Switch to unified view

a b/03-Experiments/Utils/FeatureEngineering.py
1
# Advanced feature engineering for training data
2
def age_stuff(train_df):
3
    train_df['Age_Group'] = pd.cut(train_df['Age'], bins=[0, 20, 30, 40, 50, 55], labels=['A', 'B', 'C', 'D', 'E'],)
4
    train_df['Log_Age'] = np.log1p(train_df['Age'])
5
    scaler = MinMaxScaler()
6
    train_df['Scaled_Age'] = scaler.fit_transform(train_df['Age'].values.reshape(-1, 1))
7
    return train_df
8
9
def advanced_age_stuff(train):
10
    train['Age group'] = pd.cut(train['Age'], bins=[0, 18, 30, 45, 60, train['Age'].max()], labels=['0-18', '19-30', '31-45', '46-60', '60+'])
11
    train['BMI'] = train['Weight'] / (train['Height'] ** 2)
12
13
    train['Age * Gender'] = train['Age'] * train['Gender']   
14
15
    # categorical_features = ['Gender', 'family_history_with_overweight', 'Age group', 'FAVC','CAEC', 'SMOKE','SCC', 'CALC', 'MTRANS']
16
    # train = pd.get_dummies(train, columns=categorical_features)
17
18
    # polynomial_features = PolynomialFeatures(degree=2)
19
    # X_poly = polynomial_features.fit_transform(train[['Age', 'BMI']])
20
    # train = pd.concat([train, pd.DataFrame(X_poly, columns=['Age^2', 'Age^3', 'BMI^2', 'Age * BMI', 'Age * BMI2', 'Age * BMI3'])], axis=1)
21
    return train
22
23
24