|
a |
|
b/03-Experiments/Utils/FeatureEngineering.py |
|
|
1 |
# Advanced feature engineering for training data |
|
|
2 |
def age_stuff(train_df): |
|
|
3 |
train_df['Age_Group'] = pd.cut(train_df['Age'], bins=[0, 20, 30, 40, 50, 55], labels=['A', 'B', 'C', 'D', 'E'],) |
|
|
4 |
train_df['Log_Age'] = np.log1p(train_df['Age']) |
|
|
5 |
scaler = MinMaxScaler() |
|
|
6 |
train_df['Scaled_Age'] = scaler.fit_transform(train_df['Age'].values.reshape(-1, 1)) |
|
|
7 |
return train_df |
|
|
8 |
|
|
|
9 |
def advanced_age_stuff(train): |
|
|
10 |
train['Age group'] = pd.cut(train['Age'], bins=[0, 18, 30, 45, 60, train['Age'].max()], labels=['0-18', '19-30', '31-45', '46-60', '60+']) |
|
|
11 |
train['BMI'] = train['Weight'] / (train['Height'] ** 2) |
|
|
12 |
|
|
|
13 |
train['Age * Gender'] = train['Age'] * train['Gender'] |
|
|
14 |
|
|
|
15 |
# categorical_features = ['Gender', 'family_history_with_overweight', 'Age group', 'FAVC','CAEC', 'SMOKE','SCC', 'CALC', 'MTRANS'] |
|
|
16 |
# train = pd.get_dummies(train, columns=categorical_features) |
|
|
17 |
|
|
|
18 |
# polynomial_features = PolynomialFeatures(degree=2) |
|
|
19 |
# X_poly = polynomial_features.fit_transform(train[['Age', 'BMI']]) |
|
|
20 |
# train = pd.concat([train, pd.DataFrame(X_poly, columns=['Age^2', 'Age^3', 'BMI^2', 'Age * BMI', 'Age * BMI2', 'Age * BMI3'])], axis=1) |
|
|
21 |
return train |
|
|
22 |
|
|
|
23 |
|
|
|
24 |
|