|
a |
|
b/code/init_processing/calc_quartiles.py |
|
|
1 |
##### SETUP ###### |
|
|
2 |
|
|
|
3 |
import aggregate |
|
|
4 |
|
|
|
5 |
import pickle |
|
|
6 |
import numpy as np |
|
|
7 |
import pandas as pd |
|
|
8 |
|
|
|
9 |
################## |
|
|
10 |
|
|
|
11 |
##### VARIABLES ##### |
|
|
12 |
|
|
|
13 |
percentiles = np.array([3, 5, 10, 25, 50, 75, 85, 90, 95, 97]) |
|
|
14 |
|
|
|
15 |
##################### |
|
|
16 |
|
|
|
17 |
## Open pickle file, saved from bmi_initial_processing.py |
|
|
18 |
df = pickle.load(open('../../data/pkl/BMI_resampled_lin.pkl', 'rb')) |
|
|
19 |
|
|
|
20 |
## Only one individual is a PI/HN; thus, take this patient out of the dataset |
|
|
21 |
df[df["race_ethnicity"] == "Pacific Islander/Hawaiian Native"] = np.nan |
|
|
22 |
df.dropna() |
|
|
23 |
|
|
|
24 |
## Group datapoints by gender, race/ethnicity, and age |
|
|
25 |
groupby_attributes = ["gender","age","race_ethnicity"] |
|
|
26 |
## Calculate aggregate values |
|
|
27 |
df_aggregate = aggregate.calculate_aggregations(df, groupby_attributes, percentiles) |
|
|
28 |
|
|
|
29 |
## Repeats the above calculation, except all races/ethnicities are lumped into one category, “All” |
|
|
30 |
|
|
|
31 |
## Group datapoints by gender and age |
|
|
32 |
groupby_attributes = ["gender","age"] |
|
|
33 |
## Calculate aggregate values |
|
|
34 |
df_aggregate = df_aggregate.append(aggregate.calculate_aggregations(df, groupby_attributes, percentiles)) |
|
|
35 |
|
|
|
36 |
## Save aggregate DataFrame to pickle |
|
|
37 |
output = open('../../data/pkl/BMI_aggregate_percentiles.pkl', 'wb') |
|
|
38 |
pickle.dump(df_aggregate, output, -1) |
|
|
39 |
output.close() |
|
|
40 |
|
|
|
41 |
## Save aggregate DataFrame to CSV |
|
|
42 |
df_aggregate.to_csv("../../data/csv/BMI_aggregate_percentiles.csv", index_label=False, index=False) |