Switch to unified view

a b/code/init_processing/calc_quartiles.py
1
##### SETUP ######
2
3
import aggregate
4
5
import pickle
6
import numpy as np
7
import pandas as pd
8
9
##################
10
11
##### VARIABLES #####
12
13
percentiles = np.array([3, 5, 10, 25, 50, 75, 85, 90, 95, 97])
14
15
#####################
16
17
## Open pickle file, saved from bmi_initial_processing.py
18
df = pickle.load(open('../../data/pkl/BMI_resampled_lin.pkl', 'rb'))
19
20
## Only one individual is a PI/HN; thus, take this patient out of the dataset
21
df[df["race_ethnicity"] == "Pacific Islander/Hawaiian Native"] = np.nan
22
df.dropna()
23
24
## Group datapoints by gender, race/ethnicity, and age
25
groupby_attributes = ["gender","age","race_ethnicity"]
26
## Calculate aggregate values
27
df_aggregate = aggregate.calculate_aggregations(df, groupby_attributes, percentiles)
28
29
## Repeats the above calculation, except all races/ethnicities are lumped into one category, “All”
30
31
## Group datapoints by gender and age
32
groupby_attributes = ["gender","age"]
33
## Calculate aggregate values
34
df_aggregate = df_aggregate.append(aggregate.calculate_aggregations(df, groupby_attributes, percentiles))
35
    
36
## Save aggregate DataFrame to pickle
37
output = open('../../data/pkl/BMI_aggregate_percentiles.pkl', 'wb')
38
pickle.dump(df_aggregate, output, -1)
39
output.close()
40
41
## Save aggregate DataFrame to CSV
42
df_aggregate.to_csv("../../data/csv/BMI_aggregate_percentiles.csv", index_label=False, index=False)