[4af879]: / code / init_processing / calc_subset_quartiles.py

Download this file

69 lines (53 with data), 2.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
##### SETUP ######
import aggregate
import pickle
import numpy as np
import pandas as pd
percentiles = np.array([5, 25, 50, 75, 95])
##################
selection_age = 5
age_greater_than = True
until_age = 8
bmi_greater_than = 17.3
bmi_less_than = np.nan
##################
df = pickle.load(open('../../data/pkl/BMI_resampled.pkl', 'rb'))
## Restrict to patients who satisfy start criteria
grouped = df.groupby("id")
for name_patient, group_patient in grouped:
patient_age = group_patient["age"]
if patient_age.min() > selection_age or patient_age.max() < selection_age:
df[df["id"] == name_patient] = np.nan
elif ~np.isnan(bmi_less_than) and ~np.isnan(bmi_greater_than):
if group_patient[group_patient["age"] == selection_age]["bmi"] < bmi_greater_than or \
group_patient[group_patient["age"] == selection_age]["bmi"] > bmi_less_than:
df[df["id"] == name_patient] = np.nan
elif ~np.isnan(bmi_less_than):
if group_patient[group_patient["age"] == selection_age]["bmi"] > bmi_less_than:
df[df["id"] == name_patient] = np.nan
elif ~np.isnan(bmi_greater_than):
if group_patient[group_patient["age"] == selection_age]["bmi"] < bmi_greater_than:
df[df["id"] == name_patient] = np.nan
df = df.dropna()
if age_greater_than:
df = df[df["age"] <= selection_age]
else:
df = df[df["age"] >= selection_age]
## Group datapoints by gender and age
groupby_attributes = ["gender","age"]
## Calculate aggregate values
df_aggregate = aggregate.calculate_aggregations(df, groupby_attributes, percentiles)
if age_greater_than:
df_aggregate = df_aggregate[df_aggregate["age"] < until_age]
else:
df_aggregate = df_aggregate[df_aggregate["age"] > until_age]
## Save dataframes to pickle
output = open('../../data/pkl/BMI_subset_aggregate.pkl', 'wb')
pickle.dump(df_aggregate, output, -1)
output.close()
output = open('../../data/pkl/BMI_subset_resampled.pkl', 'wb')
pickle.dump(df, output, -1)
output.close()
## Save dataframes to CSV
df.to_csv("../../data/csv/BMI_subset_resampled.csv", index_label=False, index=False)
df_aggregate.to_csv("../../data/csv/BMI_subset_aggregate.csv", index_label=False, index=False)