Diff of /data_visualization.py [000000] .. [ab5bfb]

Switch to unified view

a b/data_visualization.py
1
import os
2
import json
3
from dotenv import load_dotenv
4
import pymongo
5
import pandas as pd
6
import plotly.express as px
7
import matplotlib.pyplot as plt
8
9
10
load_dotenv()
11
12
username = os.environ.get("USERNAME")
13
password = os.environ.get("PASSWORD")
14
15
client = pymongo.MongoClient(f"mongodb+srv://{username}:{password}@cluster0.lxsbb.mongodb.net/?retryWrites=true&w=majority")
16
db = client.medical_record
17
collection = db["report"]
18
19
20
a = collection.find({})
21
b = list(a)
22
23
24
patient = {
25
    k:[] for k in b[0]["demographics"].keys()
26
}
27
patient["patient_id"] = []
28
for d in b:
29
    patient["patient_id"].append(d["_id"]),
30
    for k, v in d["demographics"].items():
31
        patient[k].append(v)
32
33
df_patient = pd.DataFrame(patient)
34
35
fig_patient_gender = px.bar(
36
    x=df_patient["gender"].value_counts().index,
37
    y=df_patient["gender"].value_counts(),
38
)
39
40
fig_patient_gender.update_layout(
41
    title="Distribution of Gender",
42
    xaxis_title="Gender",
43
    yaxis_title="Frequency",
44
)
45
46
fig_patient_race = px.bar(
47
    x=df_patient["race"].value_counts().index,
48
    y=df_patient["race"].value_counts()
49
)
50
51
fig_patient_race.update_layout(
52
    title="Distribution of race",
53
    xaxis_title="Race",
54
    yaxis_title="Frequency",
55
)
56
57
allergies = {
58
    k:[] for k in b[0]["allergies"][0].keys()
59
}
60
allergies["patient_id"] = []
61
for d in b:
62
    if d["allergies"]:
63
        for allergy in d["allergies"]:
64
            allergies["patient_id"].append(d["_id"])
65
            for k, v in allergy.items():
66
                allergies[k].append(v)
67
    else:
68
        allergies["patient_id"].append(d["_id"])
69
        keys = list(allergies.keys())[:-1]
70
        for k in keys:
71
            allergies[k].append(None)
72
73
df_allergy = pd.DataFrame(allergies)
74
75
allergic = df_allergy.groupby("patient_id")["to"].count()
76
allergic = allergic.gt(0).replace({True:"allergic", False:"not allergic"})
77
78
79
fig_allergy = px.pie(
80
    values=allergic.value_counts(),
81
    names=allergic.value_counts().index,
82
)
83
84
fig_allergy.update_layout(
85
    title="Percentage of Allergic and Non-allergic patients Patients",
86
)
87
88
fig_allergy_type = px.bar(
89
    x=df_allergy["type"].value_counts().index,
90
    y=df_allergy["type"].value_counts(),
91
)
92
93
fig_allergy_type.update_layout(
94
    title="Different types of allergies",
95
    xaxis_title="Types of allergies",
96
    yaxis_title="Number of patients",
97
)
98
99
100
conditions = {
101
     k:[] for k in b[0]["conditions"][0].keys()
102
}
103
conditions["patient_id"] = []
104
for d in b:
105
    if d["conditions"]:
106
        for condition in d["conditions"]:
107
            conditions["patient_id"].append(d["_id"])
108
            for k, v in condition.items():
109
                conditions[k].append(v)
110
    else:
111
        conditions["patient_id"].append(d["_id"])
112
        keys = list(conditions.keys())[:-1]
113
        for k in keys:
114
            conditions[k].append(None)
115
116
117
df_conditions = pd.DataFrame(conditions)
118
119
120
top_10_conditions = df_conditions["condition"].value_counts().head(10)
121
122
123
# top_10_conditions.plot(
124
#     kind="bar",
125
#     title="Top ten conditions of patients",
126
#     xlabel="Conditions",
127
#     ylabel="Number of patients"
128
# );
129
130
fig_top_conditions = px.bar(
131
    x=top_10_conditions.index,
132
    y=top_10_conditions,
133
)
134
135
fig_top_conditions.update_layout(
136
    title="Top ten conditions of patients",
137
    xaxis_title="Different Conditions",
138
    yaxis_title="Number of patients",
139
)
140
141
142
df_demographic_condition = pd.merge(
143
    df_patient, df_conditions, how='inner', on=["patient_id"]
144
)[["gender", "condition"]]
145
146
147
top_conds = top_10_conditions.index
148
149
150
df_demographic_condition["gender"] = df_demographic_condition["gender"].replace({"M": "male", "F": "female"})
151
152
cond_gender = {k:{"male":0, "female":0} for k in top_conds}
153
154
for index, row in df_demographic_condition.iterrows():
155
    if row["condition"] in top_conds:
156
        cond_gender[row["condition"]][row["gender"]] += 1
157
158
df_cond_gender = pd.DataFrame(cond_gender).T
159
160
fig_cond_gender = px.bar(
161
    df_cond_gender,
162
    barmode='group'
163
)
164
165
fig_cond_gender.update_layout(
166
    title="Top ten conditions of patients distributed among genders",
167
    xaxis_title="Different Conditions",
168
    yaxis_title="Number of patients",
169
)
170
171
172
immunizations = {
173
    k:[] for k in b[0]["immunization"][0].keys()
174
}
175
176
immunizations["patient_id"] = []
177
178
for d in b:
179
    if d["immunization"]:
180
        for imm in d["immunization"]:
181
            immunizations["patient_id"].append(d["_id"])
182
            for k, v in imm.items():
183
                immunizations[k].append(v)
184
    else:
185
        immunizations["patient_id"].append(d["_id"])
186
        keys = list(immunizations.keys())[:-1]
187
        for k in keys:
188
            immunizations[k].append(None)
189
190
191
df_imm = pd.DataFrame(immunizations)
192
df_imm["date"] = pd.to_datetime(df_imm['date'])
193
df_imm["immunization"].value_counts()
194
df_imm_covid = df_imm[df_imm["immunization"].str.contains("COVID")==True].sort_values(by='date')
195
df_imm["month_year"] = df_imm_covid["date"].dt.to_period('M')
196
covid_21_22 = df_imm.groupby("month_year")["immunization"].count()
197
198
fig_covid_21_22 = px.line(
199
    x=covid_21_22.index.strftime("%Y-%m"),
200
    y=covid_21_22
201
)
202
203
fig_covid_21_22.update_layout(
204
    title="Time plot of COVID vaccination from Jan 2021 to June 2022",
205
    xaxis_title="Month and Year of vaccination",
206
    yaxis_title="Number of patients",
207
)
208
# covid_21_22.plot(
209
#     title="Time plot of COVID vaccination from Jan 2021 to June 2022",
210
#     xlabel="Time",
211
#     ylabel="Frequency of vaccination"
212
# );