|
a |
|
b/MOA/stat_1.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as panda |
|
|
3 |
import matplotlib.pyplot as plt |
|
|
4 |
from scipy.stats import norm |
|
|
5 |
import statistics |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
db = panda.read_csv('heart.csv') |
|
|
9 |
|
|
|
10 |
bloodPressure = db.trestbps |
|
|
11 |
|
|
|
12 |
|
|
|
13 |
# Blood Pressure Description |
|
|
14 |
|
|
|
15 |
BPMean = bloodPressure.mean() |
|
|
16 |
BPMedian = bloodPressure.median() |
|
|
17 |
BPMin = bloodPressure.min() |
|
|
18 |
BPMax = bloodPressure.max() |
|
|
19 |
BPSD = bloodPressure.std() |
|
|
20 |
|
|
|
21 |
print("========================================== Blood Pressure ==========================================") |
|
|
22 |
print("Min Value = " + str(BPMin)) |
|
|
23 |
print("Max Value = " + str(BPMax)) |
|
|
24 |
print("Mean = " + str(BPMean)) |
|
|
25 |
print("Median = " + str(BPMedian)) |
|
|
26 |
print("Standard Deviation = " + str(BPSD)) |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
plt.boxplot(bloodPressure) |
|
|
30 |
plt.show() |
|
|
31 |
|
|
|
32 |
q75,q25 = np.percentile(bloodPressure,[75,25]) |
|
|
33 |
IQR = q75 - q25 |
|
|
34 |
|
|
|
35 |
db = db.loc[db.trestbps > (q25 - 1.5 * IQR)] |
|
|
36 |
db = db.loc[db.trestbps < (q75 + 1.5 * IQR)] |
|
|
37 |
|
|
|
38 |
bloodPressure = db.trestbps |
|
|
39 |
noDisease = db.loc[db.target == 0].trestbps |
|
|
40 |
Disease = db.loc[db.target == 1].trestbps |
|
|
41 |
|
|
|
42 |
plt.boxplot(bloodPressure) |
|
|
43 |
plt.show() |
|
|
44 |
|
|
|
45 |
print("Mean = " + str(Disease.mean())) |
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
plt.hist(Disease) |
|
|
50 |
plt.show() |
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
# This Section Belongs To diabetes.csv File ====!!!! |
|
|
55 |
# Getting Distribution Of Blood Pressure |
|
|
56 |
|
|
|
57 |
# print("========================= Blood Pressure ===================================") |
|
|
58 |
|
|
|
59 |
# bloodPressureMean = bloodPressure.mean() |
|
|
60 |
# bloodPressureSD = bloodPressure.std() |
|
|
61 |
|
|
|
62 |
# print("Standard Deviation : " + str(bloodPressureSD)) |
|
|
63 |
# print("Mean = " + str(bloodPressureMean)) |
|
|
64 |
|
|
|
65 |
# y = np.array(cleanBloodPressure) |
|
|
66 |
# unique, counts = np.unique(y, return_counts=True) |
|
|
67 |
|
|
|
68 |
|
|
|
69 |
# plt.hist(cleanBloodPressure, bins = int(180/20), density=False, alpha=0.6, color='b') |
|
|
70 |
# plt.title("Pressure Distribution") |
|
|
71 |
# plt.xlabel("Pressure Measurment") |
|
|
72 |
# plt.ylabel("Frequency") |
|
|
73 |
# plt.show() |
|
|
74 |
|
|
|
75 |
|
|
|
76 |
# plt.hist(cleanBloodPressure, bins = int(180/20), density=True, alpha=0.6, color='b') |
|
|
77 |
# plt.plot(unique,norm.pdf(unique,bloodPressureMean,bloodPressureSD),'k') |
|
|
78 |
# plt.ylabel("Probability") |
|
|
79 |
# plt.show() |
|
|
80 |
|
|
|
81 |
# # Checking Distribution of diabetes knowing that pressure is high ==> P > 80 is high |
|
|
82 |
|
|
|
83 |
# highPressureSamples = db.loc[db.trestbps >= 90] |
|
|
84 |
|
|
|
85 |
# lowPressureSamples = db.loc[db.trestbps <= 60] |
|
|
86 |
# lowPressureSamples = lowPressureSamples.loc[lowPressureSamples.trestbps > 30] |
|
|
87 |
|
|
|
88 |
# goodPressureSamples = db.loc[db.trestbps > 60] |
|
|
89 |
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90] |
|
|
90 |
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps |
|
|
91 |
|
|
|
92 |
# highP = highPressureSamples.trestbps |
|
|
93 |
# diabetes = highPressureSamples.loc[highPressureSamples.Outcome == 1] |
|
|
94 |
# highP = diabetes.trestbps |
|
|
95 |
|
|
|
96 |
# lowP = lowPressureSamples.trestbps |
|
|
97 |
# diabetes = lowPressureSamples.loc[lowPressureSamples.Outcome == 1] |
|
|
98 |
# lowP = diabetes.trestbps |
|
|
99 |
|
|
|
100 |
# print(goodP.count()) |
|
|
101 |
|
|
|
102 |
# # High Pressure Samples Plot |
|
|
103 |
# plt.hist(highP, 10) |
|
|
104 |
# plt.title("High Pressure Samples") |
|
|
105 |
# plt.xlabel("Pressure") |
|
|
106 |
# plt.ylabel("Frequency") |
|
|
107 |
# plt.show() |
|
|
108 |
|
|
|
109 |
# # Low Pressure Samples Plot |
|
|
110 |
# plt.hist(lowP, 4) |
|
|
111 |
# plt.title("Low Pressure Samples") |
|
|
112 |
# plt.xlabel("Pressure") |
|
|
113 |
# plt.ylabel("Frequency") |
|
|
114 |
# plt.show() |
|
|
115 |
|
|
|
116 |
# # Good Pressure Samples Plot |
|
|
117 |
# plt.hist(goodP,15) |
|
|
118 |
# plt.title("Good Pressure Samples") |
|
|
119 |
# plt.xlabel("Pressure") |
|
|
120 |
# plt.ylabel("Frequency") |
|
|
121 |
# plt.show() |
|
|
122 |
|
|
|
123 |
# # relation between pressure, age and diabetes |
|
|
124 |
|
|
|
125 |
# goodPressureSamples = db.loc[db.trestbps > 60] |
|
|
126 |
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90] |
|
|
127 |
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps |
|
|
128 |
|
|
|
129 |
|