Diff of /MOA/stat_1.py [000000] .. [1caa3f]

Switch to unified view

a b/MOA/stat_1.py
1
import numpy as np
2
import pandas as panda
3
import matplotlib.pyplot as plt
4
from scipy.stats import norm
5
import statistics
6
7
8
db = panda.read_csv('heart.csv')
9
10
bloodPressure =  db.trestbps
11
12
13
# Blood Pressure Description
14
15
BPMean = bloodPressure.mean()
16
BPMedian = bloodPressure.median()
17
BPMin = bloodPressure.min()
18
BPMax = bloodPressure.max()
19
BPSD = bloodPressure.std()
20
21
print("========================================== Blood Pressure ==========================================")
22
print("Min Value = " + str(BPMin))
23
print("Max Value = " + str(BPMax))
24
print("Mean = " + str(BPMean))
25
print("Median = " + str(BPMedian))
26
print("Standard Deviation = " + str(BPSD))
27
28
29
plt.boxplot(bloodPressure)
30
plt.show()
31
32
q75,q25 = np.percentile(bloodPressure,[75,25])
33
IQR = q75 - q25
34
35
db = db.loc[db.trestbps > (q25 - 1.5 * IQR)]
36
db = db.loc[db.trestbps < (q75 + 1.5 * IQR)]
37
38
bloodPressure =  db.trestbps
39
noDisease = db.loc[db.target == 0].trestbps
40
Disease = db.loc[db.target == 1].trestbps
41
42
plt.boxplot(bloodPressure)
43
plt.show()
44
45
print("Mean = " + str(Disease.mean()))
46
47
48
49
plt.hist(Disease)
50
plt.show()
51
52
53
54
# This Section Belongs To diabetes.csv File ====!!!!
55
# Getting Distribution Of Blood Pressure
56
57
# print("========================= Blood Pressure ===================================")
58
59
# bloodPressureMean = bloodPressure.mean()
60
# bloodPressureSD = bloodPressure.std()
61
62
# print("Standard Deviation : " + str(bloodPressureSD))
63
# print("Mean = " + str(bloodPressureMean))
64
65
# y = np.array(cleanBloodPressure)
66
# unique, counts = np.unique(y, return_counts=True)
67
68
69
# plt.hist(cleanBloodPressure, bins = int(180/20), density=False, alpha=0.6, color='b')
70
# plt.title("Pressure Distribution")
71
# plt.xlabel("Pressure Measurment")
72
# plt.ylabel("Frequency")
73
# plt.show()
74
75
76
# plt.hist(cleanBloodPressure, bins = int(180/20), density=True, alpha=0.6, color='b')
77
# plt.plot(unique,norm.pdf(unique,bloodPressureMean,bloodPressureSD),'k')
78
# plt.ylabel("Probability")
79
# plt.show()
80
81
# # Checking Distribution of diabetes knowing that pressure is high  ==> P > 80 is high
82
83
# highPressureSamples = db.loc[db.trestbps >= 90]
84
85
# lowPressureSamples = db.loc[db.trestbps <= 60]
86
# lowPressureSamples = lowPressureSamples.loc[lowPressureSamples.trestbps > 30]
87
88
# goodPressureSamples = db.loc[db.trestbps > 60]
89
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90]
90
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps
91
92
# highP = highPressureSamples.trestbps
93
# diabetes = highPressureSamples.loc[highPressureSamples.Outcome == 1]
94
# highP = diabetes.trestbps
95
96
# lowP = lowPressureSamples.trestbps
97
# diabetes = lowPressureSamples.loc[lowPressureSamples.Outcome == 1]
98
# lowP = diabetes.trestbps
99
100
# print(goodP.count())
101
102
# # High Pressure Samples Plot
103
# plt.hist(highP, 10)
104
# plt.title("High Pressure Samples")
105
# plt.xlabel("Pressure")
106
# plt.ylabel("Frequency")
107
# plt.show()
108
109
# # Low Pressure Samples Plot
110
# plt.hist(lowP, 4)
111
# plt.title("Low Pressure Samples")
112
# plt.xlabel("Pressure")
113
# plt.ylabel("Frequency")
114
# plt.show()
115
116
# # Good Pressure Samples Plot
117
# plt.hist(goodP,15)
118
# plt.title("Good Pressure Samples")
119
# plt.xlabel("Pressure")
120
# plt.ylabel("Frequency")
121
# plt.show()
122
123
# # relation between pressure, age and diabetes
124
125
# goodPressureSamples = db.loc[db.trestbps > 60]
126
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90]
127
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps
128
129