[1caa3f]: / MOA / stat_1.py

Download this file

130 lines (87 with data), 3.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import pandas as panda
import matplotlib.pyplot as plt
from scipy.stats import norm
import statistics
db = panda.read_csv('heart.csv')
bloodPressure = db.trestbps
# Blood Pressure Description
BPMean = bloodPressure.mean()
BPMedian = bloodPressure.median()
BPMin = bloodPressure.min()
BPMax = bloodPressure.max()
BPSD = bloodPressure.std()
print("========================================== Blood Pressure ==========================================")
print("Min Value = " + str(BPMin))
print("Max Value = " + str(BPMax))
print("Mean = " + str(BPMean))
print("Median = " + str(BPMedian))
print("Standard Deviation = " + str(BPSD))
plt.boxplot(bloodPressure)
plt.show()
q75,q25 = np.percentile(bloodPressure,[75,25])
IQR = q75 - q25
db = db.loc[db.trestbps > (q25 - 1.5 * IQR)]
db = db.loc[db.trestbps < (q75 + 1.5 * IQR)]
bloodPressure = db.trestbps
noDisease = db.loc[db.target == 0].trestbps
Disease = db.loc[db.target == 1].trestbps
plt.boxplot(bloodPressure)
plt.show()
print("Mean = " + str(Disease.mean()))
plt.hist(Disease)
plt.show()
# This Section Belongs To diabetes.csv File ====!!!!
# Getting Distribution Of Blood Pressure
# print("========================= Blood Pressure ===================================")
# bloodPressureMean = bloodPressure.mean()
# bloodPressureSD = bloodPressure.std()
# print("Standard Deviation : " + str(bloodPressureSD))
# print("Mean = " + str(bloodPressureMean))
# y = np.array(cleanBloodPressure)
# unique, counts = np.unique(y, return_counts=True)
# plt.hist(cleanBloodPressure, bins = int(180/20), density=False, alpha=0.6, color='b')
# plt.title("Pressure Distribution")
# plt.xlabel("Pressure Measurment")
# plt.ylabel("Frequency")
# plt.show()
# plt.hist(cleanBloodPressure, bins = int(180/20), density=True, alpha=0.6, color='b')
# plt.plot(unique,norm.pdf(unique,bloodPressureMean,bloodPressureSD),'k')
# plt.ylabel("Probability")
# plt.show()
# # Checking Distribution of diabetes knowing that pressure is high ==> P > 80 is high
# highPressureSamples = db.loc[db.trestbps >= 90]
# lowPressureSamples = db.loc[db.trestbps <= 60]
# lowPressureSamples = lowPressureSamples.loc[lowPressureSamples.trestbps > 30]
# goodPressureSamples = db.loc[db.trestbps > 60]
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90]
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps
# highP = highPressureSamples.trestbps
# diabetes = highPressureSamples.loc[highPressureSamples.Outcome == 1]
# highP = diabetes.trestbps
# lowP = lowPressureSamples.trestbps
# diabetes = lowPressureSamples.loc[lowPressureSamples.Outcome == 1]
# lowP = diabetes.trestbps
# print(goodP.count())
# # High Pressure Samples Plot
# plt.hist(highP, 10)
# plt.title("High Pressure Samples")
# plt.xlabel("Pressure")
# plt.ylabel("Frequency")
# plt.show()
# # Low Pressure Samples Plot
# plt.hist(lowP, 4)
# plt.title("Low Pressure Samples")
# plt.xlabel("Pressure")
# plt.ylabel("Frequency")
# plt.show()
# # Good Pressure Samples Plot
# plt.hist(goodP,15)
# plt.title("Good Pressure Samples")
# plt.xlabel("Pressure")
# plt.ylabel("Frequency")
# plt.show()
# # relation between pressure, age and diabetes
# goodPressureSamples = db.loc[db.trestbps > 60]
# goodPressureSamples = goodPressureSamples.loc[goodPressureSamples.trestbps < 90]
# goodP = goodPressureSamples.loc[goodPressureSamples.Outcome == 1].trestbps