[1caa3f]: / MOA / HPressure.py

Download this file

141 lines (103 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
import pandas as panda
import matplotlib.pyplot as plt
from scipy.stats import norm
import statistics
import random
df = 1
def readAll():
dt = panda.read_csv(
'cardio_train.csv',
header=0
)
dt = dt.loc[dt.cardio == 1]
return dt
#===========================================================================================
def CleanData(df):
q25,q75 = np.percentile(df.ap_hi,[25,75])
IQR = q75 - q25
df = df.loc[df.ap_hi >= (q25 - 1.5 * IQR)]
df = df.loc[df.ap_hi <= (q75 + 1.5 * IQR)]
return df
#===========================================================================================
def ChangeSample(p):
df = panda.read_csv(
'CleanPressure.csv',
header=0,
skiprows=lambda i: i>0 and random.random() > p
)
df = df.loc[df.cardio == 1]
return df
#===========================================================================================
def GenerateSamples(p):
SamplesArr = []
df = ChangeSample(p) #=== Sample Of Eleven Peobles ===#
sampleSize = df.cardio.count()
for i in range(0,1000):
df = ChangeSample(p)
SamplesArr.append(ChangeSample(p).ap_hi.mean())
return SamplesArr,sampleSize
#===========================================================================================
# These Lines Just To Clean The Data
# df = readAll()
# df = CleanData(df)
# df.to_csv('CleanPressure.csv',index=False)
#===========================================================================================
p = 0.0002999
FirstSampleArr, FirstSampleSize = GenerateSamples(p)
plt.hist(FirstSampleArr, color="red")
plt.title("Data Distribution for means from sample size = " + str(FirstSampleSize))
plt.xlabel("Mean")
plt.ylabel("Freq")
plt.show()
p = 0.0005998
SecondSampleArr, SecondSampleSize = GenerateSamples(p)
plt.hist(SecondSampleArr, color="green")
plt.title("Data Distribution for means from sample size = " + str(SecondSampleSize))
plt.xlabel("Mean")
plt.ylabel("Freq")
plt.show()
p = 0.0011996
ThirdSampleArr, ThirdSampleSize = GenerateSamples(p)
plt.hist(ThirdSampleArr, color="blue")
plt.title("Data Distribution for means from sample size = " + str(ThirdSampleSize))
plt.xlabel("Mean")
plt.ylabel("Freq")
plt.show()
# From Previous Results We Can Notice That the population is almost normal
plt.hist(FirstSampleArr, color="red")
plt.hist(SecondSampleArr, color="green")
plt.hist(ThirdSampleArr, color="blue")
plt.xlabel("Mean")
plt.ylabel("Freq")
plt.show()
sampleMin = np.min(ThirdSampleArr)
sampleMax = np.max(ThirdSampleArr)
sampleMean = np.mean(ThirdSampleArr)
sampleSD = np.std(ThirdSampleArr)
sampleSize = ThirdSampleSize
n = ThirdSampleSize
x_axis = np.arange(sampleMin, sampleMax, 1)
print("Sample Mean = Population Mean = " + str(sampleMean))
print("Sample Standard Deviation = " + str(sampleSD))
plt.plot(x_axis, norm.pdf(x_axis,sampleMean,sampleSD))
plt.xlabel("Mean")
plt.ylabel("Freq")
plt.show()
# Assuming 95% Level Of Confidence ===> Z = 1.96
z = 1.96
MaxError = z * sampleSD / np.sqrt(n)
transformed = (sampleMax - sampleMin) / 2
x_axis = np.arange(-1 * transformed, transformed, 1)
plt.plot(x_axis, norm.pdf(x_axis,0,sampleSD))
plt.plot([z,z],[0,0.09])
plt.plot([-1 * z,-1 * z],[0,0.09])
plt.show()
print("Maximum Error Estimated For Sample Of Size " + str(n) + " = " + str(MaxError))
LeftX = MaxError + sampleMean
RightX = sampleMean - MaxError
if LeftX > RightX:
A = LeftX
LeftX = RightX
RightX = A
print("The Population Mean Is Between [" + str(LeftX) + "," + str(RightX) + "] With Confidence Of 95%")