Diff of /MOA/HPressure.py [000000] .. [1caa3f]

Switch to unified view

a b/MOA/HPressure.py
1
import numpy as np
2
import pandas as panda
3
import matplotlib.pyplot as plt
4
from scipy.stats import norm
5
import statistics
6
import random
7
8
df = 1
9
10
def readAll():
11
    dt = panda.read_csv(
12
    'cardio_train.csv',
13
    header=0
14
    )
15
    dt = dt.loc[dt.cardio == 1]
16
    return dt
17
#===========================================================================================
18
19
def CleanData(df):
20
    q25,q75 = np.percentile(df.ap_hi,[25,75])
21
    IQR = q75 - q25
22
    df = df.loc[df.ap_hi >= (q25 - 1.5 * IQR)]
23
    df = df.loc[df.ap_hi <= (q75 + 1.5 * IQR)]
24
    return df
25
26
27
#===========================================================================================
28
def ChangeSample(p): 
29
    df = panda.read_csv(
30
         'CleanPressure.csv',
31
         header=0, 
32
         skiprows=lambda i: i>0 and random.random() > p
33
    )
34
    df = df.loc[df.cardio == 1]
35
    return df
36
#===========================================================================================
37
def GenerateSamples(p):
38
    SamplesArr = []
39
40
    df = ChangeSample(p)   #=== Sample Of Eleven Peobles ===#
41
42
43
    sampleSize = df.cardio.count()
44
45
    for i in range(0,1000):
46
        df = ChangeSample(p)
47
        SamplesArr.append(ChangeSample(p).ap_hi.mean())
48
    
49
    return SamplesArr,sampleSize
50
#===========================================================================================
51
# These Lines Just To Clean The Data 
52
# df = readAll()
53
# df = CleanData(df)
54
# df.to_csv('CleanPressure.csv',index=False)
55
56
#===========================================================================================
57
58
p = 0.0002999
59
FirstSampleArr, FirstSampleSize = GenerateSamples(p)
60
plt.hist(FirstSampleArr, color="red")
61
62
plt.title("Data Distribution for means from sample size = " + str(FirstSampleSize))
63
plt.xlabel("Mean")
64
plt.ylabel("Freq")
65
plt.show()
66
67
p = 0.0005998
68
SecondSampleArr, SecondSampleSize = GenerateSamples(p)
69
plt.hist(SecondSampleArr, color="green")
70
plt.title("Data Distribution for means from sample size = " + str(SecondSampleSize))
71
plt.xlabel("Mean")
72
plt.ylabel("Freq")
73
plt.show()
74
75
p = 0.0011996
76
ThirdSampleArr, ThirdSampleSize = GenerateSamples(p)
77
plt.hist(ThirdSampleArr, color="blue")
78
plt.title("Data Distribution for means from sample size = " + str(ThirdSampleSize))
79
plt.xlabel("Mean")
80
plt.ylabel("Freq")
81
plt.show()
82
83
# From Previous Results We Can Notice That the population is almost normal
84
plt.hist(FirstSampleArr, color="red")
85
plt.hist(SecondSampleArr, color="green")
86
plt.hist(ThirdSampleArr, color="blue")
87
plt.xlabel("Mean")
88
plt.ylabel("Freq")
89
plt.show()
90
91
92
sampleMin = np.min(ThirdSampleArr)
93
sampleMax = np.max(ThirdSampleArr)
94
sampleMean = np.mean(ThirdSampleArr)
95
sampleSD = np.std(ThirdSampleArr)
96
sampleSize = ThirdSampleSize
97
n = ThirdSampleSize
98
99
x_axis = np.arange(sampleMin, sampleMax, 1)
100
101
print("Sample Mean = Population Mean = " + str(sampleMean))
102
print("Sample Standard Deviation = " + str(sampleSD))
103
104
105
plt.plot(x_axis, norm.pdf(x_axis,sampleMean,sampleSD))
106
plt.xlabel("Mean")
107
plt.ylabel("Freq")
108
plt.show()
109
110
111
# Assuming 95% Level Of Confidence ===> Z = 1.96
112
z = 1.96
113
114
MaxError = z * sampleSD / np.sqrt(n)
115
116
transformed = (sampleMax - sampleMin) / 2
117
x_axis = np.arange(-1 * transformed, transformed, 1)
118
119
120
121
plt.plot(x_axis, norm.pdf(x_axis,0,sampleSD))
122
plt.plot([z,z],[0,0.09])
123
plt.plot([-1 * z,-1 * z],[0,0.09])
124
plt.show()
125
126
127
print("Maximum Error Estimated For Sample Of Size " + str(n) + " = " + str(MaxError))
128
129
LeftX = MaxError + sampleMean
130
RightX = sampleMean - MaxError
131
132
if LeftX > RightX:
133
    A = LeftX
134
    LeftX = RightX
135
    RightX = A
136
137
print("The Population Mean Is Between [" + str(LeftX) + "," + str(RightX) + "] With Confidence Of 95%") 
138
139
140