Diff of /HIMA/cardio.py [000000] .. [1caa3f]

Switch to side-by-side view

--- a
+++ b/HIMA/cardio.py
@@ -0,0 +1,35 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+cardio_data = pd.read_csv('../cardio_train.csv', sep=';')
+hascardio = cardio_data[cardio_data['cardio'] == 1]
+hascardio['age'] = hascardio['age'] / 365
+mean_age = hascardio['age'].mean()
+mode_age = hascardio['age'].mode()[0]
+median_age = hascardio['age'].median()
+max_age = hascardio['age'].max()
+min_age = hascardio['age'].min()
+std_age = hascardio['age'].std()
+cardiosmokers = hascardio[hascardio['smoke'] == 1]
+cardio_not_smokers = hascardio[hascardio['smoke'] == 0]
+
+print('The average age that has cardio ', median_age)
+print('The median age that has cardio ', mean_age)
+print('The most frequent age that has cardio ', mode_age )
+print('The older age that has cardio ', max_age )
+print('The smaller age that has cardio ', min_age )
+print('The Standard Deviation of the ages that have cardio ', std_age )
+print('The number of people that have cardio and are smokers ',len(cardiosmokers['smoke']))
+print('The number of people that have cardio and are not smokers ',len(cardio_not_smokers['smoke']))
+#print(hascardio)
+plt.ylabel('Ages')
+plt.title("Has cardio age box plot")
+plt.boxplot(hascardio['age'])
+plt.show()
+
+plt.hist(hascardio['age'], bins = int(180/25), density=False, alpha=0.5, color='r')
+plt.title("Age - Cardio")
+plt.xlabel("Age")
+plt.ylabel("Frequency")
+plt.show()