|
a |
|
b/plot.py |
|
|
1 |
import preprocessing |
|
|
2 |
import matplotlib.pyplot as plt |
|
|
3 |
import seaborn as sns |
|
|
4 |
sns.set(style="ticks", color_codes=True) |
|
|
5 |
sns.set_style("whitegrid") |
|
|
6 |
sns.set_context("poster") |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
def show_data(): |
|
|
10 |
""" |
|
|
11 |
Plots features pairwise, of the following set: |
|
|
12 |
- average allelic fraction |
|
|
13 |
- hematocrit |
|
|
14 |
- platelet |
|
|
15 |
- white blood cell count |
|
|
16 |
- hemoglobin |
|
|
17 |
- age |
|
|
18 |
""" |
|
|
19 |
|
|
|
20 |
# Preprocessing |
|
|
21 |
aml_data = preprocessing.load_csv() |
|
|
22 |
preprocessing.fill_missing_values(aml_data) |
|
|
23 |
preprocessing.add_total_genes(aml_data) |
|
|
24 |
|
|
|
25 |
# Delete gene columns |
|
|
26 |
for column in aml_data.columns: |
|
|
27 |
if 'Gene.' in column: |
|
|
28 |
del aml_data[column] |
|
|
29 |
|
|
|
30 |
# Plot pairwise |
|
|
31 |
sns.set(style='whitegrid') |
|
|
32 |
cols = ['caseflag', 'Total.Genes', 'Age', 'WBC', 'PLATELET', 'HEMOGLBN', 'HEMATOCR'] |
|
|
33 |
sns.pairplot(aml_data[cols], |
|
|
34 |
hue='caseflag', # different caseflags have different colors |
|
|
35 |
markers=['.', r'$+$'], # markers |
|
|
36 |
plot_kws={"s": 250}, # marker size (100 default) |
|
|
37 |
size=5.0) # size of each subplot |
|
|
38 |
plt.show() |