|
a |
|
b/compare.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as pd |
|
|
3 |
from sklearn import decomposition |
|
|
4 |
from sklearn.manifold import TSNE |
|
|
5 |
import umap |
|
|
6 |
|
|
|
7 |
print('Loading data...') |
|
|
8 |
input_path = 'data/PANCAN/GDC-PANCAN_' |
|
|
9 |
sample_id = np.loadtxt(input_path + 'both_samples.tsv', delimiter='\t', dtype='str') |
|
|
10 |
|
|
|
11 |
input_df = pd.read_csv(input_path + 'preprocessed_both.tsv', sep='\t', header=0, index_col=0) |
|
|
12 |
input_df = input_df.T |
|
|
13 |
|
|
|
14 |
latent_space_dimension = 2 |
|
|
15 |
|
|
|
16 |
# PCA |
|
|
17 |
print('PCA') |
|
|
18 |
pca = decomposition.PCA(n_components=latent_space_dimension) |
|
|
19 |
z = pca.fit_transform(input_df.values) |
|
|
20 |
latent_code = pd.DataFrame(z, index=sample_id) |
|
|
21 |
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_PCA_latent_sapce.tsv' |
|
|
22 |
latent_code.to_csv(output_path, sep='\t') |
|
|
23 |
|
|
|
24 |
# KPCA |
|
|
25 |
print('KPCA') |
|
|
26 |
kpca = decomposition.KernelPCA(n_components=latent_space_dimension, kernel='rbf') |
|
|
27 |
z = kpca.fit_transform(input_df.values) |
|
|
28 |
latent_code = pd.DataFrame(z, index=sample_id) |
|
|
29 |
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_KPCA_latent_sapce.tsv' |
|
|
30 |
latent_code.to_csv(output_path, sep='\t') |
|
|
31 |
|
|
|
32 |
# TSNE |
|
|
33 |
print('TSNE') |
|
|
34 |
tsne = TSNE(n_components=latent_space_dimension) |
|
|
35 |
z = tsne.fit_transform(input_df.values) |
|
|
36 |
latent_code = pd.DataFrame(z, index=sample_id) |
|
|
37 |
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_TSNE_latent_sapce.tsv' |
|
|
38 |
latent_code.to_csv(output_path, sep='\t') |
|
|
39 |
|
|
|
40 |
# UMAP |
|
|
41 |
print('UMAP') |
|
|
42 |
umap_reducer = umap.UMAP() |
|
|
43 |
z = umap_reducer.fit_transform(input_df.values) |
|
|
44 |
latent_code = pd.DataFrame(z, index=sample_id) |
|
|
45 |
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_UMAP_latent_sapce.tsv' |
|
|
46 |
latent_code.to_csv(output_path, sep='\t') |