Diff of /compare.py [000000] .. [2d53aa]

Switch to unified view

a b/compare.py
1
import numpy as np
2
import pandas as pd
3
from sklearn import decomposition
4
from sklearn.manifold import TSNE
5
import umap
6
7
print('Loading data...')
8
input_path = 'data/PANCAN/GDC-PANCAN_'
9
sample_id = np.loadtxt(input_path + 'both_samples.tsv', delimiter='\t', dtype='str')
10
11
input_df = pd.read_csv(input_path + 'preprocessed_both.tsv', sep='\t', header=0, index_col=0)
12
input_df = input_df.T
13
14
latent_space_dimension = 2
15
16
# PCA
17
print('PCA')
18
pca = decomposition.PCA(n_components=latent_space_dimension)
19
z = pca.fit_transform(input_df.values)
20
latent_code = pd.DataFrame(z, index=sample_id)
21
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_PCA_latent_sapce.tsv'
22
latent_code.to_csv(output_path, sep='\t')
23
24
# KPCA
25
print('KPCA')
26
kpca = decomposition.KernelPCA(n_components=latent_space_dimension, kernel='rbf')
27
z = kpca.fit_transform(input_df.values)
28
latent_code = pd.DataFrame(z, index=sample_id)
29
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_KPCA_latent_sapce.tsv'
30
latent_code.to_csv(output_path, sep='\t')
31
32
# TSNE
33
print('TSNE')
34
tsne = TSNE(n_components=latent_space_dimension)
35
z = tsne.fit_transform(input_df.values)
36
latent_code = pd.DataFrame(z, index=sample_id)
37
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_TSNE_latent_sapce.tsv'
38
latent_code.to_csv(output_path, sep='\t')
39
40
# UMAP
41
print('UMAP')
42
umap_reducer = umap.UMAP()
43
z = umap_reducer.fit_transform(input_df.values)
44
latent_code = pd.DataFrame(z, index=sample_id)
45
output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_UMAP_latent_sapce.tsv'
46
latent_code.to_csv(output_path, sep='\t')