--- a +++ b/compare.py @@ -0,0 +1,46 @@ +import numpy as np +import pandas as pd +from sklearn import decomposition +from sklearn.manifold import TSNE +import umap + +print('Loading data...') +input_path = 'data/PANCAN/GDC-PANCAN_' +sample_id = np.loadtxt(input_path + 'both_samples.tsv', delimiter='\t', dtype='str') + +input_df = pd.read_csv(input_path + 'preprocessed_both.tsv', sep='\t', header=0, index_col=0) +input_df = input_df.T + +latent_space_dimension = 2 + +# PCA +print('PCA') +pca = decomposition.PCA(n_components=latent_space_dimension) +z = pca.fit_transform(input_df.values) +latent_code = pd.DataFrame(z, index=sample_id) +output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_PCA_latent_sapce.tsv' +latent_code.to_csv(output_path, sep='\t') + +# KPCA +print('KPCA') +kpca = decomposition.KernelPCA(n_components=latent_space_dimension, kernel='rbf') +z = kpca.fit_transform(input_df.values) +latent_code = pd.DataFrame(z, index=sample_id) +output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_KPCA_latent_sapce.tsv' +latent_code.to_csv(output_path, sep='\t') + +# TSNE +print('TSNE') +tsne = TSNE(n_components=latent_space_dimension) +z = tsne.fit_transform(input_df.values) +latent_code = pd.DataFrame(z, index=sample_id) +output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_TSNE_latent_sapce.tsv' +latent_code.to_csv(output_path, sep='\t') + +# UMAP +print('UMAP') +umap_reducer = umap.UMAP() +z = umap_reducer.fit_transform(input_df.values) +latent_code = pd.DataFrame(z, index=sample_id) +output_path = 'results/GDC-PANCAN_' + str(latent_space_dimension) + 'D_UMAP_latent_sapce.tsv' +latent_code.to_csv(output_path, sep='\t')