|
a |
|
b/SNF.py |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
# @Time : 2021/8/8 14:01 |
|
|
4 |
# @Author : Li Xiao |
|
|
5 |
# @File : SNF.py |
|
|
6 |
import snf |
|
|
7 |
import pandas as pd |
|
|
8 |
import numpy as np |
|
|
9 |
import argparse |
|
|
10 |
import seaborn as sns |
|
|
11 |
|
|
|
12 |
if __name__ == '__main__': |
|
|
13 |
parser = argparse.ArgumentParser() |
|
|
14 |
parser.add_argument('--path', '-p', type=str, nargs=3, required=True, |
|
|
15 |
help='Location of input files, must be 3 files') |
|
|
16 |
parser.add_argument('--metric', '-m', type=str, choices=['braycurtis', 'canberra', 'chebyshev', 'cityblock', |
|
|
17 |
'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', |
|
|
18 |
'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', |
|
|
19 |
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'], default='sqeuclidean', |
|
|
20 |
help='Distance metric to compute. Must be one of available metrics in :py:func scipy.spatial.distance.pdist.') |
|
|
21 |
parser.add_argument('--K', '-k', type=int, default=20, |
|
|
22 |
help='(0, N) int, number of neighbors to consider when creating affinity matrix. See Notes of :py:func snf.compute.affinity_matrix for more details. Default: 20.') |
|
|
23 |
parser.add_argument('--mu', '-mu', type=int, default=0.5, |
|
|
24 |
help='(0, 1) float, Normalization factor to scale similarity kernel when constructing affinity matrix. See Notes of :py:func snf.compute.affinity_matrix for more details. Default: 0.5.') |
|
|
25 |
args = parser.parse_args() |
|
|
26 |
|
|
|
27 |
print('Load data files...') |
|
|
28 |
omics_data_1 = pd.read_csv(args.path[0], header=0, index_col=None) |
|
|
29 |
omics_data_2 = pd.read_csv(args.path[1], header=0, index_col=None) |
|
|
30 |
omics_data_3 = pd.read_csv(args.path[2], header=0, index_col=None) |
|
|
31 |
print(omics_data_1.shape, omics_data_2.shape, omics_data_3.shape) |
|
|
32 |
|
|
|
33 |
if omics_data_1.shape[0] != omics_data_2.shape[0] or omics_data_1.shape[0] != omics_data_3.shape[0]: |
|
|
34 |
print('Input files must have same samples.') |
|
|
35 |
exit(1) |
|
|
36 |
|
|
|
37 |
omics_data_1.rename(columns={omics_data_1.columns.tolist()[0]: 'Sample'}, inplace=True) |
|
|
38 |
omics_data_2.rename(columns={omics_data_2.columns.tolist()[0]: 'Sample'}, inplace=True) |
|
|
39 |
omics_data_3.rename(columns={omics_data_3.columns.tolist()[0]: 'Sample'}, inplace=True) |
|
|
40 |
|
|
|
41 |
# align samples of different data |
|
|
42 |
omics_data_1.sort_values(by='Sample', ascending=True, inplace=True) |
|
|
43 |
omics_data_2.sort_values(by='Sample', ascending=True, inplace=True) |
|
|
44 |
omics_data_3.sort_values(by='Sample', ascending=True, inplace=True) |
|
|
45 |
|
|
|
46 |
print('Start similarity network fusion...') |
|
|
47 |
affinity_nets = snf.make_affinity([omics_data_1.iloc[:, 1:].values.astype(np.float), omics_data_2.iloc[:, 1:].values.astype(np.float), omics_data_3.iloc[:, 1:].values.astype(np.float)], |
|
|
48 |
metric=args.metric, K=args.K, mu=args.mu) |
|
|
49 |
|
|
|
50 |
fused_net =snf.snf(affinity_nets, K=args.K) |
|
|
51 |
|
|
|
52 |
print('Save fused adjacency matrix...') |
|
|
53 |
fused_df = pd.DataFrame(fused_net) |
|
|
54 |
fused_df.columns = omics_data_1['Sample'].tolist() |
|
|
55 |
fused_df.index = omics_data_1['Sample'].tolist() |
|
|
56 |
fused_df.to_csv('result/SNF_fused_matrix.csv', header=True, index=True) |
|
|
57 |
|
|
|
58 |
np.fill_diagonal(fused_df.values, 0) |
|
|
59 |
fig = sns.clustermap(fused_df.iloc[:, :], cmap='vlag', figsize=(8,8),) |
|
|
60 |
fig.savefig('result/SNF_fused_clustermap.png', dpi=300) |
|
|
61 |
print('Success! Results can be seen in result file') |