|
a |
|
b/matlab_source/netics_fun.m |
|
|
1 |
function [ ranked_list_genes, scores ] = netics_fun( varargin ) |
|
|
2 |
|
|
|
3 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
|
4 |
% INPUTS: |
|
|
5 |
% filenameMu(varargin{1}): input file that contains the genetically aberrant genes of each sample. |
|
|
6 |
% It contains two columns that map every gene (1st column) to the samples that |
|
|
7 |
% it is genetically aberrant (2nd column) |
|
|
8 |
% adj(varargin{2}): adjacency matrix of the directed interaction network |
|
|
9 |
% restart_prob(varargin{3}): restart probability for the insulated diffusion. For (Wu et al., 2010) network use 0.4. |
|
|
10 |
% rank_method_str(varargin{4}): 'MEDIAN' uses the median of the sample-specific ranks |
|
|
11 |
% 'RRA' uses the Robust Rank Aggregation method to integrate sample-specific ranked lists |
|
|
12 |
% 'SUM' uses the sum of the sample-specific ranks |
|
|
13 |
% filenameNet(varargin{5}): input file that contains the list of the genes that are present in the network. |
|
|
14 |
% They should be in the same order as in the rows of the adjacency matrix adj. |
|
|
15 |
% An example file is given that contains the gene names of the network described in (Wu et al, 2010). |
|
|
16 |
% filenameRNA(varargin{6}): tab delimited file with two columns. First column contains the genes for which differential expression |
|
|
17 |
% between the tumor and normal samples at the RNA level was measured. Second column contains the p-values |
|
|
18 |
% of these measurements. This file can be the result of a tool for differential expression analysis such as DESeq2. |
|
|
19 |
% Each gene in this file should have only one entry. |
|
|
20 |
% filenamePR(varargin{7}): tab delimited file with two columns. First column contains the proteins for which differential expression between |
|
|
21 |
% the tumor and normal samples at the protein level was measured. Second column contains the p-values of these |
|
|
22 |
% measurements. Each gene in this file should have only one entry. |
|
|
23 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
|
24 |
% OUTPUTS: |
|
|
25 |
% ranked_list_genes: list with the ranked gene names |
|
|
26 |
% scores: list with the scores of the ranked gene names |
|
|
27 |
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
|
28 |
|
|
|
29 |
filenameRNA = ''; |
|
|
30 |
filenamePR = ''; |
|
|
31 |
ranked_list_genes={}; |
|
|
32 |
|
|
|
33 |
% Accept arguments |
|
|
34 |
if nargin < 5, |
|
|
35 |
error('Error: Missing input arguments.') |
|
|
36 |
elseif nargin > 7, |
|
|
37 |
error('Error: Too many input arguments.') |
|
|
38 |
else |
|
|
39 |
mutation_data = read_mutations( varargin{1} ); |
|
|
40 |
adj = varargin{2}; |
|
|
41 |
restart_prob = varargin{3}; |
|
|
42 |
rank_method_str = varargin{4}; |
|
|
43 |
filenameNet = varargin{5}; |
|
|
44 |
choose_mut_diff = 1; |
|
|
45 |
end |
|
|
46 |
if nargin >= 6, |
|
|
47 |
filenameRNA = varargin{6}; |
|
|
48 |
choose_mut_diff = 3; |
|
|
49 |
end |
|
|
50 |
if nargin >= 7, |
|
|
51 |
filenamePR = varargin{7}; |
|
|
52 |
end |
|
|
53 |
|
|
|
54 |
%read network genes |
|
|
55 |
fidnet = fopen(filenameNet,'r'); |
|
|
56 |
g = textscan(fidnet, '%s', 'delimiter', '\n'); |
|
|
57 |
network_genes = g{1}; |
|
|
58 |
fclose(fidnet); |
|
|
59 |
|
|
|
60 |
%Extract first token |
|
|
61 |
rank_method_str1 = rank_method_str(1:strfind(rank_method_str,'=')); |
|
|
62 |
|
|
|
63 |
%Handle input |
|
|
64 |
rank_method_str = rank_method_str(strfind(rank_method_str,'=')+1:end); |
|
|
65 |
|
|
|
66 |
%Check input - Token 1 |
|
|
67 |
if ~strcmp(rank_method_str1,'RANK_AGGREG='), |
|
|
68 |
disp(strcat('Wrong input: ',rank_method_str1)); |
|
|
69 |
disp('Input should be RANK_AGGREG='); |
|
|
70 |
return; |
|
|
71 |
end |
|
|
72 |
|
|
|
73 |
%Check input - Token 2 |
|
|
74 |
if ~strcmp(rank_method_str,'SUM') && ~strcmp(rank_method_str,'MEDIAN') && ~strcmp(rank_method_str,'RRA'), |
|
|
75 |
disp(strcat('Wrong input: ',rank_method_str)); |
|
|
76 |
disp(strcat('Input should be MEDIAN, RRA or SUM')); |
|
|
77 |
return; |
|
|
78 |
end |
|
|
79 |
|
|
|
80 |
%copy data from input mutation_data |
|
|
81 |
samples = {}; |
|
|
82 |
for i = 1:length(mutation_data), |
|
|
83 |
samples.DNA{i} = upper(mutation_data{i}); |
|
|
84 |
samples.DNA_network{i} = upper(intersect(network_genes,samples.DNA{i})); |
|
|
85 |
end |
|
|
86 |
clear mutation_data; |
|
|
87 |
|
|
|
88 |
%read differentially expressed genes |
|
|
89 |
diff_expr_genes = {}; |
|
|
90 |
if choose_mut_diff~=1, |
|
|
91 |
diff_expr_genes = read_diff_expr( filenameRNA, filenamePR ); |
|
|
92 |
if isempty(diff_expr_genes), |
|
|
93 |
choose_mut_diff=1; |
|
|
94 |
end |
|
|
95 |
else |
|
|
96 |
disp('No Input files were provided for differentially expressed genes.'); |
|
|
97 |
end |
|
|
98 |
for i = 1:length(samples.DNA), |
|
|
99 |
samples.RNA{i} = diff_expr_genes; |
|
|
100 |
samples.RNA_network{i} = upper(intersect(network_genes,samples.RNA{i})); |
|
|
101 |
end |
|
|
102 |
|
|
|
103 |
%compute diffused matrix |
|
|
104 |
disp('Computing diffused matrix...'); |
|
|
105 |
F = insulated_diff(norm_adj(adj), restart_prob); |
|
|
106 |
F_opp = insulated_diff(norm_adj(adj'), restart_prob); |
|
|
107 |
disp('Running NetICS...'); |
|
|
108 |
[ranked_list_genes, scores] = prioritization( samples, F, F_opp, network_genes, choose_mut_diff, rank_method_str); |
|
|
109 |
|
|
|
110 |
end |
|
|
111 |
|
|
|
112 |
function [ mutation_data ] = read_mutations( filename ) |
|
|
113 |
fid = fopen(filename,'r'); |
|
|
114 |
g = textscan(fid, '%s%s', 'delimiter', '\t'); |
|
|
115 |
fclose(fid); |
|
|
116 |
unique_samples = unique(g{2}); |
|
|
117 |
mutation_data = cell(length(unique_samples),1); |
|
|
118 |
for i = 1:length(unique_samples), |
|
|
119 |
mutation_data{i} = g{1}(ismember(g{2},unique_samples{i})); |
|
|
120 |
end |
|
|
121 |
end |
|
|
122 |
|
|
|
123 |
function [ diff_expr_genes ] = read_diff_expr( filenameDE, filenamePR ) |
|
|
124 |
|
|
|
125 |
diff_expr_genes ={}; RNA_names={}; rppa_names={}; |
|
|
126 |
|
|
|
127 |
if ~isempty(filenamePR), |
|
|
128 |
[rppa_names, rppa_pval] = read_file(filenamePR); |
|
|
129 |
else |
|
|
130 |
disp('No Input file was provided for differentially expressed genes at the proteome level.'); |
|
|
131 |
end |
|
|
132 |
|
|
|
133 |
if ~isempty(filenameDE), |
|
|
134 |
[RNA_names, RNA_pval] = read_file(filenameDE); |
|
|
135 |
else |
|
|
136 |
disp('No Input file was provided for differentially expressed genes at the RNA level.'); |
|
|
137 |
end |
|
|
138 |
|
|
|
139 |
%check for duplicate genes in the input files |
|
|
140 |
if (length(unique(RNA_names)) ~= length(RNA_names)) || (length(unique(rppa_names)) ~= length(rppa_names)), |
|
|
141 |
disp('Input files for differentially expressed genes should contain only one entry per gene.'); |
|
|
142 |
disp('Input files for differentially expressed genes were ignored.'); |
|
|
143 |
return; |
|
|
144 |
end |
|
|
145 |
%check if the input files for differential expression are empty |
|
|
146 |
if isempty(rppa_names) && isempty(RNA_names), |
|
|
147 |
disp('Only genetically aberrant genes are used for diffusion.'); |
|
|
148 |
return; |
|
|
149 |
%only differential expressed genes at the RNA level are provided |
|
|
150 |
elseif isempty(rppa_names) && ~isempty(RNA_names), |
|
|
151 |
[~,~,pval_all] = fdr(RNA_pval,0.05); |
|
|
152 |
diff_expr_genes = RNA_names(pval_all < 0.05); |
|
|
153 |
%only differential expressed genes at the protein level are provided |
|
|
154 |
elseif isempty(RNA_names) && ~isempty(rppa_names), |
|
|
155 |
[~,~,pval_all] = fdr(rppa_pval,0.05); |
|
|
156 |
diff_expr_genes = rppa_names(pval_all < 0.05); |
|
|
157 |
else |
|
|
158 |
names_intersection = intersect(RNA_names,rppa_names); |
|
|
159 |
names_setdiff_rna = setdiff(RNA_names,rppa_names); |
|
|
160 |
names_setdiff_pr = setdiff(rppa_names,RNA_names); |
|
|
161 |
|
|
|
162 |
rna_pval_names_intersection = RNA_pval(ismember(RNA_names,names_intersection)); |
|
|
163 |
rppa_pval_names_intersection = rppa_pval(ismember(rppa_names,names_intersection)); |
|
|
164 |
|
|
|
165 |
rna_pval_names_setdiff_rna = RNA_pval(ismember(RNA_names,names_setdiff_rna)); |
|
|
166 |
pr_pval_names_setdiff_pr = rppa_pval(ismember(rppa_names,names_setdiff_pr)); |
|
|
167 |
|
|
|
168 |
all_names = [names_intersection' names_setdiff_rna' names_setdiff_pr']; |
|
|
169 |
%Fishers method |
|
|
170 |
pval_all = (1-pchisq(-2*(log(rna_pval_names_intersection)+log(rppa_pval_names_intersection)),4)); |
|
|
171 |
pval_all = [pval_all' rna_pval_names_setdiff_rna' pr_pval_names_setdiff_pr']; |
|
|
172 |
|
|
|
173 |
[~,~,pval_all] = fdr(pval_all,0.05); |
|
|
174 |
diff_expr_genes = all_names(pval_all < 0.05); |
|
|
175 |
end |
|
|
176 |
end |
|
|
177 |
|
|
|
178 |
function [names, pval] = read_file(filenamePR) |
|
|
179 |
fid = fopen(filenamePR,'r'); |
|
|
180 |
g = textscan(fid, '%s%s', 'delimiter', '\t'); |
|
|
181 |
fclose(fid); |
|
|
182 |
names = g{1}; pval = str2double(g{2}); |
|
|
183 |
end |