netics / Git / [11a160] /matlab_source/netics

Models:
AlyssaS/
netics
Downloads: 1
[11a160]: / matlab_source / netics_fun.m
History
Download this file
184 lines (160 with data), 8.1 kB

function [ ranked_list_genes, scores ] = netics_fun( varargin )

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
%   INPUTS:
%   filenameMu(varargin{1}):         input file that contains the genetically aberrant genes of each sample. 
%                                    It contains two columns that map every gene (1st column) to the samples that 
%                                    it is genetically aberrant (2nd column)
%   adj(varargin{2}):                adjacency matrix of the directed interaction network
%   restart_prob(varargin{3}):       restart probability for the insulated diffusion. For (Wu et al., 2010) network use 0.4.
%   rank_method_str(varargin{4}):    'MEDIAN' uses the median of the sample-specific ranks
%                                    'RRA' uses the Robust Rank Aggregation method to integrate sample-specific ranked lists
%                                    'SUM' uses the sum of the sample-specific ranks
%   filenameNet(varargin{5}):        input file that contains the list of the genes that are present in the network. 
%                                    They should be in the same order as in the rows of the adjacency matrix adj. 
%                                    An example file is given that contains the gene names of the network described in (Wu et al, 2010).
%   filenameRNA(varargin{6}):        tab delimited file with two columns. First column contains the genes for which differential expression 
%                                    between the tumor and normal samples at the RNA level was measured. Second column contains the p-values 
%                                    of these measurements. This file can be the result of a tool for differential expression analysis such as DESeq2. 
%                                    Each gene in this file should have only one entry.
%   filenamePR(varargin{7}):         tab delimited file with two columns. First column contains the proteins for which differential expression between 
%                                    the tumor and normal samples at the protein level was measured. Second column contains the p-values of these 
%                                    measurements. Each gene in this file should have only one entry.
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
%   OUTPUTS:
%   ranked_list_genes:  list with the ranked gene names
%   scores:             list with the scores of the ranked gene names
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  

	filenameRNA = '';
	filenamePR = '';
	ranked_list_genes={};

	% Accept arguments    
	if nargin < 5,
		error('Error: Missing input arguments.')
	elseif nargin > 7,
		error('Error: Too many input arguments.')
	else            
		mutation_data = read_mutations( varargin{1} );
		adj = varargin{2};
		restart_prob = varargin{3};
		rank_method_str = varargin{4};
		filenameNet = varargin{5};
		choose_mut_diff = 1;
	end
	if nargin >= 6,            
		filenameRNA = varargin{6};
		choose_mut_diff = 3;        
	end
	if nargin >= 7,
		filenamePR = varargin{7};
	end

	%read network genes
	fidnet = fopen(filenameNet,'r');
	g = textscan(fidnet, '%s', 'delimiter', '\n');
	network_genes = g{1};
	fclose(fidnet);
	
	%Extract first token
	rank_method_str1 = rank_method_str(1:strfind(rank_method_str,'='));
	
	%Handle input
	rank_method_str = rank_method_str(strfind(rank_method_str,'=')+1:end);
	
	%Check input - Token 1
	if ~strcmp(rank_method_str1,'RANK_AGGREG='),
		disp(strcat('Wrong input: ',rank_method_str1));
		disp('Input should be RANK_AGGREG=');		
		return;	
	end

	%Check input - Token 2
	if  ~strcmp(rank_method_str,'SUM') && ~strcmp(rank_method_str,'MEDIAN') && ~strcmp(rank_method_str,'RRA'),
		disp(strcat('Wrong input: ',rank_method_str));
		disp(strcat('Input should be MEDIAN, RRA or SUM'));
		return;		
	end

	%copy data from input mutation_data 
	samples = {};
	for i = 1:length(mutation_data),
		samples.DNA{i} = upper(mutation_data{i});
		samples.DNA_network{i} = upper(intersect(network_genes,samples.DNA{i}));
	end
	clear mutation_data;

	%read differentially expressed genes
	diff_expr_genes = {};
	if choose_mut_diff~=1,
		diff_expr_genes = read_diff_expr( filenameRNA, filenamePR );
		if isempty(diff_expr_genes),
			choose_mut_diff=1;
		end
	else
		disp('No Input files were provided for differentially expressed genes.');        
	end
	for i = 1:length(samples.DNA),
		samples.RNA{i} = diff_expr_genes;
		samples.RNA_network{i} = upper(intersect(network_genes,samples.RNA{i}));
	end

	%compute diffused matrix
	disp('Computing diffused matrix...');
	F = insulated_diff(norm_adj(adj), restart_prob);
	F_opp = insulated_diff(norm_adj(adj'), restart_prob);    
	disp('Running NetICS...');
	[ranked_list_genes, scores] = prioritization( samples, F, F_opp, network_genes, choose_mut_diff, rank_method_str);

end

function [ mutation_data ] = read_mutations( filename )
	fid = fopen(filename,'r');
	g = textscan(fid, '%s%s', 'delimiter', '\t');
	fclose(fid);
	unique_samples = unique(g{2});
	mutation_data = cell(length(unique_samples),1);
	for i = 1:length(unique_samples),
		mutation_data{i} = g{1}(ismember(g{2},unique_samples{i}));
	end
end

function [ diff_expr_genes ] = read_diff_expr( filenameDE, filenamePR )

	diff_expr_genes ={}; RNA_names={}; rppa_names={};

	if ~isempty(filenamePR),
		[rppa_names, rppa_pval] = read_file(filenamePR);
	else
		disp('No Input file was provided for differentially expressed genes at the proteome level.');        
	end

	if ~isempty(filenameDE),
		[RNA_names, RNA_pval] = read_file(filenameDE);        
	else
		disp('No Input file was provided for differentially expressed genes at the RNA level.');        
	end

	%check for duplicate genes in the input files
	if (length(unique(RNA_names)) ~= length(RNA_names)) || (length(unique(rppa_names)) ~= length(rppa_names)),
		disp('Input files for differentially expressed genes should contain only one entry per gene.');
		disp('Input files for differentially expressed genes were ignored.');        
		return;	        
	end
	%check if the input files for differential expression are empty    
	if isempty(rppa_names) && isempty(RNA_names),
		disp('Only genetically aberrant genes are used for diffusion.');
		return;
	%only differential expressed genes at the RNA level are provided
	elseif isempty(rppa_names) && ~isempty(RNA_names),
		[~,~,pval_all] = fdr(RNA_pval,0.05);
		diff_expr_genes = RNA_names(pval_all < 0.05);
	%only differential expressed genes at the protein level are provided        
	elseif isempty(RNA_names) && ~isempty(rppa_names),
		[~,~,pval_all] = fdr(rppa_pval,0.05);
		diff_expr_genes = rppa_names(pval_all < 0.05);
	else
		names_intersection = intersect(RNA_names,rppa_names);
		names_setdiff_rna = setdiff(RNA_names,rppa_names);
		names_setdiff_pr = setdiff(rppa_names,RNA_names);

		rna_pval_names_intersection = RNA_pval(ismember(RNA_names,names_intersection));
		rppa_pval_names_intersection = rppa_pval(ismember(rppa_names,names_intersection));

		rna_pval_names_setdiff_rna = RNA_pval(ismember(RNA_names,names_setdiff_rna));
		pr_pval_names_setdiff_pr = rppa_pval(ismember(rppa_names,names_setdiff_pr));

		all_names = [names_intersection' names_setdiff_rna' names_setdiff_pr'];
		%Fishers method
		pval_all = (1-pchisq(-2*(log(rna_pval_names_intersection)+log(rppa_pval_names_intersection)),4));
		pval_all = [pval_all' rna_pval_names_setdiff_rna' pr_pval_names_setdiff_pr'];    

		[~,~,pval_all] = fdr(pval_all,0.05);
		diff_expr_genes = all_names(pval_all < 0.05);
	end
end

function [names, pval] = read_file(filenamePR)
	fid = fopen(filenamePR,'r');
	g = textscan(fid, '%s%s', 'delimiter', '\t');
	fclose(fid);
	names = g{1}; pval = str2double(g{2}); 
end