netics / Git / Diff of /matlab_source/netics

Models:

AlyssaS/

netics

Downloads: 1

Diff of /matlab_source/netics_fun.m [000000] .. [11a160]

Switch to unified view

 b/matlab_source/netics_fun.m
+function [ ranked_list_genes, scores ] = netics_fun( varargin )
+% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+%   INPUTS:
+%   filenameMu(varargin{1}):         input file that contains the genetically aberrant genes of each sample.
+%                                    It contains two columns that map every gene (1st column) to the samples that
+%                                    it is genetically aberrant (2nd column)
+%   adj(varargin{2}):                adjacency matrix of the directed interaction network
+%   restart_prob(varargin{3}):       restart probability for the insulated diffusion. For (Wu et al., 2010) network use 0.4.
+%   rank_method_str(varargin{4}):    'MEDIAN' uses the median of the sample-specific ranks
+%                                    'RRA' uses the Robust Rank Aggregation method to integrate sample-specific ranked lists
+%                                    'SUM' uses the sum of the sample-specific ranks
+%   filenameNet(varargin{5}):        input file that contains the list of the genes that are present in the network.
+%                                    They should be in the same order as in the rows of the adjacency matrix adj.
+%                                    An example file is given that contains the gene names of the network described in (Wu et al, 2010).
+%   filenameRNA(varargin{6}):        tab delimited file with two columns. First column contains the genes for which differential expression
+%                                    between the tumor and normal samples at the RNA level was measured. Second column contains the p-values
+%                                    of these measurements. This file can be the result of a tool for differential expression analysis such as DESeq2.
+%                                    Each gene in this file should have only one entry.
+%   filenamePR(varargin{7}):         tab delimited file with two columns. First column contains the proteins for which differential expression between
+%                                    the tumor and normal samples at the protein level was measured. Second column contains the p-values of these
+%                                    measurements. Each gene in this file should have only one entry.
+% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+%   OUTPUTS:
+%   ranked_list_genes:  list with the ranked gene names
+%   scores:             list with the scores of the ranked gene names
+% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    filenameRNA = '';
+    filenamePR = '';
+    ranked_list_genes={};
+    % Accept arguments
+    if nargin < 5,
+        error('Error: Missing input arguments.')
+    elseif nargin > 7,
+        error('Error: Too many input arguments.')
+    else
+        mutation_data = read_mutations( varargin{1} );
+        adj = varargin{2};
+        restart_prob = varargin{3};
+        rank_method_str = varargin{4};
+        filenameNet = varargin{5};
+        choose_mut_diff = 1;
+    end
+    if nargin >= 6,
+        filenameRNA = varargin{6};
+        choose_mut_diff = 3;
+    end
+    if nargin >= 7,
+        filenamePR = varargin{7};
+    end
+    %read network genes
+    fidnet = fopen(filenameNet,'r');
+    g = textscan(fidnet, '%s', 'delimiter', '\n');
+    network_genes = g{1};
+    fclose(fidnet);
+    %Extract first token
+    rank_method_str1 = rank_method_str(1:strfind(rank_method_str,'='));
+    %Handle input
+    rank_method_str = rank_method_str(strfind(rank_method_str,'=')+1:end);
+    %Check input - Token 1
+    if ~strcmp(rank_method_str1,'RANK_AGGREG='),
+        disp(strcat('Wrong input: ',rank_method_str1));
+        disp('Input should be RANK_AGGREG=');
+        return;
+    end
+    %Check input - Token 2
+    if  ~strcmp(rank_method_str,'SUM') && ~strcmp(rank_method_str,'MEDIAN') && ~strcmp(rank_method_str,'RRA'),
+        disp(strcat('Wrong input: ',rank_method_str));
+        disp(strcat('Input should be MEDIAN, RRA or SUM'));
+        return;
+    end
+    %copy data from input mutation_data
+    samples = {};
+    for i = 1:length(mutation_data),
+        samples.DNA{i} = upper(mutation_data{i});
+        samples.DNA_network{i} = upper(intersect(network_genes,samples.DNA{i}));
+    end
+    clear mutation_data;
+    %read differentially expressed genes
+    diff_expr_genes = {};
+    if choose_mut_diff~=1,
+        diff_expr_genes = read_diff_expr( filenameRNA, filenamePR );
+        if isempty(diff_expr_genes),
+            choose_mut_diff=1;
+        end
+    else
+        disp('No Input files were provided for differentially expressed genes.');
+    end
+    for i = 1:length(samples.DNA),
+        samples.RNA{i} = diff_expr_genes;
+        samples.RNA_network{i} = upper(intersect(network_genes,samples.RNA{i}));
+    end
+    %compute diffused matrix
+    disp('Computing diffused matrix...');
+    F = insulated_diff(norm_adj(adj), restart_prob);
+    F_opp = insulated_diff(norm_adj(adj'), restart_prob);
+    disp('Running NetICS...');
+    [ranked_list_genes, scores] = prioritization( samples, F, F_opp, network_genes, choose_mut_diff, rank_method_str);
+end
+function [ mutation_data ] = read_mutations( filename )
+    fid = fopen(filename,'r');
+    g = textscan(fid, '%s%s', 'delimiter', '\t');
+    fclose(fid);
+    unique_samples = unique(g{2});
+    mutation_data = cell(length(unique_samples),1);
+    for i = 1:length(unique_samples),
+        mutation_data{i} = g{1}(ismember(g{2},unique_samples{i}));
+    end
+end
+function [ diff_expr_genes ] = read_diff_expr( filenameDE, filenamePR )
+    diff_expr_genes ={}; RNA_names={}; rppa_names={};
+    if ~isempty(filenamePR),
+        [rppa_names, rppa_pval] = read_file(filenamePR);
+    else
+        disp('No Input file was provided for differentially expressed genes at the proteome level.');
+    end
+    if ~isempty(filenameDE),
+        [RNA_names, RNA_pval] = read_file(filenameDE);
+    else
+        disp('No Input file was provided for differentially expressed genes at the RNA level.');
+    end
+    %check for duplicate genes in the input files
+    if (length(unique(RNA_names)) ~= length(RNA_names)) || (length(unique(rppa_names)) ~= length(rppa_names)),
+        disp('Input files for differentially expressed genes should contain only one entry per gene.');
+        disp('Input files for differentially expressed genes were ignored.');
+        return;
+    end
+    %check if the input files for differential expression are empty
+    if isempty(rppa_names) && isempty(RNA_names),
+        disp('Only genetically aberrant genes are used for diffusion.');
+        return;
+    %only differential expressed genes at the RNA level are provided
+    elseif isempty(rppa_names) && ~isempty(RNA_names),
+        [~,~,pval_all] = fdr(RNA_pval,0.05);
+        diff_expr_genes = RNA_names(pval_all < 0.05);
+    %only differential expressed genes at the protein level are provided
+    elseif isempty(RNA_names) && ~isempty(rppa_names),
+        [~,~,pval_all] = fdr(rppa_pval,0.05);
+        diff_expr_genes = rppa_names(pval_all < 0.05);
+    else
+        names_intersection = intersect(RNA_names,rppa_names);
+        names_setdiff_rna = setdiff(RNA_names,rppa_names);
+        names_setdiff_pr = setdiff(rppa_names,RNA_names);
+        rna_pval_names_intersection = RNA_pval(ismember(RNA_names,names_intersection));
+        rppa_pval_names_intersection = rppa_pval(ismember(rppa_names,names_intersection));
+        rna_pval_names_setdiff_rna = RNA_pval(ismember(RNA_names,names_setdiff_rna));
+        pr_pval_names_setdiff_pr = rppa_pval(ismember(rppa_names,names_setdiff_pr));
+        all_names = [names_intersection' names_setdiff_rna' names_setdiff_pr'];
+        %Fishers method
+        pval_all = (1-pchisq(-2*(log(rna_pval_names_intersection)+log(rppa_pval_names_intersection)),4));
+        pval_all = [pval_all' rna_pval_names_setdiff_rna' pr_pval_names_setdiff_pr'];
+        [~,~,pval_all] = fdr(pval_all,0.05);
+        diff_expr_genes = all_names(pval_all < 0.05);
+    end
+end
+function [names, pval] = read_file(filenamePR)
+    fid = fopen(filenamePR,'r');
+    g = textscan(fid, '%s%s', 'delimiter', '\t');
+    fclose(fid);
+    names = g{1}; pval = str2double(g{2});
+end