COPD_multiomics / Git / Diff of /4-Multi-Omic Integration/scripts/4

Models:
AlyssaS/
COPD_multiomics
Downloads: 1
Diff of /4-Multi-Omic Integration/scripts/4_cmpd2metabo.pl [000000] .. [16eabd]
Switch to side-by-side view

--- a
+++ b/4-Multi-Omic Integration/scripts/4_cmpd2metabo.pl
@@ -0,0 +1,85 @@
+#!bin/perl
+## This script was used to extract ID mapping information between metabolites in metabolomic data and MetaCyc compounds ##
+
+
+open (IN, "compounds.tab");
+open (OUT1, ">metacyc_compounds_IDs_SMILEs.tab");
+while (<IN>) {
+	chop;
+	$id=();
+	$pubchem=();
+	$chebi=();
+	$smile=();
+	$kegg=();
+	$hmdb=();
+	#@a=split("\t",$_);
+	if ($_=~ /^UNIQUE-ID - (\S+)/) {
+		$id=$1;
+	}
+	if ($_=~ /DBLINKS - \(PUBCHEM "(\d+)"/) {
+		$pubchem=$1;
+	}
+	if ($_=~ /DBLINKS - \(CHEBI "(\d+)"/) {
+		$chebi = $1;
+	}
+        if ($_=~ /DBLINKS - \(LIGAND-CPD "(C\d+)"/) {
+                $kegg = $1;
+        }
+        if ($_=~ /DBLINKS - \(HMDB "(HMDB\d+)"/) {
+                $hmdb = $1;
+        }
+	if ($_=~ /SMILES - (\S+)/) {
+		$smile= $1;
+	}
+	print OUT1 $id."\t".$pubchem."\t".$chebi."\t".$kegg."\t".$hmdb."\t".$smile."\n";
+}
+
+open (IN, "metacyc_compounds_IDs_SMILEs.tab");
+while (<IN>) {
+	chop;
+	@a=split("\t",$_);
+	if ($a[3] ne '') {
+		$kegg{$a[3]}=$a[0];
+	}
+	if ($a[4] ne '') {
+		($tmp)=($a[4] =~ /HMDB(\d+)/);
+		$hmdb = "HMDB00".$tmp;
+		$hmdb{$hmdb}=$a[0];
+	}
+	if ($a[1] ne '') {
+		$pubchem{$a[1]}=$a[0];
+	}
+	if ($a[2] ne '') {
+		$chebi{$a[2]}=$a[0];
+	}
+}
+
+open (IN, "cmpd_description.txt");
+open (OUT2, ">cmpd2metabo_IDmatch.txt");
+$dump=<IN>;
+while (<IN>) {
+	chop;
+	my %match=();
+	@a=split("\t",$_);
+	if ($a[0] =~ /^C/) {
+		$match{$kegg{$a[0]}}=1;
+		$match{$hmdb{$a[3]}}=1;
+		$match{$pubchem{$a[4]}}=1;
+		$match{$chebi{$a[5]}}=1;
+		$match{$kegg{$a[6]}}=1;
+		#print $a[0]."\t".$kegg{$a[0]}."\t".$hmdb{$a[3]}."\t".$pubchem{$a[4]}."\t".$chebi{$a[5]}."\t".$kegg{$a[6]}."\n";
+	}
+	else {
+		$match{$hmdb{$a[0]}}=1;
+		$match{$hmdb{$a[3]}}=1;
+		$match{$pubchem{$a[4]}}=1;
+		$match{$chebi{$a[5]}}=1;
+		$match{$kegg{$a[6]}}=1;
+		#print $a[0]."\t".$hmdb{$a[0]}."\t".$hmdb{$a[3]}."\t".$pubchem{$a[4]}."\t".$chebi{$a[5]}."\t".$kegg{$a[6]}."\n";
+	}
+	for my $key (sort keys %match) {
+		if ($key ne '') {
+			print OUT2 $a[0]."\t".$key."\n";
+		}
+	}
+}