[16eabd]: / 4-Multi-Omic Integration / scripts / 4_cmpd2metabo.pl

Download this file

86 lines (81 with data), 2.0 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!bin/perl
## This script was used to extract ID mapping information between metabolites in metabolomic data and MetaCyc compounds ##
open (IN, "compounds.tab");
open (OUT1, ">metacyc_compounds_IDs_SMILEs.tab");
while (<IN>) {
chop;
$id=();
$pubchem=();
$chebi=();
$smile=();
$kegg=();
$hmdb=();
#@a=split("\t",$_);
if ($_=~ /^UNIQUE-ID - (\S+)/) {
$id=$1;
}
if ($_=~ /DBLINKS - \(PUBCHEM "(\d+)"/) {
$pubchem=$1;
}
if ($_=~ /DBLINKS - \(CHEBI "(\d+)"/) {
$chebi = $1;
}
if ($_=~ /DBLINKS - \(LIGAND-CPD "(C\d+)"/) {
$kegg = $1;
}
if ($_=~ /DBLINKS - \(HMDB "(HMDB\d+)"/) {
$hmdb = $1;
}
if ($_=~ /SMILES - (\S+)/) {
$smile= $1;
}
print OUT1 $id."\t".$pubchem."\t".$chebi."\t".$kegg."\t".$hmdb."\t".$smile."\n";
}
open (IN, "metacyc_compounds_IDs_SMILEs.tab");
while (<IN>) {
chop;
@a=split("\t",$_);
if ($a[3] ne '') {
$kegg{$a[3]}=$a[0];
}
if ($a[4] ne '') {
($tmp)=($a[4] =~ /HMDB(\d+)/);
$hmdb = "HMDB00".$tmp;
$hmdb{$hmdb}=$a[0];
}
if ($a[1] ne '') {
$pubchem{$a[1]}=$a[0];
}
if ($a[2] ne '') {
$chebi{$a[2]}=$a[0];
}
}
open (IN, "cmpd_description.txt");
open (OUT2, ">cmpd2metabo_IDmatch.txt");
$dump=<IN>;
while (<IN>) {
chop;
my %match=();
@a=split("\t",$_);
if ($a[0] =~ /^C/) {
$match{$kegg{$a[0]}}=1;
$match{$hmdb{$a[3]}}=1;
$match{$pubchem{$a[4]}}=1;
$match{$chebi{$a[5]}}=1;
$match{$kegg{$a[6]}}=1;
#print $a[0]."\t".$kegg{$a[0]}."\t".$hmdb{$a[3]}."\t".$pubchem{$a[4]}."\t".$chebi{$a[5]}."\t".$kegg{$a[6]}."\n";
}
else {
$match{$hmdb{$a[0]}}=1;
$match{$hmdb{$a[3]}}=1;
$match{$pubchem{$a[4]}}=1;
$match{$chebi{$a[5]}}=1;
$match{$kegg{$a[6]}}=1;
#print $a[0]."\t".$hmdb{$a[0]}."\t".$hmdb{$a[3]}."\t".$pubchem{$a[4]}."\t".$chebi{$a[5]}."\t".$kegg{$a[6]}."\n";
}
for my $key (sort keys %match) {
if ($key ne '') {
print OUT2 $a[0]."\t".$key."\n";
}
}
}