[16eabd]: / 4-Multi-Omic Integration / scripts / 4_metabolome2stitch.pl

Download this file

77 lines (70 with data), 1.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!bin/perl
## This script was used to extract metabolite-CIDm and CIDm-host target mapping information from STITCH database ##
open (IN, "chemical.sources.v5.0.tsv"); #### stitch id mapping file ####
while (<IN>) {
chop;
next if (/^#/);
s/CID\S//g;
@a=split("\t",$_);
if (/PC\t(\d+)/) {
$pc{$1}="CIDm".$a[0];
}
if (/PS\t(\d+)/) {
$ps{$1}="CIDm".$a[0];
}
$merge{$a[1]}="CIDm".$a[0];
if (/CHEBI:(\d+)/) {
$chebi{$1}="CIDm".$a[0];
}
if (/KEGG/) {
$kegg{$a[3]}="CIDm".$a[0];
}
}
open (IN1, "metabolite_IDs.txt"); ### metabolite IDs list ###
open (OUT1, ">metabo2CIDm.txt"); ## output ##
while (<IN>) {
chop;
@a=split("\t",$_);
if ($chebi{$a[5]} ne $merge{$a[4]}) {
print STDERR "chebi and pubchem IDs for $a[0] do not match\n"; ### raise a flag if IDs do not match based on different mapping rules
}
else {
print OUT1 $a[0]."\t".$ps{$a[3]}."\t".$merge{$a[3]}."\t".$pc{$a[3]}."\t".$chebi{$a[4]}."\t".$kegg{$a[0]}."\t".$kegg{$a[5]}."\n";
}
}
open (IN, "9606.protein_chemical.links.detailed.v5.0.tsv");
while (<IN>) {
chop;
s/9606\.//g;
@a=split("\t",$_);
next if ($a[6]<700);
#$hash{$a[0]}{$a[1]}=$a[5];
$score{$a[0]}{$a[1]}=$a[2]."\t".$a[3]."\t".$a[4]."\t".$a[5]."\t".$a[6];
}
open (IN, "9606.actions.v5.0.tsv");
while (<IN>) {
chop;
s/9606\.//g;
@a=split("\t",$_);
next if ($a[5]<700);
$int{$a[0]}{$a[1]}=$a[2];
$int{$a[1]}{$a[0]}=$a[2];
}
open (IN, "human_gene_ids.txt");
while (<IN>) {
chop;
@a=split("\t",$_);
$geneid{$a[2]}=$a[9]."\t".$a[8];
}
open (IN2, "all_CIDm.txt"); #### stitch CIDm compound ID list #####
open (OUT2, ">all_CIDm_targets.txt"); ## output ##
while (<IN>) {
chop;
# @a=split("\t",$_);
$id=$_;
if (exists $int{$id}) {
for my $key (keys %{$int{$id}}) {
print OUT2 $id."\t".$key."\t".$geneid{$key}."\t".$int{$id}{$key}."\t".$score{$id}{$key}."\n";
}
}
}