|
a |
|
b/src/trust-barcoderep-to-10X-v0.2.pl |
|
|
1 |
#!/usr/bin/env perl |
|
|
2 |
|
|
|
3 |
use strict ; |
|
|
4 |
use warnings ; |
|
|
5 |
|
|
|
6 |
die "Usage: ./trust-barcoderep-to-10X.pl trust_barcode_report.tsv 10X_report_prefix\n" if (@ARGV == 0) ; |
|
|
7 |
|
|
|
8 |
sub GetDetailChainType |
|
|
9 |
{ |
|
|
10 |
foreach my $g (@_) |
|
|
11 |
{ |
|
|
12 |
if ( $g =~ /^IGH/ ) |
|
|
13 |
{ |
|
|
14 |
return 0 ; |
|
|
15 |
} |
|
|
16 |
elsif ( $g =~ /^IGK/ ) |
|
|
17 |
{ |
|
|
18 |
return 1 ; |
|
|
19 |
} |
|
|
20 |
elsif ( $g =~ /^IGL/ ) |
|
|
21 |
{ |
|
|
22 |
return 2 ; |
|
|
23 |
} |
|
|
24 |
elsif ( $g =~ /^TRA/ ) |
|
|
25 |
{ |
|
|
26 |
return 3 ; |
|
|
27 |
} |
|
|
28 |
elsif ( $g =~ /^TRB/ ) |
|
|
29 |
{ |
|
|
30 |
return 4 ; |
|
|
31 |
} |
|
|
32 |
elsif ( $g =~ /^TRG/ ) |
|
|
33 |
{ |
|
|
34 |
return 5 ; |
|
|
35 |
} |
|
|
36 |
elsif ( $g =~ /^TRD/ ) |
|
|
37 |
{ |
|
|
38 |
return 6 ; |
|
|
39 |
} |
|
|
40 |
} |
|
|
41 |
return 7 ; |
|
|
42 |
} |
|
|
43 |
|
|
|
44 |
sub IsProductive |
|
|
45 |
{ |
|
|
46 |
my $aa = $_[0] ; |
|
|
47 |
return 0 if ($aa eq "partial" || $aa =~ /_/ || $aa =~ /\?/) ; |
|
|
48 |
return 1 ; |
|
|
49 |
} |
|
|
50 |
|
|
|
51 |
my @chainName = ("IGH", "IGK", "IGL", "TRA", "TRB", "TRG", "TRD", "None") ; |
|
|
52 |
open FP, $ARGV[0] ; |
|
|
53 |
my $prefix = $ARGV[1] ; |
|
|
54 |
open FPoutT, ">".$prefix."_t.csv" ; |
|
|
55 |
open FPoutB, ">".$prefix."_b.csv" ; |
|
|
56 |
|
|
|
57 |
print FPoutT "barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id\n" ; |
|
|
58 |
print FPoutB "barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id\n" ; |
|
|
59 |
|
|
|
60 |
my $header = <FP> ; |
|
|
61 |
while (<FP>) |
|
|
62 |
{ |
|
|
63 |
chomp ; |
|
|
64 |
my @cols = split ; |
|
|
65 |
#AACTCTTGTTATCCGA-1 abT * TRAV6*01,*,TRAJ43*01,*,TGTGCTCTAGCCGGGGAGGGCATGCGCTTT,CALAGEGMRF,2.80,AACTCTTGTTATCCGA-1_26095,100.00 * * |
|
|
66 |
for (my $i = 2 ; $i <= 3 ; ++$i) |
|
|
67 |
{ |
|
|
68 |
next if ($cols[$i] eq "*") ; |
|
|
69 |
my @chainCols = split /,/, $cols[$i] ; |
|
|
70 |
#barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id |
|
|
71 |
#AAACCTGAGTCGATAA-1,True,AAACCTGAGTCGATAA-1_contig_1,True,551,IGK,IGKV1-5,None,IGKJ1,IGKC,True,True,CQQYNSYSRTF,TGCCAACAGTATAATAGTTATTCTCGAACGTTC,1197,22,clonotype77,clonotype77_consensus_2 |
|
|
72 |
my @outputCols = ($cols[0], "True", "None", "True", "None", |
|
|
73 |
$chainName[GetDetailChainType($chainCols[0], $chainCols[2],$chainCols[3])], |
|
|
74 |
$chainCols[0] eq "*" ? "None": $chainCols[0], |
|
|
75 |
$chainCols[1] eq "*" ? "None": $chainCols[1], |
|
|
76 |
$chainCols[2] eq "*" ? "None": $chainCols[2], |
|
|
77 |
$chainCols[3] eq "*" ? "None": $chainCols[3], |
|
|
78 |
"None", IsProductive($cols[5]) ? "True" : "False", |
|
|
79 |
$chainCols[5], $chainCols[4], $chainCols[6], $chainCols[6], "None", "None" |
|
|
80 |
) ; |
|
|
81 |
if (substr($cols[1], -1) eq "T") |
|
|
82 |
{ |
|
|
83 |
print FPoutT join(",", @outputCols), "\n" ; |
|
|
84 |
} |
|
|
85 |
else |
|
|
86 |
{ |
|
|
87 |
print FPoutB join(",", @outputCols), "\n" ; |
|
|
88 |
} |
|
|
89 |
} |
|
|
90 |
} |
|
|
91 |
close FPoutT ; |
|
|
92 |
close FPoutB ; |
|
|
93 |
close FP ; |