Switch to unified view

a b/src/trust-barcoderep-to-10X-v0.2.pl
1
#!/usr/bin/env perl
2
3
use strict ;
4
use warnings ;
5
6
die "Usage: ./trust-barcoderep-to-10X.pl trust_barcode_report.tsv 10X_report_prefix\n" if (@ARGV == 0) ;
7
8
sub GetDetailChainType
9
{
10
    foreach my $g (@_)
11
    {
12
        if ( $g =~ /^IGH/ )
13
        {
14
            return 0 ;
15
        }
16
        elsif ( $g =~ /^IGK/ )
17
        {
18
            return 1 ;
19
        }
20
        elsif ( $g =~ /^IGL/ )
21
        {
22
            return 2 ;
23
        }
24
        elsif ( $g =~ /^TRA/ )
25
        {
26
            return 3 ;
27
        }
28
        elsif ( $g =~ /^TRB/ )
29
        {
30
            return 4 ;
31
        }
32
        elsif ( $g =~ /^TRG/ )
33
        {
34
            return 5 ;
35
        }
36
        elsif ( $g =~ /^TRD/ )
37
        {
38
            return 6 ;
39
        }
40
    }
41
    return 7 ;  
42
}
43
44
sub IsProductive
45
{
46
    my $aa = $_[0] ;
47
    return 0 if ($aa eq "partial" || $aa =~ /_/ || $aa =~ /\?/) ;
48
    return 1 ;
49
}
50
51
my @chainName = ("IGH", "IGK", "IGL", "TRA", "TRB", "TRG", "TRD", "None") ;
52
open FP, $ARGV[0] ;
53
my $prefix = $ARGV[1] ;
54
open FPoutT, ">".$prefix."_t.csv" ;
55
open FPoutB, ">".$prefix."_b.csv" ;
56
57
print FPoutT "barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id\n" ;
58
print FPoutB "barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id\n" ;
59
60
my $header = <FP> ;
61
while (<FP>)
62
{
63
    chomp ;
64
    my @cols = split ;
65
        #AACTCTTGTTATCCGA-1 abT *   TRAV6*01,*,TRAJ43*01,*,TGTGCTCTAGCCGGGGAGGGCATGCGCTTT,CALAGEGMRF,2.80,AACTCTTGTTATCCGA-1_26095,100.00   *   *
66
    for (my $i = 2 ; $i <= 3 ; ++$i)
67
    {
68
        next if ($cols[$i] eq "*") ;
69
        my @chainCols = split /,/, $cols[$i] ;
70
        #barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id
71
        #AAACCTGAGTCGATAA-1,True,AAACCTGAGTCGATAA-1_contig_1,True,551,IGK,IGKV1-5,None,IGKJ1,IGKC,True,True,CQQYNSYSRTF,TGCCAACAGTATAATAGTTATTCTCGAACGTTC,1197,22,clonotype77,clonotype77_consensus_2
72
        my @outputCols = ($cols[0], "True", "None", "True", "None", 
73
            $chainName[GetDetailChainType($chainCols[0], $chainCols[2],$chainCols[3])],
74
            $chainCols[0] eq "*" ? "None": $chainCols[0],
75
            $chainCols[1] eq "*" ? "None": $chainCols[1],
76
            $chainCols[2] eq "*" ? "None": $chainCols[2],
77
            $chainCols[3] eq "*" ? "None": $chainCols[3],
78
            "None", IsProductive($cols[5]) ? "True" : "False", 
79
            $chainCols[5], $chainCols[4], $chainCols[6], $chainCols[6], "None", "None"
80
            ) ;
81
        if (substr($cols[1], -1) eq "T")
82
        {
83
            print FPoutT join(",", @outputCols), "\n" ;
84
        }
85
        else
86
        {
87
            print FPoutB join(",", @outputCols), "\n" ;
88
        }
89
    }
90
}
91
close FPoutT ;
92
close FPoutB ;
93
close FP ;