Diff of /src/combine.py [000000] .. [0aa069]

Switch to unified view

a b/src/combine.py
1
import csv
2
from glob import glob
3
import re
4
5
FILES = glob("F:/NYU/Hackathon/**/*.mirna.quantification.txt")
6
print FILES.count
7
FILES.sort()
8
#print FILES
9
OUT_FILE = 'F:/NYU/Hackathon/output_file'
10
11
output_writer = open(OUT_FILE, 'a')
12
13
patients = dict()
14
id = 0
15
16
for file in FILES:
17
    print "processing file ", file
18
    output_writer.write("\n")
19
20
    m = re.search("TCGA-..-([A-Za-z0-9]{4,4})-(\d{2,2})[A-Za-z]", file)
21
    #print m
22
    patient_id = m.group(1)
23
    tissue_type = m.group(2)
24
    
25
    if patient_id not in patients:
26
        id = id + 1
27
        patients[patient_id] = id;
28
29
    patient_id = patients[patient_id]
30
31
    cancer_type = 2
32
    tmp = re.search("PAAD", file)
33
    if tmp is not None:
34
        cancer_type = 1
35
36
    output_writer.write("{0} {1} {2} ".format(patient_id, cancer_type, tissue_type))
37
    with open(file, 'r') as myFile:
38
        reader = csv.reader(myFile, delimiter='\t')
39
        count = 0
40
        for row in reader:
41
            try:
42
                if count == 0:
43
                    count += 1
44
                    continue
45
                
46
                #has_header = csv.Sniffer().has_header(myFile.read(1024))
47
                #reads_per_million_miRNA_mapped = 'NA'
48
                #myFile.seek(0)  # rewind
49
                #incsv = csv.reader(myFile)
50
                #if has_header:
51
                #   next(incsv)  # skip header row
52
                reads_per_million_miRNA_mapped = row[2]
53
                if reads_per_million_miRNA_mapped == 'NA':
54
                    reads_per_million_miRNA_mapped = '0.0'
55
                reads_per_million_miRNA_mapped = float(reads_per_million_miRNA_mapped)
56
                output_writer.write(str(reads_per_million_miRNA_mapped))
57
                
58
            except ValueError as e:
59
                output_writer.write('0.0')
60
                print "Exception wile reading line ", reader.line_num, " from file ", file, "  Expeced a float. Got ", reads_per_million_miRNA_mapped
61
            
62
            output_writer.write(" ")