[c09aa8]: / scripts / EligibilityBulletPointsSuccessful.py

Download this file

161 lines (118 with data), 5.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#start with first demarcator
#have a flag that indicates if you've hit demarcator, if flag = 0, append cell to list
#remove words followed by colon
#when you hit colon, start deleting from end of buffer until reaching space
# search for either space space space heifen space or space space space number .
# keep scanning cell until you hit a demarcator, then add everything into a bullet point
import csv
bulletFile = open("InclusionExclusionBulletPoint.tsv","w",encoding="latin-1")
with open("EligibilityCriteriaDocument.txt","r",encoding="latin-1") as tsvFile:
tsvContents = csv.reader(tsvFile, delimiter='\t')
goodCount = 0
count = 0
for row in tsvContents:
count +=1
#if count!= 60567:
#continue
inclusion = row[10]
exclusion = row[11]
nctNumber = row[1]
intervention = row[4]
condition = row[3]
gender = row[5]
age = row[6]
nctNumber = str(nctNumber)
intervention = str(intervention)
condition = str(condition)
gender = str(gender)
age = str(age)
inclusionList = []
exclusionList = []
flag = 0
buffer = ""
inclusion = str(inclusion)
exclusion = str(exclusion)
for i in range(len(inclusion)):
if inclusion[i] == ":":
k= i-1
while inclusion[k] != " ":
k -= 1
buffer = buffer[:k]
#print(inclusion)
continue
if i+4 < len(inclusion) and inclusion[i] == " " and inclusion[i+1] == " " and inclusion[i+2] == " " and inclusion[i+3] == "-" and inclusion[i+4] == " ":
if flag == 1:
inclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
buffer = ""
i = i+4
flag = 1
continue
if i+4 < len(inclusion) and inclusion[i] == " " and inclusion[i+1] == " " and inclusion[i+2] == " " and inclusion[i+3].isdigit():
k = i+4
while inclusion[k].isdigit():
k += 1
if inclusion[k] == ".":
if flag == 1:
inclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
buffer = ""
i = k+1
flag = 1
continue
buffer += inclusion[i]
if flag == 0:
inclusionList.append(inclusion)
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + inclusion + "\t" + intervention + "\n")
else:
inclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
goodCount += 1
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + gender + "\t" + intervention + "\n")
bulletFile.write(nctNumber + "\t" + condition + "\t" + "indication" + "\t" + age + "\t" + intervention + "\n")
#write id number \t indication \t item \n to the file wherever you do inclusionlist.append
flag = 0
buffer = ""
for i in range(len(exclusion)):
if exclusion[i] == ":":
k= i-1
while exclusion[k] != " ":
k -= 1
buffer = buffer[:k]
#print(exclusion)
continue
if i+4 < len(exclusion) and exclusion[i] == " " and exclusion[i+1] == " " and exclusion[i+2] == " " and exclusion[i+3] == "-" and exclusion[i+4] == " ":
if flag == 1:
exclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "counterindication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
buffer = ""
i = i+4
flag = 1
continue
if i+4 < len(exclusion) and exclusion[i] == " " and exclusion[i+1] == " " and exclusion[i+2] == " " and exclusion[i+3].isdigit():
k = i+4
while exclusion[k].isdigit():
k += 1
if exclusion[k] == ".":
if flag == 1:
exclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "counterindication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
buffer = ""
i = k+1
flag = 1
continue
buffer += exclusion[i]
if flag == 0:
exclusionList.append(exclusion)
bulletFile.write(nctNumber + "\t" + condition + "\t" + "counterindication" + "\t" + exclusion + "\t" + intervention + "\n")
else:
exclusionList.append(buffer.strip(" "))
bulletFile.write(nctNumber + "\t" + condition + "\t" + "counterindication" + "\t" + buffer.strip(" ") + "\t" + intervention + "\n")
goodCount += 1
#copy paste, replace "inclusion" with "exclusion"
#write id number \t indication \t item \n to the file wherever you do exclusionlist.append
#take row[0] for id number
#print(inclusionList)
#write to new file
# id \t inclusion/exclusion \t item \n
bulletFile.close()