|
a |
|
b/predictions.ipynb |
|
|
1 |
{ |
|
|
2 |
"cells": [ |
|
|
3 |
{ |
|
|
4 |
"cell_type": "code", |
|
|
5 |
"execution_count": 1, |
|
|
6 |
"metadata": {}, |
|
|
7 |
"outputs": [], |
|
|
8 |
"source": [ |
|
|
9 |
"import pytesseract\n", |
|
|
10 |
"pytesseract.pytesseract.tesseract_cmd = 'C:\\\\Program Files\\\\Tesseract-OCR\\\\tesseract.exe'\n", |
|
|
11 |
"tessdata_dir_config = '--tessdata-dir \"C:\\\\Program Files\\\\Tesseract-OCR\\\\tessdata\"'" |
|
|
12 |
] |
|
|
13 |
}, |
|
|
14 |
{ |
|
|
15 |
"cell_type": "code", |
|
|
16 |
"execution_count": 2, |
|
|
17 |
"metadata": {}, |
|
|
18 |
"outputs": [], |
|
|
19 |
"source": [ |
|
|
20 |
"import spacy\n", |
|
|
21 |
"from spacy import displacy" |
|
|
22 |
] |
|
|
23 |
}, |
|
|
24 |
{ |
|
|
25 |
"cell_type": "code", |
|
|
26 |
"execution_count": 3, |
|
|
27 |
"metadata": {}, |
|
|
28 |
"outputs": [], |
|
|
29 |
"source": [ |
|
|
30 |
"colors = {'DOCTOR': '#55fdf9', 'PATIENT': '#54b52d', 'BIRTHDATE': '#8752a1', 'DATE': '#ff91af','TYPE': '#999900', 'ADRESS': '#cd0000', 'REFERING_DOCTOR': '#0071f1' }\n", |
|
|
31 |
"options = {'ents': ['DOCTOR', 'PATIENT', 'BIRTHDATE', 'DATE','TYPE', 'ADRESS', 'REFERING_DOCTOR'], 'colors':colors}" |
|
|
32 |
] |
|
|
33 |
}, |
|
|
34 |
{ |
|
|
35 |
"cell_type": "code", |
|
|
36 |
"execution_count": 5, |
|
|
37 |
"metadata": {}, |
|
|
38 |
"outputs": [ |
|
|
39 |
{ |
|
|
40 |
"name": "stderr", |
|
|
41 |
"output_type": "stream", |
|
|
42 |
"text": [ |
|
|
43 |
"D:\\Anaconda\\lib\\site-packages\\spacy\\util.py:833: UserWarning: [W095] Model 'fr_pipeline' (0.0.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.2.0). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", |
|
|
44 |
" warnings.warn(warn_msg)\n" |
|
|
45 |
] |
|
|
46 |
} |
|
|
47 |
], |
|
|
48 |
"source": [ |
|
|
49 |
"nlp = spacy.load(\"./ner_model/output/model-best\")" |
|
|
50 |
] |
|
|
51 |
}, |
|
|
52 |
{ |
|
|
53 |
"cell_type": "code", |
|
|
54 |
"execution_count": 6, |
|
|
55 |
"metadata": {}, |
|
|
56 |
"outputs": [ |
|
|
57 |
{ |
|
|
58 |
"data": { |
|
|
59 |
"text/html": [ |
|
|
60 |
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n", |
|
|
61 |
"<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
62 |
" Gien\n", |
|
|
63 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n", |
|
|
64 |
"</mark>\n", |
|
|
65 |
", le \n", |
|
|
66 |
"<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
67 |
" 28/09/2010\n", |
|
|
68 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n", |
|
|
69 |
"</mark>\n", |
|
|
70 |
"</br>Monsieur \n", |
|
|
71 |
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
72 |
" LEBROC Bernard\n", |
|
|
73 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", |
|
|
74 |
"</mark>\n", |
|
|
75 |
"</br>BM/0077</br>Examen demandé par la Médecine Jaune.</br>\n", |
|
|
76 |
"<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
77 |
" ECHOGRAPHIE\n", |
|
|
78 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n", |
|
|
79 |
"</mark>\n", |
|
|
80 |
" VESICO-PROSTATIQUE ET RENALE (Hitachi Elite 8500</br>concept 26.02.2008)</br></br>Indication : néoplasie prostatique.</br>Rétention urinaire aigue.</br>RESULTAT :</br>Les coupes échographiques réalisées montrent des reins de volume et</br>d’échostructure normaux.</br></br>Il n’y à pas de dilatation des cavités pyélocalicielles ou de calcul rénal</br>visible.</br></br>Il existe une bonne différenciation cortico-médullaire et le parenchyme</br>rénal est d'épaisseur normale.</br></br>Il n’y à pas de syndrome tumoral tissulaire liquidien décelable.</br></br>Il n’y à pas d’épanchement péri ou para-rénal.</br>Au niveau pelvien, les bas uretères sont virtuels et la vessie est faiblement</br>remplie, transsonore à parois régulières sans image proliférative</br>endovésicale ou de calcul intraluminal.</br></br>La prostate par voie endorectale pèse environ 5 g ce jour et ses contours</br>sont réguliers et son échostructure est habituelle.</br></br>Le résidu post-mictionnel est non significatif.</br>CONCLUSION</br>L’examen retient un appareil urinaire échographiquement normal</br>ce jour et un petit résidu prostatique est d’environ 5g .</br>II n’y a pas d’autre anomalie.</br>\n", |
|
|
81 |
"<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
82 |
" Docteur J. NGUYEN HUU\n", |
|
|
83 |
"\n", |
|
|
84 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n", |
|
|
85 |
"</mark>\n", |
|
|
86 |
"Nous avons conseillé au patient(e) de transmettre le compte rendu à son médecin traitant.</br></div></span>" |
|
|
87 |
], |
|
|
88 |
"text/plain": [ |
|
|
89 |
"<IPython.core.display.HTML object>" |
|
|
90 |
] |
|
|
91 |
}, |
|
|
92 |
"metadata": {}, |
|
|
93 |
"output_type": "display_data" |
|
|
94 |
} |
|
|
95 |
], |
|
|
96 |
"source": [ |
|
|
97 |
"with open('medical_report_text/repport_330.txt', 'r') as file:\n", |
|
|
98 |
" data = file.read()\n", |
|
|
99 |
" doc = nlp(data)\n", |
|
|
100 |
"displacy.render(doc, style=\"ent\", jupyter=True, options=options)" |
|
|
101 |
] |
|
|
102 |
}, |
|
|
103 |
{ |
|
|
104 |
"cell_type": "code", |
|
|
105 |
"execution_count": 7, |
|
|
106 |
"metadata": {}, |
|
|
107 |
"outputs": [ |
|
|
108 |
{ |
|
|
109 |
"data": { |
|
|
110 |
"text/html": [ |
|
|
111 |
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n", |
|
|
112 |
"<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
113 |
" Hirson\n", |
|
|
114 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n", |
|
|
115 |
"</mark>\n", |
|
|
116 |
" le \n", |
|
|
117 |
"<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
118 |
" 02/12/2019\n", |
|
|
119 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n", |
|
|
120 |
"</mark>\n", |
|
|
121 |
"</br>Mon Cher Confrère,</br>Voici les clichés du thorax de :</br>M. \n", |
|
|
122 |
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
123 |
" HERVIEU VINCENT\n", |
|
|
124 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", |
|
|
125 |
"</mark>\n", |
|
|
126 |
" 6160221066</br></br>IPP: 709805</br>ACCESSION NUMBER : 72134051</br>\n", |
|
|
127 |
"<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
128 |
" Conventionnelle Radio\n", |
|
|
129 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n", |
|
|
130 |
"</mark>\n", |
|
|
131 |
" DLP : 6.3 dGy.cm° Contraste : N/C cc N/C</br>INDICATION :</br>radio de contrôle</br>TECHNIQUE :</br>POUMONS FACE + PROFIL</br>RESULTAT :</br></br>Pas d’anomalie de la silhouette cardio-médiastinale. Pas d’anomalie en projection des hiles pulmonaires. Pas de</br>syndrome bronchique ni d’image de dilatation des bronches. Pas d’anomalie du parenchyme pulmonaire. Pas</br>d’anomalie pleurale. Pas d’anomalie du cadre osseux et des parties molles.</br></br>CONCLUSION DE L'EXAMEN DE Monsieur \n", |
|
|
132 |
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
133 |
" HERVIEU Vincent\n", |
|
|
134 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", |
|
|
135 |
"</mark>\n", |
|
|
136 |
" :</br>Pas d'anomalie radiologique visible.</br></br>RADIOLOGUE SIGNATAIRE : \n", |
|
|
137 |
"<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
138 |
" Radiologue MBAPTE WAMBA John\n", |
|
|
139 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n", |
|
|
140 |
"</mark>\n", |
|
|
141 |
" , Validation électronique</br>MEDECIN REQUERANT : \n", |
|
|
142 |
"<mark class=\"entity\" style=\"background: #0071f1; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", |
|
|
143 |
" MIART LAURENT\n", |
|
|
144 |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">REFERING_DOCTOR</span>\n", |
|
|
145 |
"</mark>\n", |
|
|
146 |
"</br></br>TRACABILITE HORAIRE : Demande : 15:59 - Protocole : 15:59 - Images reçues : 15:59 - Validation : 16:48</br></div></span>" |
|
|
147 |
], |
|
|
148 |
"text/plain": [ |
|
|
149 |
"<IPython.core.display.HTML object>" |
|
|
150 |
] |
|
|
151 |
}, |
|
|
152 |
"metadata": {}, |
|
|
153 |
"output_type": "display_data" |
|
|
154 |
} |
|
|
155 |
], |
|
|
156 |
"source": [ |
|
|
157 |
"with open('medical_report_text/repport_350.txt', 'r') as file:\n", |
|
|
158 |
" data = file.read()\n", |
|
|
159 |
" doc = nlp(data)\n", |
|
|
160 |
"displacy.render(doc, style=\"ent\", jupyter=True, options=options)" |
|
|
161 |
] |
|
|
162 |
}, |
|
|
163 |
{ |
|
|
164 |
"cell_type": "code", |
|
|
165 |
"execution_count": null, |
|
|
166 |
"metadata": {}, |
|
|
167 |
"outputs": [], |
|
|
168 |
"source": [] |
|
|
169 |
} |
|
|
170 |
], |
|
|
171 |
"metadata": { |
|
|
172 |
"kernelspec": { |
|
|
173 |
"display_name": "Python 3", |
|
|
174 |
"language": "python", |
|
|
175 |
"name": "python3" |
|
|
176 |
}, |
|
|
177 |
"language_info": { |
|
|
178 |
"codemirror_mode": { |
|
|
179 |
"name": "ipython", |
|
|
180 |
"version": 3 |
|
|
181 |
}, |
|
|
182 |
"file_extension": ".py", |
|
|
183 |
"mimetype": "text/x-python", |
|
|
184 |
"name": "python", |
|
|
185 |
"nbconvert_exporter": "python", |
|
|
186 |
"pygments_lexer": "ipython3", |
|
|
187 |
"version": "3.7.9" |
|
|
188 |
} |
|
|
189 |
}, |
|
|
190 |
"nbformat": 4, |
|
|
191 |
"nbformat_minor": 4 |
|
|
192 |
} |