Diff of /predictions.ipynb [000000] .. [036ed5]

Switch to unified view

a b/predictions.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import pytesseract\n",
10
    "pytesseract.pytesseract.tesseract_cmd = 'C:\\\\Program Files\\\\Tesseract-OCR\\\\tesseract.exe'\n",
11
    "tessdata_dir_config = '--tessdata-dir \"C:\\\\Program Files\\\\Tesseract-OCR\\\\tessdata\"'"
12
   ]
13
  },
14
  {
15
   "cell_type": "code",
16
   "execution_count": 2,
17
   "metadata": {},
18
   "outputs": [],
19
   "source": [
20
    "import spacy\n",
21
    "from spacy import displacy"
22
   ]
23
  },
24
  {
25
   "cell_type": "code",
26
   "execution_count": 3,
27
   "metadata": {},
28
   "outputs": [],
29
   "source": [
30
    "colors = {'DOCTOR': '#55fdf9', 'PATIENT': '#54b52d', 'BIRTHDATE': '#8752a1', 'DATE': '#ff91af','TYPE': '#999900', 'ADRESS': '#cd0000', 'REFERING_DOCTOR': '#0071f1' }\n",
31
    "options = {'ents': ['DOCTOR', 'PATIENT', 'BIRTHDATE', 'DATE','TYPE', 'ADRESS', 'REFERING_DOCTOR'], 'colors':colors}"
32
   ]
33
  },
34
  {
35
   "cell_type": "code",
36
   "execution_count": 5,
37
   "metadata": {},
38
   "outputs": [
39
    {
40
     "name": "stderr",
41
     "output_type": "stream",
42
     "text": [
43
      "D:\\Anaconda\\lib\\site-packages\\spacy\\util.py:833: UserWarning: [W095] Model 'fr_pipeline' (0.0.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.2.0). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n",
44
      "  warnings.warn(warn_msg)\n"
45
     ]
46
    }
47
   ],
48
   "source": [
49
    "nlp = spacy.load(\"./ner_model/output/model-best\")"
50
   ]
51
  },
52
  {
53
   "cell_type": "code",
54
   "execution_count": 6,
55
   "metadata": {},
56
   "outputs": [
57
    {
58
     "data": {
59
      "text/html": [
60
       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
61
       "<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
62
       "    Gien\n",
63
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n",
64
       "</mark>\n",
65
       ", le \n",
66
       "<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
67
       "    28/09/2010\n",
68
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n",
69
       "</mark>\n",
70
       "</br>Monsieur \n",
71
       "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
72
       "    LEBROC Bernard\n",
73
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
74
       "</mark>\n",
75
       "</br>BM/0077</br>Examen demandé par la Médecine Jaune.</br>\n",
76
       "<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
77
       "    ECHOGRAPHIE\n",
78
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n",
79
       "</mark>\n",
80
       " VESICO-PROSTATIQUE ET RENALE (Hitachi Elite 8500</br>concept 26.02.2008)</br></br>Indication : néoplasie prostatique.</br>Rétention urinaire aigue.</br>RESULTAT :</br>Les coupes échographiques réalisées montrent des reins de volume et</br>d’échostructure normaux.</br></br>Il n’y à pas de dilatation des cavités pyélocalicielles ou de calcul rénal</br>visible.</br></br>Il existe une bonne différenciation cortico-médullaire et le parenchyme</br>rénal est d'épaisseur normale.</br></br>Il n’y à pas de syndrome tumoral tissulaire liquidien décelable.</br></br>Il n’y à pas d’épanchement péri ou para-rénal.</br>Au niveau pelvien, les bas uretères sont virtuels et la vessie est faiblement</br>remplie, transsonore à parois régulières sans image proliférative</br>endovésicale ou de calcul intraluminal.</br></br>La prostate par voie endorectale pèse environ 5 g ce jour et ses contours</br>sont réguliers et son échostructure est habituelle.</br></br>Le résidu post-mictionnel est non significatif.</br>CONCLUSION</br>L’examen retient un appareil urinaire échographiquement normal</br>ce jour et un petit résidu prostatique est d’environ 5g .</br>II n’y a pas d’autre anomalie.</br>\n",
81
       "<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
82
       "    Docteur J. NGUYEN HUU\n",
83
       "\n",
84
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n",
85
       "</mark>\n",
86
       "Nous avons conseillé au patient(e) de transmettre le compte rendu à son médecin traitant.</br></div></span>"
87
      ],
88
      "text/plain": [
89
       "<IPython.core.display.HTML object>"
90
      ]
91
     },
92
     "metadata": {},
93
     "output_type": "display_data"
94
    }
95
   ],
96
   "source": [
97
    "with open('medical_report_text/repport_330.txt', 'r') as file:\n",
98
    "    data = file.read()\n",
99
    "    doc = nlp(data)\n",
100
    "displacy.render(doc, style=\"ent\", jupyter=True, options=options)"
101
   ]
102
  },
103
  {
104
   "cell_type": "code",
105
   "execution_count": 7,
106
   "metadata": {},
107
   "outputs": [
108
    {
109
     "data": {
110
      "text/html": [
111
       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
112
       "<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
113
       "    Hirson\n",
114
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n",
115
       "</mark>\n",
116
       " le \n",
117
       "<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
118
       "    02/12/2019\n",
119
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n",
120
       "</mark>\n",
121
       "</br>Mon Cher Confrère,</br>Voici les clichés du thorax de :</br>M. \n",
122
       "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
123
       "    HERVIEU VINCENT\n",
124
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
125
       "</mark>\n",
126
       " 6160221066</br></br>IPP: 709805</br>ACCESSION NUMBER : 72134051</br>\n",
127
       "<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
128
       "    Conventionnelle Radio\n",
129
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n",
130
       "</mark>\n",
131
       " DLP : 6.3 dGy.cm° Contraste : N/C cc N/C</br>INDICATION :</br>radio de contrôle</br>TECHNIQUE :</br>POUMONS FACE + PROFIL</br>RESULTAT :</br></br>Pas d’anomalie de la silhouette cardio-médiastinale. Pas d’anomalie en projection des hiles pulmonaires. Pas de</br>syndrome bronchique ni d’image de dilatation des bronches. Pas d’anomalie du parenchyme pulmonaire. Pas</br>d’anomalie pleurale. Pas d’anomalie du cadre osseux et des parties molles.</br></br>CONCLUSION DE L'EXAMEN DE Monsieur \n",
132
       "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
133
       "    HERVIEU Vincent\n",
134
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
135
       "</mark>\n",
136
       " :</br>Pas d'anomalie radiologique visible.</br></br>RADIOLOGUE SIGNATAIRE : \n",
137
       "<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
138
       "    Radiologue MBAPTE WAMBA John\n",
139
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n",
140
       "</mark>\n",
141
       " , Validation électronique</br>MEDECIN REQUERANT : \n",
142
       "<mark class=\"entity\" style=\"background: #0071f1; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
143
       "    MIART LAURENT\n",
144
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">REFERING_DOCTOR</span>\n",
145
       "</mark>\n",
146
       "</br></br>TRACABILITE HORAIRE : Demande : 15:59 - Protocole : 15:59 - Images reçues : 15:59 - Validation : 16:48</br></div></span>"
147
      ],
148
      "text/plain": [
149
       "<IPython.core.display.HTML object>"
150
      ]
151
     },
152
     "metadata": {},
153
     "output_type": "display_data"
154
    }
155
   ],
156
   "source": [
157
    "with open('medical_report_text/repport_350.txt', 'r') as file:\n",
158
    "    data = file.read()\n",
159
    "    doc = nlp(data)\n",
160
    "displacy.render(doc, style=\"ent\", jupyter=True, options=options)"
161
   ]
162
  },
163
  {
164
   "cell_type": "code",
165
   "execution_count": null,
166
   "metadata": {},
167
   "outputs": [],
168
   "source": []
169
  }
170
 ],
171
 "metadata": {
172
  "kernelspec": {
173
   "display_name": "Python 3",
174
   "language": "python",
175
   "name": "python3"
176
  },
177
  "language_info": {
178
   "codemirror_mode": {
179
    "name": "ipython",
180
    "version": 3
181
   },
182
   "file_extension": ".py",
183
   "mimetype": "text/x-python",
184
   "name": "python",
185
   "nbconvert_exporter": "python",
186
   "pygments_lexer": "ipython3",
187
   "version": "3.7.9"
188
  }
189
 },
190
 "nbformat": 4,
191
 "nbformat_minor": 4
192
}