--- a +++ b/predictions.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pytesseract\n", + "pytesseract.pytesseract.tesseract_cmd = 'C:\\\\Program Files\\\\Tesseract-OCR\\\\tesseract.exe'\n", + "tessdata_dir_config = '--tessdata-dir \"C:\\\\Program Files\\\\Tesseract-OCR\\\\tessdata\"'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import spacy\n", + "from spacy import displacy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "colors = {'DOCTOR': '#55fdf9', 'PATIENT': '#54b52d', 'BIRTHDATE': '#8752a1', 'DATE': '#ff91af','TYPE': '#999900', 'ADRESS': '#cd0000', 'REFERING_DOCTOR': '#0071f1' }\n", + "options = {'ents': ['DOCTOR', 'PATIENT', 'BIRTHDATE', 'DATE','TYPE', 'ADRESS', 'REFERING_DOCTOR'], 'colors':colors}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\Anaconda\\lib\\site-packages\\spacy\\util.py:833: UserWarning: [W095] Model 'fr_pipeline' (0.0.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.2.0). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", + " warnings.warn(warn_msg)\n" + ] + } + ], + "source": [ + "nlp = spacy.load(\"./ner_model/output/model-best\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n", + "<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Gien\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n", + "</mark>\n", + ", le \n", + "<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " 28/09/2010\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n", + "</mark>\n", + "</br>Monsieur \n", + "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " LEBROC Bernard\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", + "</mark>\n", + "</br>BM/0077</br>Examen demandé par la Médecine Jaune.</br>\n", + "<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " ECHOGRAPHIE\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n", + "</mark>\n", + " VESICO-PROSTATIQUE ET RENALE (Hitachi Elite 8500</br>concept 26.02.2008)</br></br>Indication : néoplasie prostatique.</br>Rétention urinaire aigue.</br>RESULTAT :</br>Les coupes échographiques réalisées montrent des reins de volume et</br>d’échostructure normaux.</br></br>Il n’y à pas de dilatation des cavités pyélocalicielles ou de calcul rénal</br>visible.</br></br>Il existe une bonne différenciation cortico-médullaire et le parenchyme</br>rénal est d'épaisseur normale.</br></br>Il n’y à pas de syndrome tumoral tissulaire liquidien décelable.</br></br>Il n’y à pas d’épanchement péri ou para-rénal.</br>Au niveau pelvien, les bas uretères sont virtuels et la vessie est faiblement</br>remplie, transsonore à parois régulières sans image proliférative</br>endovésicale ou de calcul intraluminal.</br></br>La prostate par voie endorectale pèse environ 5 g ce jour et ses contours</br>sont réguliers et son échostructure est habituelle.</br></br>Le résidu post-mictionnel est non significatif.</br>CONCLUSION</br>L’examen retient un appareil urinaire échographiquement normal</br>ce jour et un petit résidu prostatique est d’environ 5g .</br>II n’y a pas d’autre anomalie.</br>\n", + "<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Docteur J. NGUYEN HUU\n", + "\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n", + "</mark>\n", + "Nous avons conseillé au patient(e) de transmettre le compte rendu à son médecin traitant.</br></div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with open('medical_report_text/repport_330.txt', 'r') as file:\n", + " data = file.read()\n", + " doc = nlp(data)\n", + "displacy.render(doc, style=\"ent\", jupyter=True, options=options)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n", + "<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Hirson\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n", + "</mark>\n", + " le \n", + "<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " 02/12/2019\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n", + "</mark>\n", + "</br>Mon Cher Confrère,</br>Voici les clichés du thorax de :</br>M. \n", + "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " HERVIEU VINCENT\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", + "</mark>\n", + " 6160221066</br></br>IPP: 709805</br>ACCESSION NUMBER : 72134051</br>\n", + "<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Conventionnelle Radio\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n", + "</mark>\n", + " DLP : 6.3 dGy.cm° Contraste : N/C cc N/C</br>INDICATION :</br>radio de contrôle</br>TECHNIQUE :</br>POUMONS FACE + PROFIL</br>RESULTAT :</br></br>Pas d’anomalie de la silhouette cardio-médiastinale. Pas d’anomalie en projection des hiles pulmonaires. Pas de</br>syndrome bronchique ni d’image de dilatation des bronches. Pas d’anomalie du parenchyme pulmonaire. Pas</br>d’anomalie pleurale. Pas d’anomalie du cadre osseux et des parties molles.</br></br>CONCLUSION DE L'EXAMEN DE Monsieur \n", + "<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " HERVIEU Vincent\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n", + "</mark>\n", + " :</br>Pas d'anomalie radiologique visible.</br></br>RADIOLOGUE SIGNATAIRE : \n", + "<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Radiologue MBAPTE WAMBA John\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n", + "</mark>\n", + " , Validation électronique</br>MEDECIN REQUERANT : \n", + "<mark class=\"entity\" style=\"background: #0071f1; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " MIART LAURENT\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">REFERING_DOCTOR</span>\n", + "</mark>\n", + "</br></br>TRACABILITE HORAIRE : Demande : 15:59 - Protocole : 15:59 - Images reçues : 15:59 - Validation : 16:48</br></div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with open('medical_report_text/repport_350.txt', 'r') as file:\n", + " data = file.read()\n", + " doc = nlp(data)\n", + "displacy.render(doc, style=\"ent\", jupyter=True, options=options)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}