193 lines (192 with data), 10.3 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pytesseract\n",
"pytesseract.pytesseract.tesseract_cmd = 'C:\\\\Program Files\\\\Tesseract-OCR\\\\tesseract.exe'\n",
"tessdata_dir_config = '--tessdata-dir \"C:\\\\Program Files\\\\Tesseract-OCR\\\\tessdata\"'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"from spacy import displacy"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"colors = {'DOCTOR': '#55fdf9', 'PATIENT': '#54b52d', 'BIRTHDATE': '#8752a1', 'DATE': '#ff91af','TYPE': '#999900', 'ADRESS': '#cd0000', 'REFERING_DOCTOR': '#0071f1' }\n",
"options = {'ents': ['DOCTOR', 'PATIENT', 'BIRTHDATE', 'DATE','TYPE', 'ADRESS', 'REFERING_DOCTOR'], 'colors':colors}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda\\lib\\site-packages\\spacy\\util.py:833: UserWarning: [W095] Model 'fr_pipeline' (0.0.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.2.0). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n",
" warnings.warn(warn_msg)\n"
]
}
],
"source": [
"nlp = spacy.load(\"./ner_model/output/model-best\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
"<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" Gien\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n",
"</mark>\n",
", le \n",
"<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" 28/09/2010\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n",
"</mark>\n",
"</br>Monsieur \n",
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" LEBROC Bernard\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
"</mark>\n",
"</br>BM/0077</br>Examen demandé par la Médecine Jaune.</br>\n",
"<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" ECHOGRAPHIE\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n",
"</mark>\n",
" VESICO-PROSTATIQUE ET RENALE (Hitachi Elite 8500</br>concept 26.02.2008)</br></br>Indication : néoplasie prostatique.</br>Rétention urinaire aigue.</br>RESULTAT :</br>Les coupes échographiques réalisées montrent des reins de volume et</br>d’échostructure normaux.</br></br>Il n’y à pas de dilatation des cavités pyélocalicielles ou de calcul rénal</br>visible.</br></br>Il existe une bonne différenciation cortico-médullaire et le parenchyme</br>rénal est d'épaisseur normale.</br></br>Il n’y à pas de syndrome tumoral tissulaire liquidien décelable.</br></br>Il n’y à pas d’épanchement péri ou para-rénal.</br>Au niveau pelvien, les bas uretères sont virtuels et la vessie est faiblement</br>remplie, transsonore à parois régulières sans image proliférative</br>endovésicale ou de calcul intraluminal.</br></br>La prostate par voie endorectale pèse environ 5 g ce jour et ses contours</br>sont réguliers et son échostructure est habituelle.</br></br>Le résidu post-mictionnel est non significatif.</br>CONCLUSION</br>L’examen retient un appareil urinaire échographiquement normal</br>ce jour et un petit résidu prostatique est d’environ 5g .</br>II n’y a pas d’autre anomalie.</br>\n",
"<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" Docteur J. NGUYEN HUU\n",
"\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n",
"</mark>\n",
"Nous avons conseillé au patient(e) de transmettre le compte rendu à son médecin traitant.</br></div></span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"with open('medical_report_text/repport_330.txt', 'r') as file:\n",
" data = file.read()\n",
" doc = nlp(data)\n",
"displacy.render(doc, style=\"ent\", jupyter=True, options=options)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
"<mark class=\"entity\" style=\"background: #cd0000; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" Hirson\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ADRESS</span>\n",
"</mark>\n",
" le \n",
"<mark class=\"entity\" style=\"background: #ff91af; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" 02/12/2019\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DATE</span>\n",
"</mark>\n",
"</br>Mon Cher Confrère,</br>Voici les clichés du thorax de :</br>M. \n",
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" HERVIEU VINCENT\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
"</mark>\n",
" 6160221066</br></br>IPP: 709805</br>ACCESSION NUMBER : 72134051</br>\n",
"<mark class=\"entity\" style=\"background: #999900; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" Conventionnelle Radio\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">TYPE</span>\n",
"</mark>\n",
" DLP : 6.3 dGy.cm° Contraste : N/C cc N/C</br>INDICATION :</br>radio de contrôle</br>TECHNIQUE :</br>POUMONS FACE + PROFIL</br>RESULTAT :</br></br>Pas d’anomalie de la silhouette cardio-médiastinale. Pas d’anomalie en projection des hiles pulmonaires. Pas de</br>syndrome bronchique ni d’image de dilatation des bronches. Pas d’anomalie du parenchyme pulmonaire. Pas</br>d’anomalie pleurale. Pas d’anomalie du cadre osseux et des parties molles.</br></br>CONCLUSION DE L'EXAMEN DE Monsieur \n",
"<mark class=\"entity\" style=\"background: #54b52d; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" HERVIEU Vincent\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PATIENT</span>\n",
"</mark>\n",
" :</br>Pas d'anomalie radiologique visible.</br></br>RADIOLOGUE SIGNATAIRE : \n",
"<mark class=\"entity\" style=\"background: #55fdf9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" Radiologue MBAPTE WAMBA John\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">DOCTOR</span>\n",
"</mark>\n",
" , Validation électronique</br>MEDECIN REQUERANT : \n",
"<mark class=\"entity\" style=\"background: #0071f1; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" MIART LAURENT\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">REFERING_DOCTOR</span>\n",
"</mark>\n",
"</br></br>TRACABILITE HORAIRE : Demande : 15:59 - Protocole : 15:59 - Images reçues : 15:59 - Validation : 16:48</br></div></span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"with open('medical_report_text/repport_350.txt', 'r') as file:\n",
" data = file.read()\n",
" doc = nlp(data)\n",
"displacy.render(doc, style=\"ent\", jupyter=True, options=options)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}