308 lines (307 with data), 12.4 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4c9d40b1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import cv2\n",
"import matplotlib.pyplot as plt\n",
"import os.path, sys, re\n",
"import time\n",
"from PIL import Image"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "92e8df2f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/moise/Desktop/Data_Science/Erdos_Institute/ecg-proj/ecg-copy\n"
]
}
],
"source": [
"cd ~/Desktop/Data_Science/Erdos_Institute/ecg-proj/ecg-copy/"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c5a0e302",
"metadata": {},
"outputs": [],
"source": [
"pathroot = \"data_v1/\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "3c9b391a",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Note:\n",
"-----\n",
"\n",
"1)The value for the key 'ECGImagesofMyocardialInfarctionPatients' is a list of dictionaries \n",
"corresponding to the cases\n",
" i) MI <=37\n",
" ii) MI >=38\n",
"2) The value of the key 'ECGImagesofCOVID-19Patients' is broken down as follows:\n",
" i) Binder1_Page file name\n",
" ii) COVID file name\n",
"3) The value of the key 'ECGImagesofPatientthathaveHistoryofMI' corresponds to:\n",
" i) PMI<113\n",
" ii) 113<=PMI<=161 --- ATTENTION: only 12 Leads in the case!\n",
" iii) PMI>=162\n",
"\"\"\"\n",
"superDict = {'NormalPersonECGImages': { 'Lead1':(130,300,640,600), 'Lead2':(641,300,1125,600),'Lead3':(1130,300,1625,600), 'Lead4':(1630,300,2120,600),\\\n",
" 'Lead5':(130,600,640,900), 'Lead6':(641,600,1125,900),'Lead7':(1130,600,1625,900), 'Lead8':(1630,600,2120,900),\\\n",
" 'Lead9':(130,900,640,1200), 'Lead10':(641,900,1125,1200),'Lead11':(1130,900,1625,1200), 'Lead12':(1630,900,2120,1200),\\\n",
" 'Lead13':(130,1205,2120,1450) \n",
" },\n",
" 'ECGImagesofPatientthathaveabnormalheartbeats': { 'Lead1':(130,300,640,600), 'Lead2':(641,300,1125,600),'Lead3':(1130,300,1625,600), 'Lead4':(1630,300,2110,600),\\\n",
" 'Lead5':(130,600,640,900), 'Lead6':(641,600,1125,900),'Lead7':(1130,600,1625,900), 'Lead8':(1630,600,2110,900),\\\n",
" 'Lead9':(130,900,640,1200), 'Lead10':(641,900,1125,1200),'Lead11':(1130,900,1625,1200), 'Lead12':(1630,900,2110,1200),\\\n",
" 'Lead13':(130,1205,2110,1450) \n",
" },\n",
" \n",
" 'ECGImagesofMyocardialInfarctionPatients': [{ 'Lead1':(125,310,625,600), 'Lead2':(640,310,1125,600),'Lead3':(1140,310,1625,600), 'Lead4':(1640,310,2125,600),\\\n",
" 'Lead5':(125,605,625,900), 'Lead6':(640,605,1125,900),'Lead7':(1140,605,1625,900), 'Lead8':(1640,605,2125,900),\\\n",
" 'Lead9':(125,905,625,1200), 'Lead10':(640,905,1125,1200),'Lead11':(1140,905,1625,1200), 'Lead12':(1640,905,2125,1200),\\\n",
" 'Lead13':(125,1205,2125,1495)},\\\n",
" { 'Lead1':(125,300,1125,500), 'Lead2':(1125,300,2125,500),\\\n",
" 'Lead3':(125,500,1125,680), 'Lead4':(1125,500,2125,680),\\\n",
" 'Lead5':(125,680,1125,845), 'Lead6':(1125,680,2125,845),\\\n",
" 'Lead7':(125,845,1125,1000), 'Lead8':(1125,845,2125,1000),\\\n",
" 'Lead9':(125,1000,1125,1150), 'Lead10':(1125,1000,2125,1150),\\\n",
" 'Lead11':(125,1150,1125,1300), 'Lead12':(1125,1150,2125,1300),\\\n",
" 'Lead13':(125,1300,2125,1490)} ],\n",
" \n",
" 'ECGImagesofCOVID-19Patients': [{ 'Lead1':(100,110,300,240), 'Lead2':(300,110,500,240),'Lead3':(500,110,700,240), 'Lead4':(700,110,900,240),\\\n",
" 'Lead5':(100,241,300,350), 'Lead6':(300,241,500,350),'Lead7':(500,241,700,350), 'Lead8':(700,241,900,350),\\\n",
" 'Lead9':(100,351,300,450), 'Lead10':(300,351,500,450),'Lead11':(500,351,700,450), 'Lead12':(700,351,900,450),\\\n",
" 'Lead13':(100,451,900,545)},\\\n",
" { 'Lead1':(125,310,625,600), 'Lead2':(640,310,1125,600),'Lead3':(1140,310,1625,600), 'Lead4':(1640,310,2125,600),\\\n",
" 'Lead5':(125,605,625,900), 'Lead6':(640,605,1125,900),'Lead7':(1140,605,1625,900), 'Lead8':(1640,605,2125,900),\\\n",
" 'Lead9':(125,905,625,1200), 'Lead10':(640,905,1125,1200),'Lead11':(1140,905,1625,1200), 'Lead12':(1640,905,2125,1200),\\\n",
" 'Lead13':(125,1205,2125,1495)} ],\n",
" \n",
" 'ECGImagesofPatientthathaveHistoryofMI': [{ 'Lead1':(125,310,625,600), 'Lead2':(640,310,1125,600),'Lead3':(1140,310,1625,600), 'Lead4':(1640,310,2125,600),\\\n",
" 'Lead5':(125,605,625,900), 'Lead6':(640,605,1125,900),'Lead7':(1140,605,1625,900), 'Lead8':(1640,605,2125,900),\\\n",
" 'Lead9':(125,905,625,1200), 'Lead10':(640,905,1125,1200),'Lead11':(1140,905,1625,1200), 'Lead12':(1640,905,2125,1200),\\\n",
" 'Lead13':(125,1205,2125,1495)},\\\n",
" { 'Lead1':(125,410,1125,590), 'Lead2':(1125,410,2125,590),\\\n",
" 'Lead3':(125,600,1125,815), 'Lead4':(1125,600,2125,815),\\\n",
" 'Lead5':(125,815,1125,1035), 'Lead6':(1125,815,2125,1035),\\\n",
" 'Lead7':(125,1035,1125,1235), 'Lead8':(1125,1035,2125,1235),\\\n",
" 'Lead9':(125,1235,1125,1350), 'Lead10':(1125,1235,2125,1350),\\\n",
" 'Lead11':(125,1350,1125,1490), 'Lead12':(1125,1350,2125,1490),\\\n",
" },\\\n",
" { 'Lead1':(125,290,1125,490), 'Lead2':(1125,290,2125,490),\\\n",
" 'Lead3':(125,490,1125,650), 'Lead4':(1125,490,2125,650),\\\n",
" 'Lead5':(125,650,1125,810), 'Lead6':(1125,650,2125,810),\\\n",
" 'Lead7':(125,810,1125,1000), 'Lead8':(1125,810,2125,1000),\\\n",
" 'Lead9':(125,1000,1125,1190), 'Lead10':(1125,1000,2125,1190),\\\n",
" 'Lead11':(125,1190,1125,1315), 'Lead12':(1125,1190,2125,1315),\\\n",
" 'Lead13':(125,1315,2125,1550)\n",
" }] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "310cafaf",
"metadata": {},
"outputs": [],
"source": [
"dirList=['NormalPersonECGImages','ECGImagesofPatientthathaveabnormalheartbeats',\n",
" 'ECGImagesofMyocardialInfarctionPatients','ECGImagesofCOVID-19Patients',\n",
" 'ECGImagesofPatientthathaveHistoryofMI']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "76c9879b",
"metadata": {},
"outputs": [],
"source": [
"def processor(dirs,path2,im,of,crop_dict,numLeads=13,threshold_level=50):\n",
" for i in range(1,numLeads+1):\n",
" imCrop = im.crop(box=crop_dict['Lead'+str(i)])\n",
" target = of+'-Cropped_lead'+str(i)\n",
" imCrop.save(os.path.join(path2,target+'.png'), \"PNG\", quality=100)\n",
" img = cv2.imread(os.path.join(path2,target+'.png'))\n",
" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
" coords = np.column_stack(np.where(gray < threshold_level))\n",
" coords[:,[0,1]]=coords[:,[1,0]]\n",
" csvFolder = os.path.join(targetFolder,dirs)\n",
" if not os.path.exists(csvFolder):\n",
" os.makedirs(csvFolder)\n",
" np.savetxt(os.path.join(csvFolder,target+'.csv'),coords) "
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "15742532",
"metadata": {},
"outputs": [],
"source": [
"def crop2csv(dirs,path,path2,dirList,superDict,pattern='.jpg'):\n",
" count = 0\n",
" for item in os.listdir(path):\n",
" if os.path.isfile(os.path.join(path,item)):\n",
" of, oe = os.path.splitext(item)\n",
" if oe == pattern:\n",
" im = Image.open(os.path.join(path,item))\n",
" if dirs == dirList[2]:\n",
" if int(of[3:]) <= 37:\n",
" processor(dirs,path2,im,of,superDict[dirs][0])\n",
" else:\n",
" processor(dirs,path2,im,of,superDict[dirs][1])\n",
" elif dirs == dirList[3]:\n",
" if of[0:5] == 'COVID':\n",
" processor(dirs,path2,im,of,superDict[dirs][1])\n",
" else:\n",
" processor(dirs,path2,im,of,superDict[dirs][0])\n",
" elif dirs == dirList[4]:\n",
" if int(of[4:]) < 113:\n",
" processor(dirs,path2,im,of,superDict[dirs][0])\n",
" elif (int(of[4:]) >= 113) and (int(of[4:]) <= 161) :\n",
" processor(dirs,path2,im,of,superDict[dirs][1],numLeads=12)\n",
" else:\n",
" processor(dirs,path2,im,of,superDict[dirs][2])\n",
" else:\n",
" processor(dirs,path2,im,of,superDict[dirs])\n",
" count = count+1\n",
" return count"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "61da0528",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing ECGImagesofPatientthathaveabnormalheartbeats folder ...\n",
"546 files processed in this folder in 229 sec...\n",
"\n",
"Processing ECGImagesofPatientthathaveHistoryofMI folder ...\n",
"203 files processed in this folder in 81 sec...\n",
"\n",
"Processing ECGImagesofCOVID-19Patients folder ...\n",
"250 files processed in this folder in 27 sec...\n",
"\n",
"Processing NormalPersonECGImages folder ...\n",
"859 files processed in this folder in 348 sec...\n",
"\n",
"Processing ECGImagesofMyocardialInfarctionPatients folder ...\n",
"74 files processed in this folder in 29 sec...\n",
"\n"
]
}
],
"source": [
"## Cropping\n",
"targetFolder = 'CSV_data_v1'\n",
"if not os.path.exists(targetFolder):\n",
" os.makedirs(targetFolder)\n",
"t0 = time.time()\n",
"for dirs in os.listdir(pathroot):\n",
" t = time.time()\n",
" print('Processing {0} folder ...'.format(dirs))\n",
" if not os.path.isfile(dirs):\n",
" path = os.path.join(pathroot,dirs)\n",
" path2 = os.path.join(path,\"Cropped_Images\")\n",
" if not os.path.exists(path2):\n",
" os.makedirs(path2)\n",
" fileCount = crop2csv(dirs,path,path2,dirList,superDict) \n",
" t = time.time()-t\n",
" print('{0} files processed in this folder in {1} sec...\\n'.format(fileCount,round(t)))\n",
"t0 = time.time()-t0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab0d386d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb1756ac",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ff6cfcd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ad56a7b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b319f8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}