Diff of /leukemia detection.ipynb [000000] .. [198e90]

Switch to unified view

a b/leukemia detection.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "id": "59654c10",
7
   "metadata": {},
8
   "outputs": [
9
    {
10
     "name": "stdout",
11
     "output_type": "stream",
12
     "text": [
13
      "modules loaded\n"
14
     ]
15
    }
16
   ],
17
   "source": [
18
    "import os\n",
19
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n",
20
    "\n",
21
    "import tensorflow as tf\n",
22
    "from tensorflow import keras\n",
23
    "from tensorflow.keras import backend as K\n",
24
    "from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization, Flatten\n",
25
    "from tensorflow.keras.optimizers import Adam, Adamax\n",
26
    "from tensorflow.keras.metrics import categorical_crossentropy\n",
27
    "from tensorflow.keras import regularizers\n",
28
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
29
    "from tensorflow.keras.models import Model, load_model, Sequential\n",
30
    "import numpy as np\n",
31
    "import pandas as pd\n",
32
    "import shutil\n",
33
    "import time\n",
34
    "import cv2 as cv2\n",
35
    "from tqdm import tqdm\n",
36
    "from sklearn.model_selection import train_test_split\n",
37
    "import matplotlib.pyplot as plt\n",
38
    "from matplotlib.pyplot import imshow\n",
39
    "import seaborn as sns\n",
40
    "sns.set_style('darkgrid')\n",
41
    "from PIL import Image\n",
42
    "from sklearn.metrics import confusion_matrix, classification_report\n",
43
    "from IPython.core.display import display, HTML\n",
44
    "# stop annoying tensorflow warning messages\n",
45
    "import logging\n",
46
    "logging.getLogger(\"tensorflow\").setLevel(logging.ERROR)\n",
47
    "print ('modules loaded')"
48
   ]
49
  },
50
  {
51
   "cell_type": "code",
52
   "execution_count": 2,
53
   "id": "fa7de5f3",
54
   "metadata": {},
55
   "outputs": [],
56
   "source": [
57
    "def show_image_samples(gen ):\n",
58
    "    t_dict=gen.class_indices\n",
59
    "    classes=list(t_dict.keys())    \n",
60
    "    images,labels=next(gen) # get a sample batch from the generator \n",
61
    "    plt.figure(figsize=(20, 20))\n",
62
    "    length=len(labels)\n",
63
    "    if length<25:   #show maximum of 25 images\n",
64
    "        r=length\n",
65
    "    else:\n",
66
    "        r=25\n",
67
    "    for i in range(r):\n",
68
    "        plt.subplot(5, 5, i + 1)\n",
69
    "        image=images[i]/255\n",
70
    "        plt.imshow(image)\n",
71
    "        index=np.argmax(labels[i])\n",
72
    "        class_name=classes[index]\n",
73
    "        plt.title(class_name, color='blue', fontsize=12)\n",
74
    "        plt.axis('off')\n",
75
    "    plt.show()"
76
   ]
77
  },
78
  {
79
   "cell_type": "code",
80
   "execution_count": 3,
81
   "id": "3cc88fda",
82
   "metadata": {},
83
   "outputs": [],
84
   "source": [
85
    "def show_images(tdir):\n",
86
    "    classlist=os.listdir(tdir)\n",
87
    "    length=len(classlist)\n",
88
    "    columns=5\n",
89
    "    rows=int(np.ceil(length/columns))    \n",
90
    "    plt.figure(figsize=(20, rows * 4))\n",
91
    "    for i, klass in enumerate(classlist):    \n",
92
    "        classpath=os.path.join(tdir, klass)\n",
93
    "        imgpath=os.path.join(classpath, '1.jpg')\n",
94
    "        img=plt.imread(imgpath)\n",
95
    "        plt.subplot(rows, columns, i+1)\n",
96
    "        plt.axis('off')\n",
97
    "        plt.title(klass, color='blue', fontsize=12)\n",
98
    "        plt.imshow(img)"
99
   ]
100
  },
101
  {
102
   "cell_type": "code",
103
   "execution_count": 4,
104
   "id": "564a3b4b",
105
   "metadata": {},
106
   "outputs": [],
107
   "source": [
108
    "def print_in_color(txt_msg,fore_tupple,back_tupple,):\n",
109
    "    #prints the text_msg in the foreground color specified by fore_tupple with the background specified by back_tupple \n",
110
    "    #text_msg is the text, fore_tupple is foregroud color tupple (r,g,b), back_tupple is background tupple (r,g,b)\n",
111
    "    rf,gf,bf=fore_tupple\n",
112
    "    rb,gb,bb=back_tupple\n",
113
    "    msg='{0}' + txt_msg\n",
114
    "    mat='\\33[38;2;' + str(rf) +';' + str(gf) + ';' + str(bf) + ';48;2;' + str(rb) + ';' +str(gb) + ';' + str(bb) +'m' \n",
115
    "    print(msg .format(mat), flush=True)\n",
116
    "    print('\\33[0m', flush=True) # returns default print color to back to black\n",
117
    "    return"
118
   ]
119
  },
120
  {
121
   "cell_type": "code",
122
   "execution_count": 5,
123
   "id": "9bbdb8ab",
124
   "metadata": {},
125
   "outputs": [],
126
   "source": [
127
    "class LRA(keras.callbacks.Callback):\n",
128
    "    def __init__(self,model, base_model, patience,stop_patience, threshold, factor, dwell, batches, initial_epoch,epochs, ask_epoch):\n",
129
    "        super(LRA, self).__init__()\n",
130
    "        self.model=model\n",
131
    "        self.base_model=base_model\n",
132
    "        self.patience=patience # specifies how many epochs without improvement before learning rate is adjusted\n",
133
    "        self.stop_patience=stop_patience # specifies how many times to adjust lr without improvement to stop training\n",
134
    "        self.threshold=threshold # specifies training accuracy threshold when lr will be adjusted based on validation loss\n",
135
    "        self.factor=factor # factor by which to reduce the learning rate\n",
136
    "        self.dwell=dwell\n",
137
    "        self.batches=batches # number of training batch to runn per epoch\n",
138
    "        self.initial_epoch=initial_epoch\n",
139
    "        self.epochs=epochs\n",
140
    "        self.ask_epoch=ask_epoch\n",
141
    "        self.ask_epoch_initial=ask_epoch # save this value to restore if restarting training\n",
142
    "        # callback variables \n",
143
    "        self.count=0 # how many times lr has been reduced without improvement\n",
144
    "        self.stop_count=0        \n",
145
    "        self.best_epoch=1   # epoch with the lowest loss        \n",
146
    "        self.initial_lr=float(tf.keras.backend.get_value(model.optimizer.lr)) # get the initiallearning rate and save it         \n",
147
    "        self.highest_tracc=0.0 # set highest training accuracy to 0 initially\n",
148
    "        self.lowest_vloss=np.inf # set lowest validation loss to infinity initially\n",
149
    "        self.best_weights=self.model.get_weights() # set best weights to model's initial weights\n",
150
    "        self.initial_weights=self.model.get_weights()   # save initial weights if they have to get restored \n",
151
    "        \n",
152
    "    def on_train_begin(self, logs=None):        \n",
153
    "        if self.base_model != None:\n",
154
    "            status=base_model.trainable\n",
155
    "            if status:\n",
156
    "                msg=' initializing callback starting training with base_model trainable'\n",
157
    "            else:\n",
158
    "                msg='initializing callback starting training with base_model not trainable'\n",
159
    "        else:\n",
160
    "            msg='initialing callback and starting training'                        \n",
161
    "        print_in_color (msg, (244, 252, 3), (55,65,80)) \n",
162
    "        msg='{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:10s}{9:^8s}'.format('Epoch', 'Loss', 'Accuracy',\n",
163
    "                                                                                              'V_loss','V_acc', 'LR', 'Next LR', 'Monitor','% Improv', 'Duration')\n",
164
    "        print_in_color(msg, (244,252,3), (55,65,80)) \n",
165
    "        self.start_time= time.time()\n",
166
    "        \n",
167
    "    def on_train_end(self, logs=None):\n",
168
    "        stop_time=time.time()\n",
169
    "        tr_duration= stop_time- self.start_time            \n",
170
    "        hours = tr_duration // 3600\n",
171
    "        minutes = (tr_duration - (hours * 3600)) // 60\n",
172
    "        seconds = tr_duration - ((hours * 3600) + (minutes * 60))\n",
173
    "\n",
174
    "        self.model.set_weights(self.best_weights) # set the weights of the model to the best weights\n",
175
    "        msg=f'Training is completed - model is set with weights from epoch {self.best_epoch} '\n",
176
    "        print_in_color(msg, (0,255,0), (55,65,80))\n",
177
    "        msg = f'training elapsed time was {str(hours)} hours, {minutes:4.1f} minutes, {seconds:4.2f} seconds)'\n",
178
    "        print_in_color(msg, (0,255,0), (55,65,80))   \n",
179
    "        \n",
180
    "    def on_train_batch_end(self, batch, logs=None):\n",
181
    "        acc=logs.get('accuracy')* 100  # get training accuracy \n",
182
    "        loss=logs.get('loss')\n",
183
    "        msg='{0:20s}processing batch {1:4s} of {2:5s} accuracy= {3:8.3f}  loss: {4:8.5f}'.format(' ', str(batch), str(self.batches), acc, loss)\n",
184
    "        print(msg, '\\r', end='') # prints over on the same line to show running batch count        \n",
185
    "        \n",
186
    "    def on_epoch_begin(self,epoch, logs=None):\n",
187
    "        self.now= time.time()\n",
188
    "        \n",
189
    "    def on_epoch_end(self, epoch, logs=None):  # method runs on the end of each epoch\n",
190
    "        later=time.time()\n",
191
    "        duration=later-self.now \n",
192
    "        lr=float(tf.keras.backend.get_value(self.model.optimizer.lr)) # get the current learning rate\n",
193
    "        current_lr=lr\n",
194
    "        v_loss=logs.get('val_loss')  # get the validation loss for this epoch\n",
195
    "        acc=logs.get('accuracy')  # get training accuracy \n",
196
    "        v_acc=logs.get('val_accuracy')\n",
197
    "        loss=logs.get('loss')        \n",
198
    "        if acc < self.threshold: # if training accuracy is below threshold adjust lr based on training accuracy\n",
199
    "            monitor='accuracy'\n",
200
    "            if epoch ==0:\n",
201
    "                pimprov=0.0\n",
202
    "            else:\n",
203
    "                pimprov= (acc-self.highest_tracc )*100/self.highest_tracc\n",
204
    "            if acc>self.highest_tracc: # training accuracy improved in the epoch                \n",
205
    "                self.highest_tracc=acc # set new highest training accuracy\n",
206
    "                self.best_weights=self.model.get_weights() # traing accuracy improved so save the weights\n",
207
    "                self.count=0 # set count to 0 since training accuracy improved\n",
208
    "                self.stop_count=0 # set stop counter to 0\n",
209
    "                if v_loss<self.lowest_vloss:\n",
210
    "                    self.lowest_vloss=v_loss\n",
211
    "                color= (0,255,0)\n",
212
    "                self.best_epoch=epoch + 1  # set the value of best epoch for this epoch              \n",
213
    "            else: \n",
214
    "                # training accuracy did not improve check if this has happened for patience number of epochs\n",
215
    "                # if so adjust learning rate\n",
216
    "                if self.count>=self.patience -1: # lr should be adjusted\n",
217
    "                    color=(245, 170, 66)\n",
218
    "                    lr= lr* self.factor # adjust the learning by factor\n",
219
    "                    tf.keras.backend.set_value(self.model.optimizer.lr, lr) # set the learning rate in the optimizer\n",
220
    "                    self.count=0 # reset the count to 0\n",
221
    "                    self.stop_count=self.stop_count + 1 # count the number of consecutive lr adjustments\n",
222
    "                    self.count=0 # reset counter\n",
223
    "                    if self.dwell:\n",
224
    "                        self.model.set_weights(self.best_weights) # return to better point in N space                        \n",
225
    "                    else:\n",
226
    "                        if v_loss<self.lowest_vloss:\n",
227
    "                            self.lowest_vloss=v_loss                                    \n",
228
    "                else:\n",
229
    "                    self.count=self.count +1 # increment patience counter                    \n",
230
    "        else: # training accuracy is above threshold so adjust learning rate based on validation loss\n",
231
    "            monitor='val_loss'\n",
232
    "            if epoch ==0:\n",
233
    "                pimprov=0.0\n",
234
    "            else:\n",
235
    "                pimprov= (self.lowest_vloss- v_loss )*100/self.lowest_vloss\n",
236
    "            if v_loss< self.lowest_vloss: # check if the validation loss improved \n",
237
    "                self.lowest_vloss=v_loss # replace lowest validation loss with new validation loss                \n",
238
    "                self.best_weights=self.model.get_weights() # validation loss improved so save the weights\n",
239
    "                self.count=0 # reset count since validation loss improved  \n",
240
    "                self.stop_count=0  \n",
241
    "                color=(0,255,0)                \n",
242
    "                self.best_epoch=epoch + 1 # set the value of the best epoch to this epoch\n",
243
    "            else: # validation loss did not improve\n",
244
    "                if self.count>=self.patience-1: # need to adjust lr\n",
245
    "                    color=(245, 170, 66)\n",
246
    "                    lr=lr * self.factor # adjust the learning rate                    \n",
247
    "                    self.stop_count=self.stop_count + 1 # increment stop counter because lr was adjusted \n",
248
    "                    self.count=0 # reset counter\n",
249
    "                    tf.keras.backend.set_value(self.model.optimizer.lr, lr) # set the learning rate in the optimizer\n",
250
    "                    if self.dwell:\n",
251
    "                        self.model.set_weights(self.best_weights) # return to better point in N space\n",
252
    "                else: \n",
253
    "                    self.count =self.count +1 # increment the patience counter                    \n",
254
    "                if acc>self.highest_tracc:\n",
255
    "                    self.highest_tracc= acc\n",
256
    "        msg=f'{str(epoch+1):^3s}/{str(self.epochs):4s} {loss:^9.3f}{acc*100:^9.3f}{v_loss:^9.5f}{v_acc*100:^9.3f}{current_lr:^9.5f}{lr:^9.5f}{monitor:^11s}{pimprov:^10.2f}{duration:^8.2f}'\n",
257
    "        print_in_color (msg,color, (55,65,80))\n",
258
    "        if self.stop_count> self.stop_patience - 1: # check if learning rate has been adjusted stop_count times with no improvement\n",
259
    "            msg=f' training has been halted at epoch {epoch + 1} after {self.stop_patience} adjustments of learning rate with no improvement'\n",
260
    "            print_in_color(msg, (0,255,255), (55,65,80))\n",
261
    "            self.model.stop_training = True # stop training\n",
262
    "        else: \n",
263
    "            if self.ask_epoch !=None:\n",
264
    "                if epoch + 1 >= self.ask_epoch:\n",
265
    "                    if base_model.trainable:\n",
266
    "                        msg='enter H to halt training or an integer for number of epochs to run then ask again'\n",
267
    "                    else:\n",
268
    "                        msg='enter H to halt training ,F to fine tune model, or an integer for number of epochs to run then ask again'\n",
269
    "                    print_in_color(msg, (0,255,255), (55,65,80))\n",
270
    "                    ans=input('')\n",
271
    "                    if ans=='H' or ans=='h':\n",
272
    "                        msg=f'training has been halted at epoch {epoch + 1} due to user input'\n",
273
    "                        print_in_color(msg, (0,255,255), (55,65,80))\n",
274
    "                        self.model.stop_training = True # stop training\n",
275
    "                    elif ans == 'F' or ans=='f':\n",
276
    "                        if base_model.trainable:\n",
277
    "                            msg='base_model is already set as trainable'\n",
278
    "                        else:\n",
279
    "                            msg='setting base_model as trainable for fine tuning of model'\n",
280
    "                            self.base_model.trainable=True\n",
281
    "                        print_in_color(msg, (0, 255,255), (55,65,80))\n",
282
    "                        msg='{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:^8s}'.format('Epoch', 'Loss', 'Accuracy',\n",
283
    "                                                                                              'V_loss','V_acc', 'LR', 'Next LR', 'Monitor','% Improv', 'Duration')\n",
284
    "                        print_in_color(msg, (244,252,3), (55,65,80))                         \n",
285
    "                        self.count=0\n",
286
    "                        self.stop_count=0                        \n",
287
    "                        self.ask_epoch = epoch + 1 + self.ask_epoch_initial \n",
288
    "                        \n",
289
    "                    else:\n",
290
    "                        ans=int(ans)\n",
291
    "                        self.ask_epoch +=ans\n",
292
    "                        msg=f' training will continue until epoch ' + str(self.ask_epoch)                         \n",
293
    "                        print_in_color(msg, (0, 255,255), (55,65,80))\n",
294
    "                        msg='{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:10s}{9:^8s}'.format('Epoch', 'Loss', 'Accuracy',\n",
295
    "                                                                                              'V_loss','V_acc', 'LR', 'Next LR', 'Monitor','% Improv', 'Duration')\n",
296
    "                        print_in_color(msg, (244,252,3), (55,65,80))"
297
   ]
298
  },
299
  {
300
   "cell_type": "code",
301
   "execution_count": 6,
302
   "id": "e155beef",
303
   "metadata": {},
304
   "outputs": [],
305
   "source": [
306
    "def tr_plot(tr_data, start_epoch):\n",
307
    "    #Plot the training and validation data\n",
308
    "    tacc=tr_data.history['accuracy']\n",
309
    "    tloss=tr_data.history['loss']\n",
310
    "    vacc=tr_data.history['val_accuracy']\n",
311
    "    vloss=tr_data.history['val_loss']\n",
312
    "    Epoch_count=len(tacc)+ start_epoch\n",
313
    "    Epochs=[]\n",
314
    "    for i in range (start_epoch ,Epoch_count):\n",
315
    "        Epochs.append(i+1)   \n",
316
    "    index_loss=np.argmin(vloss)#  this is the epoch with the lowest validation loss\n",
317
    "    val_lowest=vloss[index_loss]\n",
318
    "    index_acc=np.argmax(vacc)\n",
319
    "    acc_highest=vacc[index_acc]\n",
320
    "    plt.style.use('fivethirtyeight')\n",
321
    "    sc_label='best epoch= '+ str(index_loss+1 +start_epoch)\n",
322
    "    vc_label='best epoch= '+ str(index_acc + 1+ start_epoch)\n",
323
    "    fig,axes=plt.subplots(nrows=1, ncols=2, figsize=(20,8))\n",
324
    "    axes[0].plot(Epochs,tloss, 'r', label='Training loss')\n",
325
    "    axes[0].plot(Epochs,vloss,'g',label='Validation loss' )\n",
326
    "    axes[0].scatter(index_loss+1 +start_epoch,val_lowest, s=150, c= 'blue', label=sc_label)\n",
327
    "    axes[0].set_title('Training and Validation Loss')\n",
328
    "    axes[0].set_xlabel('Epochs')\n",
329
    "    axes[0].set_ylabel('Loss')\n",
330
    "    axes[0].legend()\n",
331
    "    axes[1].plot (Epochs,tacc,'r',label= 'Training Accuracy')\n",
332
    "    axes[1].plot (Epochs,vacc,'g',label= 'Validation Accuracy')\n",
333
    "    axes[1].scatter(index_acc+1 +start_epoch,acc_highest, s=150, c= 'blue', label=vc_label)\n",
334
    "    axes[1].set_title('Training and Validation Accuracy')\n",
335
    "    axes[1].set_xlabel('Epochs')\n",
336
    "    axes[1].set_ylabel('Accuracy')\n",
337
    "    axes[1].legend()\n",
338
    "    plt.tight_layout\n",
339
    "    #plt.style.use('fivethirtyeight')\n",
340
    "    plt.show()"
341
   ]
342
  },
343
  {
344
   "cell_type": "code",
345
   "execution_count": 7,
346
   "id": "35f70802",
347
   "metadata": {},
348
   "outputs": [],
349
   "source": [
350
    "def print_info( test_gen, preds, print_code, save_dir, subject ):\n",
351
    "    class_dict=test_gen.class_indices\n",
352
    "    labels= test_gen.labels\n",
353
    "    file_names= test_gen.filenames \n",
354
    "    error_list=[]\n",
355
    "    true_class=[]\n",
356
    "    pred_class=[]\n",
357
    "    prob_list=[]\n",
358
    "    new_dict={}\n",
359
    "    error_indices=[]\n",
360
    "    y_pred=[]\n",
361
    "    for key,value in class_dict.items():\n",
362
    "        new_dict[value]=key             # dictionary {integer of class number: string of class name}\n",
363
    "    # store new_dict as a text fine in the save_dir\n",
364
    "    classes=list(new_dict.values())     # list of string of class names     \n",
365
    "    errors=0      \n",
366
    "    for i, p in enumerate(preds):\n",
367
    "        pred_index=np.argmax(p)         \n",
368
    "        true_index=labels[i]  # labels are integer values\n",
369
    "        if pred_index != true_index: # a misclassification has occurred\n",
370
    "            error_list.append(file_names[i])\n",
371
    "            true_class.append(new_dict[true_index])\n",
372
    "            pred_class.append(new_dict[pred_index])\n",
373
    "            prob_list.append(p[pred_index])\n",
374
    "            error_indices.append(true_index)            \n",
375
    "            errors=errors + 1\n",
376
    "        y_pred.append(pred_index) \n",
377
    "    tests=len(preds)\n",
378
    "    acc= (1-errors/tests) *100\n",
379
    "    msg= f'There were {errors} errors in {tests} test cases Model accuracy= {acc: 6.2f} %'\n",
380
    "    print_in_color(msg,(0,255,255),(55,65,80))\n",
381
    "    if print_code !=0:\n",
382
    "        if errors>0:\n",
383
    "            if print_code>errors:\n",
384
    "                r=errors\n",
385
    "            else:\n",
386
    "                r=print_code           \n",
387
    "            msg='{0:^28s}{1:^28s}{2:^28s}{3:^16s}'.format('Filename', 'Predicted Class' , 'True Class', 'Probability')\n",
388
    "            print_in_color(msg, (0,255,0),(55,65,80))\n",
389
    "            for i in range(r):                \n",
390
    "                split1=os.path.split(error_list[i])                \n",
391
    "                split2=os.path.split(split1[0])                \n",
392
    "                fname=split2[1] + '/' + split1[1]\n",
393
    "                msg='{0:^28s}{1:^28s}{2:^28s}{3:4s}{4:^6.4f}'.format(fname, pred_class[i],true_class[i], ' ', prob_list[i])\n",
394
    "                print_in_color(msg, (255,255,255), (55,65,60))\n",
395
    "                #print(error_list[i]  , pred_class[i], true_class[i], prob_list[i])               \n",
396
    "        else:\n",
397
    "            msg='With accuracy of 100 % there are no errors to print'\n",
398
    "            print_in_color(msg, (0,255,0),(55,65,80))\n",
399
    "    if errors>0:\n",
400
    "        plot_bar=[]\n",
401
    "        plot_class=[]\n",
402
    "        for  key, value in new_dict.items():        \n",
403
    "            count=error_indices.count(key) \n",
404
    "            if count!=0:\n",
405
    "                plot_bar.append(count) # list containg how many times a class c had an error\n",
406
    "                plot_class.append(value)   # stores the class \n",
407
    "        fig=plt.figure()\n",
408
    "        fig.set_figheight(len(plot_class)/3)\n",
409
    "        fig.set_figwidth(10)\n",
410
    "        plt.style.use('fivethirtyeight')\n",
411
    "        for i in range(0, len(plot_class)):\n",
412
    "            c=plot_class[i]\n",
413
    "            x=plot_bar[i]\n",
414
    "            plt.barh(c, x, )\n",
415
    "            plt.title( ' Errors by Class on Test Set')\n",
416
    "    y_true= np.array(labels)        \n",
417
    "    y_pred=np.array(y_pred)\n",
418
    "    if len(classes)<= 30:\n",
419
    "        # create a confusion matrix \n",
420
    "        cm = confusion_matrix(y_true, y_pred )        \n",
421
    "        length=len(classes)\n",
422
    "        if length<8:\n",
423
    "            fig_width=8\n",
424
    "            fig_height=8\n",
425
    "        else:\n",
426
    "            fig_width= int(length * .5)\n",
427
    "            fig_height= int(length * .5)\n",
428
    "        plt.figure(figsize=(fig_width, fig_height))\n",
429
    "        sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)       \n",
430
    "        plt.xticks(np.arange(length)+.5, classes, rotation= 90)\n",
431
    "        plt.yticks(np.arange(length)+.5, classes, rotation=0)\n",
432
    "        plt.xlabel(\"Predicted\")\n",
433
    "        plt.ylabel(\"Actual\")\n",
434
    "        plt.title(\"Confusion Matrix\")\n",
435
    "        plt.show()\n",
436
    "    clr = classification_report(y_true, y_pred, target_names=classes, digits= 4)\n",
437
    "    print(\"Classification Report:\\n----------------------\\n\", clr)\n",
438
    "    return acc/100"
439
   ]
440
  },
441
  {
442
   "cell_type": "code",
443
   "execution_count": 8,
444
   "id": "e7d27934",
445
   "metadata": {},
446
   "outputs": [],
447
   "source": [
448
    "def saver(save_path, model, model_name, subject, accuracy,img_size, scalar, generator):    \n",
449
    "    # first save the model\n",
450
    "    save_id=str (model_name +  '-' + subject +'-'+ str(acc)[:str(acc).rfind('.')+3] + '.h5')\n",
451
    "    model_save_loc=os.path.join(save_path, save_id)\n",
452
    "    model.save(model_save_loc)\n",
453
    "    print_in_color ('model was saved as ' + model_save_loc, (0,255,0),(55,65,80)) \n",
454
    "    # now create the class_df and convert to csv file    \n",
455
    "    class_dict=generator.class_indices \n",
456
    "    height=[]\n",
457
    "    width=[]\n",
458
    "    scale=[]\n",
459
    "    for i in range(len(class_dict)):\n",
460
    "        height.append(img_size[0])\n",
461
    "        width.append(img_size[1])\n",
462
    "        scale.append(scalar)\n",
463
    "    Index_series=pd.Series(list(class_dict.values()), name='class_index')\n",
464
    "    Class_series=pd.Series(list(class_dict.keys()), name='class') \n",
465
    "    Height_series=pd.Series(height, name='height')\n",
466
    "    Width_series=pd.Series(width, name='width')\n",
467
    "    Scale_series=pd.Series(scale, name='scale by')\n",
468
    "    class_df=pd.concat([Index_series, Class_series, Height_series, Width_series, Scale_series], axis=1)    \n",
469
    "    csv_name='class_dict.csv'\n",
470
    "    csv_save_loc=os.path.join(save_path, csv_name)\n",
471
    "    class_df.to_csv(csv_save_loc, index=False) \n",
472
    "    print_in_color ('class csv file was saved as ' + csv_save_loc, (0,255,0),(55,65,80)) \n",
473
    "    return model_save_loc, csv_save_loc\n"
474
   ]
475
  },
476
  {
477
   "cell_type": "code",
478
   "execution_count": 9,
479
   "id": "6268a332",
480
   "metadata": {},
481
   "outputs": [],
482
   "source": [
483
    "def predictor(sdir, csv_path,  model_path, averaged=True, verbose=True):    \n",
484
    "    # read in the csv file\n",
485
    "    class_df=pd.read_csv(csv_path)    \n",
486
    "    class_count=len(class_df['class'].unique())\n",
487
    "    img_height=int(class_df['height'].iloc[0])\n",
488
    "    img_width =int(class_df['width'].iloc[0])\n",
489
    "    img_size=(img_width, img_height)    \n",
490
    "    scale=class_df['scale by'].iloc[0]    \n",
491
    "    # determine value to scale image pixels by\n",
492
    "    try: \n",
493
    "        s=int(scale)\n",
494
    "        s2=1\n",
495
    "        s1=0\n",
496
    "    except:\n",
497
    "        split=scale.split('-')\n",
498
    "        s1=float(split[1])\n",
499
    "        s2=float(split[0].split('*')[1])\n",
500
    "    path_list=[]\n",
501
    "    paths=os.listdir(sdir)    \n",
502
    "    for f in paths:\n",
503
    "        path_list.append(os.path.join(sdir,f))\n",
504
    "    if verbose:\n",
505
    "        print (' Model is being loaded- this will take about 10 seconds')\n",
506
    "    model=load_model(model_path)\n",
507
    "    image_count=len(path_list) \n",
508
    "    image_list=[]\n",
509
    "    file_list=[]\n",
510
    "    good_image_count=0\n",
511
    "    for i in range (image_count):        \n",
512
    "        try:\n",
513
    "            img=cv2.imread(path_list[i])\n",
514
    "            img=cv2.resize(img, img_size)\n",
515
    "            img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)            \n",
516
    "            good_image_count +=1\n",
517
    "            img=img*s2 - s1             \n",
518
    "            image_list.append(img)\n",
519
    "            file_name=os.path.split(path_list[i])[1]\n",
520
    "            file_list.append(file_name)\n",
521
    "        except:\n",
522
    "            if verbose:\n",
523
    "                print ( path_list[i], ' is an invalid image file')\n",
524
    "    if good_image_count==1: # if only a single image need to expand dimensions\n",
525
    "        averaged=True\n",
526
    "    image_array=np.array(image_list)    \n",
527
    "    # make predictions on images, sum the probabilities of each class then find class index with\n",
528
    "    # highest probability\n",
529
    "    preds=model.predict(image_array)    \n",
530
    "    if averaged:\n",
531
    "        psum=[]\n",
532
    "        for i in range (class_count): # create all 0 values list\n",
533
    "            psum.append(0)    \n",
534
    "        for p in preds: # iterate over all predictions\n",
535
    "            for i in range (class_count):\n",
536
    "                psum[i]=psum[i] + p[i]  # sum the probabilities   \n",
537
    "        index=np.argmax(psum) # find the class index with the highest probability sum        \n",
538
    "        klass=class_df['class'].iloc[index] # get the class name that corresponds to the index\n",
539
    "        prob=psum[index]/good_image_count * 100  # get the probability average         \n",
540
    "        # to show the correct image run predict again and select first image that has same index\n",
541
    "        for img in image_array:  #iterate through the images    \n",
542
    "            test_img=np.expand_dims(img, axis=0) # since it is a single image expand dimensions \n",
543
    "            test_index=np.argmax(model.predict(test_img)) # for this image find the class index with highest probability\n",
544
    "            if test_index== index: # see if this image has the same index as was selected previously\n",
545
    "                if verbose: # show image and print result if verbose=1\n",
546
    "                    plt.axis('off')\n",
547
    "                    plt.imshow(img) # show the image\n",
548
    "                    print (f'predicted species is {klass} with a probability of {prob:6.4f} % ')\n",
549
    "                break # found an image that represents the predicted class      \n",
550
    "        return klass, prob, img, None\n",
551
    "    else: # create individual predictions for each image\n",
552
    "        pred_class=[]\n",
553
    "        prob_list=[]\n",
554
    "        for i, p in enumerate(preds):\n",
555
    "            index=np.argmax(p) # find the class index with the highest probability sum\n",
556
    "            klass=class_df['class'].iloc[index] # get the class name that corresponds to the index\n",
557
    "            image_file= file_list[i]\n",
558
    "            pred_class.append(klass)\n",
559
    "            prob_list.append(p[index])            \n",
560
    "        Fseries=pd.Series(file_list, name='image file')\n",
561
    "        Lseries=pd.Series(pred_class, name= 'species')\n",
562
    "        Pseries=pd.Series(prob_list, name='probability')\n",
563
    "        df=pd.concat([Fseries, Lseries, Pseries], axis=1)\n",
564
    "        if verbose:\n",
565
    "            length= len(df)\n",
566
    "            print (df.head(length))\n",
567
    "        return None, None, None, df"
568
   ]
569
  },
570
  {
571
   "cell_type": "code",
572
   "execution_count": 10,
573
   "id": "516211e9",
574
   "metadata": {},
575
   "outputs": [],
576
   "source": [
577
    "def trim (df, max_size, min_size, column):\n",
578
    "    df=df.copy()\n",
579
    "    original_class_count= len(list(df[column].unique()))\n",
580
    "    print ('Original Number of classes in dataframe: ', original_class_count)\n",
581
    "    sample_list=[] \n",
582
    "    groups=df.groupby(column)\n",
583
    "    for label in df[column].unique():        \n",
584
    "        group=groups.get_group(label)\n",
585
    "        sample_count=len(group)         \n",
586
    "        if sample_count> max_size :\n",
587
    "            strat=group[column]\n",
588
    "            samples,_=train_test_split(group, train_size=max_size, shuffle=True, random_state=123, stratify=strat)            \n",
589
    "            sample_list.append(samples)\n",
590
    "        elif sample_count>= min_size:\n",
591
    "            sample_list.append(group)\n",
592
    "    df=pd.concat(sample_list, axis=0).reset_index(drop=True)\n",
593
    "    final_class_count= len(list(df[column].unique())) \n",
594
    "    if final_class_count != original_class_count:\n",
595
    "        print ('*** WARNING***  dataframe has a reduced number of classes' )\n",
596
    "    balance=list(df[column].value_counts())\n",
597
    "    print (balance)\n",
598
    "    return df"
599
   ]
600
  },
601
  {
602
   "cell_type": "code",
603
   "execution_count": 11,
604
   "id": "0b66bd08",
605
   "metadata": {},
606
   "outputs": [],
607
   "source": [
608
    "def balance(train_df,max_samples, min_samples, column, working_dir, image_size):\n",
609
    "    train_df=train_df.copy()\n",
610
    "    train_df=trim (train_df, max_samples, min_samples, column)    \n",
611
    "    # make directories to store augmented images\n",
612
    "    aug_dir=os.path.join(working_dir, 'aug')\n",
613
    "    if os.path.isdir(aug_dir):\n",
614
    "        shutil.rmtree(aug_dir)\n",
615
    "    os.mkdir(aug_dir)\n",
616
    "    for label in train_df['labels'].unique():    \n",
617
    "        dir_path=os.path.join(aug_dir,label)    \n",
618
    "        os.mkdir(dir_path)\n",
619
    "    # create and store the augmented images  \n",
620
    "    total=0\n",
621
    "    gen=ImageDataGenerator(horizontal_flip=True,  rotation_range=20, width_shift_range=.2,\n",
622
    "                                  height_shift_range=.2, zoom_range=.2)\n",
623
    "    groups=train_df.groupby('labels') # group by class\n",
624
    "    for label in train_df['labels'].unique():  # for every class               \n",
625
    "        group=groups.get_group(label)  # a dataframe holding only rows with the specified label \n",
626
    "        sample_count=len(group)   # determine how many samples there are in this class  \n",
627
    "        if sample_count< max_samples: # if the class has less than target number of images\n",
628
    "            aug_img_count=0\n",
629
    "            delta=max_samples-sample_count  # number of augmented images to create\n",
630
    "            target_dir=os.path.join(aug_dir, label)  # define where to write the images    \n",
631
    "            aug_gen=gen.flow_from_dataframe( group,  x_col='filepaths', y_col=None, target_size=image_size,\n",
632
    "                                            class_mode=None, batch_size=1, shuffle=False, \n",
633
    "                                            save_to_dir=target_dir, save_prefix='aug-', color_mode='rgb',\n",
634
    "                                            save_format='jpg')\n",
635
    "            while aug_img_count<delta:\n",
636
    "                images=next(aug_gen)            \n",
637
    "                aug_img_count += len(images)\n",
638
    "            total +=aug_img_count\n",
639
    "    print('Total Augmented images created= ', total)\n",
640
    "    # create aug_df and merge with train_df to create composite training set ndf\n",
641
    "    if total>0:\n",
642
    "        aug_fpaths=[]\n",
643
    "        aug_labels=[]\n",
644
    "        classlist=os.listdir(aug_dir)\n",
645
    "        for klass in classlist:\n",
646
    "            classpath=os.path.join(aug_dir, klass)     \n",
647
    "            flist=os.listdir(classpath)    \n",
648
    "            for f in flist:        \n",
649
    "                fpath=os.path.join(classpath,f)         \n",
650
    "                aug_fpaths.append(fpath)\n",
651
    "                aug_labels.append(klass)\n",
652
    "        Fseries=pd.Series(aug_fpaths, name='filepaths')\n",
653
    "        Lseries=pd.Series(aug_labels, name='labels')\n",
654
    "        aug_df=pd.concat([Fseries, Lseries], axis=1)\n",
655
    "        train_df=pd.concat([train_df,aug_df], axis=0).reset_index(drop=True)\n",
656
    "   \n",
657
    "    print (list(train_df['labels'].value_counts()) )\n",
658
    "    return train_df "
659
   ]
660
  },
661
  {
662
   "cell_type": "code",
663
   "execution_count": 12,
664
   "id": "8fc60cf6",
665
   "metadata": {},
666
   "outputs": [
667
    {
668
     "name": "stdout",
669
     "output_type": "stream",
670
     "text": [
671
      "Input image shape is  (450, 450, 3)\n"
672
     ]
673
    },
674
    {
675
     "data": {
676
      "text/plain": [
677
       "<matplotlib.image.AxesImage at 0x154c002b0a0>"
678
      ]
679
     },
680
     "execution_count": 12,
681
     "metadata": {},
682
     "output_type": "execute_result"
683
    },
684
    {
685
     "data": {
686
      "image/png": "\n",
687
      "text/plain": [
688
       "<Figure size 432x288 with 1 Axes>"
689
      ]
690
     },
691
     "metadata": {},
692
     "output_type": "display_data"
693
    }
694
   ],
695
   "source": [
696
    "img_path=r'C:\\Users\\Vinay Wadhwa\\Downloads\\archive\\C-NMC_Leukemia\\training_data\\fold_0\\all\\UID_11_10_1_all.bmp'\n",
697
    "img=plt.imread(img_path)\n",
698
    "print ('Input image shape is ',img.shape)\n",
699
    "plt.axis('off')\n",
700
    "imshow(img)"
701
   ]
702
  },
703
  {
704
   "cell_type": "code",
705
   "execution_count": 13,
706
   "id": "ed426a89",
707
   "metadata": {},
708
   "outputs": [],
709
   "source": [
710
    "def preprocess (sdir, trsplit, vsplit):\n",
711
    "    filepaths=[]\n",
712
    "    labels=[]    \n",
713
    "    folds=os.listdir(sdir)\n",
714
    "    for fold in folds:\n",
715
    "        foldpath=os.path.join(sdir,fold)\n",
716
    "        classlist=os.listdir(foldpath)\n",
717
    "        for klass in classlist:\n",
718
    "            classpath=os.path.join(foldpath,klass)\n",
719
    "            flist=os.listdir(classpath)\n",
720
    "            for f in flist:\n",
721
    "                fpath=os.path.join(classpath,f)\n",
722
    "                filepaths.append(fpath)\n",
723
    "                labels.append(klass)\n",
724
    "    Fseries=pd.Series(filepaths, name='filepaths')\n",
725
    "    Lseries=pd.Series(labels, name='labels')\n",
726
    "    df=pd.concat([Fseries, Lseries], axis=1)            \n",
727
    "    dsplit=vsplit/(1-trsplit)\n",
728
    "    strat=df['labels']\n",
729
    "    train_df, dummy_df=train_test_split(df, train_size=trsplit, shuffle=True, random_state=123, stratify=strat)\n",
730
    "    strat=dummy_df['labels']\n",
731
    "    valid_df, test_df= train_test_split(dummy_df, train_size=dsplit, shuffle=True, random_state=123, stratify=strat)\n",
732
    "    print('train_df length: ', len(train_df), '  test_df length: ',len(test_df), '  valid_df length: ', len(valid_df))\n",
733
    "     # check that each dataframe has the same number of classes to prevent model.fit errors\n",
734
    "    trcount=len(train_df['labels'].unique())\n",
735
    "    tecount=len(test_df['labels'].unique())\n",
736
    "    vcount=len(valid_df['labels'].unique())\n",
737
    "    if trcount < tecount :         \n",
738
    "        msg='** WARNING ** number of classes in training set is less than the number of classes in test set'\n",
739
    "        print_in_color(msg, (255,0,0), (55,65,80))\n",
740
    "        msg='This will throw an error in either model.evaluate or model.predict'\n",
741
    "        print_in_color(msg, (255,0,0), (55,65,80))\n",
742
    "    if trcount != vcount:\n",
743
    "        msg='** WARNING ** number of classes in training set not equal to number of classes in validation set' \n",
744
    "        print_in_color(msg, (255,0,0), (55,65,80))\n",
745
    "        msg=' this will throw an error in model.fit'\n",
746
    "        print_in_color(msg, (255,0,0), (55,65,80))\n",
747
    "        print ('train df class count: ', trcount, 'test df class count: ', tecount, ' valid df class count: ', vcount) \n",
748
    "        ans=input('Enter C to continue execution or H to halt execution')\n",
749
    "        if ans =='H' or ans == 'h':\n",
750
    "            print_in_color('Halting Execution', (255,0,0), (55,65,80))\n",
751
    "            import sys\n",
752
    "            sys.exit('program halted by user')            \n",
753
    "    print(list(train_df['labels'].value_counts()))\n",
754
    "    return train_df, test_df, valid_df"
755
   ]
756
  },
757
  {
758
   "cell_type": "code",
759
   "execution_count": null,
760
   "id": "beb2abf0",
761
   "metadata": {},
762
   "outputs": [],
763
   "source": [
764
    "sdir=r'C:\\Users\\Vinay Wadhwa\\Downloads\\archive\\C-NMC_Leukemia\\training_data'\n",
765
    "trsplit=.9\n",
766
    "vsplit=.05\n",
767
    "train_df, test_df, valid_df= preprocess(sdir,trsplit, vsplit)"
768
   ]
769
  },
770
  {
771
   "cell_type": "code",
772
   "execution_count": null,
773
   "id": "6c3cd360",
774
   "metadata": {},
775
   "outputs": [],
776
   "source": [
777
    "max_samples= 3050\n",
778
    "min_samples=0\n",
779
    "column='labels'\n",
780
    "working_dir = r'./'\n",
781
    "img_size=(300,300)\n",
782
    "train_df=trim(train_df, max_samples, min_samples, column)"
783
   ]
784
  },
785
  {
786
   "cell_type": "code",
787
   "execution_count": null,
788
   "id": "e337f302",
789
   "metadata": {},
790
   "outputs": [],
791
   "source": [
792
    "channels=3\n",
793
    "batch_size=10\n",
794
    "img_shape=(img_size[0], img_size[1], channels)\n",
795
    "length=len(test_df)\n",
796
    "test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=80],reverse=True)[0]  \n",
797
    "test_steps=int(length/test_batch_size)\n",
798
    "print ( 'test batch size: ' ,test_batch_size, '  test steps: ', test_steps)\n",
799
    "def scalar(img):    \n",
800
    "    return img  # EfficientNet expects pixelsin range 0 to 255 so no scaling is required\n",
801
    "trgen=ImageDataGenerator(preprocessing_function=scalar, horizontal_flip=True)\n",
802
    "tvgen=ImageDataGenerator(preprocessing_function=scalar)\n",
803
    "msg='                                                              for the train generator'\n",
804
    "print(msg, '\\r', end='') \n",
805
    "train_gen=trgen.flow_from_dataframe( train_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',\n",
806
    "                                    color_mode='rgb', shuffle=True, batch_size=batch_size)\n",
807
    "msg='                                                              for the test generator'\n",
808
    "print(msg, '\\r', end='') \n",
809
    "test_gen=tvgen.flow_from_dataframe( test_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',\n",
810
    "                                    color_mode='rgb', shuffle=False, batch_size=test_batch_size)\n",
811
    "msg='                                                             for the validation generator'\n",
812
    "print(msg, '\\r', end='')\n",
813
    "valid_gen=tvgen.flow_from_dataframe( valid_df, x_col='filepaths', y_col='labels', target_size=img_size, class_mode='categorical',\n",
814
    "                                    color_mode='rgb', shuffle=True, batch_size=batch_size)\n",
815
    "classes=list(train_gen.class_indices.keys())\n",
816
    "class_count=len(classes)\n",
817
    "train_steps=int(np.ceil(len(train_gen.labels)/batch_size))\n",
818
    "labels=test_gen.labels"
819
   ]
820
  },
821
  {
822
   "cell_type": "code",
823
   "execution_count": null,
824
   "id": "45192270",
825
   "metadata": {},
826
   "outputs": [],
827
   "source": [
828
    "show_image_samples(train_gen)"
829
   ]
830
  },
831
  {
832
   "cell_type": "code",
833
   "execution_count": null,
834
   "id": "be76b54c",
835
   "metadata": {},
836
   "outputs": [],
837
   "source": [
838
    "model_name='EfficientNetB3'\n",
839
    "base_model=tf.keras.applications.efficientnet.EfficientNetB3(include_top=False, weights=\"imagenet\",input_shape=img_shape, pooling='max') \n",
840
    "x=base_model.output\n",
841
    "x=keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001 )(x)\n",
842
    "x = Dense(32, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006),\n",
843
    "                bias_regularizer=regularizers.l1(0.006) ,activation='relu')(x)\n",
844
    "x=Dropout(rate=.45, seed=123)(x)        \n",
845
    "output=Dense(class_count, activation='softmax')(x)\n",
846
    "model=Model(inputs=base_model.input, outputs=output)\n",
847
    "model.compile(Adamax(learning_rate=.001), loss='categorical_crossentropy', metrics=['accuracy']) "
848
   ]
849
  },
850
  {
851
   "cell_type": "code",
852
   "execution_count": null,
853
   "id": "3a18c5d9",
854
   "metadata": {},
855
   "outputs": [],
856
   "source": [
857
    "epochs =20\n",
858
    "patience= 1 # number of epochs to wait to adjust lr if monitored value does not improve\n",
859
    "stop_patience =3 # number of epochs to wait before stopping training if monitored value does not improve\n",
860
    "threshold=.9 # if train accuracy is < threshhold adjust monitor accuracy, else monitor validation loss\n",
861
    "factor=.5 # factor to reduce lr by\n",
862
    "dwell=True # experimental, if True and monitored metric does not improve on current epoch set  modelweights back to weights of previous epoch\n",
863
    "freeze=False # if true free weights of  the base model\n",
864
    "ask_epoch=5 # number of epochs to run before asking if you want to halt training\n",
865
    "batches=train_steps\n",
866
    "callbacks=[LRA(model=model,base_model= base_model,patience=patience,stop_patience=stop_patience, threshold=threshold,\n",
867
    "                   factor=factor,dwell=dwell, batches=batches,initial_epoch=0,epochs=epochs, ask_epoch=ask_epoch )]"
868
   ]
869
  },
870
  {
871
   "cell_type": "code",
872
   "execution_count": null,
873
   "id": "af65bfd1",
874
   "metadata": {},
875
   "outputs": [],
876
   "source": [
877
    "history=model.fit(x=train_gen,  epochs=epochs, verbose=0, callbacks=callbacks,  validation_data=valid_gen,\n",
878
    "               validation_steps=None,  shuffle=False,  initial_epoch=0)"
879
   ]
880
  },
881
  {
882
   "cell_type": "code",
883
   "execution_count": null,
884
   "id": "bf5aff88",
885
   "metadata": {},
886
   "outputs": [],
887
   "source": [
888
    "subject='leukemia'\n",
889
    "print_code=0\n",
890
    "preds=model.predict(test_gen) \n",
891
    "acc=print_info( test_gen, preds, print_code, working_dir, subject ) "
892
   ]
893
  },
894
  {
895
   "cell_type": "code",
896
   "execution_count": null,
897
   "id": "09616c28",
898
   "metadata": {},
899
   "outputs": [],
900
   "source": [
901
    "model_save_loc, csv_save_loc=saver(working_dir, model, model_name, subject, acc, img_size, 1,  train_gen)"
902
   ]
903
  },
904
  {
905
   "cell_type": "code",
906
   "execution_count": null,
907
   "id": "4926bf84",
908
   "metadata": {},
909
   "outputs": [],
910
   "source": []
911
  },
912
  {
913
   "cell_type": "code",
914
   "execution_count": null,
915
   "id": "c14e1862",
916
   "metadata": {},
917
   "outputs": [],
918
   "source": []
919
  }
920
 ],
921
 "metadata": {
922
  "kernelspec": {
923
   "display_name": "Python 3 (ipykernel)",
924
   "language": "python",
925
   "name": "python3"
926
  },
927
  "language_info": {
928
   "codemirror_mode": {
929
    "name": "ipython",
930
    "version": 3
931
   },
932
   "file_extension": ".py",
933
   "mimetype": "text/x-python",
934
   "name": "python",
935
   "nbconvert_exporter": "python",
936
   "pygments_lexer": "ipython3",
937
   "version": "3.9.2"
938
  }
939
 },
940
 "nbformat": 4,
941
 "nbformat_minor": 5
942
}