Switch to unified view

a b/talk/.ipynb_checkpoints/DiseaseGroup-checkpoint.ipynb
1
{
2
 "metadata": {
3
  "celltoolbar": "Slideshow",
4
  "name": "",
5
  "signature": "sha256:5dfa5098b5370799316e25d5c2a66062a5f6b75c6502b8192cbf3896a72c9b71"
6
 },
7
 "nbformat": 3,
8
 "nbformat_minor": 0,
9
 "worksheets": [
10
  {
11
   "cells": [
12
    {
13
     "cell_type": "code",
14
     "collapsed": false,
15
     "input": [
16
      "import numpy as np\n",
17
      "import pandas as pd\n",
18
      "import matplotlib.pyplot as plt\n",
19
      "import seaborn\n",
20
      "from matplotlib import rcParams\n",
21
      "rcParams[\"figure.figsize\"] = (14, 8)\n",
22
      "rcParams[\"xtick.labelsize\"] = 12\n",
23
      "rcParams[\"ytick.labelsize\"] = 12\n",
24
      "rcParams[\"font.size\"] = 14\n",
25
      "rcParams[\"axes.titlesize\"] = 16\n",
26
      "#rcParams[\"text.usetex\"] = False\n",
27
      "rcParams[\"font.family\"] = \"Serif\"\n",
28
      "rcParams[\"figure.dpi\"] = 600\n",
29
      "\n",
30
      "\n",
31
      "a = pd.read_csv(\"../data/villagebay_population.csv\")\n",
32
      "b = pd.read_csv(\"../data/exposure.csv\")\n",
33
      "\n",
34
      "fig, (ax, ax2) = plt.subplots(2, 1, sharex=True)\n",
35
      "\n",
36
      "#ax = plt.subplot(211)\n",
37
      "ax.plot(a.Year, a.VillageBay, c=seaborn.color_palette(\"deep\", 8)[0], lw=3)\n",
38
      "ax.scatter(a.Year, a.VillageBay, c=seaborn.color_palette(\"deep\", 8)[0], s=50)\n",
39
      "ax.set_title(\"Village Bay Population\")\n",
40
      "ax.set_ylim([180, 700])\n",
41
      "\n",
42
      "\n",
43
      "#ax2 = plt.subplot(212, sharex=ax)\n",
44
      "\n",
45
      "ax2.plot(b.BirthYear, b.AvgOfLambWS, c=seaborn.color_palette(\"deep\", 8)[2], lw=3)\n",
46
      "ax2.scatter(b.BirthYear, b.AvgOfLambWS, c=seaborn.color_palette(\"deep\", 8)[2], s=50)\n",
47
      "ax2.set_title(\"Lamb Winter Survival\")\n",
48
      "ax2.set_xlim([1984.5, 2013.5])\n",
49
      "ax2.set_ylim([0, 0.8])\n",
50
      "\n",
51
      "plt.savefig(\"figures/population2.jpg\", dpi=300, jpeg_quality=100)\n"
52
     ],
53
     "language": "python",
54
     "metadata": {
55
      "slideshow": {
56
       "slide_type": "skip"
57
      }
58
     },
59
     "outputs": [],
60
     "prompt_number": 30
61
    },
62
    {
63
     "cell_type": "heading",
64
     "level": 1,
65
     "metadata": {
66
      "slideshow": {
67
       "slide_type": "slide"
68
      }
69
     },
70
     "source": [
71
      "Robust Extraction of Quantitative Information from Histology Images"
72
     ]
73
    },
74
    {
75
     "cell_type": "heading",
76
     "level": 4,
77
     "metadata": {},
78
     "source": [
79
      "Quentin Caudron"
80
     ]
81
    },
82
    {
83
     "cell_type": "heading",
84
     "level": 2,
85
     "metadata": {
86
      "slideshow": {
87
       "slide_type": "slide"
88
      }
89
     },
90
     "source": [
91
      "The Soay Sheep"
92
     ]
93
    },
94
    {
95
     "cell_type": "markdown",
96
     "metadata": {
97
      "slideshow": {
98
       "slide_type": "subslide"
99
      }
100
     },
101
     "source": [
102
      "<img src=\"figures/graphics/soay.jpg\" />"
103
     ]
104
    },
105
    {
106
     "cell_type": "markdown",
107
     "metadata": {
108
      "slideshow": {
109
       "slide_type": "subslide"
110
      }
111
     },
112
     "source": [
113
      "<img src=\"figures/graphics/population2.jpg\" />"
114
     ]
115
    },
116
    {
117
     "cell_type": "markdown",
118
     "metadata": {
119
      "slideshow": {
120
       "slide_type": "subslide"
121
      }
122
     },
123
     "source": [
124
      "<img src=\"figures/graphics/lit1.jpg\" />"
125
     ]
126
    },
127
    {
128
     "cell_type": "markdown",
129
     "metadata": {
130
      "slideshow": {
131
       "slide_type": "fragment"
132
      }
133
     },
134
     "source": [
135
      "<img src=\"figures/graphics/lit2.jpg\" />"
136
     ]
137
    },
138
    {
139
     "cell_type": "markdown",
140
     "metadata": {
141
      "slideshow": {
142
       "slide_type": "fragment"
143
      }
144
     },
145
     "source": [
146
      "<img src=\"figures/graphics/lit4.jpg\" />"
147
     ]
148
    },
149
    {
150
     "cell_type": "heading",
151
     "level": 2,
152
     "metadata": {
153
      "slideshow": {
154
       "slide_type": "slide"
155
      }
156
     },
157
     "source": [
158
      "Outline"
159
     ]
160
    },
161
    {
162
     "cell_type": "markdown",
163
     "metadata": {},
164
     "source": [
165
      "- Methods and data collection\n",
166
      "- Image processing\n",
167
      "- Extracted measures\n",
168
      "- Preliminary analysis\n",
169
      "- Future directions"
170
     ]
171
    },
172
    {
173
     "cell_type": "heading",
174
     "level": 2,
175
     "metadata": {
176
      "slideshow": {
177
       "slide_type": "slide"
178
      }
179
     },
180
     "source": [
181
      "Data"
182
     ]
183
    },
184
    {
185
     "cell_type": "markdown",
186
     "metadata": {},
187
     "source": [
188
      "**In the field, winter of 2011 - 2012 :**\n",
189
      "    \n",
190
      "- Daily study area monitoring for deaths\n",
191
      "- 143 liver samples collected within a day of death"
192
     ]
193
    },
194
    {
195
     "cell_type": "markdown",
196
     "metadata": {
197
      "slideshow": {
198
       "slide_type": "fragment"
199
      }
200
     },
201
     "source": [
202
      "**In the lab :**\n",
203
      "\n",
204
      "- Sectioning after paraffin treatment\n",
205
      "- H&E staining of about 1000 slides"
206
     ]
207
    },
208
    {
209
     "cell_type": "markdown",
210
     "metadata": {
211
      "slideshow": {
212
       "slide_type": "fragment"
213
      }
214
     },
215
     "source": [
216
      "**Analysis :**\n",
217
      "\n",
218
      "- Pathology standard : semi-quantitative scoring\n",
219
      "- Image processing"
220
     ]
221
    },
222
    {
223
     "cell_type": "heading",
224
     "level": 3,
225
     "metadata": {
226
      "slideshow": {
227
       "slide_type": "subslide"
228
      }
229
     },
230
     "source": [
231
      "The Field &copy;"
232
     ]
233
    },
234
    {
235
     "cell_type": "markdown",
236
     "metadata": {},
237
     "source": [
238
      "Sweat-and-blood-collected in cold, cold Scotland."
239
     ]
240
    },
241
    {
242
     "cell_type": "markdown",
243
     "metadata": {
244
      "slideshow": {
245
       "slide_type": "fragment"
246
      }
247
     },
248
     "source": [
249
      "Eight physical measurements :\n",
250
      "- Age at death\n",
251
      "- Weight\n",
252
      "- Sex\n",
253
      "- Limb length\n",
254
      "- Environmental \"stress\""
255
     ]
256
    },
257
    {
258
     "cell_type": "heading",
259
     "level": 3,
260
     "metadata": {
261
      "slideshow": {
262
       "slide_type": "subslide"
263
      }
264
     },
265
     "source": [
266
      "Clinical Pathology"
267
     ]
268
    },
269
    {
270
     "cell_type": "markdown",
271
     "metadata": {},
272
     "source": [
273
      "Operator-driven visual analysis of 98 slides under microscopy."
274
     ]
275
    },
276
    {
277
     "cell_type": "markdown",
278
     "metadata": {
279
      "slideshow": {
280
       "slide_type": "fragment"
281
      }
282
     },
283
     "source": [
284
      "Eleven discrete and continuous measures :\n",
285
      "\n",
286
      "- Inflammation\n",
287
      "- Necrosis\n",
288
      "- Apoptosis\n",
289
      "- Hyperplasia\n",
290
      "- Fibrosis\n",
291
      "- Hepatitis"
292
     ]
293
    },
294
    {
295
     "cell_type": "heading",
296
     "level": 3,
297
     "metadata": {
298
      "slideshow": {
299
       "slide_type": "subslide"
300
      }
301
     },
302
     "source": [
303
      "Image Processing"
304
     ]
305
    },
306
    {
307
     "cell_type": "markdown",
308
     "metadata": {},
309
     "source": [
310
      "Automated analysis of 4430 images of slides representing 143 sheep."
311
     ]
312
    },
313
    {
314
     "cell_type": "markdown",
315
     "metadata": {
316
      "slideshow": {
317
       "slide_type": "fragment"
318
      }
319
     },
320
     "source": [
321
      "Seven structural and textural measures with varying levels of biological interpretation :\n",
322
      "\n",
323
      "- Inflammation\n",
324
      "- Hyperplasia / tissue density\n",
325
      "- Best-guess proxies for \"generic degeneration\""
326
     ]
327
    },
328
    {
329
     "cell_type": "heading",
330
     "level": 2,
331
     "metadata": {
332
      "slideshow": {
333
       "slide_type": "slide"
334
      }
335
     },
336
     "source": [
337
      "Image Processing"
338
     ]
339
    },
340
    {
341
     "cell_type": "markdown",
342
     "metadata": {
343
      "slideshow": {
344
       "slide_type": "subslide"
345
      }
346
     },
347
     "source": [
348
      "<img src=\"figures/graphics/sheep.jpg\"></img>"
349
     ]
350
    },
351
    {
352
     "cell_type": "markdown",
353
     "metadata": {
354
      "slideshow": {
355
       "slide_type": "subslide"
356
      }
357
     },
358
     "source": [
359
      "<img src=\"figures/graphics/processed.jpg\"></img>"
360
     ]
361
    },
362
    {
363
     "cell_type": "heading",
364
     "level": 3,
365
     "metadata": {
366
      "slideshow": {
367
       "slide_type": "subslide"
368
      }
369
     },
370
     "source": [
371
      "The Challenge"
372
     ]
373
    },
374
    {
375
     "cell_type": "markdown",
376
     "metadata": {
377
      "slideshow": {
378
       "slide_type": "-"
379
      }
380
     },
381
     "source": [
382
      "**Information extraction must be**\n",
383
      "- automagical - no operator input\n",
384
      "- reasonably quick - restricted computing time\n",
385
      "- robust - invariant to slicing, staining, field-related variation \n",
386
      "- unbiased - same algorithms for everyone"
387
     ]
388
    },
389
    {
390
     "cell_type": "markdown",
391
     "metadata": {
392
      "slideshow": {
393
       "slide_type": "subslide"
394
      }
395
     },
396
     "source": [
397
      "![image](figures/graphics/robust3.jpg)"
398
     ]
399
    },
400
    {
401
     "cell_type": "markdown",
402
     "metadata": {
403
      "slideshow": {
404
       "slide_type": "subslide"
405
      }
406
     },
407
     "source": [
408
      "![image](figures/graphics/robust4.jpg)"
409
     ]
410
    },
411
    {
412
     "cell_type": "markdown",
413
     "metadata": {
414
      "slideshow": {
415
       "slide_type": "subslide"
416
      }
417
     },
418
     "source": [
419
      "![image](figures/graphics/robust1.jpg)"
420
     ]
421
    },
422
    {
423
     "cell_type": "markdown",
424
     "metadata": {
425
      "slideshow": {
426
       "slide_type": "subslide"
427
      }
428
     },
429
     "source": [
430
      "![image](figures/graphics/robust2.jpg)"
431
     ]
432
    },
433
    {
434
     "cell_type": "markdown",
435
     "metadata": {
436
      "slideshow": {
437
       "slide_type": "subslide"
438
      }
439
     },
440
     "source": [
441
      "<img src=\"figures/graphics/gif.gif\"></img>"
442
     ]
443
    },
444
    {
445
     "cell_type": "heading",
446
     "level": 2,
447
     "metadata": {
448
      "slideshow": {
449
       "slide_type": "slide"
450
      }
451
     },
452
     "source": [
453
      "Structural and Textural Measures"
454
     ]
455
    },
456
    {
457
     "cell_type": "markdown",
458
     "metadata": {
459
      "slideshow": {
460
       "slide_type": "subslide"
461
      }
462
     },
463
     "source": [
464
      "- characteristic **scale** of sinusoid widths\n",
465
      "- **directional** amplitude of preferred sinusoid alignment\n",
466
      "- **tissue to sinusoid** ratio\n",
467
      "- **count** of inflammatory foci per image\n",
468
      "- **mean size** of inflammatory foci per image\n",
469
      "- information **entropy** of sinusoid distribution\n",
470
      "- **lacunarity** ( clustering ) of sinusoids"
471
     ]
472
    },
473
    {
474
     "cell_type": "markdown",
475
     "metadata": {
476
      "slideshow": {
477
       "slide_type": "subslide"
478
      }
479
     },
480
     "source": [
481
      "![image](figures/graphics/intra.png)"
482
     ]
483
    },
484
    {
485
     "cell_type": "markdown",
486
     "metadata": {
487
      "slideshow": {
488
       "slide_type": "subslide"
489
      }
490
     },
491
     "source": [
492
      "![image](figures/graphics/inter2.png)"
493
     ]
494
    },
495
    {
496
     "cell_type": "heading",
497
     "level": 2,
498
     "metadata": {
499
      "slideshow": {
500
       "slide_type": "slide"
501
      }
502
     },
503
     "source": [
504
      "Exploratory Analysis"
505
     ]
506
    },
507
    {
508
     "cell_type": "heading",
509
     "level": 3,
510
     "metadata": {},
511
     "source": [
512
      "by individual"
513
     ]
514
    },
515
    {
516
     "cell_type": "markdown",
517
     "metadata": {
518
      "slideshow": {
519
       "slide_type": "subslide"
520
      }
521
     },
522
     "source": [
523
      "<img src=\"figures/regressions/BDHyperplasia/lm-0.png\" />"
524
     ]
525
    },
526
    {
527
     "cell_type": "markdown",
528
     "metadata": {
529
      "slideshow": {
530
       "slide_type": "subslide"
531
      }
532
     },
533
     "source": [
534
      "<img src=\"figures/regressions/PortalInflammation/lm-0.png\" />"
535
     ]
536
    },
537
    {
538
     "cell_type": "markdown",
539
     "metadata": {
540
      "slideshow": {
541
       "slide_type": "subslide"
542
      }
543
     },
544
     "source": [
545
      "<img src=\"figures/regressions/PortalInflammation/lm-1.png\" />"
546
     ]
547
    },
548
    {
549
     "cell_type": "heading",
550
     "level": 2,
551
     "metadata": {
552
      "slideshow": {
553
       "slide_type": "slide"
554
      }
555
     },
556
     "source": [
557
      "Exploratory Analysis"
558
     ]
559
    },
560
    {
561
     "cell_type": "heading",
562
     "level": 3,
563
     "metadata": {},
564
     "source": [
565
      "controlled for age / cohort"
566
     ]
567
    },
568
    {
569
     "cell_type": "markdown",
570
     "metadata": {
571
      "slideshow": {
572
       "slide_type": "subslide"
573
      }
574
     },
575
     "source": [
576
      "<img src=\"figures/regressions/PortalInflammation/mm_0.png\" />"
577
     ]
578
    },
579
    {
580
     "cell_type": "markdown",
581
     "metadata": {
582
      "slideshow": {
583
       "slide_type": "subslide"
584
      }
585
     },
586
     "source": [
587
      "<img src=\"figures/regressions/BDHyperplasia/mm_0.png\" />"
588
     ]
589
    },
590
    {
591
     "cell_type": "markdown",
592
     "metadata": {
593
      "slideshow": {
594
       "slide_type": "subslide"
595
      }
596
     },
597
     "source": [
598
      "<img src=\"figures/regressions/BDHyperplasia/mm_1.png\" />"
599
     ]
600
    },
601
    {
602
     "cell_type": "markdown",
603
     "metadata": {
604
      "slideshow": {
605
       "slide_type": "subslide"
606
      }
607
     },
608
     "source": [
609
      "<img src=\"figures/regressions/TawfikTotal/mm_0.png\" />"
610
     ]
611
    },
612
    {
613
     "cell_type": "markdown",
614
     "metadata": {
615
      "slideshow": {
616
       "slide_type": "subslide"
617
      }
618
     },
619
     "source": [
620
      "<img src=\"figures/regressions/Fibrosis/mm_0.png\" />"
621
     ]
622
    },
623
    {
624
     "cell_type": "markdown",
625
     "metadata": {
626
      "slideshow": {
627
       "slide_type": "subslide"
628
      }
629
     },
630
     "source": [
631
      "<img src=\"figures/regressions/PortalInflammation/mm_0.png\" />"
632
     ]
633
    },
634
    {
635
     "cell_type": "markdown",
636
     "metadata": {
637
      "slideshow": {
638
       "slide_type": "subslide"
639
      }
640
     },
641
     "source": [
642
      "<img src=\"figures/regressions/Hindleg/mm_0.png\" />"
643
     ]
644
    },
645
    {
646
     "cell_type": "markdown",
647
     "metadata": {
648
      "slideshow": {
649
       "slide_type": "subslide"
650
      }
651
     },
652
     "source": [
653
      "<img src=\"figures/regressions/Weight/mm_0.png\" />"
654
     ]
655
    },
656
    {
657
     "cell_type": "heading",
658
     "level": 2,
659
     "metadata": {
660
      "slideshow": {
661
       "slide_type": "slide"
662
      }
663
     },
664
     "source": [
665
      "Further analysis"
666
     ]
667
    },
668
    {
669
     "cell_type": "heading",
670
     "level": 3,
671
     "metadata": {},
672
     "source": [
673
      "Age or cohort effect ?"
674
     ]
675
    },
676
    {
677
     "cell_type": "markdown",
678
     "metadata": {
679
      "slideshow": {
680
       "slide_type": "subslide"
681
      }
682
     },
683
     "source": [
684
      "<img src=\"figures/regressions/BDHyperplasia/mm_coefs_color_E.png\" />"
685
     ]
686
    },
687
    {
688
     "cell_type": "markdown",
689
     "metadata": {
690
      "slideshow": {
691
       "slide_type": "subslide"
692
      }
693
     },
694
     "source": [
695
      "<img src=\"figures/regressions/BDHyperplasia/mm_coefs_color_CES.png\" />"
696
     ]
697
    },
698
    {
699
     "cell_type": "markdown",
700
     "metadata": {
701
      "slideshow": {
702
       "slide_type": "subslide"
703
      }
704
     },
705
     "source": [
706
      "<img src=\"figures/regressions/BDHyperplasia/mm_coefs_color_RES.png\" />"
707
     ]
708
    },
709
    {
710
     "cell_type": "heading",
711
     "level": 2,
712
     "metadata": {
713
      "slideshow": {
714
       "slide_type": "slide"
715
      }
716
     },
717
     "source": [
718
      "Conclusions"
719
     ]
720
    },
721
    {
722
     "cell_type": "markdown",
723
     "metadata": {},
724
     "source": [
725
      "- our image measures capture **relevant** and **useful** information\n",
726
      "- a number of correlations can be **explained** biologically\n",
727
      "- underlying **structure** in the data needs thought\n",
728
      "- still no **map** from image or histological measures to condition of individual"
729
     ]
730
    },
731
    {
732
     "cell_type": "heading",
733
     "level": 2,
734
     "metadata": {
735
      "slideshow": {
736
       "slide_type": "slide"
737
      }
738
     },
739
     "source": [
740
      "Future directions"
741
     ]
742
    },
743
    {
744
     "cell_type": "heading",
745
     "level": 3,
746
     "metadata": {
747
      "slideshow": {
748
       "slide_type": "subslide"
749
      }
750
     },
751
     "source": [
752
      "Further exploration of the dataset"
753
     ]
754
    },
755
    {
756
     "cell_type": "markdown",
757
     "metadata": {
758
      "slideshow": {
759
       "slide_type": "-"
760
      }
761
     },
762
     "source": [
763
      "- 145 sheep ( 89 females )\n",
764
      "- 12 age classes\n",
765
      "- potential redundancy in various measures"
766
     ]
767
    },
768
    {
769
     "cell_type": "markdown",
770
     "metadata": {
771
      "slideshow": {
772
       "slide_type": "fragment"
773
      }
774
     },
775
     "source": [
776
      "- 4460 entries across 27 variables\n",
777
      "- 3330 with full image and histological information\n",
778
      "- 1196 for which **complete** information is available"
779
     ]
780
    },
781
    {
782
     "cell_type": "heading",
783
     "level": 3,
784
     "metadata": {
785
      "slideshow": {
786
       "slide_type": "subslide"
787
      }
788
     },
789
     "source": [
790
      "More data"
791
     ]
792
    },
793
    {
794
     "cell_type": "markdown",
795
     "metadata": {},
796
     "source": [
797
      "- nutritional information\n",
798
      "- immunity data"
799
     ]
800
    },
801
    {
802
     "cell_type": "heading",
803
     "level": 3,
804
     "metadata": {
805
      "slideshow": {
806
       "slide_type": "subslide"
807
      }
808
     },
809
     "source": [
810
      "Narrow-field images"
811
     ]
812
    },
813
    {
814
     "cell_type": "markdown",
815
     "metadata": {},
816
     "source": [
817
      "- 12536 images\n",
818
      "- spatial distribution of nuclei"
819
     ]
820
    },
821
    {
822
     "cell_type": "markdown",
823
     "metadata": {
824
      "slideshow": {
825
       "slide_type": "subslide"
826
      }
827
     },
828
     "source": [
829
      "![image](figures/graphics/10.jpg)"
830
     ]
831
    },
832
    {
833
     "cell_type": "markdown",
834
     "metadata": {
835
      "slideshow": {
836
       "slide_type": "subslide"
837
      }
838
     },
839
     "source": [
840
      "![image](figures/graphics/Processed2.jpg)"
841
     ]
842
    },
843
    {
844
     "cell_type": "markdown",
845
     "metadata": {
846
      "slideshow": {
847
       "slide_type": "subslide"
848
      }
849
     },
850
     "source": [
851
      "![image](figures/graphics/Segmented.jpg)"
852
     ]
853
    },
854
    {
855
     "cell_type": "markdown",
856
     "metadata": {
857
      "slideshow": {
858
       "slide_type": "subslide"
859
      }
860
     },
861
     "source": [
862
      "<img src=\"figures/graphics/10x.png\" width=100%></src>"
863
     ]
864
    },
865
    {
866
     "cell_type": "heading",
867
     "level": 2,
868
     "metadata": {
869
      "slideshow": {
870
       "slide_type": "slide"
871
      }
872
     },
873
     "source": [
874
      "With thanks to"
875
     ]
876
    },
877
    {
878
     "cell_type": "markdown",
879
     "metadata": {},
880
     "source": [
881
      "Romain Garnier\n",
882
      "\n",
883
      "Andrea Graham\n",
884
      "\n",
885
      "Tawfik Aboellail (CSU)\n",
886
      "\n",
887
      "Bryan Grenfell\n"
888
     ]
889
    }
890
   ],
891
   "metadata": {}
892
  }
893
 ]
894
}