Diff of /talk/GrahamGroup.ipynb [000000] .. [171cba]

Switch to unified view

a b/talk/GrahamGroup.ipynb
1
{
2
 "metadata": {
3
  "celltoolbar": "Slideshow",
4
  "name": "",
5
  "signature": "sha256:683ceb13e563077b8da74ca91d75c910b368c1da6290dcded253f1373c1c887b"
6
 },
7
 "nbformat": 3,
8
 "nbformat_minor": 0,
9
 "worksheets": [
10
  {
11
   "cells": [
12
    {
13
     "cell_type": "heading",
14
     "level": 1,
15
     "metadata": {
16
      "slideshow": {
17
       "slide_type": "slide"
18
      }
19
     },
20
     "source": [
21
      "Robust Extraction of Quantitative Information from Histology Images"
22
     ]
23
    },
24
    {
25
     "cell_type": "heading",
26
     "level": 4,
27
     "metadata": {},
28
     "source": [
29
      "Quentin Caudron\n",
30
      "<br /><br />\n",
31
      "\n",
32
      "Romain Garnier\n",
33
      "<br /><br />\n",
34
      "\n",
35
      "*with Bryan Grenfell and Andrea Graham*"
36
     ]
37
    },
38
    {
39
     "cell_type": "heading",
40
     "level": 3,
41
     "metadata": {
42
      "slideshow": {
43
       "slide_type": "slide"
44
      }
45
     },
46
     "source": [
47
      "Outline"
48
     ]
49
    },
50
    {
51
     "cell_type": "markdown",
52
     "metadata": {},
53
     "source": [
54
      "- Image processing\n",
55
      "- Extracted measures\n",
56
      "- Preliminary analysis\n",
57
      "- Future directions"
58
     ]
59
    },
60
    {
61
     "cell_type": "markdown",
62
     "metadata": {
63
      "slideshow": {
64
       "slide_type": "skip"
65
      }
66
     },
67
     "source": [
68
      "4. Age as random effect <---\n",
69
      "\n",
70
      "[\"interface_hepatitis\", \"confluent_necrosis\", \"portal_inflammation\", \"ln_ap_ri\"]"
71
     ]
72
    },
73
    {
74
     "cell_type": "code",
75
     "collapsed": false,
76
     "input": [
77
      "def normalise(df, skip = []) :\n",
78
      "\tfor i in df.columns :\n",
79
      "\t\tif i not in skip :\n",
80
      "\t\t\tdf[i] -= df[i].mean()\n",
81
      "\t\t\tdf[i] /= df[i].std()\n",
82
      "\treturn df\n",
83
      "\n",
84
      "\n",
85
      "\n",
86
      "\n",
87
      "\n",
88
      "\n",
89
      "def rescale(df, skip = []) :\n",
90
      "    for i in df.columns :\n",
91
      "        if i not in skip :\n",
92
      "            df[i] -= df[i].min()\n",
93
      "            df[i] /= df[i].max()\n",
94
      "    return df\n",
95
      "\n",
96
      "\n",
97
      "\n",
98
      "# Remove a layer from a list\n",
99
      "def delayer(m) :\n",
100
      "\tout = []\n",
101
      "\tfor i in m :\n",
102
      "\t\tif isinstance(i, list) :\n",
103
      "\t\t\tfor j in i :\n",
104
      "\t\t\t\tout.append(j)\n",
105
      "\t\telse :\n",
106
      "\t\t\tout.append(i)\n",
107
      "\treturn out\n",
108
      "\n",
109
      "\n",
110
      "\n",
111
      "\n",
112
      "\n",
113
      "\n",
114
      "\n",
115
      "# Remove all layers from a list\n",
116
      "def flatten(m) :\n",
117
      "\tout = m[:]\n",
118
      "\n",
119
      "\twhile out != delayer(out) :\n",
120
      "\t\tout = delayer(out)\n",
121
      "\n",
122
      "\treturn out\n",
123
      "\n",
124
      "\n",
125
      "\n",
126
      "\n",
127
      "\n",
128
      "\n",
129
      "\n",
130
      "\n",
131
      "# Generate all combinations of objects in a list\n",
132
      "def combinatorial(l) :\n",
133
      "\tout = []\n",
134
      "\n",
135
      "\tfor numel in range(len(l)) :\n",
136
      "\t\tfor i in itertools.combinations(l, numel+1) :\n",
137
      "\t\t\tout.append(list(i))\n",
138
      "\n",
139
      "\treturn out\n",
140
      "\n",
141
      "\n",
142
      "\n",
143
      "\n",
144
      "\n",
145
      "\n",
146
      "\n",
147
      "\n",
148
      "\n",
149
      "\n",
150
      "def pcaplot(df) :\n",
151
      "\n",
152
      "\t# PCA\n",
153
      "\tpca = decomposition.PCA(whiten = True)\n",
154
      "\tpca.fit(df)\n",
155
      "\tp1 = pca.components_[0] / np.abs(pca.components_[0]).max() * np.sqrt(2)/2\n",
156
      "\tp2 = pca.components_[1] / np.abs(pca.components_[1]).max() * np.sqrt(2)/2\n",
157
      "\n",
158
      "\t# Normalise\n",
159
      "\tnorms = np.max([np.sqrt((np.array(zip(p1, p2)[i])**2).sum()) for i in range(len(p1))])\n",
160
      "\tc = plt.Circle( (0, 0), radius = 1, alpha = 0.2)\n",
161
      "\tplt.axes(aspect = 1)\n",
162
      "\tplt.gca().add_artist(c)\n",
163
      "\n",
164
      "\tplt.scatter(p1 / norms, p2 / norms)\n",
165
      "\tplt.xlim([-1, 1])\n",
166
      "\tplt.ylim([-1, 1])\n",
167
      "\n",
168
      "\tfor i, text in enumerate(df.columns) :\n",
169
      "\t\tplt.annotate(text, xy = [p1[i], p2[i]])\n",
170
      "\n",
171
      "\tplt.tight_layout()\n",
172
      "\n",
173
      "\n",
174
      "\n",
175
      "\n",
176
      "\n",
177
      "\n",
178
      "\n",
179
      "\n",
180
      "\n",
181
      "\n",
182
      "\n",
183
      "def test_all_linear(df, y, x, return_significant = False, group = None) :\n",
184
      "\n",
185
      "    # All possible combinations of independent variables\n",
186
      "\tindependent = combinatorial(x)\n",
187
      "\n",
188
      "\tfits = {}\n",
189
      "\tpval = {}\n",
190
      "\tlinmodels = {}\n",
191
      "\tqsum = {}\n",
192
      "\taic = {}\n",
193
      "\n",
194
      "\t# For all dependent variables, one at a time\n",
195
      "\tfor dependent in y :\n",
196
      "\n",
197
      "\t\tprint \"Fitting for %s.\" % dependent\n",
198
      "\n",
199
      "\t\t# For all combinations of independent variables\n",
200
      "\t\tfor covariate in independent :\n",
201
      "\n",
202
      "\t\t\t# Standard mixed model\n",
203
      "\t\t\tif group is None :\n",
204
      "\n",
205
      "\t\t\t\t# Fit a linear model\n",
206
      "\t\t\t\tsubset = delayer([covariate, dependent])\n",
207
      "\t\t\t\tdf2 = df[delayer(subset)].dropna()\n",
208
      "\t\t\t\tdf2[\"Intercept\"] = np.ones(len(df2))\n",
209
      "                \n",
210
      "\t\t\t\tols = sm.GLS(endog = df2[dependent], exog = df2[delayer([covariate, \"Intercept\"])]).fit()\n",
211
      "\n",
212
      "\t\t\t\t# Save the results\n",
213
      "\t\t\t\tif (return_significant and ols.f_pvalue < 0.05) or (not return_significant) :\n",
214
      "\t\t\t\t\tlinmodels.setdefault(dependent, []).append(ols)\n",
215
      "\t\t\t\t\tfits.setdefault(dependent, []).append(ols.rsquared)\n",
216
      "\t\t\t\t\tpval.setdefault(dependent, []).append(ols.f_pvalue)\n",
217
      "\t\t\t\t\taic.setdefault(dependent, []).append(ols.aic)\n",
218
      "\n",
219
      "\n",
220
      "\t\t\t# Mixed effects model\n",
221
      "\t\t\telse :\n",
222
      "\t\t\t\tsubset = delayer([covariate, dependent, group])\n",
223
      "\t\t\t\tdf2 = df[delayer(subset)].dropna()\n",
224
      "\n",
225
      "\t\t\t\t# Fit a mixed effects model\n",
226
      "\t\t\t\tols = MixedLM(endog = df2[dependent], exog = df2[covariate], groups = df2[group]).fit()\n",
227
      "\n",
228
      "\t\t\t\t# Calculate AIC\n",
229
      "\t\t\t\tlinmodels.setdefault(dependent, []).append(ols)\n",
230
      "\t\t\t\tfits.setdefault(dependent, []).append(2 * (ols.k_fe + 1) - 2 * ols.llf)\n",
231
      "\t\t\t\tpval.setdefault(dependent, []).append(ols.pvalues)\n",
232
      "\n",
233
      "\tif group is not None :\n",
234
      "\t\tfor i in y :\n",
235
      "\t\t\tf = np.array(fits[i])\n",
236
      "\t\t\tmodels = np.array(linmodels[i])\n",
237
      "\t\t\tidx = np.where(f - f.min() <= 2)[0]\n",
238
      "\t\t\tbestmodelDoF = [j.k_fe for j in np.array(linmodels[i])[idx]]\n",
239
      "\t\t\tbestmodels = [idx[j] for j in np.where(bestmodelDoF == np.min(bestmodelDoF))[0]]\n",
240
      "\t\t\tqsum[i] = models[idx[np.where(f[bestmodels] == np.min(f[bestmodels]))]]\n",
241
      "\n",
242
      "\n",
243
      "\t\treturn linmodels, fits, pval, qsum\n",
244
      "\n",
245
      "\treturn linmodels, fits, pval, aic\n",
246
      "\n",
247
      "\t\n",
248
      "\t\t\n",
249
      "\n",
250
      "\n",
251
      "\n",
252
      "\n",
253
      "\n",
254
      "\n",
255
      "\n",
256
      "\n",
257
      "\n",
258
      "\n",
259
      "\n",
260
      "\n",
261
      "\n",
262
      "\n",
263
      "\n",
264
      "def summary(models) :\n",
265
      "\n",
266
      "\t# Generate list of everything\n",
267
      "\tr2 = np.array([m.r2 for dependent in models.keys() for m in models[dependent]])\n",
268
      "\tp = np.array([m.f_stat[\"p-value\"] for dependent in models.keys() for m in models[dependent]])\n",
269
      "\tmod = np.array([m for dependent in models.keys() for m in models[dependent]])\n",
270
      "\tdependent = np.array([dependent for dependent in models.keys() for m in models[dependent]])\n",
271
      "\n",
272
      "\t# Sort by R2\n",
273
      "\tidx = np.argsort(r2)[::-1]\n",
274
      "\n",
275
      "\t# Output string\n",
276
      "\ts = \"%d significant regressions.\\n\\n\" % len(r2)\n",
277
      "\ts += \"Ten most correlated :\\n\\n\"\n",
278
      "\n",
279
      "\t# Print a summary of the top ten correlations\n",
280
      "\tfor i in idx[:10] :\n",
281
      "\t\ts += (\"%s ~ %s\\n\" % (dependent[i], \" + \".join(mod[i].x.columns[:-1])))\n",
282
      "\t\ts += (\"R^2 = %f\\tp = %f\\n\\n\" % (r2[i], p[i]))\n",
283
      "\n",
284
      "\tprint s\n",
285
      "    \n",
286
      "    \n",
287
      "    \n",
288
      "    \n",
289
      "    \n",
290
      "    \n",
291
      "    \n",
292
      "def rstr(y, x) :\n",
293
      "    formatstr = \"%s ~ \" % y\n",
294
      "    for i in x[:-1] :\n",
295
      "        formatstr += str(i)\n",
296
      "        formatstr += \" + \"\n",
297
      "    formatstr += str(x[-1])\n",
298
      "    return formatstr\n",
299
      "\n",
300
      "\n",
301
      "\n",
302
      "\n",
303
      "\n",
304
      "\n",
305
      "\n"
306
     ],
307
     "language": "python",
308
     "metadata": {
309
      "slideshow": {
310
       "slide_type": "skip"
311
      }
312
     },
313
     "outputs": [],
314
     "prompt_number": 3
315
    },
316
    {
317
     "cell_type": "code",
318
     "collapsed": false,
319
     "input": [
320
      "import numpy as np\n",
321
      "from sklearn.neighbors import KernelDensity\n",
322
      "from matplotlib import rcParams\n",
323
      "import matplotlib.pyplot as plt\n",
324
      "import seaborn\n",
325
      "import pandas as pd\n",
326
      "import itertools\n",
327
      "from sklearn import linear_model, ensemble, decomposition, cross_validation, preprocessing\n",
328
      "from statsmodels.regression.mixed_linear_model import MixedLM\n",
329
      "import statsmodels.api as sm\n",
330
      "from statsmodels.regression.linear_model import OLSResults\n",
331
      "from statsmodels.tools.tools import add_constant\n",
332
      "\n",
333
      "\n",
334
      "%matplotlib inline\n",
335
      "rcParams[\"figure.figsize\"] = (14, 8)\n",
336
      "\n",
337
      "\n",
338
      "# RAW DATA\n",
339
      "\n",
340
      "raw_physical = pd.read_csv(\"../data/physical.csv\")\n",
341
      "raw_histo = pd.read_csv(\"../data/tawfik.csv\")\n",
342
      "ent = pd.read_csv(\"../4x/results/entropy.csv\").drop([\"Unnamed: 0\"], 1)\n",
343
      "foci = pd.read_csv(\"../4x/results/foci.csv\").drop([\"Unnamed: 0\"], 1)\n",
344
      "lac = pd.read_csv(\"../4x/results/normalised_lacunarity.csv\").drop([\"Unnamed: 0\"], 1)\n",
345
      "gabor = pd.read_csv(\"../4x/results/gabor_filters.csv\").drop([\"Unnamed: 0\"], 1)\n",
346
      "ts = pd.read_csv(\"../4x/results/tissue_sinusoid_ratio.csv\").drop([\"Unnamed: 0\"], 1)\n",
347
      "\n",
348
      "raw_image = pd.merge(lac, ent,\n",
349
      "\ton=[\"Sheep\", \"Image\"]).merge(foci, \n",
350
      "\ton=[\"Sheep\", \"Image\"]).merge(gabor,\n",
351
      "\ton=[\"Sheep\", \"Image\"]).merge(ts, \n",
352
      "    on=[\"Sheep\", \"Image\"])\n",
353
      "raw_image.rename(columns = {\t\"meanSize\" : \"FociSize\", \n",
354
      "\t\t\t\t\t\t\t\t\"TSRatio\" : \"TissueToSinusoid\",\n",
355
      "\t\t\t\t\t\t\t\t\"Count\" : \"FociCount\" }, inplace=True)\n",
356
      "\n",
357
      "\n",
358
      "\n",
359
      "# CLEAN DATA\n",
360
      "\n",
361
      "physcols = [\"Weight\", \"Sex\", \"AgeAtDeath\", \"Foreleg\", \"Hindleg\"]\n",
362
      "imagecols = [\"Entropy\", \"Lacunarity\", \"Inflammation\", \"Scale\", \"Directionality\", \"FociCount\", \"FociSize\", \"TissueToSinusoid\"]\n",
363
      "histcols = [\"Lobular_collapse\", \"Interface_hepatitis\", \"Confluent_necrosis\", \"Ln_ap_ri\", \"Portal_inflammation\", \"BD_hyperplasia\", \"Fibrosis\", \"TawfikTotal\", \"Mean_hep_size\", \"Min_hep_size\", \"Max_hep_size\"]\n",
364
      "\n",
365
      "\n",
366
      "\n",
367
      "\n",
368
      "\n",
369
      "# IMAGE\n",
370
      "\n",
371
      "# Set FociSize to zero if FociCount is zero\n",
372
      "# Drop stdSize\n",
373
      "image = raw_image\n",
374
      "image = image.drop(\"stdSize\", 1)\n",
375
      "image.FociSize[raw_image.FociCount == 0] = 0\n",
376
      "\n",
377
      "\n",
378
      "\n",
379
      "# HISTO\n",
380
      "\n",
381
      "histo = raw_histo\n",
382
      "histo = histo.drop([\"Vessels\", \"Vacuol\", \"Pigment\", \"Std_hep_size\"], 1)\n",
383
      "\n",
384
      "\n",
385
      "\n",
386
      "# PHYSICAL\n",
387
      "\n",
388
      "physical = raw_physical\n",
389
      "physical = physical.drop([\"CurrTag\", \"DeathDate\", \"Category\"], 1)\n",
390
      "physical\n",
391
      "\n",
392
      "\n",
393
      "\n",
394
      "\n",
395
      "# COMPLETE DATASET\n",
396
      "\n",
397
      "raw_data = pd.merge(pd.merge(image, histo, on=\"Sheep\", how=\"outer\"), physical, on=\"Sheep\", how=\"outer\")\n",
398
      "raw_data.to_csv(\"../data/tentative_complete.csv\")\n",
399
      "\n",
400
      "\n",
401
      "\n",
402
      "\n",
403
      "# AVERAGED BY SHEEP\n",
404
      "data = raw_data\n",
405
      "data[\"Inflammation\"] = data.FociCount * data.FociSize\n",
406
      "\n",
407
      "sheep = rescale(data.groupby(\"Sheep\").mean())\n",
408
      "age = rescale(data.groupby(\"AgeAtDeath\").mean())\n",
409
      "\n",
410
      "\n",
411
      "\n",
412
      "\n",
413
      "\n",
414
      "\n",
415
      "\n",
416
      "# REGRESSIONS : fixed effects, grouped by sheep\n",
417
      "\n",
418
      "df = sheep[[\"Portal_inflammation\", \"FociSize\"]].dropna()\n",
419
      "df[\"Intercept\"] = np.ones(len(df))\n",
420
      "portal_inflammation = sm.GLS(endog = df.Portal_inflammation, exog = df[[\"FociSize\", \"Intercept\"]]).fit().summary()\n",
421
      "#portal_inflammation = portal_inflammation.summary()\n",
422
      "del portal_inflammation.tables[2]\n",
423
      "\n",
424
      "\n",
425
      "\n",
426
      "df = sheep[[\"BD_hyperplasia\", \"Scale\", \"Directionality\", \"FociSize\"]].dropna()\n",
427
      "df[\"Intercept\"] = np.ones(len(df))\n",
428
      "hyperplasia = sm.GLS(endog = df.BD_hyperplasia, exog = df[[\"FociSize\", \"Scale\", \"Directionality\", \"Intercept\"]]).fit().summary()\n",
429
      "#hyperplasia.summary()\n",
430
      "del hyperplasia.tables[2]\n",
431
      "\n",
432
      "\n",
433
      "\n",
434
      "\n",
435
      "\n",
436
      "\n",
437
      "# REGRESSIONS : fixed effects, grouped by age\n",
438
      "\n",
439
      "df = age[[\"Max_hep_size\", \"Entropy\", \"Directionality\"]].dropna()\n",
440
      "df[\"Intercept\"] = np.ones(len(df))\n",
441
      "maxhepsize = sm.GLS(endog = df.Max_hep_size, exog = df[[\"Entropy\", \"Directionality\", \"Intercept\"]]).fit().summary()\n",
442
      "del maxhepsize.tables[2]\n",
443
      "\n",
444
      "\n",
445
      "\n",
446
      "\n",
447
      "df = age[[\"Lobular_collapse\", \"FociSize\"]].dropna()\n",
448
      "df[\"Intercept\"] = np.ones(len(df))\n",
449
      "lobular_collapse = sm.GLS(endog = df.Lobular_collapse, exog = df[[\"FociSize\", \"Intercept\"]]).fit().summary()\n",
450
      "del lobular_collapse.tables[2]\n",
451
      "\n",
452
      "\n",
453
      "df = age[[\"Interface_hepatitis\", \"Lacunarity\"]].dropna()\n",
454
      "df[\"Intercept\"] = np.ones(len(df))\n",
455
      "interface_hepatitis = sm.GLS(endog = df.Interface_hepatitis, exog = df[[\"Lacunarity\", \"Intercept\"]]).fit().summary()\n",
456
      "del interface_hepatitis.tables[2]\n",
457
      "\n",
458
      "\n",
459
      "df = age[[\"Fibrosis\", \"Inflammation\"]].dropna()\n",
460
      "df[\"Intercept\"] = np.ones(len(df))\n",
461
      "fibrosis = sm.GLS(endog = df.Fibrosis, exog = df[[\"Inflammation\", \"Intercept\"]]).fit().summary()\n",
462
      "del fibrosis.tables[2]\n",
463
      "\n",
464
      "\n",
465
      "\n",
466
      "\n",
467
      "# PCA\n",
468
      "\n",
469
      "s = sheep.dropna(subset=delayer([imagecols, histcols]))\n",
470
      "pca = decomposition.PCA(n_components=1)\n",
471
      "pcax = pca.fit_transform(s[imagecols])\n",
472
      "pcay = pca.fit_transform(s[histcols])\n",
473
      "pca = sm.GLS(endog = pcay[:, 0][:, np.newaxis], exog = add_constant(pcax)).fit().summary()\n",
474
      "del pca.tables[2]\n",
475
      "\n",
476
      "\n",
477
      "\n",
478
      "\n",
479
      "\n",
480
      "# REGRESSIONS : mixed effects, intercept on age at death\n",
481
      "\n",
482
      "df = age[[\"Fibrosis\", \"Inflammation\"]].dropna()\n",
483
      "df[\"Intercept\"] = np.ones(len(df))\n",
484
      "fibrosis = sm.GLS(endog = df.Fibrosis, exog = df[[\"Inflammation\", \"Intercept\"]]).fit().summary()\n",
485
      "del fibrosis.tables[2]"
486
     ],
487
     "language": "python",
488
     "metadata": {
489
      "slideshow": {
490
       "slide_type": "skip"
491
      }
492
     },
493
     "outputs": [],
494
     "prompt_number": 14
495
    },
496
    {
497
     "cell_type": "code",
498
     "collapsed": false,
499
     "input": [
500
      "a = portal_inflammation.summary()\n",
501
      "del a.tables[2]\n",
502
      "a"
503
     ],
504
     "language": "python",
505
     "metadata": {
506
      "slideshow": {
507
       "slide_type": "skip"
508
      }
509
     },
510
     "outputs": [
511
      {
512
       "html": [
513
        "<table class=\"simpletable\">\n",
514
        "<caption>GLS Regression Results</caption>\n",
515
        "<tr>\n",
516
        "  <th>Dep. Variable:</th>    <td>Portal_inflammation</td> <th>  R-squared:         </th> <td>   0.280</td>\n",
517
        "</tr>\n",
518
        "<tr>\n",
519
        "  <th>Model:</th>                    <td>GLS</td>         <th>  Adj. R-squared:    </th> <td>   0.273</td>\n",
520
        "</tr>\n",
521
        "<tr>\n",
522
        "  <th>Method:</th>              <td>Least Squares</td>    <th>  F-statistic:       </th> <td>   37.34</td>\n",
523
        "</tr>\n",
524
        "<tr>\n",
525
        "  <th>Date:</th>              <td>Tue, 28 Oct 2014</td>   <th>  Prob (F-statistic):</th> <td>2.12e-08</td>\n",
526
        "</tr>\n",
527
        "<tr>\n",
528
        "  <th>Time:</th>                  <td>23:35:30</td>       <th>  Log-Likelihood:    </th> <td>  14.996</td>\n",
529
        "</tr>\n",
530
        "<tr>\n",
531
        "  <th>No. Observations:</th>       <td>    98</td>        <th>  AIC:               </th> <td>  -25.99</td>\n",
532
        "</tr>\n",
533
        "<tr>\n",
534
        "  <th>Df Residuals:</th>           <td>    96</td>        <th>  BIC:               </th> <td>  -20.82</td>\n",
535
        "</tr>\n",
536
        "<tr>\n",
537
        "  <th>Df Model:</th>               <td>     1</td>        <th>                     </th>     <td> </td>   \n",
538
        "</tr>\n",
539
        "<tr>\n",
540
        "  <th>Covariance Type:</th>       <td>nonrobust</td>      <th>                     </th>     <td> </td>   \n",
541
        "</tr>\n",
542
        "</table>\n",
543
        "<table class=\"simpletable\">\n",
544
        "<tr>\n",
545
        "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
546
        "</tr>\n",
547
        "<tr>\n",
548
        "  <th>FociSize</th>  <td>    0.5627</td> <td>    0.092</td> <td>    6.111</td> <td> 0.000</td> <td>    0.380     0.746</td>\n",
549
        "</tr>\n",
550
        "<tr>\n",
551
        "  <th>Intercept</th> <td>    0.3368</td> <td>    0.043</td> <td>    7.855</td> <td> 0.000</td> <td>    0.252     0.422</td>\n",
552
        "</tr>\n",
553
        "</table>"
554
       ],
555
       "metadata": {},
556
       "output_type": "pyout",
557
       "prompt_number": 19,
558
       "text": [
559
        "<class 'statsmodels.iolib.summary.Summary'>\n",
560
        "\"\"\"\n",
561
        "                             GLS Regression Results                            \n",
562
        "===============================================================================\n",
563
        "Dep. Variable:     Portal_inflammation   R-squared:                       0.280\n",
564
        "Model:                             GLS   Adj. R-squared:                  0.273\n",
565
        "Method:                  Least Squares   F-statistic:                     37.34\n",
566
        "Date:                 Tue, 28 Oct 2014   Prob (F-statistic):           2.12e-08\n",
567
        "Time:                         23:35:30   Log-Likelihood:                 14.996\n",
568
        "No. Observations:                   98   AIC:                            -25.99\n",
569
        "Df Residuals:                       96   BIC:                            -20.82\n",
570
        "Df Model:                            1                                         \n",
571
        "Covariance Type:             nonrobust                                         \n",
572
        "==============================================================================\n",
573
        "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
574
        "------------------------------------------------------------------------------\n",
575
        "FociSize       0.5627      0.092      6.111      0.000         0.380     0.746\n",
576
        "Intercept      0.3368      0.043      7.855      0.000         0.252     0.422\n",
577
        "==============================================================================\n",
578
        "\n",
579
        "Warnings:\n",
580
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
581
        "\"\"\""
582
       ]
583
      }
584
     ],
585
     "prompt_number": 19
586
    },
587
    {
588
     "cell_type": "heading",
589
     "level": 2,
590
     "metadata": {
591
      "slideshow": {
592
       "slide_type": "slide"
593
      }
594
     },
595
     "source": [
596
      "Image Processing"
597
     ]
598
    },
599
    {
600
     "cell_type": "markdown",
601
     "metadata": {
602
      "slideshow": {
603
       "slide_type": "subslide"
604
      }
605
     },
606
     "source": [
607
      "<img src=\"figures/sheep.jpg\"></img>"
608
     ]
609
    },
610
    {
611
     "cell_type": "markdown",
612
     "metadata": {
613
      "slideshow": {
614
       "slide_type": "subslide"
615
      }
616
     },
617
     "source": [
618
      "<img src=\"figures/processed.jpg\"></img>"
619
     ]
620
    },
621
    {
622
     "cell_type": "heading",
623
     "level": 3,
624
     "metadata": {
625
      "slideshow": {
626
       "slide_type": "subslide"
627
      }
628
     },
629
     "source": [
630
      "Extraction"
631
     ]
632
    },
633
    {
634
     "cell_type": "markdown",
635
     "metadata": {
636
      "slideshow": {
637
       "slide_type": "-"
638
      }
639
     },
640
     "source": [
641
      "- Automagical\n",
642
      "- Reasonably quick"
643
     ]
644
    },
645
    {
646
     "cell_type": "heading",
647
     "level": 3,
648
     "metadata": {
649
      "slideshow": {
650
       "slide_type": "subslide"
651
      }
652
     },
653
     "source": [
654
      "Robust"
655
     ]
656
    },
657
    {
658
     "cell_type": "markdown",
659
     "metadata": {
660
      "slideshow": {
661
       "slide_type": "-"
662
      }
663
     },
664
     "source": [
665
      "- Invariant to staining, slicing, field-related variation\n",
666
      "- Capture intersample variation"
667
     ]
668
    },
669
    {
670
     "cell_type": "markdown",
671
     "metadata": {
672
      "slideshow": {
673
       "slide_type": "subslide"
674
      }
675
     },
676
     "source": [
677
      "![image](figures/robust3.jpg)"
678
     ]
679
    },
680
    {
681
     "cell_type": "markdown",
682
     "metadata": {
683
      "slideshow": {
684
       "slide_type": "subslide"
685
      }
686
     },
687
     "source": [
688
      "![image](figures/robust4.jpg)"
689
     ]
690
    },
691
    {
692
     "cell_type": "markdown",
693
     "metadata": {
694
      "slideshow": {
695
       "slide_type": "subslide"
696
      }
697
     },
698
     "source": [
699
      "![image](figures/robust1.jpg)"
700
     ]
701
    },
702
    {
703
     "cell_type": "markdown",
704
     "metadata": {
705
      "slideshow": {
706
       "slide_type": "subslide"
707
      }
708
     },
709
     "source": [
710
      "![image](figures/robust2.jpg)"
711
     ]
712
    },
713
    {
714
     "cell_type": "heading",
715
     "level": 2,
716
     "metadata": {
717
      "slideshow": {
718
       "slide_type": "slide"
719
      }
720
     },
721
     "source": [
722
      "Structural and Textural Measures"
723
     ]
724
    },
725
    {
726
     "cell_type": "markdown",
727
     "metadata": {
728
      "slideshow": {
729
       "slide_type": "subslide"
730
      }
731
     },
732
     "source": [
733
      "- characteristic **scale** of sinusoid widths\n",
734
      "- **directional** amplitude of preferred sinusoid alignment\n",
735
      "- **tissue to sinusoid** ratio\n",
736
      "- **count** of inflammatory foci per image\n",
737
      "- **mean size** of inflammatory foci per image\n",
738
      "- information **entropy** of sinusoid distribution\n",
739
      "- **lacunarity** ( clustering ) of sinusoids"
740
     ]
741
    },
742
    {
743
     "cell_type": "markdown",
744
     "metadata": {
745
      "slideshow": {
746
       "slide_type": "subslide"
747
      }
748
     },
749
     "source": [
750
      "<img src=\"figures/gif.gif\"></img>"
751
     ]
752
    },
753
    {
754
     "cell_type": "markdown",
755
     "metadata": {
756
      "slideshow": {
757
       "slide_type": "subslide"
758
      }
759
     },
760
     "source": [
761
      "![image](figures/intra.png)"
762
     ]
763
    },
764
    {
765
     "cell_type": "markdown",
766
     "metadata": {
767
      "slideshow": {
768
       "slide_type": "subslide"
769
      }
770
     },
771
     "source": [
772
      "![image](figures/inter2.png)"
773
     ]
774
    },
775
    {
776
     "cell_type": "heading",
777
     "level": 2,
778
     "metadata": {
779
      "slideshow": {
780
       "slide_type": "slide"
781
      }
782
     },
783
     "source": [
784
      "Exploratory Analysis"
785
     ]
786
    },
787
    {
788
     "cell_type": "heading",
789
     "level": 3,
790
     "metadata": {},
791
     "source": [
792
      "by individual"
793
     ]
794
    },
795
    {
796
     "cell_type": "code",
797
     "collapsed": false,
798
     "input": [
799
      "portal_inflammation"
800
     ],
801
     "language": "python",
802
     "metadata": {
803
      "slideshow": {
804
       "slide_type": "subslide"
805
      }
806
     },
807
     "outputs": [
808
      {
809
       "html": [
810
        "<table class=\"simpletable\">\n",
811
        "<caption>GLS Regression Results</caption>\n",
812
        "<tr>\n",
813
        "  <th>Dep. Variable:</th>    <td>Portal_inflammation</td> <th>  R-squared:         </th> <td>   0.280</td>\n",
814
        "</tr>\n",
815
        "<tr>\n",
816
        "  <th>Model:</th>                    <td>GLS</td>         <th>  Adj. R-squared:    </th> <td>   0.273</td>\n",
817
        "</tr>\n",
818
        "<tr>\n",
819
        "  <th>Method:</th>              <td>Least Squares</td>    <th>  F-statistic:       </th> <td>   37.34</td>\n",
820
        "</tr>\n",
821
        "<tr>\n",
822
        "  <th>Date:</th>              <td>Tue, 28 Oct 2014</td>   <th>  Prob (F-statistic):</th> <td>2.12e-08</td>\n",
823
        "</tr>\n",
824
        "<tr>\n",
825
        "  <th>Time:</th>                  <td>23:40:10</td>       <th>  Log-Likelihood:    </th> <td>  14.996</td>\n",
826
        "</tr>\n",
827
        "<tr>\n",
828
        "  <th>No. Observations:</th>       <td>    98</td>        <th>  AIC:               </th> <td>  -25.99</td>\n",
829
        "</tr>\n",
830
        "<tr>\n",
831
        "  <th>Df Residuals:</th>           <td>    96</td>        <th>  BIC:               </th> <td>  -20.82</td>\n",
832
        "</tr>\n",
833
        "<tr>\n",
834
        "  <th>Df Model:</th>               <td>     1</td>        <th>                     </th>     <td> </td>   \n",
835
        "</tr>\n",
836
        "<tr>\n",
837
        "  <th>Covariance Type:</th>       <td>nonrobust</td>      <th>                     </th>     <td> </td>   \n",
838
        "</tr>\n",
839
        "</table>\n",
840
        "<table class=\"simpletable\">\n",
841
        "<tr>\n",
842
        "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
843
        "</tr>\n",
844
        "<tr>\n",
845
        "  <th>FociSize</th>  <td>    0.5627</td> <td>    0.092</td> <td>    6.111</td> <td> 0.000</td> <td>    0.380     0.746</td>\n",
846
        "</tr>\n",
847
        "<tr>\n",
848
        "  <th>Intercept</th> <td>    0.3368</td> <td>    0.043</td> <td>    7.855</td> <td> 0.000</td> <td>    0.252     0.422</td>\n",
849
        "</tr>\n",
850
        "</table>"
851
       ],
852
       "metadata": {},
853
       "output_type": "pyout",
854
       "prompt_number": 29,
855
       "text": [
856
        "<class 'statsmodels.iolib.summary.Summary'>\n",
857
        "\"\"\"\n",
858
        "                             GLS Regression Results                            \n",
859
        "===============================================================================\n",
860
        "Dep. Variable:     Portal_inflammation   R-squared:                       0.280\n",
861
        "Model:                             GLS   Adj. R-squared:                  0.273\n",
862
        "Method:                  Least Squares   F-statistic:                     37.34\n",
863
        "Date:                 Tue, 28 Oct 2014   Prob (F-statistic):           2.12e-08\n",
864
        "Time:                         23:40:10   Log-Likelihood:                 14.996\n",
865
        "No. Observations:                   98   AIC:                            -25.99\n",
866
        "Df Residuals:                       96   BIC:                            -20.82\n",
867
        "Df Model:                            1                                         \n",
868
        "Covariance Type:             nonrobust                                         \n",
869
        "==============================================================================\n",
870
        "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
871
        "------------------------------------------------------------------------------\n",
872
        "FociSize       0.5627      0.092      6.111      0.000         0.380     0.746\n",
873
        "Intercept      0.3368      0.043      7.855      0.000         0.252     0.422\n",
874
        "==============================================================================\n",
875
        "\n",
876
        "Warnings:\n",
877
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
878
        "\"\"\""
879
       ]
880
      }
881
     ],
882
     "prompt_number": 29
883
    },
884
    {
885
     "cell_type": "markdown",
886
     "metadata": {
887
      "slideshow": {
888
       "slide_type": "subslide"
889
      }
890
     },
891
     "source": [
892
      "![image](figures/portal_inflammation.png)"
893
     ]
894
    },
895
    {
896
     "cell_type": "code",
897
     "collapsed": false,
898
     "input": [
899
      "hyperplasia"
900
     ],
901
     "language": "python",
902
     "metadata": {
903
      "slideshow": {
904
       "slide_type": "subslide"
905
      }
906
     },
907
     "outputs": [
908
      {
909
       "html": [
910
        "<table class=\"simpletable\">\n",
911
        "<caption>GLS Regression Results</caption>\n",
912
        "<tr>\n",
913
        "  <th>Dep. Variable:</th>     <td>BD_hyperplasia</td>  <th>  R-squared:         </th> <td>   0.306</td>\n",
914
        "</tr>\n",
915
        "<tr>\n",
916
        "  <th>Model:</th>                   <td>GLS</td>       <th>  Adj. R-squared:    </th> <td>   0.284</td>\n",
917
        "</tr>\n",
918
        "<tr>\n",
919
        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   13.83</td>\n",
920
        "</tr>\n",
921
        "<tr>\n",
922
        "  <th>Date:</th>             <td>Tue, 28 Oct 2014</td> <th>  Prob (F-statistic):</th> <td>1.52e-07</td>\n",
923
        "</tr>\n",
924
        "<tr>\n",
925
        "  <th>Time:</th>                 <td>23:40:10</td>     <th>  Log-Likelihood:    </th> <td> -3.9632</td>\n",
926
        "</tr>\n",
927
        "<tr>\n",
928
        "  <th>No. Observations:</th>      <td>    98</td>      <th>  AIC:               </th> <td>   15.93</td>\n",
929
        "</tr>\n",
930
        "<tr>\n",
931
        "  <th>Df Residuals:</th>          <td>    94</td>      <th>  BIC:               </th> <td>   26.27</td>\n",
932
        "</tr>\n",
933
        "<tr>\n",
934
        "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
935
        "</tr>\n",
936
        "<tr>\n",
937
        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
938
        "</tr>\n",
939
        "</table>\n",
940
        "<table class=\"simpletable\">\n",
941
        "<tr>\n",
942
        "         <td></td>           <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
943
        "</tr>\n",
944
        "<tr>\n",
945
        "  <th>FociSize</th>       <td>    0.6698</td> <td>    0.113</td> <td>    5.902</td> <td> 0.000</td> <td>    0.444     0.895</td>\n",
946
        "</tr>\n",
947
        "<tr>\n",
948
        "  <th>Scale</th>          <td>    0.5811</td> <td>    0.243</td> <td>    2.394</td> <td> 0.019</td> <td>    0.099     1.063</td>\n",
949
        "</tr>\n",
950
        "<tr>\n",
951
        "  <th>Directionality</th> <td>   -0.4419</td> <td>    0.190</td> <td>   -2.330</td> <td> 0.022</td> <td>   -0.819    -0.065</td>\n",
952
        "</tr>\n",
953
        "<tr>\n",
954
        "  <th>Intercept</th>      <td>   -0.0504</td> <td>    0.079</td> <td>   -0.642</td> <td> 0.523</td> <td>   -0.206     0.105</td>\n",
955
        "</tr>\n",
956
        "</table>"
957
       ],
958
       "metadata": {},
959
       "output_type": "pyout",
960
       "prompt_number": 31,
961
       "text": [
962
        "<class 'statsmodels.iolib.summary.Summary'>\n",
963
        "\"\"\"\n",
964
        "                            GLS Regression Results                            \n",
965
        "==============================================================================\n",
966
        "Dep. Variable:         BD_hyperplasia   R-squared:                       0.306\n",
967
        "Model:                            GLS   Adj. R-squared:                  0.284\n",
968
        "Method:                 Least Squares   F-statistic:                     13.83\n",
969
        "Date:                Tue, 28 Oct 2014   Prob (F-statistic):           1.52e-07\n",
970
        "Time:                        23:40:10   Log-Likelihood:                -3.9632\n",
971
        "No. Observations:                  98   AIC:                             15.93\n",
972
        "Df Residuals:                      94   BIC:                             26.27\n",
973
        "Df Model:                           3                                         \n",
974
        "Covariance Type:            nonrobust                                         \n",
975
        "==================================================================================\n",
976
        "                     coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
977
        "----------------------------------------------------------------------------------\n",
978
        "FociSize           0.6698      0.113      5.902      0.000         0.444     0.895\n",
979
        "Scale              0.5811      0.243      2.394      0.019         0.099     1.063\n",
980
        "Directionality    -0.4419      0.190     -2.330      0.022        -0.819    -0.065\n",
981
        "Intercept         -0.0504      0.079     -0.642      0.523        -0.206     0.105\n",
982
        "==================================================================================\n",
983
        "\n",
984
        "Warnings:\n",
985
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
986
        "\"\"\""
987
       ]
988
      }
989
     ],
990
     "prompt_number": 31
991
    },
992
    {
993
     "cell_type": "markdown",
994
     "metadata": {
995
      "slideshow": {
996
       "slide_type": "subslide"
997
      }
998
     },
999
     "source": [
1000
      "![image](figures/hyperplasia.png)"
1001
     ]
1002
    },
1003
    {
1004
     "cell_type": "code",
1005
     "collapsed": false,
1006
     "input": [
1007
      "pca"
1008
     ],
1009
     "language": "python",
1010
     "metadata": {
1011
      "slideshow": {
1012
       "slide_type": "subslide"
1013
      }
1014
     },
1015
     "outputs": [
1016
      {
1017
       "html": [
1018
        "<table class=\"simpletable\">\n",
1019
        "<caption>GLS Regression Results</caption>\n",
1020
        "<tr>\n",
1021
        "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.075</td>\n",
1022
        "</tr>\n",
1023
        "<tr>\n",
1024
        "  <th>Model:</th>                   <td>GLS</td>       <th>  Adj. R-squared:    </th> <td>   0.065</td>\n",
1025
        "</tr>\n",
1026
        "<tr>\n",
1027
        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   7.723</td>\n",
1028
        "</tr>\n",
1029
        "<tr>\n",
1030
        "  <th>Date:</th>             <td>Wed, 29 Oct 2014</td> <th>  Prob (F-statistic):</th>  <td>0.00657</td>\n",
1031
        "</tr>\n",
1032
        "<tr>\n",
1033
        "  <th>Time:</th>                 <td>14:38:47</td>     <th>  Log-Likelihood:    </th> <td> -70.082</td>\n",
1034
        "</tr>\n",
1035
        "<tr>\n",
1036
        "  <th>No. Observations:</th>      <td>    97</td>      <th>  AIC:               </th> <td>   144.2</td>\n",
1037
        "</tr>\n",
1038
        "<tr>\n",
1039
        "  <th>Df Residuals:</th>          <td>    95</td>      <th>  BIC:               </th> <td>   149.3</td>\n",
1040
        "</tr>\n",
1041
        "<tr>\n",
1042
        "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
1043
        "</tr>\n",
1044
        "<tr>\n",
1045
        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
1046
        "</tr>\n",
1047
        "</table>\n",
1048
        "<table class=\"simpletable\">\n",
1049
        "<tr>\n",
1050
        "    <td></td>       <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
1051
        "</tr>\n",
1052
        "<tr>\n",
1053
        "  <th>const</th> <td>-2.949e-17</td> <td>    0.051</td> <td>-5.77e-16</td> <td> 1.000</td> <td>   -0.102     0.102</td>\n",
1054
        "</tr>\n",
1055
        "<tr>\n",
1056
        "  <th>x1</th>    <td>    0.3865</td> <td>    0.139</td> <td>    2.779</td> <td> 0.007</td> <td>    0.110     0.663</td>\n",
1057
        "</tr>\n",
1058
        "</table>"
1059
       ],
1060
       "metadata": {},
1061
       "output_type": "pyout",
1062
       "prompt_number": 15,
1063
       "text": [
1064
        "<class 'statsmodels.iolib.summary.Summary'>\n",
1065
        "\"\"\"\n",
1066
        "                            GLS Regression Results                            \n",
1067
        "==============================================================================\n",
1068
        "Dep. Variable:                      y   R-squared:                       0.075\n",
1069
        "Model:                            GLS   Adj. R-squared:                  0.065\n",
1070
        "Method:                 Least Squares   F-statistic:                     7.723\n",
1071
        "Date:                Wed, 29 Oct 2014   Prob (F-statistic):            0.00657\n",
1072
        "Time:                        14:38:47   Log-Likelihood:                -70.082\n",
1073
        "No. Observations:                  97   AIC:                             144.2\n",
1074
        "Df Residuals:                      95   BIC:                             149.3\n",
1075
        "Df Model:                           1                                         \n",
1076
        "Covariance Type:            nonrobust                                         \n",
1077
        "==============================================================================\n",
1078
        "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
1079
        "------------------------------------------------------------------------------\n",
1080
        "const      -2.949e-17      0.051  -5.77e-16      1.000        -0.102     0.102\n",
1081
        "x1             0.3865      0.139      2.779      0.007         0.110     0.663\n",
1082
        "==============================================================================\n",
1083
        "\n",
1084
        "Warnings:\n",
1085
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
1086
        "\"\"\""
1087
       ]
1088
      }
1089
     ],
1090
     "prompt_number": 15
1091
    },
1092
    {
1093
     "cell_type": "markdown",
1094
     "metadata": {
1095
      "slideshow": {
1096
       "slide_type": "subslide"
1097
      }
1098
     },
1099
     "source": [
1100
      "![image](figures/pca.png)"
1101
     ]
1102
    },
1103
    {
1104
     "cell_type": "heading",
1105
     "level": 2,
1106
     "metadata": {
1107
      "slideshow": {
1108
       "slide_type": "slide"
1109
      }
1110
     },
1111
     "source": [
1112
      "Exploratory Analysis"
1113
     ]
1114
    },
1115
    {
1116
     "cell_type": "heading",
1117
     "level": 3,
1118
     "metadata": {},
1119
     "source": [
1120
      "by age class"
1121
     ]
1122
    },
1123
    {
1124
     "cell_type": "code",
1125
     "collapsed": false,
1126
     "input": [
1127
      "fibrosis"
1128
     ],
1129
     "language": "python",
1130
     "metadata": {
1131
      "slideshow": {
1132
       "slide_type": "subslide"
1133
      }
1134
     },
1135
     "outputs": [
1136
      {
1137
       "html": [
1138
        "<table class=\"simpletable\">\n",
1139
        "<caption>GLS Regression Results</caption>\n",
1140
        "<tr>\n",
1141
        "  <th>Dep. Variable:</th>        <td>Fibrosis</td>     <th>  R-squared:         </th> <td>   0.800</td>\n",
1142
        "</tr>\n",
1143
        "<tr>\n",
1144
        "  <th>Model:</th>                   <td>GLS</td>       <th>  Adj. R-squared:    </th> <td>   0.778</td>\n",
1145
        "</tr>\n",
1146
        "<tr>\n",
1147
        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   36.07</td>\n",
1148
        "</tr>\n",
1149
        "<tr>\n",
1150
        "  <th>Date:</th>             <td>Wed, 29 Oct 2014</td> <th>  Prob (F-statistic):</th> <td>0.000201</td>\n",
1151
        "</tr>\n",
1152
        "<tr>\n",
1153
        "  <th>Time:</th>                 <td>11:13:48</td>     <th>  Log-Likelihood:    </th> <td>  7.8003</td>\n",
1154
        "</tr>\n",
1155
        "<tr>\n",
1156
        "  <th>No. Observations:</th>      <td>    11</td>      <th>  AIC:               </th> <td>  -11.60</td>\n",
1157
        "</tr>\n",
1158
        "<tr>\n",
1159
        "  <th>Df Residuals:</th>          <td>     9</td>      <th>  BIC:               </th> <td>  -10.80</td>\n",
1160
        "</tr>\n",
1161
        "<tr>\n",
1162
        "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
1163
        "</tr>\n",
1164
        "<tr>\n",
1165
        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
1166
        "</tr>\n",
1167
        "</table>\n",
1168
        "<table class=\"simpletable\">\n",
1169
        "<tr>\n",
1170
        "        <td></td>          <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
1171
        "</tr>\n",
1172
        "<tr>\n",
1173
        "  <th>Inflammation</th> <td>    1.0159</td> <td>    0.169</td> <td>    6.006</td> <td> 0.000</td> <td>    0.633     1.399</td>\n",
1174
        "</tr>\n",
1175
        "<tr>\n",
1176
        "  <th>Intercept</th>    <td>   -0.0105</td> <td>    0.083</td> <td>   -0.126</td> <td> 0.902</td> <td>   -0.198     0.177</td>\n",
1177
        "</tr>\n",
1178
        "</table>"
1179
       ],
1180
       "metadata": {},
1181
       "output_type": "pyout",
1182
       "prompt_number": 6,
1183
       "text": [
1184
        "<class 'statsmodels.iolib.summary.Summary'>\n",
1185
        "\"\"\"\n",
1186
        "                            GLS Regression Results                            \n",
1187
        "==============================================================================\n",
1188
        "Dep. Variable:               Fibrosis   R-squared:                       0.800\n",
1189
        "Model:                            GLS   Adj. R-squared:                  0.778\n",
1190
        "Method:                 Least Squares   F-statistic:                     36.07\n",
1191
        "Date:                Wed, 29 Oct 2014   Prob (F-statistic):           0.000201\n",
1192
        "Time:                        11:13:48   Log-Likelihood:                 7.8003\n",
1193
        "No. Observations:                  11   AIC:                            -11.60\n",
1194
        "Df Residuals:                       9   BIC:                            -10.80\n",
1195
        "Df Model:                           1                                         \n",
1196
        "Covariance Type:            nonrobust                                         \n",
1197
        "================================================================================\n",
1198
        "                   coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
1199
        "--------------------------------------------------------------------------------\n",
1200
        "Inflammation     1.0159      0.169      6.006      0.000         0.633     1.399\n",
1201
        "Intercept       -0.0105      0.083     -0.126      0.902        -0.198     0.177\n",
1202
        "================================================================================\n",
1203
        "\n",
1204
        "Warnings:\n",
1205
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
1206
        "\"\"\""
1207
       ]
1208
      }
1209
     ],
1210
     "prompt_number": 6
1211
    },
1212
    {
1213
     "cell_type": "markdown",
1214
     "metadata": {
1215
      "slideshow": {
1216
       "slide_type": "subslide"
1217
      }
1218
     },
1219
     "source": [
1220
      "![image](figures/fibrosis.png)"
1221
     ]
1222
    },
1223
    {
1224
     "cell_type": "code",
1225
     "collapsed": false,
1226
     "input": [
1227
      "lobular_collapse"
1228
     ],
1229
     "language": "python",
1230
     "metadata": {
1231
      "slideshow": {
1232
       "slide_type": "subslide"
1233
      }
1234
     },
1235
     "outputs": [
1236
      {
1237
       "html": [
1238
        "<table class=\"simpletable\">\n",
1239
        "<caption>GLS Regression Results</caption>\n",
1240
        "<tr>\n",
1241
        "  <th>Dep. Variable:</th>    <td>Lobular_collapse</td> <th>  R-squared:         </th> <td>   0.586</td>\n",
1242
        "</tr>\n",
1243
        "<tr>\n",
1244
        "  <th>Model:</th>                   <td>GLS</td>       <th>  Adj. R-squared:    </th> <td>   0.540</td>\n",
1245
        "</tr>\n",
1246
        "<tr>\n",
1247
        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   12.73</td>\n",
1248
        "</tr>\n",
1249
        "<tr>\n",
1250
        "  <th>Date:</th>             <td>Wed, 29 Oct 2014</td> <th>  Prob (F-statistic):</th>  <td>0.00605</td>\n",
1251
        "</tr>\n",
1252
        "<tr>\n",
1253
        "  <th>Time:</th>                 <td>11:13:48</td>     <th>  Log-Likelihood:    </th> <td>  2.2626</td>\n",
1254
        "</tr>\n",
1255
        "<tr>\n",
1256
        "  <th>No. Observations:</th>      <td>    11</td>      <th>  AIC:               </th> <td> -0.5252</td>\n",
1257
        "</tr>\n",
1258
        "<tr>\n",
1259
        "  <th>Df Residuals:</th>          <td>     9</td>      <th>  BIC:               </th> <td>  0.2706</td>\n",
1260
        "</tr>\n",
1261
        "<tr>\n",
1262
        "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
1263
        "</tr>\n",
1264
        "<tr>\n",
1265
        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
1266
        "</tr>\n",
1267
        "</table>\n",
1268
        "<table class=\"simpletable\">\n",
1269
        "<tr>\n",
1270
        "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
1271
        "</tr>\n",
1272
        "<tr>\n",
1273
        "  <th>FociSize</th>  <td>    1.1379</td> <td>    0.319</td> <td>    3.567</td> <td> 0.006</td> <td>    0.416     1.860</td>\n",
1274
        "</tr>\n",
1275
        "<tr>\n",
1276
        "  <th>Intercept</th> <td>    0.0460</td> <td>    0.159</td> <td>    0.289</td> <td> 0.779</td> <td>   -0.314     0.406</td>\n",
1277
        "</tr>\n",
1278
        "</table>"
1279
       ],
1280
       "metadata": {},
1281
       "output_type": "pyout",
1282
       "prompt_number": 7,
1283
       "text": [
1284
        "<class 'statsmodels.iolib.summary.Summary'>\n",
1285
        "\"\"\"\n",
1286
        "                            GLS Regression Results                            \n",
1287
        "==============================================================================\n",
1288
        "Dep. Variable:       Lobular_collapse   R-squared:                       0.586\n",
1289
        "Model:                            GLS   Adj. R-squared:                  0.540\n",
1290
        "Method:                 Least Squares   F-statistic:                     12.73\n",
1291
        "Date:                Wed, 29 Oct 2014   Prob (F-statistic):            0.00605\n",
1292
        "Time:                        11:13:48   Log-Likelihood:                 2.2626\n",
1293
        "No. Observations:                  11   AIC:                           -0.5252\n",
1294
        "Df Residuals:                       9   BIC:                            0.2706\n",
1295
        "Df Model:                           1                                         \n",
1296
        "Covariance Type:            nonrobust                                         \n",
1297
        "==============================================================================\n",
1298
        "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
1299
        "------------------------------------------------------------------------------\n",
1300
        "FociSize       1.1379      0.319      3.567      0.006         0.416     1.860\n",
1301
        "Intercept      0.0460      0.159      0.289      0.779        -0.314     0.406\n",
1302
        "==============================================================================\n",
1303
        "\n",
1304
        "Warnings:\n",
1305
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
1306
        "\"\"\""
1307
       ]
1308
      }
1309
     ],
1310
     "prompt_number": 7
1311
    },
1312
    {
1313
     "cell_type": "markdown",
1314
     "metadata": {
1315
      "slideshow": {
1316
       "slide_type": "subslide"
1317
      }
1318
     },
1319
     "source": [
1320
      "![image](figures/lobular_collapse.png)"
1321
     ]
1322
    },
1323
    {
1324
     "cell_type": "code",
1325
     "collapsed": false,
1326
     "input": [
1327
      "interface_hepatitis"
1328
     ],
1329
     "language": "python",
1330
     "metadata": {
1331
      "slideshow": {
1332
       "slide_type": "subslide"
1333
      }
1334
     },
1335
     "outputs": [
1336
      {
1337
       "html": [
1338
        "<table class=\"simpletable\">\n",
1339
        "<caption>GLS Regression Results</caption>\n",
1340
        "<tr>\n",
1341
        "  <th>Dep. Variable:</th>    <td>Interface_hepatitis</td> <th>  R-squared:         </th> <td>   0.659</td>\n",
1342
        "</tr>\n",
1343
        "<tr>\n",
1344
        "  <th>Model:</th>                    <td>GLS</td>         <th>  Adj. R-squared:    </th> <td>   0.621</td>\n",
1345
        "</tr>\n",
1346
        "<tr>\n",
1347
        "  <th>Method:</th>              <td>Least Squares</td>    <th>  F-statistic:       </th> <td>   17.38</td>\n",
1348
        "</tr>\n",
1349
        "<tr>\n",
1350
        "  <th>Date:</th>              <td>Wed, 29 Oct 2014</td>   <th>  Prob (F-statistic):</th>  <td>0.00242</td>\n",
1351
        "</tr>\n",
1352
        "<tr>\n",
1353
        "  <th>Time:</th>                  <td>11:13:48</td>       <th>  Log-Likelihood:    </th> <td>  2.3063</td>\n",
1354
        "</tr>\n",
1355
        "<tr>\n",
1356
        "  <th>No. Observations:</th>       <td>    11</td>        <th>  AIC:               </th> <td> -0.6126</td>\n",
1357
        "</tr>\n",
1358
        "<tr>\n",
1359
        "  <th>Df Residuals:</th>           <td>     9</td>        <th>  BIC:               </th> <td>  0.1832</td>\n",
1360
        "</tr>\n",
1361
        "<tr>\n",
1362
        "  <th>Df Model:</th>               <td>     1</td>        <th>                     </th>     <td> </td>   \n",
1363
        "</tr>\n",
1364
        "<tr>\n",
1365
        "  <th>Covariance Type:</th>       <td>nonrobust</td>      <th>                     </th>     <td> </td>   \n",
1366
        "</tr>\n",
1367
        "</table>\n",
1368
        "<table class=\"simpletable\">\n",
1369
        "<tr>\n",
1370
        "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
1371
        "</tr>\n",
1372
        "<tr>\n",
1373
        "  <th>Lacunarity</th> <td>   -1.0224</td> <td>    0.245</td> <td>   -4.168</td> <td> 0.002</td> <td>   -1.577    -0.468</td>\n",
1374
        "</tr>\n",
1375
        "<tr>\n",
1376
        "  <th>Intercept</th>  <td>    0.9504</td> <td>    0.143</td> <td>    6.669</td> <td> 0.000</td> <td>    0.628     1.273</td>\n",
1377
        "</tr>\n",
1378
        "</table>"
1379
       ],
1380
       "metadata": {},
1381
       "output_type": "pyout",
1382
       "prompt_number": 8,
1383
       "text": [
1384
        "<class 'statsmodels.iolib.summary.Summary'>\n",
1385
        "\"\"\"\n",
1386
        "                             GLS Regression Results                            \n",
1387
        "===============================================================================\n",
1388
        "Dep. Variable:     Interface_hepatitis   R-squared:                       0.659\n",
1389
        "Model:                             GLS   Adj. R-squared:                  0.621\n",
1390
        "Method:                  Least Squares   F-statistic:                     17.38\n",
1391
        "Date:                 Wed, 29 Oct 2014   Prob (F-statistic):            0.00242\n",
1392
        "Time:                         11:13:48   Log-Likelihood:                 2.3063\n",
1393
        "No. Observations:                   11   AIC:                           -0.6126\n",
1394
        "Df Residuals:                        9   BIC:                            0.1832\n",
1395
        "Df Model:                            1                                         \n",
1396
        "Covariance Type:             nonrobust                                         \n",
1397
        "==============================================================================\n",
1398
        "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
1399
        "------------------------------------------------------------------------------\n",
1400
        "Lacunarity    -1.0224      0.245     -4.168      0.002        -1.577    -0.468\n",
1401
        "Intercept      0.9504      0.143      6.669      0.000         0.628     1.273\n",
1402
        "==============================================================================\n",
1403
        "\n",
1404
        "Warnings:\n",
1405
        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
1406
        "\"\"\""
1407
       ]
1408
      }
1409
     ],
1410
     "prompt_number": 8
1411
    },
1412
    {
1413
     "cell_type": "markdown",
1414
     "metadata": {
1415
      "slideshow": {
1416
       "slide_type": "subslide"
1417
      }
1418
     },
1419
     "source": [
1420
      "![image](figures/interface_hepatitis.png)"
1421
     ]
1422
    },
1423
    {
1424
     "cell_type": "heading",
1425
     "level": 2,
1426
     "metadata": {
1427
      "slideshow": {
1428
       "slide_type": "slide"
1429
      }
1430
     },
1431
     "source": [
1432
      "Exploratory analysis"
1433
     ]
1434
    },
1435
    {
1436
     "cell_type": "heading",
1437
     "level": 3,
1438
     "metadata": {},
1439
     "source": [
1440
      "with a random effect on age at death"
1441
     ]
1442
    },
1443
    {
1444
     "cell_type": "markdown",
1445
     "metadata": {
1446
      "slideshow": {
1447
       "slide_type": "subslide"
1448
      }
1449
     },
1450
     "source": [
1451
      "| Dependent variable                       | Models<br />AIC < 2 + AIC<sub>min</sub> | Primary explanatory variables                           |\n",
1452
      "|------------------------------------------|:----------------------------------:|---------------------------------------------------------------------|\n",
1453
      "| Ishak score                              |                  7                 | entropy, tissue-to-sinusoid, focus count, focus size                |\n",
1454
      "| Lobular collapse                         |                  5                 | entropy, lacunarity, tissue-to-sinusoid, focus count                |\n",
1455
      "| Confluent necrosis                       |                  1                 | entropy                                                             |\n",
1456
      "| Interface hepatitis                      |                  2                 | entropy, tissue-to-sinusoid                                         |\n",
1457
      "| Portal inflammation                      |                  4                 | entropy, focus size, lacunarity, focus count, scale, directionality |\n",
1458
      "| Fibrosis                                 |                  2                 | entropy, lacunarity, tissue-to-sinusoid                             |\n",
1459
      "| Biliary hyperplasia                      |                  1                 | focus size                                                          |\n",
1460
      "| Necrosis, apoptosis, random inflammation |    <font color=\"white\">This_is_bla</font>2<font color=\"white\">This_is_bla</font>           | entropy, lacunarity                                                 |"
1461
     ]
1462
    },
1463
    {
1464
     "cell_type": "markdown",
1465
     "metadata": {
1466
      "slideshow": {
1467
       "slide_type": "subslide"
1468
      }
1469
     },
1470
     "source": [
1471
      "- entropy consistently explains histological measures when controlled for age\n",
1472
      "- also important : tissue to sinusoid ratio, focus count and size, lacunarity"
1473
     ]
1474
    },
1475
    {
1476
     "cell_type": "markdown",
1477
     "metadata": {
1478
      "slideshow": {
1479
       "slide_type": "fragment"
1480
      }
1481
     },
1482
     "source": [
1483
      "- biological / historical reasoning for this potential cohort effect\n",
1484
      "- interpretation of these models\n",
1485
      "- quality of fit"
1486
     ]
1487
    },
1488
    {
1489
     "cell_type": "heading",
1490
     "level": 2,
1491
     "metadata": {
1492
      "slideshow": {
1493
       "slide_type": "slide"
1494
      }
1495
     },
1496
     "source": [
1497
      "Conclusions"
1498
     ]
1499
    },
1500
    {
1501
     "cell_type": "markdown",
1502
     "metadata": {},
1503
     "source": [
1504
      "- our **semi-educated guess** measures may capture relevant information\n",
1505
      "- underlying **structure** in the data needs thought\n",
1506
      "- still no **map** from image or histological measures to condition of individual\n"
1507
     ]
1508
    },
1509
    {
1510
     "cell_type": "heading",
1511
     "level": 2,
1512
     "metadata": {
1513
      "slideshow": {
1514
       "slide_type": "slide"
1515
      }
1516
     },
1517
     "source": [
1518
      "Future directions"
1519
     ]
1520
    },
1521
    {
1522
     "cell_type": "heading",
1523
     "level": 3,
1524
     "metadata": {
1525
      "slideshow": {
1526
       "slide_type": "subslide"
1527
      }
1528
     },
1529
     "source": [
1530
      "Further exploration of the dataset"
1531
     ]
1532
    },
1533
    {
1534
     "cell_type": "markdown",
1535
     "metadata": {
1536
      "slideshow": {
1537
       "slide_type": "-"
1538
      }
1539
     },
1540
     "source": [
1541
      "- 145 sheep ( 89 females )\n",
1542
      "- 11 age classes\n",
1543
      "- potential redundancy in various measures"
1544
     ]
1545
    },
1546
    {
1547
     "cell_type": "markdown",
1548
     "metadata": {
1549
      "slideshow": {
1550
       "slide_type": "fragment"
1551
      }
1552
     },
1553
     "source": [
1554
      "- 4460 entries across 27 variables\n",
1555
      "- 3330 with full image and histological information\n",
1556
      "- 1196 for which **complete** information is available"
1557
     ]
1558
    },
1559
    {
1560
     "cell_type": "heading",
1561
     "level": 3,
1562
     "metadata": {
1563
      "slideshow": {
1564
       "slide_type": "subslide"
1565
      }
1566
     },
1567
     "source": [
1568
      "More data"
1569
     ]
1570
    },
1571
    {
1572
     "cell_type": "markdown",
1573
     "metadata": {},
1574
     "source": [
1575
      "- nutritional information\n",
1576
      "- immunity data"
1577
     ]
1578
    },
1579
    {
1580
     "cell_type": "heading",
1581
     "level": 3,
1582
     "metadata": {
1583
      "slideshow": {
1584
       "slide_type": "subslide"
1585
      }
1586
     },
1587
     "source": [
1588
      "Narrow-field images"
1589
     ]
1590
    },
1591
    {
1592
     "cell_type": "markdown",
1593
     "metadata": {},
1594
     "source": [
1595
      "- 12536 images\n",
1596
      "- spatial distribution of nuclei"
1597
     ]
1598
    },
1599
    {
1600
     "cell_type": "markdown",
1601
     "metadata": {
1602
      "slideshow": {
1603
       "slide_type": "subslide"
1604
      }
1605
     },
1606
     "source": [
1607
      "![image](figures/10.jpg)"
1608
     ]
1609
    },
1610
    {
1611
     "cell_type": "markdown",
1612
     "metadata": {
1613
      "slideshow": {
1614
       "slide_type": "subslide"
1615
      }
1616
     },
1617
     "source": [
1618
      "![image](figures/Processed2.jpg)"
1619
     ]
1620
    },
1621
    {
1622
     "cell_type": "markdown",
1623
     "metadata": {
1624
      "slideshow": {
1625
       "slide_type": "subslide"
1626
      }
1627
     },
1628
     "source": [
1629
      "![image](figures/Segmented.jpg)"
1630
     ]
1631
    },
1632
    {
1633
     "cell_type": "markdown",
1634
     "metadata": {
1635
      "slideshow": {
1636
       "slide_type": "subslide"
1637
      }
1638
     },
1639
     "source": [
1640
      "<img src=\"figures/10x.png\" width=100%></src>"
1641
     ]
1642
    }
1643
   ],
1644
   "metadata": {}
1645
  }
1646
 ]
1647
}