Diff of /thesis/references.bib [000000] .. [7e66db]

Switch to unified view

a b/thesis/references.bib
1
@article{Stern2014,
2
abstract = {There has recently been an increased demand in bone age estimation (BAE) of living individuals and human remains in legal medicine applications. A severe drawback of established BAE techniques based on X-ray images is radiation exposure, since many countries prohibit scanning involving ionizing radiation without diagnostic reasons. We propose a completely automated method for BAE based on volumetric hand MRI images. On our database of 56 male caucasian subjects between 13 and 19 years, we are able to estimate the subjects age with a mean difference of 0.85 +/- 0.58 years compared to the chronological age, which is in line with radiologist results using established radiographic methods. We see this work as a promising first step towards a novel MRI based bone age estimation system, with the key benefits of lacking exposure to ionizing radiation and higher accuracy due to exploitation of volumetric data.},
3
author = {Stern, Darko and Ebner, Thomas and Bischof, Horst and Grassegger, Sabine and Ehammer, Thomas and Urschler, Martin},
4
file = {:Users/pietz/Documents/Mendeley/HandMRT-automaticAnalysis.pdf:pdf},
5
isbn = {978-3-319-10470-6; 978-3-319-10469-0},
6
journal = {Medical image computing and computer-assisted intervention : MICCAI ... International Conference on Medical Image Computing and Computer-Assisted Intervention},
7
keywords = {Adolescent,Adult,Age Determination by Skeleton,Aging,Algorithms,Artificial Intelligence,Functional Laterality,Hand Bones,Humans,Image Interpretation, Computer-Assisted,Magnetic Resonance Imaging,Male,Middle Aged,Pattern Recognition, Automated,Reproducibility of Results,Sensitivity and Specificity,Young Adult,anatomy {\&} histology,methods,pathology,physiology},
8
number = {Pt 2},
9
pages = {220--227},
10
pmid = {25485382},
11
title = {{Fully automatic bone age estimation from left hand MR images.}},
12
volume = {17},
13
year = {2014}
14
}
15
@article{Prasoon2013,
16
abstract = {Segmentation of anatomical structures in medical images is often based on a voxel/pixel classification approach. Deep learning systems, such as convolutional neural networks (CNNs), can infer a hierarchical representation of images that fosters categorization. We propose a novel system for voxel classification integrating three 2D CNNs, which have a one-to-one association with the xy, yz and zx planes of 3D image, respectively. We applied our method to the segmentation of tibial cartilage in low field knee MRI scans and tested it on 114 unseen scans. Although our method uses only 2D features at a single scale, it performs better than a state-of the-art method using 3D multi-scale features. In the latter approach, the features and the classifier have been carefully adapted to the problem at hand. That we were able to get better results by a deep learning architecture that autonomously learns the features from the images is the main insight of this study.},
17
author = {Prasoon, Adhish and Petersen, Kersten and Igel, Christian and Lauze, Fran{\c{c}}ois Francois and Dam, Erik and Nielsen, Mads},
18
file = {:Users/pietz/Documents/Mendeley/Prasoon et al. - Unknown - Deep Feature Learning for Knee Cartilage Segmentation Using a Triplanar Convolutional Neural Network.pdf:pdf},
19
title = {{Deep Feature Learning for Knee Cartilage Segmentation Using a Triplanar Convolutional Neural Network}},
20
url = {https://pdfs.semanticscholar.org/9574/35e5b8fc318f4fca90ef5e6015ce736e5e8f.pdf},
21
year = {2013}
22
}
23
@article{Folkesson2007,
24
abstract = {—We present a fully automatic method for articular cartilage segmentation from magnetic resonance imaging (MRI) which we use as the foundation of a quantitative cartilage as-sessment. We evaluate our method by comparisons to manual segmentations by a radiologist and by examining the interscan reproducibility of the volume and area estimates. Training and evaluation of the method is performed on a data set consisting of 139 scans of knees with a status ranging from healthy to severely osteoarthritic. This is, to our knowledge, the only fully automatic cartilage segmentation method that has good agreement with manual segmentations, an interscan reproducibility as good as that of a human expert, and enables the separation between healthy and osteoarthritic populations. While high-field scanners offer high-quality imaging from which the articular cartilage have been evaluated extensively using manual and automated image analysis techniques, low-field scanners on the other hand produce lower quality images but to a fraction of the cost of their high-field counterpart. For low-field MRI, there is no well-established accu-racy validation for quantitative cartilage estimates, but we show that differences between healthy and osteoarthritic populations are statistically significant using our cartilage volume and surface area estimates, which suggests that low-field MRI analysis can become a useful, affordable tool in clinical studies. Index Terms—Articular cartilage, image segmentation, os-teoarthritis, magnetic resonance imaging (MRI), pattern classifi-cation.},
25
author = {Folkesson, Jenny and Dam, Erik B and Olsen, Ole F and Pettersen, Paola C and Christiansen, Claus},
26
doi = {10.1109/TMI.2006.886808},
27
file = {:Users/pietz/Documents/Mendeley/Folkesson et al. - 2007 - Segmenting Articular Cartilage Automatically Using a Voxel Classification Approach.pdf:pdf},
28
journal = {IEEE TRANSACTIONS ON MEDICAL IMAGING},
29
number = {1},
30
title = {{Segmenting Articular Cartilage Automatically Using a Voxel Classification Approach}},
31
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.488.2760{\&}rep=rep1{\&}type=pdf},
32
volume = {26},
33
year = {2007}
34
}
35
@book{Weishaupt2009,
36
author = {Weishaupt, D and K{\"{o}}chli, V D and Marincek, B and Froehlich, J M and Nanz, D and Pr{\"{u}}{\ss}mann, K P},
37
file = {:Users/pietz/Documents/Mendeley/-Spr.-{\_}Weishaupt{\_}u.a.,{\_}Wie{\_}funktioniert{\_}MRI{\_}(2009).pdf:pdf},
38
isbn = {9783540895725},
39
pages = {172},
40
publisher = {Springer},
41
title = {{Wie funktioniert MRI?}},
42
year = {2009}
43
}
44
@book{Jopp2007,
45
author = {Jopp, Eilin.},
46
isbn = {9783830028949},
47
publisher = {Kovac},
48
title = {{Methoden zur Alters- und Geschlechtsbestimmung auf dem Pruefstand - eine rechtsmedizinische empirische Studie}},
49
url = {http://www.verlagdrkovac.de/978-3-8300-2894-9.htm},
50
year = {2007}
51
}
52
@inproceedings{Drozdzal2016,
53
abstract = {introduce a novel system that integrates several modules including a breast segmentation module and a fibroglandular tissue segmentation module into a modified cascaded region-based convolutional network.},
54
archivePrefix = {arXiv},
55
arxivId = {1608.04117},
56
author = {Drozdzal, Michal and Vorontsov, Eugene and Chartrand, Gabriel and Kadoury, Samuel and Pal, Chris},
57
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
58
doi = {10.1007/978-3-319-46976-8_19},
59
eprint = {1608.04117},
60
isbn = {9783319469751},
61
issn = {16113349},
62
pmid = {25246403},
63
title = {{The importance of skip connections in biomedical image segmentation}},
64
volume = {10008 LNCS},
65
year = {2016}
66
}
67
@article{Moeskops2016,
68
abstract = {—Automatic segmentation in MR brain images is im-portant for quantitative analysis in large-scale studies with images acquired at all ages. This paper presents a method for the auto-matic segmentation of MR brain images into a number of tissue classes using a convolutional neural network. To ensure that the method obtains accurate segmentation details as well as spatial consistency, the network uses multiple patch sizes and multiple convolution kernel sizes to acquire multi-scale information about each voxel. The method is not dependent on explicit features, but learns to recognise the information that is important for the clas-sification based on training data. The method requires a single anatomical MR image only. The segmentation method is applied to five different data sets: coronal -weighted images of preterm in-fants acquired at 30 weeks postmenstrual age (PMA) and 40 weeks PMA, axial -weighted images of preterm infants acquired at 40 weeks PMA, axial -weighted images of ageing adults acquired at an average age of 70 years, and -weighted images of young adults acquired at an average age of 23 years. The method ob-tained the following average Dice coefficients over all segmented tissue classes for each data set, respectively: 0.87, 0.82, 0.84, 0.86, and 0.91. The results demonstrate that the method obtains accu-rate segmentations in all five sets, and hence demonstrates its ro-bustness to differences in age and acquisition protocol. Index Terms—Adult brain, automatic image segmentation, con-volutional neural networks, deep learning, MRI, preterm neonatal brain.},
69
archivePrefix = {arXiv},
70
arxivId = {1704.03295},
71
author = {Moeskops, Pim and Viergever, Max A. and Mendrik, Adrienne M. and {De Vries}, Linda S. and Benders, Manon J.N.L. and Isgum, Ivana},
72
doi = {10.1109/TMI.2016.2548501},
73
eprint = {1704.03295},
74
isbn = {0278-0062},
75
issn = {1558254X},
76
journal = {IEEE Transactions on Medical Imaging},
77
number = {5},
78
pmid = {27046893},
79
title = {{Automatic Segmentation of MR Brain Images with a Convolutional Neural Network}},
80
volume = {35},
81
year = {2016}
82
}
83
@misc{WorldHealthOrganization2016,
84
author = {{World Health Organization}},
85
booktitle = {WHO},
86
publisher = {World Health Organization},
87
title = {{Ionizing radiation, health effects and protective measures}},
88
url = {http://www.who.int/mediacentre/factsheets/fs371/en/},
89
urldate = {2017-08-08},
90
year = {2016}
91
}
92
@incollection{Saring2014,
93
author = {S{\"{a}}ring, Dennis and Mauer, Markus and Jopp, Eilin},
94
doi = {10.1007/978-3-642-54111-7_16},
95
file = {:Users/pietz/Documents/Mendeley/BVM2014-age.pdf:pdf},
96
pages = {60--65},
97
publisher = {Springer, Berlin, Heidelberg},
98
title = {{Klassifikation des Verschlussgrades der Epiphyse der proximalen Tibia zur Altersbestimmung}},
99
url = {http://link.springer.com/10.1007/978-3-642-54111-7{\_}16},
100
year = {2014}
101
}
102
@article{Setiono1997,
103
abstract = {Feature selection is an integral part of most learning algorithms. Due to the existence of irrelevant and redundant attributes, by selecting only the relevant attributes of the data, higher predictive accuracy can be expected from a machine learning method. In this paper, we propose the use of a three-layer feedforward neural network to select those input attributes that are most useful for discriminating classes in a given set of input patterns. A network pruning algorithm is the foundation of the proposed algorithm. By adding a penalty term to the error function of the network, redundant network connections can be distinguished from those relevant ones by their small weights when the network training process has been completed. A simple criterion to remove an attribute based on the accuracy rate of the network is developed. The network is retrained after removal of an attribute, and the selection process is repeated until no attribute meets the criterion for removal. Our experimental results suggest that the proposed method works very well on a wide variety of classification problems.},
104
author = {Setiono, R and Liu, H},
105
doi = {10.1109/72.572104},
106
file = {:Users/pietz/Documents/Mendeley/tnn97.pdf:pdf},
107
issn = {10459227},
108
journal = {IEEE Transactions on Neural Networks},
109
month = {may},
110
number = {3},
111
pages = {654--662},
112
pmid = {18255668},
113
title = {{Neural-network feature selector}},
114
url = {http://www.ncbi.nlm.nih.gov/pubmed/18255668 http://ieeexplore.ieee.org/document/572104/},
115
volume = {8},
116
year = {1997}
117
}
118
@article{Saxe2015,
119
abstract = {Malware remains a serious problem for corporations, government agencies, and individuals, as attackers continue to use it as a tool to effect frequent and costly network intrusions. Machine learning holds the promise of automating the work required to detect newly discovered malware families, and could potentially learn generalizations about malware and benign software that support the detection of entirely new, unknown malware families. Unfortunately, few proposed machine learning based malware detection methods have achieved the low false positive rates required to deliver deployable detectors. In this paper we a deep neural network malware classifier that achieves a usable detection rate at an extremely low false positive rate and scales to real world training example volumes on commodity hardware. Specifically, we show that our system achieves a 95{\%} detection rate at 0.1{\%} false positive rate (FPR), based on more than 400,000 software binaries sourced directly from our customers and internal malware databases. We achieve these results by directly learning on all binaries, without any filtering, unpacking, or manually separating binary files into categories. Further, we confirm our false positive rates directly on a live stream of files coming in from Invincea's deployed endpoint solution, provide an estimate of how many new binary files we expected to see a day on an enterprise network, and describe how that relates to the false positive rate and translates into an intuitive threat score. Our results demonstrate that it is now feasible to quickly train and deploy a low resource, highly accurate machine learning classification model, with false positive rates that approach traditional labor intensive signature based methods, while also detecting previously unseen malware.},
120
archivePrefix = {arXiv},
121
arxivId = {1508.03096},
122
author = {Saxe, Joshua and Berlin, Konstantin},
123
eprint = {1508.03096},
124
file = {:Users/pietz/Documents/Mendeley/Saxe, Berlin - 2015 - Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features.pdf:pdf},
125
month = {aug},
126
title = {{Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features}},
127
url = {http://arxiv.org/abs/1508.03096},
128
year = {2015}
129
}
130
@article{Bojarski2017,
131
abstract = {As part of a complete software stack for autonomous driving, NVIDIA has created a neural-network-based system, known as PilotNet, which outputs steering angles given images of the road ahead. PilotNet is trained using road images paired with the steering angles generated by a human driving a data-collection car. It derives the necessary domain knowledge by observing human drivers. This eliminates the need for human engineers to anticipate what is important in an image and foresee all the necessary rules for safe driving. Road tests demonstrated that PilotNet can successfully perform lane keeping in a wide variety of driving conditions, regardless of whether lane markings are present or not. The goal of the work described here is to explain what PilotNet learns and how it makes its decisions. To this end we developed a method for determining which elements in the road image most influence PilotNet's steering decision. Results show that PilotNet indeed learns to recognize relevant objects on the road. In addition to learning the obvious features such as lane markings, edges of roads, and other cars, PilotNet learns more subtle features that would be hard to anticipate and program by engineers, for example, bushes lining the edge of the road and atypical vehicle classes.},
132
archivePrefix = {arXiv},
133
arxivId = {1704.07911},
134
author = {Bojarski, Mariusz and Yeres, Philip and Choromanska, Anna and Choromanski, Krzysztof and Firner, Bernhard and Jackel, Lawrence and Muller, Urs},
135
eprint = {1704.07911},
136
file = {:Users/pietz/Documents/Mendeley/Bojarski et al. - 2017 - Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car.pdf:pdf},
137
month = {apr},
138
title = {{Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car}},
139
url = {http://arxiv.org/abs/1704.07911},
140
year = {2017}
141
}
142
@article{Wu2016,
143
abstract = {Neural Machine Translation (NMT) is an end-to-end learning approach for automated translation, with the potential to overcome many of the weaknesses of conventional phrase-based translation systems. Unfortunately, NMT systems are known to be computationally expensive both in training and in translation inference. Also, most NMT systems have difficulty with rare words. These issues have hindered NMT's use in practical deployments and services, where both accuracy and speed are essential. In this work, we present GNMT, Google's Neural Machine Translation system, which attempts to address many of these issues. Our model consists of a deep LSTM network with 8 encoder and 8 decoder layers using attention and residual connections. To improve parallelism and therefore decrease training time, our attention mechanism connects the bottom layer of the decoder to the top layer of the encoder. To accelerate the final translation speed, we employ low-precision arithmetic during inference computations. To improve handling of rare words, we divide words into a limited set of common sub-word units ("wordpieces") for both input and output. This method provides a good balance between the flexibility of "character"-delimited models and the efficiency of "word"-delimited models, naturally handles translation of rare words, and ultimately improves the overall accuracy of the system. Our beam search technique employs a length-normalization procedure and uses a coverage penalty, which encourages generation of an output sentence that is most likely to cover all the words in the source sentence. On the WMT'14 English-to-French and English-to-German benchmarks, GNMT achieves competitive results to state-of-the-art. Using a human side-by-side evaluation on a set of isolated simple sentences, it reduces translation errors by an average of 60{\%} compared to Google's phrase-based production system.},
144
archivePrefix = {arXiv},
145
arxivId = {1609.08144},
146
author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V. and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and Klingner, Jeff and Shah, Apurva and Johnson, Melvin and Liu, Xiaobing and Kaiser, {\L}ukasz and Gouws, Stephan and Kato, Yoshikiyo and Kudo, Taku and Kazawa, Hideto and Stevens, Keith and Kurian, George and Patil, Nishant and Wang, Wei and Young, Cliff and Smith, Jason and Riesa, Jason and Rudnick, Alex and Vinyals, Oriol and Corrado, Greg and Hughes, Macduff and Dean, Jeffrey},
147
eprint = {1609.08144},
148
file = {:Users/pietz/Documents/Mendeley/Wu et al. - 2016 - Google's Neural Machine Translation System Bridging the Gap between Human and Machine Translation.pdf:pdf},
149
month = {sep},
150
title = {{Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}},
151
url = {http://arxiv.org/abs/1609.08144},
152
year = {2016}
153
}
154
@article{EuropeanAsylumSupportOffice2013,
155
author = {{European Asylum Support Office}},
156
file = {:Users/pietz/Documents/Mendeley/European Asylum Support Office - 2013 - Age assessment practice in Europe.pdf:pdf},
157
title = {{Age assessment practice in Europe}},
158
url = {https://www.easo.europa.eu/sites/default/files/public/EASO-Age-assessment-practice-in-Europe1.pdf},
159
year = {2013}
160
}
161
@article{Feltz2015,
162
author = {Feltz, Vivien},
163
file = {:Users/pietz/Documents/Mendeley/Feltz - 2015 - Age Assessment for Unaccompanied Minors.pdf:pdf},
164
title = {{Age Assessment for Unaccompanied Minors}},
165
url = {https://mdmeuroblog.files.wordpress.com/2014/01/age-determination-def.pdf},
166
year = {2015}
167
}
168
@article{Dodin2011,
169
abstract = {This study aimed at developing a fully automated bone segmentation method for the human knee (femur and tibia) from magnetic resonance (MR) images. MR imaging was acquired on a whole body 1.5T scanner with a gradient echo fat suppressed sequence using an extremity coil. The method was based on the Ray Casting technique which relies on the decomposition of the MR images into multiple surface layers to localize the boundaries of the bones and several partial segmentation objects being automatically merged to obtain the final complete segmentation of the bones. Validation analyses were performed on 161 MR images from knee osteoarthritis patients, comparing the developed fully automated to a validated semi-automated segmentation method, using the average surface distance (ASD), volume correlation coefficient, and Dice similarity coefficient (DSC). For both femur and tibia, respectively, data showed excellent bone surface ASD (0.50 ± 0.12 mm; 0.37 ± 0.09 mm), average oriented distance between bone surfaces within the cartilage domain (0.02 ± 0.07 mm; −0.05 ± 0.10 mm), and bone volume DSC (0.94 ± 0.05; 0.92 ± 0.07). This newly developed fully automated bone segmentation method will enable large scale studies to be conducted within shorter time durations, as well as increase stability in the reading of pathological bone.},
170
author = {Dodin, Pierre and Martel-Pelletier, Johanne and Pelletier, Jean-Pierre and Abram, Francois},
171
doi = {10.1007/s11517-011-0838-8},
172
file = {:Users/pietz/Documents/Mendeley/Martel-Pelletier - 2011 - A fully automated human knee 3D MRI bone segmentation using the ray casting technique.pdf:pdf},
173
keywords = {ray casting {\'{a}} mri,{\'{a}} 3d knee segmentation},
174
pages = {1413--1424},
175
title = {{A fully automated human knee 3D MRI bone segmentation using the ray casting technique}},
176
year = {2011}
177
}
178
@article{Dam,
179
abstract = {Clinical studies including thousands of magnetic resonance imaging (MRI) scans offer potential for pathogenesis research in osteoarthritis. However, comprehensive quantification of all bone, cartilage, and meniscus compartments is challenging. We propose a segmentation framework for fully automatic segmentation of knee MRI. The framework combines multiatlas rigid registration with voxel classification and was trained on manual segmentations with varying configurations of bones, cartilages, and menisci. The validation included high-and low-field knee MRI cohorts from the Center for Clinical and Basic Research, the osteoarthritis initiative (QAI), and the segmentation of knee images10 (SKI10) challenge. In total, 1907 knee MRIs were segmented during the evaluation. No segmentations were excluded. Our resulting OAI cartilage volume scores are available upon request. The precision and accuracy performances matched manual reader re-segmentation well. The cartilage volume scan-rescan precision was 4.9{\%} (RMS CV). The Dice volume overlaps in the medial/lateral tibial/femoral cartilage compartments were 0.80 to 0.87. The correlations with volumes from independent meth-ods were between 0.90 and 0.96 on the OAI scans. Thus, the framework demonstrated precision and accuracy comparable to manual segmentations. Finally, our method placed second for cartilage segmentation in the SKI10 challenge. The comprehensive validation suggested that automatic segmentation is appropriate for cohorts with thousands of scans.},
180
author = {Dam, Erik B and Lillholm, Martin and Marques, Joselene and Nielsen, Mads},
181
doi = {10.1117/1.JMI},
182
file = {:Users/pietz/Documents/Mendeley/Dam et al. - Unknown - Automatic segmentation of high-and low-field knee MRIs using knee image quantification with data from the osteoar.pdf:pdf},
183
keywords = {MRI,knee,osteoarthritis,segmentation Paper 14109RR},
184
title = {{Automatic segmentation of high-and low-field knee MRIs using knee image quantification with data from the osteoarthritis initiative}},
185
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4478858/pdf/JMI-002-024001.pdf}
186
}
187
@article{Ringenbach2012,
188
abstract = {For the production of cutting blocks for knee-joint implant positioning a precise segmentation of the femur and tibia is essential. Due to low bone density and osteophytes the segmentation of knee bones from CT data can be a major challenge. As part of an industrial project, we have developed a hybrid segmentation method – based on a pre-segmentation with statistical shape model and a fine-segmentation with the Fast Marching algorithm.},
189
author = {Ringenbach, Alex and Schw{\"{a}}gli, / and Tobias},
190
doi = {10.1515/bmt-2012-4500},
191
file = {:Users/pietz/Documents/Mendeley/Ringenbach, Schw{\"{a}}gli, Tobias - 2012 - A robust and accurate segmentation of the knee bones from CT data.pdf:pdf},
192
journal = {Biomed Tech},
193
title = {{A robust and accurate segmentation of the knee bones from CT data}},
194
url = {https://www.degruyter.com/downloadpdf/j/bmte.2012.57.issue-s1-B/bmt-2012-4500/bmt-2012-4500.pdf},
195
volume = {57},
196
year = {2012}
197
}
198
@article{Ahn,
199
author = {Ahn, Chunsoo and Bui, Toan Duc and Lee, Yong‑woo and Shin, Jitae and Background, Hyunjin Park},
200
doi = {10.1186/s12938‑016‑0225‑7},
201
file = {:Users/pietz/Documents/Mendeley/Ahn et al. - Unknown - Fully automated, level set‑based segmentation for knee MRIs using an adaptive force function and template data.pdf:pdf},
202
journal = {BioMedical Engineering OnLine},
203
keywords = {Cartilage,Knee segmentation,Magnetic resonance imaging,Medical image processing},
204
title = {{Fully automated, level set‑based segmentation for knee MRIs using an adaptive force function and template: data from the osteoarthritis initiative}},
205
url = {https://biomedical-engineering-online.biomedcentral.com/track/pdf/10.1186/s12938-016-0225-7?site=biomedical-engineering-online.biomedcentral.com}
206
}
207
@article{Zhao2016,
208
abstract = {Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsing challenge 2016, PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new record of mIoU accuracy 85.4{\%} on PASCAL VOC 2012 and accuracy 80.2{\%} on Cityscapes.},
209
archivePrefix = {arXiv},
210
arxivId = {1612.01105},
211
author = {Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya},
212
eprint = {1612.01105},
213
file = {:Users/pietz/Documents/Mendeley/Zhao et al. - 2016 - Pyramid Scene Parsing Network.pdf:pdf},
214
month = {dec},
215
title = {{Pyramid Scene Parsing Network}},
216
url = {http://arxiv.org/abs/1612.01105},
217
year = {2016}
218
}
219
@article{Lin2016,
220
abstract = {Recently, very deep convolutional neural networks (CNNs) have shown outstanding performance in object recognition and have also been the first choice for dense classification problems such as semantic segmentation. However, repeated subsampling operations like pooling or convolution striding in deep CNNs lead to a significant decrease in the initial image resolution. Here, we present RefineNet, a generic multi-path refinement network that explicitly exploits all the information available along the down-sampling process to enable high-resolution prediction using long-range residual connections. In this way, the deeper layers that capture high-level semantic features can be directly refined using fine-grained features from earlier convolutions. The individual components of RefineNet employ residual connections following the identity mapping mindset, which allows for effective end-to-end training. Further, we introduce chained residual pooling, which captures rich background context in an efficient manner. We carry out comprehensive experiments and set new state-of-the-art results on seven public datasets. In particular, we achieve an intersection-over-union score of 83.4 on the challenging PASCAL VOC 2012 dataset, which is the best reported result to date.},
221
archivePrefix = {arXiv},
222
arxivId = {1611.06612},
223
author = {Lin, Guosheng and Milan, Anton and Shen, Chunhua and Reid, Ian},
224
eprint = {1611.06612},
225
file = {:Users/pietz/Documents/Mendeley/Lin et al. - 2016 - RefineNet Multi-Path Refinement Networks for High-Resolution Semantic Segmentation.pdf:pdf},
226
month = {nov},
227
title = {{RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation}},
228
url = {http://arxiv.org/abs/1611.06612},
229
year = {2016}
230
}
231
@article{Chen2016,
232
abstract = {In this work we address the task of semantic image segmentation with Deep Learning and make three main contributions that are experimentally shown to have substantial practical merit. First, we highlight convolution with upsampled filters, or 'atrous convolution', as a powerful tool in dense prediction tasks. Atrous convolution allows us to explicitly control the resolution at which feature responses are computed within Deep Convolutional Neural Networks. It also allows us to effectively enlarge the field of view of filters to incorporate larger context without increasing the number of parameters or the amount of computation. Second, we propose atrous spatial pyramid pooling (ASPP) to robustly segment objects at multiple scales. ASPP probes an incoming convolutional feature layer with filters at multiple sampling rates and effective fields-of-views, thus capturing objects as well as image context at multiple scales. Third, we improve the localization of object boundaries by combining methods from DCNNs and probabilistic graphical models. The commonly deployed combination of max-pooling and downsampling in DCNNs achieves invariance but has a toll on localization accuracy. We overcome this by combining the responses at the final DCNN layer with a fully connected Conditional Random Field (CRF), which is shown both qualitatively and quantitatively to improve localization performance. Our proposed "DeepLab" system sets the new state-of-art at the PASCAL VOC-2012 semantic image segmentation task, reaching 79.7{\%} mIOU in the test set, and advances the results on three other datasets: PASCAL-Context, PASCAL-Person-Part, and Cityscapes. All of our code is made publicly available online.},
233
archivePrefix = {arXiv},
234
arxivId = {1606.00915},
235
author = {Chen, Liang-Chieh and Papandreou, George and Kokkinos, Iasonas and Murphy, Kevin and Yuille, Alan L.},
236
eprint = {1606.00915},
237
file = {:Users/pietz/Documents/Mendeley/Chen et al. - 2016 - DeepLab Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf:pdf},
238
month = {jun},
239
title = {{DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs}},
240
url = {http://arxiv.org/abs/1606.00915},
241
year = {2016}
242
}
243
@article{Nekrasov2016,
244
abstract = {Semantic image segmentation is a principal problem in computer vision, where the aim is to correctly classify each individual pixel of an image into a semantic label. Its widespread use in many areas, including medical imaging and autonomous driving, has fostered extensive research in recent years. Empirical improvements in tackling this task have primarily been motivated by successful exploitation of Convolutional Neural Networks (CNNs) pre-trained for image classification and object recognition. However, the pixel-wise labelling with CNNs has its own unique challenges: (1) an accurate deconvolution, or upsampling, of low-resolution output into a higher-resolution segmentation mask and (2) an inclusion of global information, or context, within locally extracted features. To address these issues, we propose a novel architecture to conduct the equivalent of the deconvolution operation globally and acquire dense predictions. We demonstrate that it leads to improved performance of state-of-the-art semantic segmentation models on the PASCAL VOC 2012 benchmark, reaching 74.0{\%} mean IU accuracy on the test set.},
245
archivePrefix = {arXiv},
246
arxivId = {1602.03930},
247
author = {Nekrasov, Vladimir and Ju, Janghoon and Choi, Jaesik},
248
eprint = {1602.03930},
249
file = {:Users/pietz/Documents/Mendeley/Nekrasov, Ju, Choi - 2016 - Global Deconvolutional Networks for Semantic Segmentation.pdf:pdf},
250
month = {feb},
251
title = {{Global Deconvolutional Networks for Semantic Segmentation}},
252
url = {http://arxiv.org/abs/1602.03930},
253
year = {2016}
254
}
255
@misc{NVIDIA,
256
author = {NVIDIA},
257
title = {{GPU vs CPU? What is GPU Computing?}},
258
url = {http://www.nvidia.com/object/what-is-gpu-computing.html},
259
urldate = {2017-07-31}
260
}
261
@article{Jager2010,
262
author = {J{\"{a}}ger, Florian},
263
file = {:Users/pietz/Documents/Mendeley/J{\"{a}}ger - 2010 - Normalization of Magnetic Resonance Images and its Application to the Diagnosis of the Scoliotic Spine.pdf:pdf},
264
title = {{Normalization of Magnetic Resonance Images and its Application to the Diagnosis of the Scoliotic Spine}},
265
url = {http://www5.informatik.uni-erlangen.de/Forschung/Publikationen/2011/Jaeger11-NOM.pdf},
266
year = {2010}
267
}
268
@article{Kapur,
269
abstract = {A method for model based segmentation of 3D Magnetic Resonance Imaging (MRI) scans of the human knee is pre-sented. A probabilistic model describing the spatial rela-tionships between features of the human knee is constructed from 3D manually segmented data. In conjunction with fea-ture detection techniques from low-level computer vision, this model is used to segment knee MRI scans in a Bayesian framework.},
270
author = {Kapur, Tina and Beardsley, Paul A and Gibson, Sarah F and Eric, W and Grimson, L and Wells, William M},
271
file = {:Users/pietz/Documents/Mendeley/Kapur et al. - Unknown - Model Based Segmentation of Clinical Knee MRI.pdf:pdf},
272
title = {{Model Based Segmentation of Clinical Knee MRI}},
273
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.49.6825{\&}rep=rep1{\&}type=pdf}
274
}
275
@article{LeCun1990,
276
abstract = {We present an application of back-propagation networks to handwritten digit recognition. Minimal preprocessing of the data was required, but architecture of the network was highly constrained and specifically designed for the task. The input of the network consists of normalized images of isolated digits. The method has 1{\%} error rate and about a 9{\%} reject rate on zipcode digits provided by the U.S. Postal Service.},
277
archivePrefix = {arXiv},
278
arxivId = {1004.3732},
279
author = {{Le Cun}, Y. and Boser, B. and Denker, J. S. and Henderson, D. and Howard, R. E. and Hubbard, W.},
280
doi = {10.1111/dsu.12130},
281
eprint = {1004.3732},
282
file = {:Users/pietz/Documents/Mendeley/Unknown - Unknown - Handwritten Digit Recognition with a Back-Propagation Network.pdf:pdf},
283
isbn = {1-55860-100-7},
284
issn = {1524-4725},
285
journal = {Advances in Neural Information Processing Systems},
286
pages = {396--404},
287
pmid = {23301817},
288
title = {{Handwritten Digit Recognition with a Back-Propagation Network}},
289
url = {http://yann.lecun.com/exdb/publis/pdf/lecun-90c.pdf http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.5076{\%}5Cnhttp://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.32.5076{\&}rep=rep1{\&}type=pdf},
290
year = {1990}
291
}
292
@misc{Attarian2013,
293
author = {Attarian, David E.},
294
booktitle = {Duke Medicine},
295
title = {{Your Bones: What are growth plates?}},
296
url = {http://serkadis.net/index/151288},
297
urldate = {2017-07-27},
298
year = {2013}
299
}
300
@misc{DukeHealth,
301
abstract = {Growth plates are zones of cartilage at each end of our long bones (femur, tibia, etc.), explains David E. Attarian, MD, a joint replacement orthopaedic surgeon at Duke. These bones grow by the contribution of new bone from the growth plate.  Because of their soft nature, these parts of the bone are vulnerable to injury during the development of a child. This is a region of the bone that is sometimes weaker than the surrounding tendons and ligaments. As a result, up to 30 percent of fractures in children can occur around the growth plates.},
302
author = {{Duke Health}},
303
title = {{Growth plates: what you need to know}},
304
url = {https://www.dukehealth.org/blog/growth-plates-what-you-need-know},
305
urldate = {2017-07-27}
306
}
307
@book{Aumuller2010,
308
author = {Aum{\"{u}}ller, Gerhard and Aust, Gabriela and Doll, Andreas and Engele, J{\"{u}}rgen and Kirsch, Joachim and Mense, Siegfried and Wurzinger, Laurenz},
309
isbn = {978-3131360410},
310
number = {2},
311
pages = {1218},
312
title = {{Anatomie}},
313
year = {2010}
314
}
315
@misc{TheDanaFoundation2011,
316
abstract = {Magnetic Resonance Imaging (MRI) is based on the principle of nuclear magnetic resonance and uses radiofrequency waves to probe tissue structure and function without requiring exposure to ionizing radiation. The two researchers who made MRI clinically feasible in the 1980s by building on initial discoveries of the 1930s won the Nobel Prize in Physiology or Medicine in 2003.},
317
author = {{The Dana Foundation}},
318
pages = {1},
319
title = {{Non-invasive Structural and Physiological Imaging: MRI Technologies}},
320
url = {http://dana.org/Publications/ReportDetails.aspx?id=44357},
321
urldate = {2017-07-25},
322
year = {2011}
323
}
324
@book{Westbrook2016,
325
abstract = {MRI at a Glance encapsulates essential MRI physics knowledge. Illustrated in full colour throughout, its concise text explains complex information, to provide the perfect revision aid. It includes topics ranging from magnetism to safety, K space to pulse sequences, and image contrast to artefacts. This third edition has been fully updated, with revised diagrams and new pedagogy, including 55 key points, tables, scan tips, equations, and learning points. There is also an expanded glossary and new appendices on optimizing image quality, parameters and trade-offs.},
326
author = {Westbrook, Catherine},
327
edition = {3},
328
file = {:Users/pietz/Documents/Mendeley/Westbrook - 2016 - MRI at A Glance.pdf:pdf},
329
isbn = {978-1-119-05355-2},
330
keywords = {MRI,medical imaging},
331
mendeley-tags = {MRI,medical imaging},
332
pages = {136},
333
publisher = {Wiley Blackwell},
334
title = {{MRI at A Glance}},
335
year = {2016}
336
}
337
@article{Pereira2016,
338
abstract = {Among brain tumors, gliomas are the most common and aggressive, leading to a very short life expectancy in their highest grade. Thus, treatment planning is a key stage to improve the quality of life of oncological patients. Magnetic Resonance Imaging (MRI) is a widely used imaging technique to assess these tumors, but the large amount of data produced by MRI prevents manual segmentation in a reasonable time, limiting the use of precise quantitative measurements in the clinical practice. So, automatic and reliable segmentation methods are required; however, the large spatial and structural variability among brain tumors make automatic segmentation a challenging problem. In this paper, we propose an automatic segmentation method based on Convolutional Neural Networks (CNN), exploring small 33 kernels. The use of small kernels allows designing a deeper architecture, besides having a positive effect against overfitting, given the fewer number of weights in the network. We also investigated the use of intensity normalization as a pre-processing step, which though not common in CNN-based segmentation methods, proved together with data augmentation to be very effective for brain tumor segmentation in MRI images. Our proposal was validated in the Brain Tumor Segmentation Challenge 2013 database (BRATS 2013), obtaining simultaneously the first position for the complete, core, and enhancing regions in Dice Similarity Coefficient metric (0:88, 0:83, 0:77) for the Challenge data set. Also, it obtained the overall first position by the online evaluation platform. We also participated in the on-site BRATS 2015 Challenge using the same model, obtaining the second place, with Dice Similarity Coefficient metric of 0:78, 0:65, and 0:75 for the complete, core, and enhancing regions, respectively.},
339
archivePrefix = {arXiv},
340
arxivId = {arXiv:1502.02445v2},
341
author = {Pereira, Sergio and Pinto, Adriano and Alves, Victor and Silva, Carlos A.},
342
doi = {10.1109/TMI.2016.2538465},
343
eprint = {arXiv:1502.02445v2},
344
isbn = {0278-0062},
345
issn = {1558254X},
346
journal = {IEEE Transactions on Medical Imaging},
347
number = {5},
348
pmid = {26960222},
349
title = {{Brain Tumor Segmentation Using Convolutional Neural Networks in MRI Images}},
350
volume = {35},
351
year = {2016}
352
}
353
@article{Kamnitsas2017,
354
abstract = {We propose a dual pathway, 11-layers deep, three-dimensional Convolutional Neural Network for the challenging task of brain lesion segmentation. The devised architecture is the result of an in-depth analysis of the limitations of current networks proposed for similar applications. To overcome the computational burden of processing 3D medical scans, we have devised an efficient and effective dense training scheme which joins the processing of adjacent image patches into one pass through the network while automatically adapting to the inherent class imbalance present in the data. Further, we analyze the development of deeper, thus more discriminative 3D CNNs. In order to incorporate both local and larger contextual information, we employ a dual pathway architecture that processes the input images at multiple scales simultaneously. For post-processing of the network's soft segmentation, we use a 3D fully connected Conditional Random Field which effectively removes false positives. Our pipeline is extensively evaluated on three challenging tasks of lesion segmentation in multi-channel MRI patient data with traumatic brain injuries, brain tumours, and ischemic stroke. We improve on the state-of-the-art for all three applications, with top ranking performance on the public benchmarks BRATS 2015 and ISLES 2015. Our method is computationally efficient, which allows its adoption in a variety of research and clinical settings. The source code of our implementation is made publicly available.},
355
archivePrefix = {arXiv},
356
arxivId = {1603.05959},
357
author = {Kamnitsas, Konstantinos and Ledig, Christian and Newcombe, Virginia F.J. and Simpson, Joanna P. and Kane, Andrew D. and Menon, David K. and Rueckert, Daniel and Glocker, Ben},
358
doi = {10.1016/j.media.2016.10.004},
359
eprint = {1603.05959},
360
isbn = {1662-4548 (Print)$\backslash$r1662-453X (Linking)},
361
issn = {13618423},
362
journal = {Medical Image Analysis},
363
pmid = {25191215},
364
title = {{Efficient multi-scale 3D CNN with fully connected CRF for accurate brain lesion segmentation}},
365
volume = {36},
366
year = {2017}
367
}
368
@article{Iandola2016a,
369
abstract = {Recent research on deep neural networks has focused primarily on improving accuracy. For a given accuracy level, it is typically possible to identify multiple DNN architectures that achieve that accuracy level. With equivalent accuracy, smaller DNN architectures offer at least three advantages: (1) Smaller DNNs require less communication across servers during distributed training. (2) Smaller DNNs require less bandwidth to export a new model from the cloud to an autonomous car. (3) Smaller DNNs are more feasible to deploy on FPGAs and other hardware with limited memory. To provide all of these advantages, we propose a small DNN architecture called SqueezeNet. SqueezeNet achieves AlexNet-level accuracy on ImageNet with 50x fewer parameters. Additionally, with model compression techniques we are able to compress SqueezeNet to less than 0.5MB (510x smaller than AlexNet). The SqueezeNet architecture is available for download here: https://github.com/DeepScale/SqueezeNet},
370
archivePrefix = {arXiv},
371
arxivId = {1602.07360},
372
author = {Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally, William J. and Keutzer, Kurt},
373
eprint = {1602.07360},
374
file = {:Users/pietz/Documents/Mendeley/Iandola et al. - 2016 - SqueezeNet AlexNet-level accuracy with 50x fewer parameters and 0.5MB model size.pdf:pdf},
375
month = {feb},
376
title = {{SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and}},
377
url = {http://arxiv.org/abs/1602.07360},
378
year = {2016}
379
}
380
@article{Simonyan2014a,
381
abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
382
archivePrefix = {arXiv},
383
arxivId = {1409.1556},
384
author = {Simonyan, Karen and Zisserman, Andrew},
385
eprint = {1409.1556},
386
file = {:Users/pietz/Documents/Mendeley/Simonyan, Zisserman - 2014 - Very Deep Convolutional Networks for Large-Scale Image Recognition.pdf:pdf},
387
keywords = {classification,cnn,imagenet,vgg16},
388
mendeley-tags = {classification,cnn,imagenet,vgg16},
389
month = {sep},
390
title = {{Very Deep Convolutional Networks for Large-Scale Image Recognition}},
391
url = {http://arxiv.org/abs/1409.1556},
392
year = {2014}
393
}
394
@article{He2015b,
395
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57{\%} error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28{\%} relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC {\&} COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
396
archivePrefix = {arXiv},
397
arxivId = {1512.03385},
398
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
399
eprint = {1512.03385},
400
file = {:Users/pietz/Documents/Mendeley/He et al. - 2015 - Deep Residual Learning for Image Recognition.pdf:pdf},
401
month = {dec},
402
title = {{Deep Residual Learning for Image Recognition}},
403
url = {http://arxiv.org/abs/1512.03385},
404
year = {2015}
405
}
406
@article{Dumoulin2016,
407
abstract = {We introduce a guide to help deep learning practitioners understand and manipulate convolutional neural network architectures. The guide clarifies the relationship between various properties (input shape, kernel shape, zero padding, strides and output shape) of convolutional, pooling and transposed convolutional layers, as well as the relationship between convolutional and transposed convolutional layers. Relationships are derived for various cases, and are illustrated in order to make them intuitive.},
408
archivePrefix = {arXiv},
409
arxivId = {arXiv:1603.07285v1},
410
author = {Dumoulin, Vincent and Visin, Francesco and Box, George E P},
411
doi = {10.1051/0004-6361/201527329},
412
eprint = {arXiv:1603.07285v1},
413
file = {:Users/pietz/Documents/Mendeley/Dumoulin, Visin, Box - 2016 - A guide to convolution arithmetic for deep learning.pdf:pdf},
414
isbn = {9783319105895},
415
issn = {16113349},
416
journal = {Arxiv},
417
month = {mar},
418
pages = {1--28},
419
pmid = {26353135},
420
title = {{A guide to convolution arithmetic for deep learning}},
421
url = {http://arxiv.org/abs/1603.07285 https://arxiv.org/pdf/1603.07285.pdf},
422
year = {2016}
423
}
424
@article{Keskar2016,
425
abstract = {The stochastic gradient descent (SGD) method and its variants are algorithms of choice for many Deep Learning tasks. These methods operate in a small-batch regime wherein a fraction of the training data, say {\$}32{\$}-{\$}512{\$} data points, is sampled to compute an approximation to the gradient. It has been observed in practice that when using a larger batch there is a degradation in the quality of the model, as measured by its ability to generalize. We investigate the cause for this generalization drop in the large-batch regime and present numerical evidence that supports the view that large-batch methods tend to converge to sharp minimizers of the training and testing functions - and as is well known, sharp minima lead to poorer generalization. In contrast, small-batch methods consistently converge to flat minimizers, and our experiments support a commonly held view that this is due to the inherent noise in the gradient estimation. We discuss several strategies to attempt to help large-batch methods eliminate this generalization gap.},
426
archivePrefix = {arXiv},
427
arxivId = {1609.04836},
428
author = {Keskar, Nitish Shirish and Mudigere, Dheevatsa and Nocedal, Jorge and Smelyanskiy, Mikhail and Tang, Ping Tak Peter},
429
eprint = {1609.04836},
430
file = {:Users/pietz/Documents/Mendeley/Keskar et al. - 2016 - On Large-Batch Training for Deep Learning Generalization Gap and Sharp Minima.pdf:pdf},
431
month = {sep},
432
title = {{On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima}},
433
url = {http://arxiv.org/abs/1609.04836},
434
year = {2016}
435
}
436
@article{Goyal2017,
437
abstract = {Deep learning thrives with large neural networks and large datasets. However, larger networks and larger datasets result in longer training times that impede research and development progress. Distributed synchronous SGD offers a potential solution to this problem by dividing SGD minibatches over a pool of parallel workers. Yet to make this scheme efficient, the per-worker workload must be large, which implies nontrivial growth in the SGD minibatch size. In this paper, we empirically show that on the ImageNet dataset large minibatches cause optimization difficulties, but when these are addressed the trained networks exhibit good generalization. Specifically, we show no loss of accuracy when training with large minibatch sizes up to 8192 images. To achieve this result, we adopt a linear scaling rule for adjusting learning rates as a function of minibatch size and develop a new warmup scheme that overcomes optimization challenges early in training. With these simple techniques, our Caffe2-based system trains ResNet-50 with a minibatch size of 8192 on 256 GPUs in one hour, while matching small minibatch accuracy. Using commodity hardware, our implementation achieves {\~{}}90{\%} scaling efficiency when moving from 8 to 256 GPUs. This system enables us to train visual recognition models on internet-scale data with high efficiency.},
438
archivePrefix = {arXiv},
439
arxivId = {1706.02677},
440
author = {Goyal, Priya and Doll{\'{a}}r, Piotr and Girshick, Ross and Noordhuis, Pieter and Wesolowski, Lukasz and Kyrola, Aapo and Tulloch, Andrew and Jia, Yangqing and He, Kaiming},
441
eprint = {1706.02677},
442
file = {:Users/pietz/Documents/Mendeley/Goyal et al. - 2017 - Accurate, Large Minibatch SGD Training ImageNet in 1 Hour.pdf:pdf},
443
month = {jun},
444
title = {{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour}},
445
url = {http://arxiv.org/abs/1706.02677},
446
year = {2017}
447
}
448
@article{Hoffer2017,
449
abstract = {Background: Deep learning models are typically trained using stochastic gradient descent or one of its variants. These methods update the weights using their gradient, estimated from a small fraction of the training data. It has been observed that when using large batch sizes there is a persistent degradation in generalization performance - known as the "generalization gap" phenomena. Identifying the origin of this gap and closing it had remained an open problem. Contributions: We examine the initial high learning rate training phase. We find that the weight distance from its initialization grows logarithmically with the number of weight updates. We therefore propose a "random walk on random landscape" statistical model which is known to exhibit similar "ultra-slow" diffusion behavior. Following this hypothesis we conducted experiments to show empirically that the "generalization gap" stems from the relatively small number of updates rather than the batch size, and can be completely eliminated by adapting the training regime used. We further investigate different techniques to train models in the large-batch regime and present a novel algorithm named "Ghost Batch Normalization" which enables significant decrease in the generalization gap without increasing the number of updates. To validate our findings we conduct several additional experiments on MNIST, CIFAR-10, CIFAR-100 and ImageNet. Finally, we reassess common practices and beliefs concerning training of deep models and suggest they may not be optimal to achieve good generalization.},
450
archivePrefix = {arXiv},
451
arxivId = {1705.08741},
452
author = {Hoffer, Elad and Hubara, Itay and Soudry, Daniel},
453
eprint = {1705.08741},
454
file = {:Users/pietz/Documents/Mendeley/Hoffer, Hubara, Soudry - 2017 - Train longer, generalize better closing the generalization gap in large batch training of neural network.pdf:pdf},
455
month = {may},
456
title = {{Train longer, generalize better: closing the generalization gap in large batch training of neural networks}},
457
url = {http://arxiv.org/abs/1705.08741},
458
year = {2017}
459
}
460
@article{Kingma2014,
461
abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
462
archivePrefix = {arXiv},
463
arxivId = {1412.6980},
464
author = {Kingma, Diederik P. and Ba, Jimmy},
465
eprint = {1412.6980},
466
file = {:Users/pietz/Documents/Mendeley/Kingma, Ba - 2014 - Adam A Method for Stochastic Optimization.pdf:pdf},
467
month = {dec},
468
title = {{Adam: A Method for Stochastic Optimization}},
469
url = {http://arxiv.org/abs/1412.6980},
470
year = {2014}
471
}
472
@article{He2015a,
473
abstract = {Rectified activation units (rectifiers) are essential for state-of-the-art neural networks. In this work, we study rectifier neural networks for image classification from two aspects. First, we propose a Parametric Rectified Linear Unit (PReLU) that generalizes the traditional rectified unit. PReLU improves model fitting with nearly zero extra computational cost and little overfitting risk. Second, we derive a robust initialization method that particularly considers the rectifier nonlinearities. This method enables us to train extremely deep rectified models directly from scratch and to investigate deeper or wider network architectures. Based on our PReLU networks (PReLU-nets), we achieve 4.94{\%} top-5 test error on the ImageNet 2012 classification dataset. This is a 26{\%} relative improvement over the ILSVRC 2014 winner (GoogLeNet, 6.66{\%}). To our knowledge, our result is the first to surpass human-level performance (5.1{\%}, Russakovsky et al.) on this visual recognition challenge.},
474
archivePrefix = {arXiv},
475
arxivId = {1502.01852},
476
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
477
eprint = {1502.01852},
478
file = {:Users/pietz/Documents/Mendeley/He et al. - 2015 - Delving Deep into Rectifiers Surpassing Human-Level Performance on ImageNet Classification.pdf:pdf},
479
month = {feb},
480
title = {{Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}},
481
url = {http://arxiv.org/abs/1502.01852},
482
year = {2015}
483
}
484
@article{Clevert2015,
485
abstract = {We introduce the "exponential linear unit" (ELU) which speeds up learning in deep neural networks and leads to higher classification accuracies. Like rectified linear units (ReLUs), leaky ReLUs (LReLUs) and parametrized ReLUs (PReLUs), ELUs alleviate the vanishing gradient problem via the identity for positive values. However, ELUs have improved learning characteristics compared to the units with other activation functions. In contrast to ReLUs, ELUs have negative values which allows them to push mean unit activations closer to zero like batch normalization but with lower computational complexity. Mean shifts toward zero speed up learning by bringing the normal gradient closer to the unit natural gradient because of a reduced bias shift effect. While LReLUs and PReLUs have negative values, too, they do not ensure a noise-robust deactivation state. ELUs saturate to a negative value with smaller inputs and thereby decrease the forward propagated variation and information. Therefore, ELUs code the degree of presence of particular phenomena in the input, while they do not quantitatively model the degree of their absence. In experiments, ELUs lead not only to faster learning, but also to significantly better generalization performance than ReLUs and LReLUs on networks with more than 5 layers. On CIFAR-100 ELUs networks significantly outperform ReLU networks with batch normalization while batch normalization does not improve ELU networks. ELU networks are among the top 10 reported CIFAR-10 results and yield the best published result on CIFAR-100, without resorting to multi-view evaluation or model averaging. On ImageNet, ELU networks considerably speed up learning compared to a ReLU network with the same architecture, obtaining less than 10{\%} classification error for a single crop, single model network.},
486
archivePrefix = {arXiv},
487
arxivId = {1511.07289},
488
author = {Clevert, Djork-Arn{\'{e}} and Unterthiner, Thomas and Hochreiter, Sepp},
489
eprint = {1511.07289},
490
file = {:Users/pietz/Documents/Mendeley/Clevert, Unterthiner, Hochreiter - 2015 - Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs).pdf:pdf},
491
month = {nov},
492
title = {{Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)}},
493
url = {http://arxiv.org/abs/1511.07289},
494
year = {2015}
495
}
496
@article{Nair,
497
abstract = {Restricted Boltzmann machines were devel-oped using binary stochastic hidden units. These can be generalized by replacing each binary unit by an infinite number of copies that all have the same weights but have pro-gressively more negative biases. The learning and inference rules for these " Stepped Sig-moid Units " are unchanged. They can be ap-proximated efficiently by noisy, rectified lin-ear units. Compared with binary units, these units learn features that are better for object recognition on the NORB dataset and face verification on the Labeled Faces in the Wild dataset. Unlike binary units, rectified linear units preserve information about relative in-tensities as information travels through mul-tiple layers of feature detectors.},
498
author = {Nair, Vinod and Hinton, Geoffrey E},
499
file = {:Users/pietz/Documents/Mendeley/Nair, Hinton - Unknown - Rectified Linear Units Improve Restricted Boltzmann Machines.pdf:pdf},
500
title = {{Rectified Linear Units Improve Restricted Boltzmann Machines}},
501
url = {http://www.cs.toronto.edu/{~}fritz/absps/reluICML.pdf}
502
}
503
@article{Simard,
504
abstract = {Neural networks are a powerful technology for classification of visual inputs arising from documents. However, there is a confusing plethora of different neural network methods that are used in the literature and in industry. This paper describes a set of concrete best practices that document analysis researchers can use to get good results with neural networks. The most important practice is getting a training set as large as possible: we expand the training set by adding a new form of distorted data. The next most important practice is that convolutional neural networks are better suited for visual document tasks than fully connected networks. We propose that a simple " do-it-yourself " implementation of convolution with a flexible architecture is suitable for many visual document problems. This simple convolutional neural network does not require complex methods, such as momentum, weight decay, structure-dependent learning rates, averaging layers, tangent prop, or even finely-tuning the architecture. The end result is a very simple yet general architecture which can yield state-of-the-art performance for document analysis. We illustrate our claims on the MNIST set of English digit images.},
505
author = {Simard, Patrice Y and Steinkraus, Dave and Platt, John C},
506
file = {:Users/pietz/Documents/Mendeley/Simard, Steinkraus, Platt - Unknown - Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis.pdf:pdf},
507
title = {{Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis}},
508
url = {https://pdfs.semanticscholar.org/7b1c/c19dec9289c66e7ab45e80e8c42273509ab6.pdf}
509
}
510
@article{LeCun1998,
511
abstract = {Finding an appropriate set of features is an essential problem in the design of shape recognition systems. This paper attempts to show that for recognizing simple objects with high shape variability such as handwritten characters, it is possible, and even advantageous, to feed the system directly with minimally processed images and to rely on learning to extract the right set of features. Convolutional Neural Networks are shown to be particularly well suited to this task. We also show that these networks can be used to recognize multiple objects without requiring explicit segmentation of the objects from their surrounding. The second part of the paper presents the Graph Transformer Network model which extends the applicability of gradient-based learning to systems that use graphs to represents features, objects, and their combinations.},
512
author = {LeCun, Yann and Haffner, Patrick and Bottou, L{\'{e}}on and Bengio, Yoshua},
513
file = {:Users/pietz/Documents/Mendeley/LeCun et al. - 1998 - Object Recognition with Gradient-Based Learning.pdf:pdf},
514
keywords = {cnn,convolution,neural network},
515
mendeley-tags = {cnn,convolution,neural network},
516
title = {{Object Recognition with Gradient-Based Learning}},
517
url = {http://yann.lecun.com/exdb/publis/pdf/lecun-99.pdf},
518
year = {1998}
519
}
520
@article{Krizhevsky,
521
abstract = {We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSVRC-2010 contest into the 1000 dif-ferent classes. On the test data, we achieved top-1 and top-5 error rates of 37.5{\%} and 17.0{\%} which is considerably better than the previous state-of-the-art. The neural network, which has 60 million parameters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully-connected layers with a final 1000-way softmax. To make train-ing faster, we used non-saturating neurons and a very efficient GPU implemen-tation of the convolution operation. To reduce overfitting in the fully-connected layers we employed a recently-developed regularization method called " dropout " that proved to be very effective. We also entered a variant of this model in the ILSVRC-2012 competition and achieved a winning top-5 test error rate of 15.3{\%}, compared to 26.2{\%} achieved by the second-best entry.},
522
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
523
file = {:Users/pietz/Documents/Mendeley/Krizhevsky, Sutskever, Hinton - Unknown - ImageNet Classification with Deep Convolutional Neural Networks.pdf:pdf},
524
title = {{ImageNet Classification with Deep Convolutional Neural Networks}}
525
}
526
@article{Albarqouni2016,
527
abstract = {The lack of publicly available ground-truth data has been identified as the major challenge for transferring recent developments in deep learning to the biomedical imaging domain. Though crowdsourcing has enabled annotation of large scale databases for real world images, its application for biomedical purposes requires a deeper understanding and hence, more precise definition of the actual annotation task. The fact that expert tasks are being outsourced to non-expert users may lead to noisy annotations introducing disagreement between users. Despite being a valuable resource for learning annotation models from crowdsourcing, conventional machine-learning methods may have difficulties dealing with noisy annotations during training. In this manuscript, we present a new concept for learning from crowds that handle data aggregation directly as part of the learning process of the convolutional neural network (CNN) via additional crowdsourcing layer (AggNet). Besides, we present an experimental study on learning from crowds designed to answer the following questions. 1) Can deep CNN be trained with data collected from crowdsourcing? 2) How to adapt the CNN to train on multiple types of annotation datasets (ground truth and crowd-based)? 3) How does the choice of annotation and aggregation affect the accuracy? Our experimental setup involved Annot8, a self-implemented web-platform based on Crowdflower API realizing image annotation tasks for a publicly available biomedical image database. Our results give valuable insights into the functionality of deep CNN learning from crowd annotations and prove the necessity of data aggregation integration.},
528
author = {Albarqouni, Shadi and Baur, Christoph and Achilles, Felix and Belagiannis, Vasileios and Demirci, Stefanie and Navab, Nassir},
529
doi = {10.1109/TMI.2016.2528120},
530
isbn = {1558-254X (Electronic)$\backslash$r0278-0062 (Linking)},
531
issn = {1558254X},
532
journal = {IEEE Transactions on Medical Imaging},
533
number = {5},
534
pmid = {26891484},
535
title = {{AggNet: Deep Learning From Crowds for Mitosis Detection in Breast Cancer Histology Images}},
536
volume = {35},
537
year = {2016}
538
}
539
@article{Havaei2017,
540
abstract = {In this paper, we present a fully automatic brain tumor segmentation method based on Deep Neural Networks (DNNs). The proposed networks are tailored to glioblastomas (both low and high grade) pictured in MR images. By their very nature, these tumors can appear anywhere in the brain and have almost any kind of shape, size, and contrast. These reasons motivate our exploration of a machine learning solution that exploits a flexible, high capacity DNN while being extremely efficient. Here, we give a description of different model choices that we've found to be necessary for obtaining competitive performance. We explore in particular different architectures based on Convolutional Neural Networks (CNN), i.e. DNNs specifically adapted to image data.We present a novel CNN architecture which differs from those traditionally used in computer vision. Our CNN exploits both local features as well as more global contextual features simultaneously. Also, different from most traditional uses of CNNs, our networks use a final layer that is a convolutional implementation of a fully connected layer which allows a 40 fold speed up. We also describe a 2-phase training procedure that allows us to tackle difficulties related to the imbalance of tumor labels. Finally, we explore a cascade architecture in which the output of a basic CNN is treated as an additional source of information for a subsequent CNN. Results reported on the 2013 BRATS test data-set reveal that our architecture improves over the currently published state-of-the-art while being over 30 times faster.},
541
archivePrefix = {arXiv},
542
arxivId = {1505.03540},
543
author = {Havaei, Mohammad and Davy, Axel and Warde-Farley, David and Biard, Antoine and Courville, Aaron and Bengio, Yoshua and Pal, Chris and Jodoin, Pierre Marc and Larochelle, Hugo},
544
doi = {10.1016/j.media.2016.05.004},
545
eprint = {1505.03540},
546
isbn = {1361-8415},
547
issn = {13618423},
548
journal = {Medical Image Analysis},
549
pmid = {27310171},
550
title = {{Brain tumor segmentation with Deep Neural Networks}},
551
volume = {35},
552
year = {2017}
553
}
554
@article{Chollet2016,
555
abstract = {We present an interpretation of Inception modules in convolutional neural networks as being an intermediate step in-between regular convolution and the depthwise separable convolution operation (a depthwise convolution followed by a pointwise convolution). In this light, a depthwise separable convolution can be understood as an Inception module with a maximally large number of towers. This observation leads us to propose a novel deep convolutional neural network architecture inspired by Inception, where Inception modules have been replaced with depthwise separable convolutions. We show that this architecture, dubbed Xception, slightly outperforms Inception V3 on the ImageNet dataset (which Inception V3 was designed for), and significantly outperforms Inception V3 on a larger image classification dataset comprising 350 million images and 17,000 classes. Since the Xception architecture has the same number of parameters as Inception V3, the performance gains are not due to increased capacity but rather to a more efficient use of model parameters.},
556
archivePrefix = {arXiv},
557
arxivId = {1610.02357},
558
author = {Chollet, Fran{\c{c}}ois},
559
eprint = {1610.02357},
560
file = {:Users/pietz/Documents/Mendeley/Chollet - 2016 - Xception Deep Learning with Depthwise Separable Convolutions.pdf:pdf},
561
month = {oct},
562
title = {{Xception: Deep Learning with Depthwise Separable Convolutions}},
563
url = {http://arxiv.org/abs/1610.02357},
564
year = {2016}
565
}
566
@article{Sled1997,
567
abstract = {A novel approach to correcting for intensity non-uniformity in MR data is described that achieves high performance without requiring supervision. By making relatively few assumptions about the data, the method can be applied at an early stage in an automated data analysis, before a tissue intensity or geometric model is available. Described as Non-parametric Non-uniform intensity Normalization (N3), the method is independent of pulse sequence and insensitive to pathological data that might oth-erwise violate model assumptions. To eliminate the dependence of the eld estimate on anatomy, an iterative approach is employed to estimate both the multiplicative bias eld and the distribution of the true tissue intensities. The performance of this method is evaluated using both real and simulated MR data. Preprocessing of MR data using N3 is shown to substantially improve the accuracy of anatomical analysis techniques such as tissue classiication and cortical surface extraction. ii},
568
author = {Sled, John G},
569
file = {:Users/pietz/Documents/Mendeley/Sled - 1997 - A Non-parametric Method for Automatic Correction of Intensity Non-uniformity in MRI Data.pdf:pdf},
570
title = {{A Non-parametric Method for Automatic Correction of Intensity Non-uniformity in MRI Data}},
571
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.2659{\&}rep=rep1{\&}type=pdf},
572
year = {1997}
573
}
574
@article{Lemley2017,
575
abstract = {A recurring problem faced when training neural networks is that there is typically not enough data to maximize the generalization capability of deep neural networks(DNN). There are many techniques to address this, including data augmentation, dropout, and transfer learning. In this paper, we introduce an additional method which we call Smart Augmentation and we show how to use it to increase the accuracy and reduce overfitting on a target network. Smart Augmentation works by creating a network that learns how to generate augmented data during the training process of a target network in a way that reduces that networks loss. This allows us to learn augmentations that minimize the error of that network. Smart Augmentation has shown the potential to increase accuracy by demonstrably significant measures on all datasets tested. In addition, it has shown potential to achieve similar or improved performance levels with significantly smaller network sizes in a number of tested cases.},
576
archivePrefix = {arXiv},
577
arxivId = {1703.08383},
578
author = {Lemley, Joseph and Bazrafkan, Shabab and Corcoran, Peter},
579
doi = {10.1109/ACCESS.2017.2696121},
580
eprint = {1703.08383},
581
file = {:Users/pietz/Documents/Mendeley/Lemley, Bazrafkan, Corcoran - 2017 - Smart Augmentation - Learning an Optimal Data Augmentation Strategy.pdf:pdf},
582
month = {mar},
583
title = {{Smart Augmentation - Learning an Optimal Data Augmentation Strategy}},
584
url = {http://arxiv.org/abs/1703.08383 http://dx.doi.org/10.1109/ACCESS.2017.2696121},
585
year = {2017}
586
}
587
@article{Wang2013,
588
abstract = {Label fusion based multi-atlas segmentation has proven to be one of the most competitive techniques for medical image segmentation. This technique transfers segmentations from expert-labeled images, called atlases, to a novel image using deformable image registration. Errors produced by label transfer are further reduced by label fusion that combines the results produced by all atlases into a consensus solution. Among the proposed label fusion strategies, weighted voting with spatially varying weight distributions derived from atlas-target intensity similarity is a simple and highly effective label fusion technique. However, one limitation of most weighted voting methods is that the weights are computed independently for each atlas, without taking into account the fact that different atlases may produce similar label errors. To address this problem, we recently developed the joint label fusion technique and the corrective learning technique, which won the first place of the 2012 MICCAI Multi-Atlas Labeling Challenge and was one of the top performers in 2013 MICCAI Segmentation: Algorithms, Theory and Applications (SATA) challenge. To make our techniques more accessible to the scientific research community, we describe an Insight-Toolkit based open source implementation of our label fusion methods. Our implementation extends our methods to work with multi-modality imaging data and is more suitable for segmentation problems with multiple labels. We demonstrate the usage of our tools through applying them to the 2012 MICCAI Multi-Atlas Labeling Challenge brain image dataset and the 2013 SATA challenge canine leg image dataset. We report the best results on these two datasets so far.},
589
author = {Wang, Hongzhi and Yushkevich, Paul A},
590
doi = {10.3389/fninf.2013.00027},
591
file = {:Users/pietz/Documents/Mendeley/Wang, Yushkevich - 2013 - Multi-atlas segmentation with joint label fusion and corrective learning-an open source implementation.pdf:pdf},
592
issn = {1662-5196},
593
journal = {Frontiers in neuroinformatics},
594
keywords = {Insight-Toolkit,corrective learning,joint label fusion,multi-atlas label fusion,open source implementation},
595
pages = {27},
596
pmid = {24319427},
597
publisher = {Frontiers Media SA},
598
title = {{Multi-atlas segmentation with joint label fusion and corrective learning-an open source implementation.}},
599
url = {http://www.ncbi.nlm.nih.gov/pubmed/24319427 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3837555},
600
volume = {7},
601
year = {2013}
602
}
603
@article{Iglesias2014,
604
abstract = {Multi-atlas segmentation (MAS), first introduced and popularized by the pioneering work of Rohlfing, Brandt, Menzel and Maurer Jr (2004), Klein, Mensh, Ghosh, Tourville and Hirsch (2005), and Heckemann, Hajnal, Aljabar, Rueckert and Hammers (2006), is becoming one of the most widely-used and successful image segmentation techniques in biomedical applications. By manipulating and utilizing the entire dataset of "atlases" (training images that have been previously labeled, e.g., manually by an expert), rather than some model-based average representation, MAS has the flexibility to better capture anatomical variation, thus offering superior segmentation accuracy. This benefit, however, typically comes at a high computational cost. Recent advancements in computer hardware and image processing software have been instrumental in addressing this challenge and facilitated the wide adoption of MAS. Today, MAS has come a long way and the approach includes a wide array of sophisticated algorithms that employ ideas from machine learning, probabilistic modeling, optimization, and computer vision, among other fields. This paper presents a survey of published MAS algorithms and studies that have applied these methods to various biomedical problems. In writing this survey, we have three distinct aims. Our primary goal is to document how MAS was originally conceived, later evolved, and now relates to alternative methods. Second, this paper is intended to be a detailed reference of past research activity in MAS, which now spans over a decade (2003 - 2014) and entails novel methodological developments and application-specific solutions. Finally, our goal is to also present a perspective on the future of MAS, which, we believe, will be one of the dominant approaches in biomedical image segmentation.},
605
archivePrefix = {arXiv},
606
arxivId = {1412.3421},
607
author = {Iglesias, Juan Eugenio and Sabuncu, Mert Rory},
608
eprint = {1412.3421},
609
file = {:Users/pietz/Documents/Mendeley/Iglesias, Sabuncu - 2014 - Multi-Atlas Segmentation of Biomedical Images A Survey.pdf:pdf},
610
month = {dec},
611
title = {{Multi-Atlas Segmentation of Biomedical Images: A Survey}},
612
url = {http://arxiv.org/abs/1412.3421},
613
year = {2014}
614
}
615
@article{Wang2017a,
616
abstract = {Accurate medical image segmentation is essential for diagnosis, surgical planning and many other applications. Convolutional Neural Networks (CNNs) have shown to be state-of-the-art automatic segmentation methods while the result still needs to be refined to become accurate and robust enough for clinical use. We propose a deep learning-based interactive segmentation method in order to improve the segmentation obtained by an automatic CNN as well as reduce user interactions during refinement for better results. We use one CNN to obtain an initial segmentation automatically, on which user interactions are added to indicate mis-segmentations. Another CNN takes as input the user interactions with the initial segmentation and gives a refined result. We propose a new way to combine user interactions with CNNs through geodesic distance maps, and propose a resolution-preserving network that can give better dense prediction. In addition, we integrate user interactions as hard constraints into back-propagatable Conditional Random Fields. We validated the proposed framework in the application of placenta segmentation from fetal MRI and clavicle segmentation from chest radiographs. Experimental results show our method achieves a large improvement from automatic CNNs, and obtains comparable accuracy with fewer user interventions and less time compared with traditional interactive methods.},
617
archivePrefix = {arXiv},
618
arxivId = {1707.00652},
619
author = {Wang, Guotai and Zuluaga, Maria A. and Li, Wenqi and Pratt, Rosalind and Patel, Premal A. and Aertsen, Michael and Doel, Tom and David, Anna L. and Deprest, Jan and Ourselin, Sebastien and Vercauteren, Tom},
620
eprint = {1707.00652},
621
file = {:Users/pietz/Documents/Mendeley/Wang et al. - 2017 - DeepIGeoS A Deep Interactive Geodesic Framework for Medical Image Segmentation.pdf:pdf},
622
keywords = {Index Terms—Interactive image segmentation,conditional random fields,convolutional neural network,geodesic distance},
623
month = {jul},
624
title = {{DeepIGeoS: A Deep Interactive Geodesic Framework for Medical Image Segmentation}},
625
url = {http://arxiv.org/abs/1707.00652 https://arxiv.org/pdf/1707.00652v1.pdf},
626
year = {2017}
627
}
628
@article{Salehi2017,
629
abstract = {Fully convolutional deep neural networks carry out excellent potential for fast and accurate image segmentation. One of the main challenges in training these networks is data imbalance, which is particularly problematic in medical imaging applications such as lesion segmentation where the number of lesion voxels is often much lower than the number of non-lesion voxels. Training with unbalanced data can lead to predictions that are severely biased towards high precision but low recall (sensitivity), which is undesired especially in medical applications where false negatives are much less tolerable than false positives. Several methods have been proposed to deal with this problem including balanced sampling, two step training, sample re-weighting, and similarity loss functions. In this paper, we propose a generalized loss function based on the Tversky index to address the issue of data imbalance and achieve much better trade-off between precision and recall in training 3D fully convolutional deep neural networks. Experimental results in multiple sclerosis lesion segmentation on magnetic resonance images show improved F2 score, Dice coefficient, and the area under the precision-recall curve in test data. Based on these results we suggest Tversky loss function as a generalized framework to effectively train deep neural networks.},
630
archivePrefix = {arXiv},
631
arxivId = {1706.05721},
632
author = {Salehi, Seyed Sadegh Mohseni and Erdogmus, Deniz and Gholipour, Ali},
633
eprint = {1706.05721},
634
file = {:Users/pietz/Documents/Mendeley/Salehi, Erdogmus, Gholipour - 2017 - Tversky loss function for image segmentation using 3D fully convolutional deep networks(2).pdf:pdf},
635
month = {jun},
636
title = {{Tversky loss function for image segmentation using 3D fully convolutional deep networks}},
637
url = {http://arxiv.org/abs/1706.05721},
638
year = {2017}
639
}
640
@article{Ioffe2015,
641
abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer's inputs changes during training, as the parameters of the previous layers change. This slows down the training by requiring lower learning rates and careful parameter initialization, and makes it notoriously hard to train models with saturating nonlinearities. We refer to this phenomenon as internal covariate shift, and address the problem by normalizing layer inputs. Our method draws its strength from making normalization a part of the model architecture and performing the normalization for each training mini-batch. Batch Normalization allows us to use much higher learning rates and be less careful about initialization. It also acts as a regularizer, in some cases eliminating the need for Dropout. Applied to a state-of-the-art image classification model, Batch Normalization achieves the same accuracy with 14 times fewer training steps, and beats the original model by a significant margin. Using an ensemble of batch-normalized networks, we improve upon the best published result on ImageNet classification: reaching 4.9{\%} top-5 validation error (and 4.8{\%} test error), exceeding the accuracy of human raters.},
642
archivePrefix = {arXiv},
643
arxivId = {1502.03167},
644
author = {Ioffe, Sergey and Szegedy, Christian},
645
eprint = {1502.03167},
646
file = {:Users/pietz/Documents/Mendeley/Ioffe, Szegedy - 2015 - Batch Normalization Accelerating Deep Network Training by Reducing Internal Covariate Shift.pdf:pdf},
647
month = {feb},
648
title = {{Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}},
649
url = {http://arxiv.org/abs/1502.03167},
650
year = {2015}
651
}
652
@article{Krahenbuhl2012,
653
abstract = {Most state-of-the-art techniques for multi-class image segmentation and labeling use conditional random fields defined over pixels or image regions. While region-level models often feature dense pairwise connectivity, pixel-level models are considerably larger and have only permitted sparse graph structures. In this paper, we consider fully connected CRF models defined on the complete set of pixels in an image. The resulting graphs have billions of edges, making traditional inference algorithms impractical. Our main contribution is a highly efficient approximate inference algorithm for fully connected CRF models in which the pairwise edge potentials are defined by a linear combination of Gaussian kernels. Our experiments demonstrate that dense connectivity at the pixel level substantially improves segmentation and labeling accuracy.},
654
archivePrefix = {arXiv},
655
arxivId = {1210.5644},
656
author = {Kr{\"{a}}henb{\"{u}}hl, Philipp and Koltun, Vladlen},
657
eprint = {1210.5644},
658
file = {:Users/pietz/Documents/Mendeley/Kr{\"{a}}henb{\"{u}}hl, Koltun - 2012 - Efficient Inference in Fully Connected CRFs with Gaussian Edge Potentials.pdf:pdf},
659
month = {oct},
660
title = {{Efficient Inference in Fully Connected CRFs with Gaussian Edge Potentials}},
661
url = {http://arxiv.org/abs/1210.5644},
662
year = {2012}
663
}
664
@article{Tustison2010,
665
abstract = {A variant of the popular nonparametric nonuniform intensity normalization (N3) algorithm is proposed for bias field correction. Given the superb performance of N3 and its public availability, it has been the subject of several evaluation studies. These studies have demonstrated the importance of certain parameters associated with the B-spline least-squares fitting. We propose the substitution of a recently developed fast and robust B-spline approximation routine and a modified hierarchical optimization scheme for improved bias field correction over the original N3 algorithm. Similar to the N3 algorithm, we also make the source code, testing, and technical documentation of our contribution, which we denote as ??N4ITK,?? available to the public through the Insight Toolkit of the National Institutes of Health. Performance assessment is demonstrated using simulated data from the publicly available Brainweb database, hyperpolarized 3He lung image data, and 9.4T postmortem hippocampus data.},
666
author = {Tustison, Nicholas J. and Avants, Brian B. and Cook, Philip A. and Zheng, Yuanjie and Egan, Alexander and Yushkevich, Paul A. and Gee, James C.},
667
doi = {10.1109/TMI.2010.2046908},
668
file = {:Users/pietz/Documents/Mendeley/N4ITK.pdf:pdf},
669
isbn = {1558-254X (Electronic)$\backslash$r0278-0062 (Linking)},
670
issn = {02780062},
671
journal = {IEEE Transactions on Medical Imaging},
672
keywords = {B-spline approximation,Bias field,Inhomogeneity,N3},
673
number = {6},
674
pages = {1310--1320},
675
pmid = {20378467},
676
title = {{N4ITK: Improved N3 bias correction}},
677
volume = {29},
678
year = {2010}
679
}
680
@incollection{Pastor-Pellicer2013,
681
author = {Pastor-Pellicer, Joan and Zamora-Mart{\'{i}}nez, Francisco and Espa{\~{n}}a-Boquera, Salvador and Castro-Bleda, Mar{\'{i}}a Jos{\'{e}}},
682
doi = {10.1007/978-3-642-38679-4_37},
683
file = {:Users/pietz/Documents/Mendeley/F-Measure.pdf:pdf},
684
pages = {376--384},
685
publisher = {Springer, Berlin, Heidelberg},
686
title = {{F-Measure as the Error Function to Train Neural Networks}},
687
url = {http://link.springer.com/10.1007/978-3-642-38679-4{\_}37},
688
year = {2013}
689
}
690
@article{Csurka2013,
691
abstract = {In this work, we consider the evaluation of the semantic segmentation task. We discuss the strengths and limitations of the few existing measures, and propose new ways to evaluate semantic segmentation. First, we argue that a per-image score instead of one computed over the entire dataset brings a lot more insight. Second, we propose to take contours more carefully into account. Based on the conducted experiments, we suggest best practices for the evaluation. Finally, we present a user study we conducted to better understand how the quality of image segmentations is perceived by humans.},
692
author = {Csurka, Gabriela and Larlus, Diane and Perronnin, Florent},
693
file = {:Users/pietz/Documents/Mendeley/Csurka, Larlus, Perronnin - Unknown - What is a good evaluation measure for semantic segmentation.pdf:pdf},
694
title = {{What is a good evaluation measure for semantic segmentation?}},
695
url = {http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf},
696
year = {2013}
697
}
698
@article{Chen2017,
699
abstract = {In this work, we revisit atrous convolution, a powerful tool to explicitly adjust filter's field-of-view as well as control the resolution of feature responses computed by Deep Convolutional Neural Networks, in the application of semantic image segmentation. To handle the problem of segmenting objects at multiple scales, we design modules which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, we propose to augment our previously proposed Atrous Spatial Pyramid Pooling module, which probes convolutional features at multiple scales, with image-level features encoding global context and further boost performance. We also elaborate on implementation details and share our experience on training our system. The proposed `DeepLabv3' system significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2012 semantic image segmentation benchmark.},
700
archivePrefix = {arXiv},
701
arxivId = {1706.05587},
702
author = {Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig},
703
eprint = {1706.05587},
704
file = {:Users/pietz/Documents/Mendeley/Chen et al. - 2017 - Rethinking Atrous Convolution for Semantic Image Segmentation.pdf:pdf},
705
month = {jun},
706
title = {{Rethinking Atrous Convolution for Semantic Image Segmentation}},
707
url = {http://arxiv.org/abs/1706.05587},
708
year = {2017}
709
}
710
@article{Yu2015,
711
abstract = {State-of-the-art models for semantic segmentation are based on adaptations of convolutional networks that had originally been designed for image classification. However, dense prediction and image classification are structurally different. In this work, we develop a new convolutional network module that is specifically designed for dense prediction. The presented module uses dilated convolutions to systematically aggregate multi-scale contextual information without losing resolution. The architecture is based on the fact that dilated convolutions support exponential expansion of the receptive field without loss of resolution or coverage. We show that the presented context module increases the accuracy of state-of-the-art semantic segmentation systems. In addition, we examine the adaptation of image classification networks to dense prediction and show that simplifying the adapted network can increase accuracy.},
712
archivePrefix = {arXiv},
713
arxivId = {1511.07122},
714
author = {Yu, Fisher and Koltun, Vladlen},
715
eprint = {1511.07122},
716
file = {:Users/pietz/Documents/Mendeley/1511.07122.pdf:pdf},
717
month = {nov},
718
title = {{Multi-Scale Context Aggregation by Dilated Convolutions}},
719
url = {http://arxiv.org/abs/1511.07122},
720
year = {2015}
721
}
722
@article{Hafiane2017,
723
abstract = {Ultrasound-Guided Regional Anesthesia (UGRA) has been gaining importance in the last few years, offering numerous advantages over alternative methods of nerve localization (neurostimulation or paraesthesia). However, nerve detection is one of the most tasks that anaesthetists can encounter in the UGRA procedure. Computer aided system that can detect automatically region of nerve, would help practitioner to concentrate more in anaesthetic delivery. In this paper we propose a new method based on deep learning combined with spatiotemporal information to robustly segment the nerve region. The proposed method is based on two phases, localisation and segmentation. The first phase, consists in using convolutional neural network combined with spatial and temporal consistency to detect the nerve zone. The second phase utilises active contour model to delineate the region of interest. Obtained results show the validity of the proposed approach and its robustness.},
724
archivePrefix = {arXiv},
725
arxivId = {1706.05870},
726
author = {Hafiane, Adel and Vieyres, Pierre and Delbos, Alain},
727
eprint = {1706.05870},
728
file = {:Users/pietz/Documents/Mendeley/1706.05870.pdf:pdf},
729
month = {jun},
730
title = {{Deep learning with spatiotemporal consistency for nerve segmentation in ultrasound images}},
731
url = {http://arxiv.org/abs/1706.05870},
732
year = {2017}
733
}
734
@phdthesis{Mauer2015,
735
author = {{Auf der Mauer}, Markus},
736
file = {:Users/pietz/Documents/Mendeley/Auf der Mauer - 2015 - Automated Quantification of the Growth Plate of the Proximal Tibia for the Age Assessment in 3D MR Images Using a.pdf:pdf},
737
title = {{Automated Quantification of the Growth Plate of the Proximal Tibia for the Age Assessment in 3D MR Images Using a Fuzzy-Logic Classification Approach}},
738
year = {2015}
739
}
740
@article{Chlebus2017,
741
abstract = {We present a fully automatic method employing convolutional neural networks based on the 2D U-net architecture and random forest classifier to solve the automatic liver lesion segmentation problem of the ISBI 2017 Liver Tumor Segmentation Challenge (LiTS). In order to constrain the ROI in which the tumors could be located, a liver segmentation is performed first. For the organ segmentation, an ensemble of convolutional networks is trained to segment a liver using a set of 179 liver CT datasets from liver surgery planning. Inside of the liver ROI a neural network, trained using 127 challenge training datasets, identifies tumor candidates, which are subsequently filtered with a random forest classifier yielding the final tumor segmentation. The evaluation on the 70 challenge test cases resulted in a mean Dice coefficient of 0.65, ranking our method in the second place.},
742
archivePrefix = {arXiv},
743
arxivId = {1706.00842},
744
author = {Chlebus, Grzegorz and Meine, Hans and Moltz, Jan Hendrik and Schenk, Andrea},
745
eprint = {1706.00842},
746
file = {:Users/pietz/Documents/Mendeley/Chlebus et al. - 2017 - Neureal Network-Based Automatic Liver Tumor Segmentation With Random Forest-Based Candidate Filtering.pdf:pdf},
747
month = {jun},
748
title = {{Neural Network-Based Automatic Liver Tumor Segmentation With Random Forest-Based Candidate Filtering}},
749
url = {http://arxiv.org/abs/1706.00842},
750
year = {2017}
751
}
752
@article{Son2017,
753
abstract = {Retinal vessel segmentation is an indispensable step for automatic detection of retinal diseases with fundoscopic images. Though many approaches have been proposed, existing methods tend to miss fine vessels or allow false positives at terminal branches. Let alone under-segmentation, over-segmentation is also problematic when quantitative studies need to measure the precise width of vessels. In this paper, we present a method that generates the precise map of retinal vessels using generative adversarial training. Our methods achieve dice coefficient of 0.829 on DRIVE dataset and 0.834 on STARE dataset which is the state-of-the-art performance on both datasets.},
754
archivePrefix = {arXiv},
755
arxivId = {1706.09318},
756
author = {Son, Jaemin and Park, Sang Jun and Jung, Kyu-Hwan},
757
eprint = {1706.09318},
758
file = {:Users/pietz/Documents/Mendeley/1706.09318.pdf:pdf},
759
month = {jun},
760
title = {{Retinal Vessel Segmentation in Fundoscopic Images with Generative Adversarial Networks}},
761
url = {http://arxiv.org/abs/1706.09318},
762
year = {2017}
763
}
764
@article{Yuheng2017,
765
abstract = {The technology of image segmentation is widely used in medical image processing, face recognition pedestrian detection, etc. The current image segmentation techniques include region-based segmentation, edge detection segmentation, segmentation based on clustering, segmentation based on weakly-supervised learning in CNN, etc. This paper analyzes and summarizes these algorithms of image segmentation, and compares the advantages and disadvantages of different algorithms. Finally, we make a prediction of the development trend of image segmentation with the combination of these algorithms.},
766
archivePrefix = {arXiv},
767
arxivId = {1707.02051},
768
author = {Yuheng, Song and Hao, Yan},
769
eprint = {1707.02051},
770
file = {:Users/pietz/Documents/Mendeley/Yuheng, Hao - 2017 - Image Segmentation Algorithms Overview.pdf:pdf},
771
month = {jul},
772
title = {{Image Segmentation Algorithms Overview}},
773
url = {http://arxiv.org/abs/1707.02051},
774
year = {2017}
775
}
776
@article{Feng2017,
777
abstract = {Automated detection and segmentation of pulmonary nod-ules on lung computed tomography (CT) scans can facilitate early lung cancer diagnosis. Existing supervised approaches for automated nodule segmentation on CT scans require voxel-based annotations for training, which are labor-and time-consuming to obtain. In this work, we propose a weakly-supervised method that generates accurate voxel-level nodule segmentation trained with image-level labels only. By adapting a con-volutional neural network (CNN) trained for image classification, our proposed method learns discriminative regions from the activation maps of convolution units at different scales, and identifies the true nodule location with a novel candidate-screening framework. Experimental re-sults on the public LIDC-IDRI dataset demonstrate that, our weakly-supervised nodule segmentation framework achieves competitive perfor-mance compared to a fully-supervised CNN-based segmentation method.},
778
archivePrefix = {arXiv},
779
arxivId = {1707.01086},
780
author = {Feng, Xinyang and Yang, Jie and Laine, Andrew F. and Angelini, Elsa D.},
781
eprint = {1707.01086},
782
file = {:Users/pietz/Documents/Mendeley/Feng et al. - 2017 - Discriminative Localization in CNNs for Weakly-Supervised Segmentation of Pulmonary Nodules.pdf:pdf},
783
month = {jul},
784
title = {{Discriminative Localization in CNNs for Weakly- Supervised Segmentation of Pulmonary Nodules}},
785
url = {http://arxiv.org/abs/1707.01086 https://arxiv.org/pdf/1707.01086v1.pdf},
786
year = {2017}
787
}
788
@article{Fidon2017,
789
abstract = {The Dice score is widely used for binary segmentation due to its robustness to class imbalance. Soft generalisations of the Dice score allow it to be used as a loss function for training convolutional neural networks (CNN). Although CNNs trained using mean-class Dice score achieve state-of-the-art results on multi-class segmentation, this loss function does neither take advantage of inter-class relationships nor multi-scale information. We argue that an improved loss function should balance misclassifications to favour predictions that are semantically meaningful. This paper investigates these issues in the context of multi-class brain tumour segmentation. Our contribution is threefold. 1) We propose a semantically-informed generalisation of the Dice score for multi-class segmentation based on the Wasserstein distance on the probabilistic label space. 2) We propose a holistic CNN that embeds spatial information at multiple scales with deep supervision. 3) We show that the joint use of holistic CNNs and generalised Wasserstein Dice scores achieves segmentations that are more semantically meaningful for brain tumour segmentation.},
790
archivePrefix = {arXiv},
791
arxivId = {arXiv:1707.00478v2},
792
author = {Fidon, Lucas and Li, Wenqi and Garcia-peraza-herrera, Luis C},
793
eprint = {arXiv:1707.00478v2},
794
file = {:Users/pietz/Documents/Mendeley/Fidon et al. - 2017 - Generalised Wasserstein Dice Score for Imbalanced Multi-class Segmentation using Holistic Convolutional Networks.pdf:pdf},
795
month = {jul},
796
pages = {1--11},
797
title = {{Imbalanced Multi-class Segmentation using Holistic Convolutional Networks}},
798
url = {http://arxiv.org/abs/1707.00478},
799
year = {2017}
800
}
801
@article{Moeskops2017,
802
abstract = {Convolutional neural networks (CNNs) have been applied to various automatic image segmentation tasks in medical image analysis, including brain MRI segmentation. Generative adversarial networks have recently gained popularity because of their power in generating images that are difficult to distinguish from real images. In this study we use an adversarial training approach to improve CNN-based brain MRI segmentation. To this end, we include an additional loss function that motivates the network to generate segmentations that are difficult to distinguish from manual segmentations. During training, this loss function is optimised together with the conventional average per-voxel cross entropy loss. The results show improved segmentation performance using this adversarial training procedure for segmentation of two different sets of images and using two different network architectures, both visually and in terms of Dice coefficients.},
803
archivePrefix = {arXiv},
804
arxivId = {1707.03195},
805
author = {Moeskops, Pim and Veta, Mitko and Lafarge, Maxime W. and Eppenhof, Koen A. J. and Pluim, Josien P. W.},
806
eprint = {1707.03195},
807
file = {:Users/pietz/Documents/Mendeley/Moeskops et al. - 2017 - Adversarial training and dilated convolutions for brain MRI segmentation.pdf:pdf},
808
month = {jul},
809
title = {{Adversarial training and dilated convolutions for brain MRI segmentation}},
810
url = {http://arxiv.org/abs/1707.03195},
811
year = {2017}
812
}
813
@article{Tai2016,
814
abstract = {Semantic segmentation of functional magnetic resonance imaging (fMRI) makes great sense for pathology diagnosis and decision system of medical robots. The multi-channel fMRI data provide more information of the pathological features. But the increased amount of data causes complexity in feature detection. This paper proposes a principal component analysis (PCA)-aided fully convolutional network to particularly deal with multi-channel fMRI. We transfer the learned weights of contemporary classification networks to the segmentation task by fine-tuning. The experiments results are compared with various methods e.g. k-NN. A new labelling strategy is proposed to solve the semantic segmentation problem with unclear boundaries. Even with a small-sized training dataset, the test results demonstrate that our model outperforms other pathological feature detection methods. Besides, its forward inference only takes 90 milliseconds for a single set of fMRI data. To our knowledge, this is the first time to realize pixel-wise labeling of multi-channel magnetic resonance image using FCN.},
815
archivePrefix = {arXiv},
816
arxivId = {1610.01732},
817
author = {Tai, Lei and Ye, Qiong and Liu, Ming},
818
eprint = {1610.01732},
819
file = {:Users/pietz/Documents/Mendeley/Tai et al. - 2016 - PCA-aided Fully Convolutional Networks for Semantic Segmentation of Multi-channel fMRI.pdf:pdf},
820
month = {oct},
821
title = {{PCA-aided Fully Convolutional Networks for Semantic Segmentation of Multi-channel fMRI}},
822
url = {http://arxiv.org/abs/1610.01732},
823
year = {2016}
824
}
825
@article{Badrinarayanan2015,
826
abstract = {We present a novel and practical deep fully convolutional neural network architecture for semantic pixel-wise segmentation termed SegNet. This core trainable segmentation engine consists of an encoder network, a corresponding decoder network followed by a pixel-wise classification layer. The architecture of the encoder network is topologically identical to the 13 convolutional layers in the VGG16 network. The role of the decoder network is to map the low resolution encoder feature maps to full input resolution feature maps for pixel-wise classification. The novelty of SegNet lies is in the manner in which the decoder upsamples its lower resolution input feature map(s). Specifically, the decoder uses pooling indices computed in the max-pooling step of the corresponding encoder to perform non-linear upsampling. This eliminates the need for learning to upsample. The upsampled maps are sparse and are then convolved with trainable filters to produce dense feature maps. We compare our proposed architecture with the widely adopted FCN and also with the well known DeepLab-LargeFOV, DeconvNet architectures. This comparison reveals the memory versus accuracy trade-off involved in achieving good segmentation performance. SegNet was primarily motivated by scene understanding applications. Hence, it is designed to be efficient both in terms of memory and computational time during inference. It is also significantly smaller in the number of trainable parameters than other competing architectures. We also performed a controlled benchmark of SegNet and other architectures on both road scenes and SUN RGB-D indoor scene segmentation tasks. We show that SegNet provides good performance with competitive inference time and more efficient inference memory-wise as compared to other architectures. We also provide a Caffe implementation of SegNet and a web demo at http://mi.eng.cam.ac.uk/projects/segnet/.},
827
archivePrefix = {arXiv},
828
arxivId = {1511.00561},
829
author = {Badrinarayanan, Vijay and Kendall, Alex and Cipolla, Roberto},
830
eprint = {1511.00561},
831
file = {:Users/pietz/Documents/Mendeley/1511.00561.pdf:pdf},
832
month = {nov},
833
title = {{SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation}},
834
url = {http://arxiv.org/abs/1511.00561},
835
year = {2015}
836
}
837
@article{Lieman-Sifry2017,
838
abstract = {Cardiac Magnetic Resonance (CMR) imaging is commonly used to assess cardiac structure and function. One disadvantage of CMR is that post-processing of exams is tedious. Without automation, precise assessment of cardiac function via CMR typically requires an annotator to spend tens of minutes per case manually contouring ventricular structures. Automatic contouring can lower the required time per patient by generating contour suggestions that can be lightly modified by the annotator. Fully convolutional networks (FCNs), a variant of convolutional neural networks, have been used to rapidly advance the state-of-the-art in automated segmentation, which makes FCNs a natural choice for ventricular segmentation. However, FCNs are limited by their computational cost, which increases the monetary cost and degrades the user experience of production systems. To combat this shortcoming, we have developed the FastVentricle architecture, an FCN architecture for ventricular segmentation based on the recently developed ENet architecture. FastVentricle is 4x faster and runs with 6x less memory than the previous state-of-the-art ventricular segmentation architecture while still maintaining excellent clinical accuracy.},
839
archivePrefix = {arXiv},
840
arxivId = {1704.04296},
841
author = {Lieman-Sifry, Jesse and Le, Matthieu and Lau, Felix and Sall, Sean and Golden, Daniel},
842
doi = {10.1007/978-3-319-59448-4_13},
843
eprint = {1704.04296},
844
file = {:Users/pietz/Documents/Mendeley/1704.04296.pdf:pdf},
845
isbn = {9783319594477},
846
issn = {16113349},
847
title = {{FastVentricle: Cardiac Segmentation with ENet}},
848
year = {2017}
849
}
850
@article{Srivastava2014,
851
author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
852
file = {:Users/pietz/Documents/Mendeley/Srivastava et al. - 2014 - Dropout A Simple Way to Prevent Neural Networks from Overfitting.pdf:pdf},
853
journal = {Journal of Machine Learning Research},
854
pages = {1929--1958},
855
title = {{Dropout: A Simple Way to Prevent Neural Networks from Overfitting}},
856
url = {http://jmlr.org/papers/v15/srivastava14a.html},
857
volume = {15},
858
year = {2014}
859
}
860
@book{Chollet2017,
861
author = {Chollet, Francois},
862
booktitle = {Manning},
863
file = {:Users/pietz/Documents/Mendeley/Chollet - 2017 - Deep Learning With Python.pdf:pdf},
864
keywords = {ai,artificial intelligence,deep learning,dl,keras,machine learning,ml},
865
mendeley-tags = {ai,artificial intelligence,deep learning,dl,keras,machine learning,ml},
866
title = {{Deep Learning With Python}},
867
volume = {1},
868
year = {2017}
869
}
870
@book{Goodfellow2016,
871
author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
872
file = {:Users/pietz/Documents/Mendeley/Goodfellow, Bengio, Courville - 2016 - Deep Learning.pdf:pdf},
873
isbn = {0262035618},
874
publisher = {The MIT Press},
875
title = {{Deep Learning}},
876
year = {2016}
877
}
878
@inproceedings{Milletari2016,
879
abstract = {Convolutional Neural Networks (CNNs) have been recently employed to solve problems from both the computer vision and medical image analysis fields. Despite their popularity, most approaches are only able to process 2D images while most medical data used in clinical practice consists of 3D volumes. In this work we propose an approach to 3D image segmentation based on a volumetric, fully convolutional, neural network. Our CNN is trained end-to-end on MRI volumes depicting prostate, and learns to predict segmentation for the whole volume at once. We introduce a novel objective function, that we optimise during training, based on Dice coefficient. In this way we can deal with situations where there is a strong imbalance between the number of foreground and background voxels. To cope with the limited number of annotated volumes available for training, we augment the data applying random non-linear transformations and histogram matching. We show in our experimental evaluation that our approach achieves good performances on challenging test data while requiring only a fraction of the processing time needed by other previous methods.},
880
archivePrefix = {arXiv},
881
arxivId = {1606.04797},
882
author = {Milletari, Fausto and Navab, Nassir and Ahmadi, Seyed Ahmad},
883
booktitle = {Proceedings - 2016 4th International Conference on 3D Vision, 3DV 2016},
884
doi = {10.1109/3DV.2016.79},
885
eprint = {1606.04797},
886
file = {:Users/pietz/Documents/Mendeley/Milletari, Navab, Ahmadi - 2016 - V-Net Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation.pdf:pdf},
887
isbn = {9781509054077},
888
keywords = {Deep learning,convolutional neural networks,machine learning,prostate,segmentation},
889
month = {jun},
890
pages = {565--571},
891
title = {{V-Net: Fully convolutional neural networks for volumetric medical image segmentation}},
892
url = {http://arxiv.org/abs/1606.04797},
893
year = {2016}
894
}
895
@article{Kayalibay2017,
896
abstract = {Convolutional neural networks have been applied to a wide variety of computer vision tasks. Recent advances in semantic segmentation have enabled their application to medical image segmentation. While most CNNs use two-dimensional kernels, recent CNN-based publications on medical image segmentation featured three-dimensional kernels, allowing full access to the three-dimensional structure of medical images. Though closely related to semantic segmentation, medical image segmentation includes specific challenges that need to be addressed, such as the scarcity of labelled data, the high class imbalance found in the ground truth and the high memory demand of three-dimensional images. In this work, a CNN-based method with three-dimensional filters is demonstrated and applied to hand and brain MRI. Two modifications to an existing CNN architecture are discussed, along with methods on addressing the aforementioned challenges. While most of the existing literature on medical image segmentation focuses on soft tissue and the major organs, this work is validated on data both from the central nervous system as well as the bones of the hand.},
897
archivePrefix = {arXiv},
898
arxivId = {1701.03056},
899
author = {Kayalibay, Baris and Jensen, Grady and van der Smagt, Patrick},
900
eprint = {1701.03056},
901
file = {:Users/pietz/Documents/Mendeley/Kayalibay, Jensen, van der Smagt - 2017 - CNN-based Segmentation of Medical Imaging Data.pdf:pdf},
902
month = {jan},
903
title = {{CNN-based Segmentation of Medical Imaging Data}},
904
url = {http://arxiv.org/abs/1701.03056},
905
year = {2017}
906
}
907
@article{Shelhamer2016,
908
abstract = {Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixels-to-pixels, improve on the previous best result in semantic segmentation. Our key insight is to build "fully convolutional" networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet, the VGG net, and GoogLeNet) into fully convolutional networks and transfer their learned representations by fine-tuning to the segmentation task. We then define a skip architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves improved segmentation of PASCAL VOC (30{\%} relative improvement to 67.2{\%} mean IU on 2012), NYUDv2, SIFT Flow, and PASCAL-Context, while inference takes one tenth of a second for a typical image.},
909
archivePrefix = {arXiv},
910
arxivId = {1605.06211},
911
author = {Shelhamer, Evan and Long, Jonathan and Darrell, Trevor},
912
eprint = {1605.06211},
913
file = {:Users/pietz/Documents/Mendeley/Shelhamer, Long, Darrell - 2016 - Fully Convolutional Networks for Semantic Segmentation.pdf:pdf},
914
month = {may},
915
title = {{Fully Convolutional Networks for Semantic Segmentation}},
916
url = {http://arxiv.org/abs/1605.06211},
917
year = {2016}
918
}
919
@article{Wang2017,
920
abstract = {Recent advances in deep learning, especially deep convolutional neural networks (CNNs), have led to significant improvement over previous semantic segmentation systems. Here we show how to improve pixel-wise semantic segmentation by manipulating convolution-related operations that are better for practical use. First, we implement dense upsampling convolution (DUC) to generate pixel-level prediction, which is able to capture and decode more detailed information that is generally missing in bilinear upsampling. Second, we propose a hybrid dilated convolution (HDC) framework in the encoding phase. This framework 1) effectively enlarges the receptive fields of the network to aggregate global information; 2) alleviates what we call the "gridding issue" caused by the standard dilated convolution operation. We evaluate our approaches thoroughly on the Cityscapes dataset, and achieve a new state-of-art result of 80.1{\%} mIOU in the test set. We also are state-of-the-art overall on the KITTI road estimation benchmark and the PASCAL VOC2012 segmentation task. Pretrained models are available at https://goo.gl/DQMeun},
921
archivePrefix = {arXiv},
922
arxivId = {1702.08502},
923
author = {Wang, Panqu and Chen, Pengfei and Yuan, Ye and Liu, Ding and Huang, Zehua and Hou, Xiaodi and Cottrell, Garrison},
924
eprint = {1702.08502},
925
file = {:Users/pietz/Documents/Mendeley/Wang et al. - 2017 - Understanding Convolution for Semantic Segmentation.pdf:pdf},
926
month = {feb},
927
title = {{Understanding Convolution for Semantic Segmentation}},
928
url = {http://arxiv.org/abs/1702.08502},
929
year = {2017}
930
}
931
@inproceedings{Cicek2016,
932
abstract = {This paper introduces a network for volumetric segmentation that learns from sparsely annotated volumetric images. We outline two attractive use cases of this method: (1) In a semi-automated setup, the user annotates some slices in the volume to be segmented. The network learns from these sparse annotations and provides a dense 3D segmentation. (2) In a fully-automated setup, we assume that a representative, sparsely annotated training set exists. Trained on this data set, the network densely segments new volumetric images. The proposed network extends the previous u-net architecture from Ronneberger et al. by replacing all 2D operations with their 3D counterparts. The implementation performs on-the-fly elastic deformations for efficient data augmentation during training. It is trained end-to-end from scratch, i.e., no pre-trained network is required. We test the performance of the proposed method on a complex, highly variable 3D structure, the Xenopus kidney, and achieve good results for both use cases.},
933
archivePrefix = {arXiv},
934
arxivId = {1606.06650},
935
author = {{\c{C}}i{\c{c}}ek, {\"{O}}zg{\"{u}}n and Abdulkadir, Ahmed and Lienkamp, Soeren S. and Brox, Thomas and Ronneberger, Olaf},
936
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
937
doi = {10.1007/978-3-319-46723-8_49},
938
eprint = {1606.06650},
939
file = {:Users/pietz/Documents/Mendeley/{\c{C}}i{\c{c}}ek et al. - 2016 - 3D U-net Learning dense volumetric segmentation from sparse annotation.pdf:pdf},
940
isbn = {9783319467221},
941
issn = {16113349},
942
keywords = {3D,Biomedical volumetric image segmentation,Convolutional neural networks,Fully-automated,Semi-automated,Sparse annotation,Xenopus kidney},
943
month = {jun},
944
pages = {424--432},
945
title = {{3D U-net: Learning dense volumetric segmentation from sparse annotation}},
946
url = {http://arxiv.org/abs/1606.06650},
947
volume = {9901 LNCS},
948
year = {2016}
949
}
950
@article{Zhang2017,
951
abstract = {We introduce an extremely computation efficient CNN architecture named ShuffleNet, designed specially for mobile devices with very limited computing power (e.g., 10-150 MFLOPs). The new architecture utilizes two proposed operations, pointwise group convolution and channel shuffle, to greatly reduce computation cost while maintaining accuracy. Experiments on ImageNet classification and MS COCO object detection demonstrate the superior performance of ShuffleNet over other structures, e.g. lower top-1 error (absolute 6.7$\backslash${\%}) than the recent MobileNet system on ImageNet classification under the computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet achieves $\backslash$textasciitilde 13{\$}\backslashtimes{\$} actual speedup over AlexNet while maintaining comparable accuracy.},
952
archivePrefix = {arXiv},
953
arxivId = {1707.01083},
954
author = {Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
955
eprint = {1707.01083},
956
file = {:Users/pietz/Documents/Mendeley/Zhang et al. - 2017 - ShuffleNet An Extremely Efficient Convolutional Neural Network for Mobile Devices.pdf:pdf},
957
month = {jul},
958
pages = {1--10},
959
title = {{ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices}},
960
url = {http://arxiv.org/abs/1707.01083},
961
year = {2017}
962
}
963
@article{Rajpurkar2017,
964
abstract = {We develop an algorithm which exceeds the performance of board certified cardiologists in detecting a wide range of heart arrhythmias from electrocardiograms recorded with a single-lead wearable monitor. We build a dataset with more than 500 times the number of unique patients than previously studied corpora. On this dataset, we train a 34-layer convolutional neural network which maps a sequence of ECG samples to a sequence of rhythm classes. Committees of board-certified cardiologists annotate a gold standard test set on which we compare the performance of our model to that of 6 other individual cardiologists. We exceed the average cardiologist performance in both recall (sensitivity) and precision (positive predictive value).},
965
archivePrefix = {arXiv},
966
arxivId = {1707.01836},
967
author = {Rajpurkar, Pranav and Hannun, Awni Y. and Haghpanahi, Masoumeh and Bourn, Codie and Ng, Andrew Y.},
968
eprint = {1707.01836},
969
file = {:Users/pietz/Documents/Mendeley/Rajpurkar et al. - 2017 - Cardiologist-Level Arrhythmia Detection with Convolutional Neural Networks.pdf:pdf},
970
month = {jul},
971
title = {{Cardiologist-Level Arrhythmia Detection with Convolutional Neural Networks}},
972
url = {http://arxiv.org/abs/1707.01836},
973
year = {2017}
974
}
975
@article{Ronneberger2015a,
976
abstract = {There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net .},
977
archivePrefix = {arXiv},
978
arxivId = {1505.04597},
979
author = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
980
doi = {10.1007/978-3-319-24574-4_28},
981
eprint = {1505.04597},
982
file = {:Users/pietz/Documents/Mendeley/Ronneberger, Fischer, Brox - 2015 - U-Net Convolutional Networks for Biomedical Image Segmentation.pdf:pdf},
983
isbn = {978-3-319-24573-7},
984
issn = {16113349},
985
journal = {Miccai},
986
month = {may},
987
pages = {234--241},
988
pmid = {23285570},
989
title = {{U-Net: Convolutional Networks for Biomedical Image Segmentation}},
990
url = {http://arxiv.org/abs/1505.04597},
991
year = {2015}
992
}
993
@article{Noh2015,
994
abstract = {We propose a novel semantic segmentation algorithm by learning a deconvolution network. We learn the network on top of the convolutional layers adopted from VGG 16-layer net. The deconvolution network is composed of deconvolution and unpooling layers, which identify pixel-wise class labels and predict segmentation masks. We apply the trained network to each proposal in an input image, and construct the final semantic segmentation map by combining the results from all proposals in a simple manner. The proposed algorithm mitigates the limitations of the existing methods based on fully convolutional networks by integrating deep deconvolution network and proposal-wise prediction; our segmentation method typically identifies detailed structures and handles objects in multiple scales naturally. Our network demonstrates outstanding performance in PASCAL VOC 2012 dataset, and we achieve the best accuracy (72.5{\%}) among the methods trained with no external data through ensemble with the fully convolutional network.},
995
archivePrefix = {arXiv},
996
arxivId = {1505.04366},
997
author = {Noh, Hyeonwoo and Hong, Seunghoon and Han, Bohyung},
998
eprint = {1505.04366},
999
file = {:Users/pietz/Documents/Mendeley/1505.04366.pdf:pdf},
1000
month = {may},
1001
title = {{Learning Deconvolution Network for Semantic Segmentation}},
1002
url = {http://arxiv.org/abs/1505.04366},
1003
year = {2015}
1004
}
1005
@article{Springenberg2014,
1006
abstract = {Most modern convolutional neural networks (CNNs) used for object recognition are built using the same principles: Alternating convolution and max-pooling layers followed by a small number of fully connected layers. We re-evaluate the state of the art for object recognition from small images with convolutional networks, questioning the necessity of different components in the pipeline. We find that max-pooling can simply be replaced by a convolutional layer with increased stride without loss in accuracy on several image recognition benchmarks. Following this finding -- and building on other recent work for finding simple network structures -- we propose a new architecture that consists solely of convolutional layers and yields competitive or state of the art performance on several object recognition datasets (CIFAR-10, CIFAR-100, ImageNet). To analyze the network we introduce a new variant of the "deconvolution approach" for visualizing features learned by CNNs, which can be applied to a broader range of network structures than existing approaches.},
1007
archivePrefix = {arXiv},
1008
arxivId = {1412.6806},
1009
author = {Springenberg, Jost Tobias and Dosovitskiy, Alexey and Brox, Thomas and Riedmiller, Martin},
1010
eprint = {1412.6806},
1011
file = {:Users/pietz/Documents/Mendeley/Springenberg et al. - 2014 - Striving for Simplicity The All Convolutional Net.pdf:pdf},
1012
month = {dec},
1013
title = {{Striving for Simplicity: The All Convolutional Net}},
1014
url = {http://arxiv.org/abs/1412.6806},
1015
year = {2014}
1016
}
1017
@article{Roth2015,
1018
abstract = {Automated classification of human anatomy is an important prerequisite for many computer-aided diagnosis systems. The spatial complexity and variability of anatomy throughout the human body makes classification difficult. "Deep learning" methods such as convolutional networks (ConvNets) outperform other state-of-the-art methods in image classification tasks. In this work, we present a method for organ- or body-part-specific anatomical classification of medical images acquired using computed tomography (CT) with ConvNets. We train a ConvNet, using 4,298 separate axial 2D key-images to learn 5 anatomical classes. Key-images were mined from a hospital PACS archive, using a set of 1,675 patients. We show that a data augmentation approach can help to enrich the data set and improve classification performance. Using ConvNets and data augmentation, we achieve anatomy-specific classification error of 5.9 {\%} and area-under-the-curve (AUC) values of an average of 0.998 in testing. We demonstrate that deep learning can be used to train very reliable and accurate classifiers that could initialize further computer-aided diagnosis.},
1019
archivePrefix = {arXiv},
1020
arxivId = {1504.04003},
1021
author = {Roth, Holger R. and Lee, Christopher T. and Shin, Hoo-Chang and Seff, Ari and Kim, Lauren and Yao, Jianhua and Lu, Le and Summers, Ronald M.},
1022
doi = {10.1109/ISBI.2015.7163826},
1023
eprint = {1504.04003},
1024
file = {:Users/pietz/Documents/Mendeley/Roth et al. - 2015 - Anatomy-specific classification of medical images using deep convolutional nets.pdf:pdf},
1025
month = {apr},
1026
title = {{Anatomy-specific classification of medical images using deep convolutional nets}},
1027
url = {http://arxiv.org/abs/1504.04003 http://dx.doi.org/10.1109/ISBI.2015.7163826},
1028
year = {2015}
1029
}
1030
@article{Camlica2015,
1031
abstract = {Good results on image classification and retrieval using support vector machines (SVM) with local binary patterns (LBPs) as features have been extensively reported in the literature where an entire image is retrieved or classified. In contrast, in medical imaging, not all parts of the image may be equally significant or relevant to the image retrieval application at hand. For instance, in lung x-ray image, the lung region may contain a tumour, hence being highly significant whereas the surrounding area does not contain significant information from medical diagnosis perspective. In this paper, we propose to detect salient regions of images during training and fold the data to reduce the effect of irrelevant regions. As a result, smaller image areas will be used for LBP features calculation and consequently classification by SVM. We use IRMA 2009 dataset with 14,410 x-ray images to verify the performance of the proposed approach. The results demonstrate the benefits of saliency-based folding approach that delivers comparable classification accuracies with state-of-the-art but exhibits lower computational cost and storage requirements, factors highly important for big data analytics.},
1032
archivePrefix = {arXiv},
1033
arxivId = {1509.04619},
1034
author = {Camlica, Zehra and Tizhoosh, H. R. and Khalvati, Farzad},
1035
doi = {10.1109/ICMLA.2015.131},
1036
eprint = {1509.04619},
1037
file = {:Users/pietz/Documents/Mendeley/Camlica, Tizhoosh, Khalvati - 2015 - Medical Image Classification via SVM using LBP Features from Saliency-Based Folded Data.pdf:pdf},
1038
month = {sep},
1039
title = {{Medical Image Classification via SVM using LBP Features from Saliency-Based Folded Data}},
1040
url = {http://arxiv.org/abs/1509.04619 http://dx.doi.org/10.1109/ICMLA.2015.131},
1041
year = {2015}
1042
}
1043
@article{Cho2015,
1044
abstract = {The use of Convolutional Neural Networks (CNN) in natural image classification systems has produced very impressive results. Combined with the inherent nature of medical images that make them ideal for deep-learning, further application of such systems to medical image classification holds much promise. However, the usefulness and potential impact of such a system can be completely negated if it does not reach a target accuracy. In this paper, we present a study on determining the optimum size of the training data set necessary to achieve high classification accuracy with low variance in medical image classification systems. The CNN was applied to classify axial Computed Tomography (CT) images into six anatomical classes. We trained the CNN using six different sizes of training data set (5, 10, 20, 50, 100, and 200) and then tested the resulting system with a total of 6000 CT images. All images were acquired from the Massachusetts General Hospital (MGH) Picture Archiving and Communication System (PACS). Using this data, we employ the learning curve approach to predict classification accuracy at a given training sample size. Our research will present a general methodology for determining the training data set size necessary to achieve a certain target classification accuracy that can be easily applied to other problems within such systems.},
1045
archivePrefix = {arXiv},
1046
arxivId = {1511.06348},
1047
author = {Cho, Junghwan and Lee, Kyewook and Shin, Ellie and Choy, Garry and Do, Synho},
1048
eprint = {1511.06348},
1049
file = {:Users/pietz/Documents/Mendeley/Cho et al. - 2015 - How much data is needed to train a medical image deep learning system to achieve necessary high accuracy.pdf:pdf},
1050
month = {nov},
1051
title = {{How much data is needed to train a medical image deep learning system to achieve necessary high accuracy?}},
1052
url = {http://arxiv.org/abs/1511.06348},
1053
year = {2015}
1054
}
1055
@article{Mishkin2016,
1056
abstract = {The paper systematically studies the impact of a range of recent advances in CNN architectures and learning methods on the object categorization (ILSVRC) problem. The evalution tests the influence of the following choices of the architecture: non-linearity (ReLU, ELU, maxout, compatibility with batch normalization), pooling variants (stochastic, max, average, mixed), network width, classifier design (convolutional, fully-connected, SPP), image pre-processing, and of learning parameters: learning rate, batch size, cleanliness of the data, etc. The performance gains of the proposed modifications are first tested individually and then in combination. The sum of individual gains is bigger than the observed improvement when all modifications are introduced, but the "deficit" is small suggesting independence of their benefits. We show that the use of 128x128 pixel images is sufficient to make qualitative conclusions about optimal network structure that hold for the full size Caffe and VGG nets. The results are obtained an order of magnitude faster than with the standard 224 pixel images.},
1057
archivePrefix = {arXiv},
1058
arxivId = {1606.02228},
1059
author = {Mishkin, Dmytro and Sergievskiy, Nikolay and Matas, Jiri},
1060
doi = {10.1016/j.cviu.2017.05.007},
1061
eprint = {1606.02228},
1062
file = {:Users/pietz/Documents/Mendeley/Mishkin, Sergievskiy, Matas - 2016 - Systematic evaluation of CNN advances on the ImageNet.pdf:pdf},
1063
month = {jun},
1064
title = {{Systematic evaluation of CNN advances on the ImageNet}},
1065
url = {http://arxiv.org/abs/1606.02228 http://dx.doi.org/10.1016/j.cviu.2017.05.007},
1066
year = {2016}
1067
}
1068
@article{Anderson2016,
1069
abstract = {In this paper we present a technique to train neural network models on small amounts of data. Current methods for training neural networks on small amounts of rich data typically rely on strategies such as fine-tuning a pre-trained neural network or the use of domain-specific hand-engineered features. Here we take the approach of treating network layers, or entire networks, as modules and combine pre-trained modules with untrained modules, to learn the shift in distributions between data sets. The central impact of using a modular approach comes from adding new representations to a network, as opposed to replacing representations via fine-tuning. Using this technique, we are able surpass results using standard fine-tuning transfer learning approaches, and we are also able to significantly increase performance over such approaches when using smaller amounts of data.},
1070
archivePrefix = {arXiv},
1071
arxivId = {1611.01714},
1072
author = {Anderson, Ark and Shaffer, Kyle and Yankov, Artem and Corley, Court D. and Hodas, Nathan O.},
1073
eprint = {1611.01714},
1074
file = {:Users/pietz/Documents/Mendeley/Anderson et al. - 2016 - Beyond Fine Tuning A Modular Approach to Learning on Small Data.pdf:pdf},
1075
month = {nov},
1076
title = {{Beyond Fine Tuning: A Modular Approach to Learning on Small Data}},
1077
url = {http://arxiv.org/abs/1611.01714},
1078
year = {2016}
1079
}
1080
@article{Cole2016,
1081
abstract = {Machine learning analysis of neuroimaging data can accurately predict chronological age in healthy people and deviations from healthy brain ageing have been associated with cognitive impairment and disease. Here we sought to further establish the credentials of "brain-predicted age" as a biomarker of individual differences in the brain ageing process, using a predictive modelling approach based on deep learning, and specifically convolutional neural networks (CNN), and applied to both pre-processed and raw T1-weighted MRI data. Firstly, we aimed to demonstrate the accuracy of CNN brain-predicted age using a large dataset of healthy adults (N = 2001). Next, we sought to establish the heritability of brain-predicted age using a sample of monozygotic and dizygotic female twins (N = 62). Thirdly, we examined the test-retest and multi-centre reliability of brain-predicted age using two samples (within-scanner N = 20; between-scanner N = 11). CNN brain-predicted ages were generated and compared to a Gaussian Process Regression (GPR) approach, on all datasets. Input data were grey matter (GM) or white matter (WM) volumetric maps generated by Statistical Parametric Mapping (SPM) or raw data. Brain-predicted age represents an accurate, highly reliable and genetically-valid phenotype, that has potential to be used as a biomarker of brain ageing. Moreover, age predictions can be accurately generated on raw T1-MRI data, substantially reducing computation time for novel data, bringing the process closer to giving real-time information on brain health in clinical settings.},
1082
archivePrefix = {arXiv},
1083
arxivId = {1612.02572},
1084
author = {Cole, James H and Poudel, Rudra PK and Tsagkrasoulis, Dimosthenis and Caan, Matthan WA and Steves, Claire and Spector, Tim D and Montana, Giovanni},
1085
eprint = {1612.02572},
1086
file = {:Users/pietz/Documents/Mendeley/Cole et al. - 2016 - Predicting brain age with deep learning from raw imaging data results in a reliable and heritable biomarker.pdf:pdf},
1087
month = {dec},
1088
title = {{Predicting brain age with deep learning from raw imaging data results in a reliable and heritable biomarker}},
1089
url = {http://arxiv.org/abs/1612.02572},
1090
year = {2016}
1091
}
1092
@article{Wu2017,
1093
abstract = {Recently, DNN model compression based on network architecture design, e.g., SqueezeNet, attracted a lot attention. No accuracy drop on image classification is observed on these extremely compact networks, compared to well-known models. An emerging question, however, is whether these model compression techniques hurt DNN's learning ability other than classifying images on a single dataset. Our preliminary experiment shows that these compression methods could degrade domain adaptation (DA) ability, though the classification performance is preserved. Therefore, we propose a new compact network architecture and unsupervised DA method in this paper. The DNN is built on a new basic module Conv-M which provides more diverse feature extractors without significantly increasing parameters. The unified framework of our DA method will simultaneously learn invariance across domains, reduce divergence of feature representations, and adapt label prediction. Our DNN has 4.1M parameters, which is only 6.7{\%} of AlexNet or 59{\%} of GoogLeNet. Experiments show that our DNN obtains GoogLeNet-level accuracy both on classification and DA, and our DA method slightly outperforms previous competitive ones. Put all together, our DA strategy based on our DNN achieves state-of-the-art on sixteen of total eighteen DA tasks on popular Office-31 and Office-Caltech datasets.},
1094
archivePrefix = {arXiv},
1095
arxivId = {1703.04071},
1096
author = {Wu, Chunpeng and Wen, Wei and Afzal, Tariq and Zhang, Yongmei and Chen, Yiran and Li, Hai},
1097
eprint = {1703.04071},
1098
file = {:Users/pietz/Documents/Mendeley/Wu et al. - 2017 - A Compact DNN Approaching GoogLeNet-Level Accuracy of Classification and Domain Adaptation.pdf:pdf},
1099
month = {mar},
1100
title = {{A Compact DNN: Approaching GoogLeNet-Level Accuracy of Classification and Domain Adaptation}},
1101
url = {http://arxiv.org/abs/1703.04071},
1102
year = {2017}
1103
}
1104
@article{Smith2015,
1105
abstract = {It is known that the learning rate is the most important hyper-parameter to tune for training deep neural networks. This paper describes a new method for setting the learning rate, named cyclical learning rates, which practically eliminates the need to experimentally find the best values and schedule for the global learning rates. Instead of monotonically decreasing the learning rate, this method lets the learning rate cyclically vary between reasonable boundary values. Training with cyclical learning rates instead of fixed values achieves improved classification accuracy without a need to tune and often in fewer iterations. This paper also describes a simple way to estimate "reasonable bounds" -- linearly increasing the learning rate of the network for a few epochs. In addition, cyclical learning rates are demonstrated on the CIFAR-10 and CIFAR-100 datasets with ResNets, Stochastic Depth networks, and DenseNets, and the ImageNet dataset with the AlexNet and GoogLeNet architectures. These are practical tools for everyone who trains neural networks.},
1106
archivePrefix = {arXiv},
1107
arxivId = {1506.01186},
1108
author = {Smith, Leslie N.},
1109
eprint = {1506.01186},
1110
file = {:Users/pietz/Documents/Mendeley/Smith - 2015 - Cyclical Learning Rates for Training Neural Networks.pdf:pdf},
1111
month = {jun},
1112
title = {{Cyclical Learning Rates for Training Neural Networks}},
1113
url = {http://arxiv.org/abs/1506.01186},
1114
year = {2015}
1115
}
1116
@article{Xu2017,
1117
abstract = {Image matting is a fundamental computer vision problem and has many applications. Previous algorithms have poor performance when an image has similar foreground and background colors or complicated textures. The main reasons are prior methods 1) only use low-level features and 2) lack high-level context. In this paper, we propose a novel deep learning based algorithm that can tackle both these problems. Our deep model has two parts. The first part is a deep convolutional encoder-decoder network that takes an image and the corresponding trimap as inputs and predict the alpha matte of the image. The second part is a small convolutional network that refines the alpha matte predictions of the first network to have more accurate alpha values and sharper edges. In addition, we also create a large-scale image matting dataset including 49300 training images and 1000 testing images. We evaluate our algorithm on the image matting benchmark, our testing set, and a wide variety of real images. Experimental results clearly demonstrate the superiority of our algorithm over previous methods.},
1118
archivePrefix = {arXiv},
1119
arxivId = {1703.03872},
1120
author = {Xu, Ning and Price, Brian and Cohen, Scott and Huang, Thomas},
1121
eprint = {1703.03872},
1122
file = {:Users/pietz/Documents/Mendeley/Xu et al. - 2017 - Deep Image Matting.pdf:pdf},
1123
month = {mar},
1124
title = {{Deep Image Matting}},
1125
url = {http://arxiv.org/abs/1703.03872},
1126
year = {2017}
1127
}
1128
@article{Ravishankar2017,
1129
abstract = {The ability to automatically learn task specific feature representations has led to a huge success of deep learning methods. When large training data is scarce, such as in medical imaging problems, transfer learning has been very effective. In this paper, we systematically investigate the process of transferring a Convolutional Neural Network, trained on ImageNet images to perform image classification, to kidney detection problem in ultrasound images. We study how the detection performance depends on the extent of transfer. We show that a transferred and tuned CNN can outperform a state-of-the-art feature engineered pipeline and a hybridization of these two techniques achieves 20$\backslash${\%} higher performance. We also investigate how the evolution of intermediate response images from our network. Finally, we compare these responses to state-of-the-art image processing filters in order to gain greater insight into how transfer learning is able to effectively manage widely varying imaging regimes.},
1130
archivePrefix = {arXiv},
1131
arxivId = {1704.06040},
1132
author = {Ravishankar, Hariharan and Sudhakar, Prasad and Venkataramani, Rahul and Thiruvenkadam, Sheshadri and Annangi, Pavan and Babu, Narayanan and Vaidya, Vivek},
1133
eprint = {1704.06040},
1134
file = {:Users/pietz/Documents/Mendeley/Ravishankar et al. - 2017 - Understanding the Mechanisms of Deep Transfer Learning for Medical Images.pdf:pdf},
1135
month = {apr},
1136
title = {{Understanding the Mechanisms of Deep Transfer Learning for Medical Images}},
1137
url = {http://arxiv.org/abs/1704.06040},
1138
year = {2017}
1139
}
1140
@article{Razzak2017,
1141
abstract = {Healthcare sector is totally different from other industry. It is on high priority sector and people expect highest level of care and services regardless of cost. It did not achieve social expectation even though it consume huge percentage of budget. Mostly the interpretations of medical data is being done by medical expert. In terms of image interpretation by human expert, it is quite limited due to its subjectivity, the complexity of the image, extensive variations exist across different interpreters, and fatigue. After the success of deep learning in other real world application, it is also providing exciting solutions with good accuracy for medical imaging and is seen as a key method for future applications in health secotr. In this chapter, we discussed state of the art deep learning architecture and its optimization used for medical image segmentation and classification. In the last section, we have discussed the challenges deep learning based methods for medical imaging and open research issue.},
1142
archivePrefix = {arXiv},
1143
arxivId = {1704.06825},
1144
author = {Razzak, Muhammad Imran and Naz, Saeeda and Zaib, Ahmad},
1145
eprint = {1704.06825},
1146
file = {:Users/pietz/Documents/Mendeley/Razzak, Naz, Zaib - 2017 - Deep Learning for Medical Image Processing Overview, Challenges and Future.pdf:pdf},
1147
month = {apr},
1148
title = {{Deep Learning for Medical Image Processing: Overview, Challenges and Future}},
1149
url = {http://arxiv.org/abs/1704.06825},
1150
year = {2017}
1151
}
1152
@article{Shwartz-Ziv2017,
1153
abstract = {Despite their great success, there is still no comprehensive theoretical understanding of learning with Deep Neural Networks (DNNs) or their inner organization. Previous work proposed to analyze DNNs in the $\backslash$textit{\{}Information Plane{\}}; i.e., the plane of the Mutual Information values that each layer preserves on the input and output variables. They suggested that the goal of the network is to optimize the Information Bottleneck (IB) tradeoff between compression and prediction, successively, for each layer. In this work we follow up on this idea and demonstrate the effectiveness of the Information-Plane visualization of DNNs. Our main results are: (i) most of the training epochs in standard DL are spent on {\{}$\backslash$emph compression{\}} of the input to efficient representation and not on fitting the training labels. (ii) The representation compression phase begins when the training errors becomes small and the Stochastic Gradient Decent (SGD) epochs change from a fast drift to smaller training error into a stochastic relaxation, or random diffusion, constrained by the training error value. (iii) The converged layers lie on or very close to the Information Bottleneck (IB) theoretical bound, and the maps from the input to any hidden layer and from this hidden layer to the output satisfy the IB self-consistent equations. This generalization through noise mechanism is unique to Deep Neural Networks and absent in one layer networks. (iv) The training time is dramatically reduced when adding more hidden layers. Thus the main advantage of the hidden layers is computational. This can be explained by the reduced relaxation time, as this it scales super-linearly (exponentially for simple diffusion) with the information compression from the previous layer.},
1154
archivePrefix = {arXiv},
1155
arxivId = {1703.00810},
1156
author = {Shwartz-Ziv, Ravid and Tishby, Naftali},
1157
eprint = {1703.00810},
1158
file = {:Users/pietz/Documents/Mendeley/Shwartz-Ziv, Tishby - 2017 - Opening the Black Box of Deep Neural Networks via Information.pdf:pdf},
1159
month = {mar},
1160
title = {{Opening the Black Box of Deep Neural Networks via Information}},
1161
url = {http://arxiv.org/abs/1703.00810},
1162
year = {2017}
1163
}
1164
@article{Qassim2017,
1165
abstract = {Deep learning has given way to a new era of machine learning, apart from computer vision. Convolutional neural networks have been implemented in image classification, segmentation and object detection. Despite recent advancements, we are still in the very early stages and have yet to settle on best practices for network architecture in terms of deep design, small in size and a short training time. In this work, we propose a very deep neural network comprised of 16 Convolutional layers compressed with the Fire Module adapted from the SQUEEZENET model. We also call for the addition of residual connections to help suppress degradation. This model can be implemented on almost every neural network model with fully incorporated residual learning. This proposed model Residual-Squeeze-VGG16 (ResSquVGG16) trained on the large-scale MIT Places365-Standard scene dataset. In our tests, the model performed with accuracy similar to the pre-trained VGG16 model in Top-1 and Top-5 validation accuracy while also enjoying a 23.86{\%} reduction in training time and an 88.4{\%} reduction in size. In our tests, this model was trained from scratch.},
1166
archivePrefix = {arXiv},
1167
arxivId = {1705.03004},
1168
author = {Qassim, Hussam and Feinzimer, David and Verma, Abhishek},
1169
eprint = {1705.03004},
1170
file = {:Users/pietz/Documents/Mendeley/Qassim, Feinzimer, Verma - 2017 - Residual Squeeze VGG16.pdf:pdf},
1171
month = {may},
1172
title = {{Residual Squeeze VGG16}},
1173
url = {http://arxiv.org/abs/1705.03004},
1174
year = {2017}
1175
}
1176
@article{Sarraf2016,
1177
abstract = {Recently, machine learning techniques especially predictive modeling and pattern recognition in biomedical sciences from drug delivery system to medical imaging has become one of the important methods which are assisting researchers to have deeper understanding of entire issue and to solve complex medical problems. Deep learning is a powerful machine learning algorithm in classification while extracting low to high-level features. In this paper, we used convolutional neural network to classify Alzheimer's brain from normal healthy brain. The importance of classifying this kind of medical data is to potentially develop a predict model or system in order to recognize the type disease from normal subjects or to estimate the stage of the disease. Classification of clinical data such as Alzheimer's disease has been always challenging and most problematic part has been always selecting the most discriminative features. Using Convolutional Neural Network (CNN) and the famous architecture LeNet-5, we successfully classified structural MRI data of Alzheimer's subjects from normal controls where the accuracy of test data on trained data reached 98.84{\%}. This experiment suggests us the shift and scale invariant features extracted by CNN followed by deep learning classification is most powerful method to distinguish clinical data from healthy data in fMRI. This approach also enables us to expand our methodology to predict more complicated systems.},
1178
archivePrefix = {arXiv},
1179
arxivId = {1607.06583},
1180
author = {Sarraf, Saman and Tofighi, Ghassem},
1181
eprint = {1607.06583},
1182
file = {:Users/pietz/Documents/Mendeley/Sarraf, Tofighi - 2016 - Classification of Alzheimer's Disease using fMRI Data and Deep Learning Convolutional Neural Networks.pdf:pdf},
1183
month = {jul},
1184
title = {{Classification of Alzheimer's Disease Structural MRI Data by Deep Learning Convolutional Neural Networks}},
1185
url = {http://arxiv.org/abs/1607.06583},
1186
year = {2016}
1187
}
1188
@article{Grace2017,
1189
abstract = {Advances in artificial intelligence (AI) will transform modern life by reshaping transportation, health, science, finance, and the military. To adapt public policy, we need to better anticipate these advances. Here we report the results from a large survey of machine learning researchers on their beliefs about progress in AI. Researchers predict AI will outperform humans in many activities in the next ten years, such as translating languages (by 2024), writing high-school essays (by 2026), driving a truck (by 2027), working in retail (by 2031), writing a bestselling book (by 2049), and working as a surgeon (by 2053). Researchers believe there is a 50{\%} chance of AI outperforming humans in all tasks in 45 years and of automating all human jobs in 120 years, with Asian respondents expecting these dates much sooner than North Americans. These results will inform discussion amongst researchers and policymakers about anticipating and managing trends in AI.},
1190
archivePrefix = {arXiv},
1191
arxivId = {1705.08807},
1192
author = {Grace, Katja and Salvatier, John and Dafoe, Allan and Zhang, Baobao and Evans, Owain},
1193
eprint = {1705.08807},
1194
file = {:Users/pietz/Documents/Mendeley/Grace et al. - 2017 - When will AI exceed human performance Evidence from AI experts.pdf:pdf},
1195
month = {may},
1196
title = {{When Will AI Exceed Human Performance? Evidence from AI Experts}},
1197
url = {http://arxiv.org/abs/1705.08807},
1198
year = {2017}
1199
}
1200
@article{Rolnick2017,
1201
abstract = {Deep neural networks trained on large supervised datasets have led to impressive results in recent years. However, since well-annotated datasets can be prohibitively expensive and time-consuming to collect, recent work has explored the use of larger but noisy datasets that can be more easily obtained. In this paper, we investigate the behavior of deep neural networks on training sets with massively noisy labels. We show that successful learning is possible even with an essentially arbitrary amount of noise. For example, on MNIST we find that accuracy of above 90 percent is still attainable even when the dataset has been diluted with 100 noisy examples for each clean example. Such behavior holds across multiple patterns of label noise, even when noisy labels are biased towards confusing classes. Further, we show how the required dataset size for successful training increases with higher label noise. Finally, we present simple actionable techniques for improving learning in the regime of high label noise.},
1202
archivePrefix = {arXiv},
1203
arxivId = {1705.10694},
1204
author = {Rolnick, David and Veit, Andreas and Belongie, Serge and Shavit, Nir},
1205
eprint = {1705.10694},
1206
file = {:Users/pietz/Documents/Mendeley/Rolnick et al. - 2017 - Deep Learning is Robust to Massive Label Noise.pdf:pdf},
1207
month = {may},
1208
title = {{Deep Learning is Robust to Massive Label Noise}},
1209
url = {http://arxiv.org/abs/1705.10694},
1210
year = {2017}
1211
}
1212
@article{Tajbakhsh2017,
1213
abstract = {Training a deep convolutional neural network (CNN) from scratch is difficult because it requires a large amount of labeled training data and a great deal of expertise to ensure proper convergence. A promising alternative is to fine-tune a CNN that has been pre-trained using, for instance, a large set of labeled natural images. However, the substantial differences between natural and medical images may advise against such knowledge transfer. In this paper, we seek to answer the following central question in the context of medical image analysis: $\backslash$emph{\{}Can the use of pre-trained deep CNNs with sufficient fine-tuning eliminate the need for training a deep CNN from scratch?{\}} To address this question, we considered 4 distinct medical imaging applications in 3 specialties (radiology, cardiology, and gastroenterology) involving classification, detection, and segmentation from 3 different imaging modalities, and investigated how the performance of deep CNNs trained from scratch compared with the pre-trained CNNs fine-tuned in a layer-wise manner. Our experiments consistently demonstrated that (1) the use of a pre-trained CNN with adequate fine-tuning outperformed or, in the worst case, performed as well as a CNN trained from scratch; (2) fine-tuned CNNs were more robust to the size of training sets than CNNs trained from scratch; (3) neither shallow tuning nor deep tuning was the optimal choice for a particular application; and (4) our layer-wise fine-tuning scheme could offer a practical way to reach the best performance for the application at hand based on the amount of available data.},
1214
archivePrefix = {arXiv},
1215
arxivId = {1706.00712},
1216
author = {Tajbakhsh, Nima and Shin, Jae Y. and Gurudu, Suryakanth R. and Hurst, R. Todd and Kendall, Christopher B. and Gotway, Michael B. and Liang, Jianming},
1217
doi = {10.1109/TMI.2016.2535302},
1218
eprint = {1706.00712},
1219
file = {:Users/pietz/Documents/Mendeley/Tajbakhsh et al. - 2016 - Convolutional Neural Networks for Medical Image Analysis Full Training or Fine Tuning.pdf:pdf},
1220
month = {jun},
1221
title = {{Convolutional Neural Networks for Medical Image Analysis: Full Training or Fine Tuning?}},
1222
url = {http://arxiv.org/abs/1706.00712 http://dx.doi.org/10.1109/TMI.2016.2535302},
1223
year = {2017}
1224
}
1225
@article{Dong2017,
1226
abstract = {A major challenge in brain tumor treatment planning and quantitative evaluation is determination of the tumor extent. The noninvasive magnetic resonance imaging (MRI) technique has emerged as a front-line diagnostic tool for brain tumors without ionizing radiation. Manual segmentation of brain tumor extent from 3D MRI volumes is a very time-consuming task and the performance is highly relied on operator's experience. In this context, a reliable fully automatic segmentation method for the brain tumor segmentation is necessary for an efficient measurement of the tumor extent. In this study, we propose a fully automatic method for brain tumor segmentation, which is developed using U-Net based deep convolutional networks. Our method was evaluated on Multimodal Brain Tumor Image Segmentation (BRATS 2015) datasets, which contain 220 high-grade brain tumor and 54 low-grade tumor cases. Cross-validation has shown that our method can obtain promising segmentation efficiently.},
1227
archivePrefix = {arXiv},
1228
arxivId = {1705.03820},
1229
author = {Dong, Hao and Yang, Guang and Liu, Fangde and Mo, Yuanhan and Guo, Yike},
1230
eprint = {1705.03820},
1231
file = {:Users/pietz/Documents/Mendeley/Dong, Liu, Mo - 2011 - Automatic Brain Tumor Detection and Segmentation Using U-Net Based Fully Convolutional Networks.pdf:pdf},
1232
month = {may},
1233
title = {{Automatic Brain Tumor Detection and Segmentation Using U-Net Based Fully Convolutional Networks}},
1234
url = {http://arxiv.org/abs/1705.03820},
1235
year = {2017}
1236
}
1237
@article{Litjens2017,
1238
abstract = {Deep learning algorithms, in particular convolutional networks, have rapidly become a methodology of choice for analyzing medical images. This paper reviews the major deep learning concepts pertinent to medical image analysis and summarizes over 300 contributions to the field, most of which appeared in the last year. We survey the use of deep learning for image classification, object detection, segmentation, registration, and other tasks and provide concise overviews of studies per application area. Open challenges and directions for future research are discussed.},
1239
archivePrefix = {arXiv},
1240
arxivId = {1702.05747},
1241
author = {Litjens, Geert and Kooi, Thijs and Bejnordi, Babak Ehteshami and Setio, Arnaud Arindra Adiyoso and Ciompi, Francesco and Ghafoorian, Mohsen and van der Laak, Jeroen A. W. M. and van Ginneken, Bram and S{\'{a}}nchez, Clara I.},
1242
eprint = {1702.05747},
1243
file = {:Users/pietz/Documents/Mendeley/Litjens et al. - 2017 - A Survey on Deep Learning in Medical Image Analysis.pdf:pdf},
1244
month = {feb},
1245
title = {{A Survey on Deep Learning in Medical Image Analysis}},
1246
url = {http://arxiv.org/abs/1702.05747},
1247
year = {2017}
1248
}