knee-mri-segmentation / Git / [7e66db] /thesis/references.bib

Models:
SCallahan/
knee-mri-segmentation
Downloads: 1
[7e66db]: / thesis / references.bib
History
Download this file
1249 lines (1248 with data), 156.0 kB

@article{Stern2014,
abstract = {There has recently been an increased demand in bone age estimation (BAE) of living individuals and human remains in legal medicine applications. A severe drawback of established BAE techniques based on X-ray images is radiation exposure, since many countries prohibit scanning involving ionizing radiation without diagnostic reasons. We propose a completely automated method for BAE based on volumetric hand MRI images. On our database of 56 male caucasian subjects between 13 and 19 years, we are able to estimate the subjects age with a mean difference of 0.85 +/- 0.58 years compared to the chronological age, which is in line with radiologist results using established radiographic methods. We see this work as a promising first step towards a novel MRI based bone age estimation system, with the key benefits of lacking exposure to ionizing radiation and higher accuracy due to exploitation of volumetric data.},
author = {Stern, Darko and Ebner, Thomas and Bischof, Horst and Grassegger, Sabine and Ehammer, Thomas and Urschler, Martin},
file = {:Users/pietz/Documents/Mendeley/HandMRT-automaticAnalysis.pdf:pdf},
isbn = {978-3-319-10470-6; 978-3-319-10469-0},
journal = {Medical image computing and computer-assisted intervention : MICCAI ... International Conference on Medical Image Computing and Computer-Assisted Intervention},
keywords = {Adolescent,Adult,Age Determination by Skeleton,Aging,Algorithms,Artificial Intelligence,Functional Laterality,Hand Bones,Humans,Image Interpretation, Computer-Assisted,Magnetic Resonance Imaging,Male,Middle Aged,Pattern Recognition, Automated,Reproducibility of Results,Sensitivity and Specificity,Young Adult,anatomy {\&} histology,methods,pathology,physiology},
number = {Pt 2},
pages = {220--227},
pmid = {25485382},
title = {{Fully automatic bone age estimation from left hand MR images.}},
volume = {17},
year = {2014}
}
@article{Prasoon2013,
abstract = {Segmentation of anatomical structures in medical images is often based on a voxel/pixel classification approach. Deep learning systems, such as convolutional neural networks (CNNs), can infer a hierarchical representation of images that fosters categorization. We propose a novel system for voxel classification integrating three 2D CNNs, which have a one-to-one association with the xy, yz and zx planes of 3D image, respectively. We applied our method to the segmentation of tibial cartilage in low field knee MRI scans and tested it on 114 unseen scans. Although our method uses only 2D features at a single scale, it performs better than a state-of the-art method using 3D multi-scale features. In the latter approach, the features and the classifier have been carefully adapted to the problem at hand. That we were able to get better results by a deep learning architecture that autonomously learns the features from the images is the main insight of this study.},
author = {Prasoon, Adhish and Petersen, Kersten and Igel, Christian and Lauze, Fran{\c{c}}ois Francois and Dam, Erik and Nielsen, Mads},
file = {:Users/pietz/Documents/Mendeley/Prasoon et al. - Unknown - Deep Feature Learning for Knee Cartilage Segmentation Using a Triplanar Convolutional Neural Network.pdf:pdf},
title = {{Deep Feature Learning for Knee Cartilage Segmentation Using a Triplanar Convolutional Neural Network}},
url = {https://pdfs.semanticscholar.org/9574/35e5b8fc318f4fca90ef5e6015ce736e5e8f.pdf},
year = {2013}
}
@article{Folkesson2007,
abstract = {—We present a fully automatic method for articular cartilage segmentation from magnetic resonance imaging (MRI) which we use as the foundation of a quantitative cartilage as-sessment. We evaluate our method by comparisons to manual segmentations by a radiologist and by examining the interscan reproducibility of the volume and area estimates. Training and evaluation of the method is performed on a data set consisting of 139 scans of knees with a status ranging from healthy to severely osteoarthritic. This is, to our knowledge, the only fully automatic cartilage segmentation method that has good agreement with manual segmentations, an interscan reproducibility as good as that of a human expert, and enables the separation between healthy and osteoarthritic populations. While high-field scanners offer high-quality imaging from which the articular cartilage have been evaluated extensively using manual and automated image analysis techniques, low-field scanners on the other hand produce lower quality images but to a fraction of the cost of their high-field counterpart. For low-field MRI, there is no well-established accu-racy validation for quantitative cartilage estimates, but we show that differences between healthy and osteoarthritic populations are statistically significant using our cartilage volume and surface area estimates, which suggests that low-field MRI analysis can become a useful, affordable tool in clinical studies. Index Terms—Articular cartilage, image segmentation, os-teoarthritis, magnetic resonance imaging (MRI), pattern classifi-cation.},
author = {Folkesson, Jenny and Dam, Erik B and Olsen, Ole F and Pettersen, Paola C and Christiansen, Claus},
doi = {10.1109/TMI.2006.886808},
file = {:Users/pietz/Documents/Mendeley/Folkesson et al. - 2007 - Segmenting Articular Cartilage Automatically Using a Voxel Classification Approach.pdf:pdf},
journal = {IEEE TRANSACTIONS ON MEDICAL IMAGING},
number = {1},
title = {{Segmenting Articular Cartilage Automatically Using a Voxel Classification Approach}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.488.2760{\&}rep=rep1{\&}type=pdf},
volume = {26},
year = {2007}
}
@book{Weishaupt2009,
author = {Weishaupt, D and K{\"{o}}chli, V D and Marincek, B and Froehlich, J M and Nanz, D and Pr{\"{u}}{\ss}mann, K P},
file = {:Users/pietz/Documents/Mendeley/-Spr.-{\_}Weishaupt{\_}u.a.,{\_}Wie{\_}funktioniert{\_}MRI{\_}(2009).pdf:pdf},
isbn = {9783540895725},
pages = {172},
publisher = {Springer},
title = {{Wie funktioniert MRI?}},
year = {2009}
}
@book{Jopp2007,
author = {Jopp, Eilin.},
isbn = {9783830028949},
publisher = {Kovac},
title = {{Methoden zur Alters- und Geschlechtsbestimmung auf dem Pruefstand - eine rechtsmedizinische empirische Studie}},
url = {http://www.verlagdrkovac.de/978-3-8300-2894-9.htm},
year = {2007}
}
@inproceedings{Drozdzal2016,
abstract = {introduce a novel system that integrates several modules including a breast segmentation module and a fibroglandular tissue segmentation module into a modified cascaded region-based convolutional network.},
archivePrefix = {arXiv},
arxivId = {1608.04117},
author = {Drozdzal, Michal and Vorontsov, Eugene and Chartrand, Gabriel and Kadoury, Samuel and Pal, Chris},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-319-46976-8_19},
eprint = {1608.04117},
isbn = {9783319469751},
issn = {16113349},
pmid = {25246403},
title = {{The importance of skip connections in biomedical image segmentation}},
volume = {10008 LNCS},
year = {2016}
}
@article{Moeskops2016,
abstract = {—Automatic segmentation in MR brain images is im-portant for quantitative analysis in large-scale studies with images acquired at all ages. This paper presents a method for the auto-matic segmentation of MR brain images into a number of tissue classes using a convolutional neural network. To ensure that the method obtains accurate segmentation details as well as spatial consistency, the network uses multiple patch sizes and multiple convolution kernel sizes to acquire multi-scale information about each voxel. The method is not dependent on explicit features, but learns to recognise the information that is important for the clas-sification based on training data. The method requires a single anatomical MR image only. The segmentation method is applied to five different data sets: coronal -weighted images of preterm in-fants acquired at 30 weeks postmenstrual age (PMA) and 40 weeks PMA, axial -weighted images of preterm infants acquired at 40 weeks PMA, axial -weighted images of ageing adults acquired at an average age of 70 years, and -weighted images of young adults acquired at an average age of 23 years. The method ob-tained the following average Dice coefficients over all segmented tissue classes for each data set, respectively: 0.87, 0.82, 0.84, 0.86, and 0.91. The results demonstrate that the method obtains accu-rate segmentations in all five sets, and hence demonstrates its ro-bustness to differences in age and acquisition protocol. Index Terms—Adult brain, automatic image segmentation, con-volutional neural networks, deep learning, MRI, preterm neonatal brain.},
archivePrefix = {arXiv},
arxivId = {1704.03295},
author = {Moeskops, Pim and Viergever, Max A. and Mendrik, Adrienne M. and {De Vries}, Linda S. and Benders, Manon J.N.L. and Isgum, Ivana},
doi = {10.1109/TMI.2016.2548501},
eprint = {1704.03295},
isbn = {0278-0062},
issn = {1558254X},
journal = {IEEE Transactions on Medical Imaging},
number = {5},
pmid = {27046893},
title = {{Automatic Segmentation of MR Brain Images with a Convolutional Neural Network}},
volume = {35},
year = {2016}
}
@misc{WorldHealthOrganization2016,
author = {{World Health Organization}},
booktitle = {WHO},
publisher = {World Health Organization},
title = {{Ionizing radiation, health effects and protective measures}},
url = {http://www.who.int/mediacentre/factsheets/fs371/en/},
urldate = {2017-08-08},
year = {2016}
}
@incollection{Saring2014,
author = {S{\"{a}}ring, Dennis and Mauer, Markus and Jopp, Eilin},
doi = {10.1007/978-3-642-54111-7_16},
file = {:Users/pietz/Documents/Mendeley/BVM2014-age.pdf:pdf},
pages = {60--65},
publisher = {Springer, Berlin, Heidelberg},
title = {{Klassifikation des Verschlussgrades der Epiphyse der proximalen Tibia zur Altersbestimmung}},
url = {http://link.springer.com/10.1007/978-3-642-54111-7{\_}16},
year = {2014}
}
@article{Setiono1997,
abstract = {Feature selection is an integral part of most learning algorithms. Due to the existence of irrelevant and redundant attributes, by selecting only the relevant attributes of the data, higher predictive accuracy can be expected from a machine learning method. In this paper, we propose the use of a three-layer feedforward neural network to select those input attributes that are most useful for discriminating classes in a given set of input patterns. A network pruning algorithm is the foundation of the proposed algorithm. By adding a penalty term to the error function of the network, redundant network connections can be distinguished from those relevant ones by their small weights when the network training process has been completed. A simple criterion to remove an attribute based on the accuracy rate of the network is developed. The network is retrained after removal of an attribute, and the selection process is repeated until no attribute meets the criterion for removal. Our experimental results suggest that the proposed method works very well on a wide variety of classification problems.},
author = {Setiono, R and Liu, H},
doi = {10.1109/72.572104},
file = {:Users/pietz/Documents/Mendeley/tnn97.pdf:pdf},
issn = {10459227},
journal = {IEEE Transactions on Neural Networks},
month = {may},
number = {3},
pages = {654--662},
pmid = {18255668},
title = {{Neural-network feature selector}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/18255668 http://ieeexplore.ieee.org/document/572104/},
volume = {8},
year = {1997}
}
@article{Saxe2015,
abstract = {Malware remains a serious problem for corporations, government agencies, and individuals, as attackers continue to use it as a tool to effect frequent and costly network intrusions. Machine learning holds the promise of automating the work required to detect newly discovered malware families, and could potentially learn generalizations about malware and benign software that support the detection of entirely new, unknown malware families. Unfortunately, few proposed machine learning based malware detection methods have achieved the low false positive rates required to deliver deployable detectors. In this paper we a deep neural network malware classifier that achieves a usable detection rate at an extremely low false positive rate and scales to real world training example volumes on commodity hardware. Specifically, we show that our system achieves a 95{\%} detection rate at 0.1{\%} false positive rate (FPR), based on more than 400,000 software binaries sourced directly from our customers and internal malware databases. We achieve these results by directly learning on all binaries, without any filtering, unpacking, or manually separating binary files into categories. Further, we confirm our false positive rates directly on a live stream of files coming in from Invincea's deployed endpoint solution, provide an estimate of how many new binary files we expected to see a day on an enterprise network, and describe how that relates to the false positive rate and translates into an intuitive threat score. Our results demonstrate that it is now feasible to quickly train and deploy a low resource, highly accurate machine learning classification model, with false positive rates that approach traditional labor intensive signature based methods, while also detecting previously unseen malware.},
archivePrefix = {arXiv},
arxivId = {1508.03096},
author = {Saxe, Joshua and Berlin, Konstantin},
eprint = {1508.03096},
file = {:Users/pietz/Documents/Mendeley/Saxe, Berlin - 2015 - Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features.pdf:pdf},
month = {aug},
title = {{Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features}},
url = {http://arxiv.org/abs/1508.03096},
year = {2015}
}
@article{Bojarski2017,
abstract = {As part of a complete software stack for autonomous driving, NVIDIA has created a neural-network-based system, known as PilotNet, which outputs steering angles given images of the road ahead. PilotNet is trained using road images paired with the steering angles generated by a human driving a data-collection car. It derives the necessary domain knowledge by observing human drivers. This eliminates the need for human engineers to anticipate what is important in an image and foresee all the necessary rules for safe driving. Road tests demonstrated that PilotNet can successfully perform lane keeping in a wide variety of driving conditions, regardless of whether lane markings are present or not. The goal of the work described here is to explain what PilotNet learns and how it makes its decisions. To this end we developed a method for determining which elements in the road image most influence PilotNet's steering decision. Results show that PilotNet indeed learns to recognize relevant objects on the road. In addition to learning the obvious features such as lane markings, edges of roads, and other cars, PilotNet learns more subtle features that would be hard to anticipate and program by engineers, for example, bushes lining the edge of the road and atypical vehicle classes.},
archivePrefix = {arXiv},
arxivId = {1704.07911},
author = {Bojarski, Mariusz and Yeres, Philip and Choromanska, Anna and Choromanski, Krzysztof and Firner, Bernhard and Jackel, Lawrence and Muller, Urs},
eprint = {1704.07911},
file = {:Users/pietz/Documents/Mendeley/Bojarski et al. - 2017 - Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car.pdf:pdf},
month = {apr},
title = {{Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car}},
url = {http://arxiv.org/abs/1704.07911},
year = {2017}
}
@article{Wu2016,
abstract = {Neural Machine Translation (NMT) is an end-to-end learning approach for automated translation, with the potential to overcome many of the weaknesses of conventional phrase-based translation systems. Unfortunately, NMT systems are known to be computationally expensive both in training and in translation inference. Also, most NMT systems have difficulty with rare words. These issues have hindered NMT's use in practical deployments and services, where both accuracy and speed are essential. In this work, we present GNMT, Google's Neural Machine Translation system, which attempts to address many of these issues. Our model consists of a deep LSTM network with 8 encoder and 8 decoder layers using attention and residual connections. To improve parallelism and therefore decrease training time, our attention mechanism connects the bottom layer of the decoder to the top layer of the encoder. To accelerate the final translation speed, we employ low-precision arithmetic during inference computations. To improve handling of rare words, we divide words into a limited set of common sub-word units ("wordpieces") for both input and output. This method provides a good balance between the flexibility of "character"-delimited models and the efficiency of "word"-delimited models, naturally handles translation of rare words, and ultimately improves the overall accuracy of the system. Our beam search technique employs a length-normalization procedure and uses a coverage penalty, which encourages generation of an output sentence that is most likely to cover all the words in the source sentence. On the WMT'14 English-to-French and English-to-German benchmarks, GNMT achieves competitive results to state-of-the-art. Using a human side-by-side evaluation on a set of isolated simple sentences, it reduces translation errors by an average of 60{\%} compared to Google's phrase-based production system.},
archivePrefix = {arXiv},
arxivId = {1609.08144},
author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V. and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and Klingner, Jeff and Shah, Apurva and Johnson, Melvin and Liu, Xiaobing and Kaiser, {\L}ukasz and Gouws, Stephan and Kato, Yoshikiyo and Kudo, Taku and Kazawa, Hideto and Stevens, Keith and Kurian, George and Patil, Nishant and Wang, Wei and Young, Cliff and Smith, Jason and Riesa, Jason and Rudnick, Alex and Vinyals, Oriol and Corrado, Greg and Hughes, Macduff and Dean, Jeffrey},
eprint = {1609.08144},
file = {:Users/pietz/Documents/Mendeley/Wu et al. - 2016 - Google's Neural Machine Translation System Bridging the Gap between Human and Machine Translation.pdf:pdf},
month = {sep},
title = {{Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}},
url = {http://arxiv.org/abs/1609.08144},
year = {2016}
}
@article{EuropeanAsylumSupportOffice2013,
author = {{European Asylum Support Office}},
file = {:Users/pietz/Documents/Mendeley/European Asylum Support Office - 2013 - Age assessment practice in Europe.pdf:pdf},
title = {{Age assessment practice in Europe}},
url = {https://www.easo.europa.eu/sites/default/files/public/EASO-Age-assessment-practice-in-Europe1.pdf},
year = {2013}
}
@article{Feltz2015,
author = {Feltz, Vivien},
file = {:Users/pietz/Documents/Mendeley/Feltz - 2015 - Age Assessment for Unaccompanied Minors.pdf:pdf},
title = {{Age Assessment for Unaccompanied Minors}},
url = {https://mdmeuroblog.files.wordpress.com/2014/01/age-determination-def.pdf},
year = {2015}
}
@article{Dodin2011,
abstract = {This study aimed at developing a fully automated bone segmentation method for the human knee (femur and tibia) from magnetic resonance (MR) images. MR imaging was acquired on a whole body 1.5T scanner with a gradient echo fat suppressed sequence using an extremity coil. The method was based on the Ray Casting technique which relies on the decomposition of the MR images into multiple surface layers to localize the boundaries of the bones and several partial segmentation objects being automatically merged to obtain the final complete segmentation of the bones. Validation analyses were performed on 161 MR images from knee osteoarthritis patients, comparing the developed fully automated to a validated semi-automated segmentation method, using the average surface distance (ASD), volume correlation coefficient, and Dice similarity coefficient (DSC). For both femur and tibia, respectively, data showed excellent bone surface ASD (0.50 ± 0.12 mm; 0.37 ± 0.09 mm), average oriented distance between bone surfaces within the cartilage domain (0.02 ± 0.07 mm; −0.05 ± 0.10 mm), and bone volume DSC (0.94 ± 0.05; 0.92 ± 0.07). This newly developed fully automated bone segmentation method will enable large scale studies to be conducted within shorter time durations, as well as increase stability in the reading of pathological bone.},
author = {Dodin, Pierre and Martel-Pelletier, Johanne and Pelletier, Jean-Pierre and Abram, Francois},
doi = {10.1007/s11517-011-0838-8},
file = {:Users/pietz/Documents/Mendeley/Martel-Pelletier - 2011 - A fully automated human knee 3D MRI bone segmentation using the ray casting technique.pdf:pdf},
keywords = {ray casting {\'{a}} mri,{\'{a}} 3d knee segmentation},
pages = {1413--1424},
title = {{A fully automated human knee 3D MRI bone segmentation using the ray casting technique}},
year = {2011}
}
@article{Dam,
abstract = {Clinical studies including thousands of magnetic resonance imaging (MRI) scans offer potential for pathogenesis research in osteoarthritis. However, comprehensive quantification of all bone, cartilage, and meniscus compartments is challenging. We propose a segmentation framework for fully automatic segmentation of knee MRI. The framework combines multiatlas rigid registration with voxel classification and was trained on manual segmentations with varying configurations of bones, cartilages, and menisci. The validation included high-and low-field knee MRI cohorts from the Center for Clinical and Basic Research, the osteoarthritis initiative (QAI), and the segmentation of knee images10 (SKI10) challenge. In total, 1907 knee MRIs were segmented during the evaluation. No segmentations were excluded. Our resulting OAI cartilage volume scores are available upon request. The precision and accuracy performances matched manual reader re-segmentation well. The cartilage volume scan-rescan precision was 4.9{\%} (RMS CV). The Dice volume overlaps in the medial/lateral tibial/femoral cartilage compartments were 0.80 to 0.87. The correlations with volumes from independent meth-ods were between 0.90 and 0.96 on the OAI scans. Thus, the framework demonstrated precision and accuracy comparable to manual segmentations. Finally, our method placed second for cartilage segmentation in the SKI10 challenge. The comprehensive validation suggested that automatic segmentation is appropriate for cohorts with thousands of scans.},
author = {Dam, Erik B and Lillholm, Martin and Marques, Joselene and Nielsen, Mads},
doi = {10.1117/1.JMI},
file = {:Users/pietz/Documents/Mendeley/Dam et al. - Unknown - Automatic segmentation of high-and low-field knee MRIs using knee image quantification with data from the osteoar.pdf:pdf},
keywords = {MRI,knee,osteoarthritis,segmentation Paper 14109RR},
title = {{Automatic segmentation of high-and low-field knee MRIs using knee image quantification with data from the osteoarthritis initiative}},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4478858/pdf/JMI-002-024001.pdf}
}
@article{Ringenbach2012,
abstract = {For the production of cutting blocks for knee-joint implant positioning a precise segmentation of the femur and tibia is essential. Due to low bone density and osteophytes the segmentation of knee bones from CT data can be a major challenge. As part of an industrial project, we have developed a hybrid segmentation method – based on a pre-segmentation with statistical shape model and a fine-segmentation with the Fast Marching algorithm.},
author = {Ringenbach, Alex and Schw{\"{a}}gli, / and Tobias},
doi = {10.1515/bmt-2012-4500},
file = {:Users/pietz/Documents/Mendeley/Ringenbach, Schw{\"{a}}gli, Tobias - 2012 - A robust and accurate segmentation of the knee bones from CT data.pdf:pdf},
journal = {Biomed Tech},
title = {{A robust and accurate segmentation of the knee bones from CT data}},
url = {https://www.degruyter.com/downloadpdf/j/bmte.2012.57.issue-s1-B/bmt-2012-4500/bmt-2012-4500.pdf},
volume = {57},
year = {2012}
}
@article{Ahn,
author = {Ahn, Chunsoo and Bui, Toan Duc and Lee, Yong‑woo and Shin, Jitae and Background, Hyunjin Park},
doi = {10.1186/s12938‑016‑0225‑7},
file = {:Users/pietz/Documents/Mendeley/Ahn et al. - Unknown - Fully automated, level set‑based segmentation for knee MRIs using an adaptive force function and template data.pdf:pdf},
journal = {BioMedical Engineering OnLine},
keywords = {Cartilage,Knee segmentation,Magnetic resonance imaging,Medical image processing},
title = {{Fully automated, level set‑based segmentation for knee MRIs using an adaptive force function and template: data from the osteoarthritis initiative}},
url = {https://biomedical-engineering-online.biomedcentral.com/track/pdf/10.1186/s12938-016-0225-7?site=biomedical-engineering-online.biomedcentral.com}
}
@article{Zhao2016,
abstract = {Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsing challenge 2016, PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new record of mIoU accuracy 85.4{\%} on PASCAL VOC 2012 and accuracy 80.2{\%} on Cityscapes.},
archivePrefix = {arXiv},
arxivId = {1612.01105},
author = {Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya},
eprint = {1612.01105},
file = {:Users/pietz/Documents/Mendeley/Zhao et al. - 2016 - Pyramid Scene Parsing Network.pdf:pdf},
month = {dec},
title = {{Pyramid Scene Parsing Network}},
url = {http://arxiv.org/abs/1612.01105},
year = {2016}
}
@article{Lin2016,
abstract = {Recently, very deep convolutional neural networks (CNNs) have shown outstanding performance in object recognition and have also been the first choice for dense classification problems such as semantic segmentation. However, repeated subsampling operations like pooling or convolution striding in deep CNNs lead to a significant decrease in the initial image resolution. Here, we present RefineNet, a generic multi-path refinement network that explicitly exploits all the information available along the down-sampling process to enable high-resolution prediction using long-range residual connections. In this way, the deeper layers that capture high-level semantic features can be directly refined using fine-grained features from earlier convolutions. The individual components of RefineNet employ residual connections following the identity mapping mindset, which allows for effective end-to-end training. Further, we introduce chained residual pooling, which captures rich background context in an efficient manner. We carry out comprehensive experiments and set new state-of-the-art results on seven public datasets. In particular, we achieve an intersection-over-union score of 83.4 on the challenging PASCAL VOC 2012 dataset, which is the best reported result to date.},
archivePrefix = {arXiv},
arxivId = {1611.06612},
author = {Lin, Guosheng and Milan, Anton and Shen, Chunhua and Reid, Ian},
eprint = {1611.06612},
file = {:Users/pietz/Documents/Mendeley/Lin et al. - 2016 - RefineNet Multi-Path Refinement Networks for High-Resolution Semantic Segmentation.pdf:pdf},
month = {nov},
title = {{RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation}},
url = {http://arxiv.org/abs/1611.06612},
year = {2016}
}
@article{Chen2016,
abstract = {In this work we address the task of semantic image segmentation with Deep Learning and make three main contributions that are experimentally shown to have substantial practical merit. First, we highlight convolution with upsampled filters, or 'atrous convolution', as a powerful tool in dense prediction tasks. Atrous convolution allows us to explicitly control the resolution at which feature responses are computed within Deep Convolutional Neural Networks. It also allows us to effectively enlarge the field of view of filters to incorporate larger context without increasing the number of parameters or the amount of computation. Second, we propose atrous spatial pyramid pooling (ASPP) to robustly segment objects at multiple scales. ASPP probes an incoming convolutional feature layer with filters at multiple sampling rates and effective fields-of-views, thus capturing objects as well as image context at multiple scales. Third, we improve the localization of object boundaries by combining methods from DCNNs and probabilistic graphical models. The commonly deployed combination of max-pooling and downsampling in DCNNs achieves invariance but has a toll on localization accuracy. We overcome this by combining the responses at the final DCNN layer with a fully connected Conditional Random Field (CRF), which is shown both qualitatively and quantitatively to improve localization performance. Our proposed "DeepLab" system sets the new state-of-art at the PASCAL VOC-2012 semantic image segmentation task, reaching 79.7{\%} mIOU in the test set, and advances the results on three other datasets: PASCAL-Context, PASCAL-Person-Part, and Cityscapes. All of our code is made publicly available online.},
archivePrefix = {arXiv},
arxivId = {1606.00915},
author = {Chen, Liang-Chieh and Papandreou, George and Kokkinos, Iasonas and Murphy, Kevin and Yuille, Alan L.},
eprint = {1606.00915},
file = {:Users/pietz/Documents/Mendeley/Chen et al. - 2016 - DeepLab Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf:pdf},
month = {jun},
title = {{DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs}},
url = {http://arxiv.org/abs/1606.00915},
year = {2016}
}
@article{Nekrasov2016,
abstract = {Semantic image segmentation is a principal problem in computer vision, where the aim is to correctly classify each individual pixel of an image into a semantic label. Its widespread use in many areas, including medical imaging and autonomous driving, has fostered extensive research in recent years. Empirical improvements in tackling this task have primarily been motivated by successful exploitation of Convolutional Neural Networks (CNNs) pre-trained for image classification and object recognition. However, the pixel-wise labelling with CNNs has its own unique challenges: (1) an accurate deconvolution, or upsampling, of low-resolution output into a higher-resolution segmentation mask and (2) an inclusion of global information, or context, within locally extracted features. To address these issues, we propose a novel architecture to conduct the equivalent of the deconvolution operation globally and acquire dense predictions. We demonstrate that it leads to improved performance of state-of-the-art semantic segmentation models on the PASCAL VOC 2012 benchmark, reaching 74.0{\%} mean IU accuracy on the test set.},
archivePrefix = {arXiv},
arxivId = {1602.03930},
author = {Nekrasov, Vladimir and Ju, Janghoon and Choi, Jaesik},
eprint = {1602.03930},
file = {:Users/pietz/Documents/Mendeley/Nekrasov, Ju, Choi - 2016 - Global Deconvolutional Networks for Semantic Segmentation.pdf:pdf},
month = {feb},
title = {{Global Deconvolutional Networks for Semantic Segmentation}},
url = {http://arxiv.org/abs/1602.03930},
year = {2016}
}
@misc{NVIDIA,
author = {NVIDIA},
title = {{GPU vs CPU? What is GPU Computing?}},
url = {http://www.nvidia.com/object/what-is-gpu-computing.html},
urldate = {2017-07-31}
}
@article{Jager2010,
author = {J{\"{a}}ger, Florian},
file = {:Users/pietz/Documents/Mendeley/J{\"{a}}ger - 2010 - Normalization of Magnetic Resonance Images and its Application to the Diagnosis of the Scoliotic Spine.pdf:pdf},
title = {{Normalization of Magnetic Resonance Images and its Application to the Diagnosis of the Scoliotic Spine}},
url = {http://www5.informatik.uni-erlangen.de/Forschung/Publikationen/2011/Jaeger11-NOM.pdf},
year = {2010}
}
@article{Kapur,
abstract = {A method for model based segmentation of 3D Magnetic Resonance Imaging (MRI) scans of the human knee is pre-sented. A probabilistic model describing the spatial rela-tionships between features of the human knee is constructed from 3D manually segmented data. In conjunction with fea-ture detection techniques from low-level computer vision, this model is used to segment knee MRI scans in a Bayesian framework.},
author = {Kapur, Tina and Beardsley, Paul A and Gibson, Sarah F and Eric, W and Grimson, L and Wells, William M},
file = {:Users/pietz/Documents/Mendeley/Kapur et al. - Unknown - Model Based Segmentation of Clinical Knee MRI.pdf:pdf},
title = {{Model Based Segmentation of Clinical Knee MRI}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.49.6825{\&}rep=rep1{\&}type=pdf}
}
@article{LeCun1990,
abstract = {We present an application of back-propagation networks to handwritten digit recognition. Minimal preprocessing of the data was required, but architecture of the network was highly constrained and specifically designed for the task. The input of the network consists of normalized images of isolated digits. The method has 1{\%} error rate and about a 9{\%} reject rate on zipcode digits provided by the U.S. Postal Service.},
archivePrefix = {arXiv},
arxivId = {1004.3732},
author = {{Le Cun}, Y. and Boser, B. and Denker, J. S. and Henderson, D. and Howard, R. E. and Hubbard, W.},
doi = {10.1111/dsu.12130},
eprint = {1004.3732},
file = {:Users/pietz/Documents/Mendeley/Unknown - Unknown - Handwritten Digit Recognition with a Back-Propagation Network.pdf:pdf},
isbn = {1-55860-100-7},
issn = {1524-4725},
journal = {Advances in Neural Information Processing Systems},
pages = {396--404},
pmid = {23301817},
title = {{Handwritten Digit Recognition with a Back-Propagation Network}},
url = {http://yann.lecun.com/exdb/publis/pdf/lecun-90c.pdf http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.5076{\%}5Cnhttp://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.32.5076{\&}rep=rep1{\&}type=pdf},
year = {1990}
}
@misc{Attarian2013,
author = {Attarian, David E.},
booktitle = {Duke Medicine},
title = {{Your Bones: What are growth plates?}},
url = {http://serkadis.net/index/151288},
urldate = {2017-07-27},
year = {2013}
}
@misc{DukeHealth,
abstract = {Growth plates are zones of cartilage at each end of our long bones (femur, tibia, etc.), explains David E. Attarian, MD, a joint replacement orthopaedic surgeon at Duke. These bones grow by the contribution of new bone from the growth plate.  Because of their soft nature, these parts of the bone are vulnerable to injury during the development of a child. This is a region of the bone that is sometimes weaker than the surrounding tendons and ligaments. As a result, up to 30 percent of fractures in children can occur around the growth plates.},
author = {{Duke Health}},
title = {{Growth plates: what you need to know}},
url = {https://www.dukehealth.org/blog/growth-plates-what-you-need-know},
urldate = {2017-07-27}
}
@book{Aumuller2010,
author = {Aum{\"{u}}ller, Gerhard and Aust, Gabriela and Doll, Andreas and Engele, J{\"{u}}rgen and Kirsch, Joachim and Mense, Siegfried and Wurzinger, Laurenz},
isbn = {978-3131360410},
number = {2},
pages = {1218},
title = {{Anatomie}},
year = {2010}
}
@misc{TheDanaFoundation2011,
abstract = {Magnetic Resonance Imaging (MRI) is based on the principle of nuclear magnetic resonance and uses radiofrequency waves to probe tissue structure and function without requiring exposure to ionizing radiation. The two researchers who made MRI clinically feasible in the 1980s by building on initial discoveries of the 1930s won the Nobel Prize in Physiology or Medicine in 2003.},
author = {{The Dana Foundation}},
pages = {1},
title = {{Non-invasive Structural and Physiological Imaging: MRI Technologies}},
url = {http://dana.org/Publications/ReportDetails.aspx?id=44357},
urldate = {2017-07-25},
year = {2011}
}
@book{Westbrook2016,
abstract = {MRI at a Glance encapsulates essential MRI physics knowledge. Illustrated in full colour throughout, its concise text explains complex information, to provide the perfect revision aid. It includes topics ranging from magnetism to safety, K space to pulse sequences, and image contrast to artefacts. This third edition has been fully updated, with revised diagrams and new pedagogy, including 55 key points, tables, scan tips, equations, and learning points. There is also an expanded glossary and new appendices on optimizing image quality, parameters and trade-offs.},
author = {Westbrook, Catherine},
edition = {3},
file = {:Users/pietz/Documents/Mendeley/Westbrook - 2016 - MRI at A Glance.pdf:pdf},
isbn = {978-1-119-05355-2},
keywords = {MRI,medical imaging},
mendeley-tags = {MRI,medical imaging},
pages = {136},
publisher = {Wiley Blackwell},
title = {{MRI at A Glance}},
year = {2016}
}
@article{Pereira2016,
abstract = {Among brain tumors, gliomas are the most common and aggressive, leading to a very short life expectancy in their highest grade. Thus, treatment planning is a key stage to improve the quality of life of oncological patients. Magnetic Resonance Imaging (MRI) is a widely used imaging technique to assess these tumors, but the large amount of data produced by MRI prevents manual segmentation in a reasonable time, limiting the use of precise quantitative measurements in the clinical practice. So, automatic and reliable segmentation methods are required; however, the large spatial and structural variability among brain tumors make automatic segmentation a challenging problem. In this paper, we propose an automatic segmentation method based on Convolutional Neural Networks (CNN), exploring small 33 kernels. The use of small kernels allows designing a deeper architecture, besides having a positive effect against overfitting, given the fewer number of weights in the network. We also investigated the use of intensity normalization as a pre-processing step, which though not common in CNN-based segmentation methods, proved together with data augmentation to be very effective for brain tumor segmentation in MRI images. Our proposal was validated in the Brain Tumor Segmentation Challenge 2013 database (BRATS 2013), obtaining simultaneously the first position for the complete, core, and enhancing regions in Dice Similarity Coefficient metric (0:88, 0:83, 0:77) for the Challenge data set. Also, it obtained the overall first position by the online evaluation platform. We also participated in the on-site BRATS 2015 Challenge using the same model, obtaining the second place, with Dice Similarity Coefficient metric of 0:78, 0:65, and 0:75 for the complete, core, and enhancing regions, respectively.},
archivePrefix = {arXiv},
arxivId = {arXiv:1502.02445v2},
author = {Pereira, Sergio and Pinto, Adriano and Alves, Victor and Silva, Carlos A.},
doi = {10.1109/TMI.2016.2538465},
eprint = {arXiv:1502.02445v2},
isbn = {0278-0062},
issn = {1558254X},
journal = {IEEE Transactions on Medical Imaging},
number = {5},
pmid = {26960222},
title = {{Brain Tumor Segmentation Using Convolutional Neural Networks in MRI Images}},
volume = {35},
year = {2016}
}
@article{Kamnitsas2017,
abstract = {We propose a dual pathway, 11-layers deep, three-dimensional Convolutional Neural Network for the challenging task of brain lesion segmentation. The devised architecture is the result of an in-depth analysis of the limitations of current networks proposed for similar applications. To overcome the computational burden of processing 3D medical scans, we have devised an efficient and effective dense training scheme which joins the processing of adjacent image patches into one pass through the network while automatically adapting to the inherent class imbalance present in the data. Further, we analyze the development of deeper, thus more discriminative 3D CNNs. In order to incorporate both local and larger contextual information, we employ a dual pathway architecture that processes the input images at multiple scales simultaneously. For post-processing of the network's soft segmentation, we use a 3D fully connected Conditional Random Field which effectively removes false positives. Our pipeline is extensively evaluated on three challenging tasks of lesion segmentation in multi-channel MRI patient data with traumatic brain injuries, brain tumours, and ischemic stroke. We improve on the state-of-the-art for all three applications, with top ranking performance on the public benchmarks BRATS 2015 and ISLES 2015. Our method is computationally efficient, which allows its adoption in a variety of research and clinical settings. The source code of our implementation is made publicly available.},
archivePrefix = {arXiv},
arxivId = {1603.05959},
author = {Kamnitsas, Konstantinos and Ledig, Christian and Newcombe, Virginia F.J. and Simpson, Joanna P. and Kane, Andrew D. and Menon, David K. and Rueckert, Daniel and Glocker, Ben},
doi = {10.1016/j.media.2016.10.004},
eprint = {1603.05959},
isbn = {1662-4548 (Print)$\backslash$r1662-453X (Linking)},
issn = {13618423},
journal = {Medical Image Analysis},
pmid = {25191215},
title = {{Efficient multi-scale 3D CNN with fully connected CRF for accurate brain lesion segmentation}},
volume = {36},
year = {2017}
}
@article{Iandola2016a,
abstract = {Recent research on deep neural networks has focused primarily on improving accuracy. For a given accuracy level, it is typically possible to identify multiple DNN architectures that achieve that accuracy level. With equivalent accuracy, smaller DNN architectures offer at least three advantages: (1) Smaller DNNs require less communication across servers during distributed training. (2) Smaller DNNs require less bandwidth to export a new model from the cloud to an autonomous car. (3) Smaller DNNs are more feasible to deploy on FPGAs and other hardware with limited memory. To provide all of these advantages, we propose a small DNN architecture called SqueezeNet. SqueezeNet achieves AlexNet-level accuracy on ImageNet with 50x fewer parameters. Additionally, with model compression techniques we are able to compress SqueezeNet to less than 0.5MB (510x smaller than AlexNet). The SqueezeNet architecture is available for download here: https://github.com/DeepScale/SqueezeNet},
archivePrefix = {arXiv},
arxivId = {1602.07360},
author = {Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally, William J. and Keutzer, Kurt},
eprint = {1602.07360},
file = {:Users/pietz/Documents/Mendeley/Iandola et al. - 2016 - SqueezeNet AlexNet-level accuracy with 50x fewer parameters and 0.5MB model size.pdf:pdf},
month = {feb},
title = {{SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and}},
url = {http://arxiv.org/abs/1602.07360},
year = {2016}
}
@article{Simonyan2014a,
abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
archivePrefix = {arXiv},
arxivId = {1409.1556},
author = {Simonyan, Karen and Zisserman, Andrew},
eprint = {1409.1556},
file = {:Users/pietz/Documents/Mendeley/Simonyan, Zisserman - 2014 - Very Deep Convolutional Networks for Large-Scale Image Recognition.pdf:pdf},
keywords = {classification,cnn,imagenet,vgg16},
mendeley-tags = {classification,cnn,imagenet,vgg16},
month = {sep},
title = {{Very Deep Convolutional Networks for Large-Scale Image Recognition}},
url = {http://arxiv.org/abs/1409.1556},
year = {2014}
}
@article{He2015b,
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57{\%} error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28{\%} relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC {\&} COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
archivePrefix = {arXiv},
arxivId = {1512.03385},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
eprint = {1512.03385},
file = {:Users/pietz/Documents/Mendeley/He et al. - 2015 - Deep Residual Learning for Image Recognition.pdf:pdf},
month = {dec},
title = {{Deep Residual Learning for Image Recognition}},
url = {http://arxiv.org/abs/1512.03385},
year = {2015}
}
@article{Dumoulin2016,
abstract = {We introduce a guide to help deep learning practitioners understand and manipulate convolutional neural network architectures. The guide clarifies the relationship between various properties (input shape, kernel shape, zero padding, strides and output shape) of convolutional, pooling and transposed convolutional layers, as well as the relationship between convolutional and transposed convolutional layers. Relationships are derived for various cases, and are illustrated in order to make them intuitive.},
archivePrefix = {arXiv},
arxivId = {arXiv:1603.07285v1},
author = {Dumoulin, Vincent and Visin, Francesco and Box, George E P},
doi = {10.1051/0004-6361/201527329},
eprint = {arXiv:1603.07285v1},
file = {:Users/pietz/Documents/Mendeley/Dumoulin, Visin, Box - 2016 - A guide to convolution arithmetic for deep learning.pdf:pdf},
isbn = {9783319105895},
issn = {16113349},
journal = {Arxiv},
month = {mar},
pages = {1--28},
pmid = {26353135},
title = {{A guide to convolution arithmetic for deep learning}},
url = {http://arxiv.org/abs/1603.07285 https://arxiv.org/pdf/1603.07285.pdf},
year = {2016}
}
@article{Keskar2016,
abstract = {The stochastic gradient descent (SGD) method and its variants are algorithms of choice for many Deep Learning tasks. These methods operate in a small-batch regime wherein a fraction of the training data, say {\$}32{\$}-{\$}512{\$} data points, is sampled to compute an approximation to the gradient. It has been observed in practice that when using a larger batch there is a degradation in the quality of the model, as measured by its ability to generalize. We investigate the cause for this generalization drop in the large-batch regime and present numerical evidence that supports the view that large-batch methods tend to converge to sharp minimizers of the training and testing functions - and as is well known, sharp minima lead to poorer generalization. In contrast, small-batch methods consistently converge to flat minimizers, and our experiments support a commonly held view that this is due to the inherent noise in the gradient estimation. We discuss several strategies to attempt to help large-batch methods eliminate this generalization gap.},
archivePrefix = {arXiv},
arxivId = {1609.04836},
author = {Keskar, Nitish Shirish and Mudigere, Dheevatsa and Nocedal, Jorge and Smelyanskiy, Mikhail and Tang, Ping Tak Peter},
eprint = {1609.04836},
file = {:Users/pietz/Documents/Mendeley/Keskar et al. - 2016 - On Large-Batch Training for Deep Learning Generalization Gap and Sharp Minima.pdf:pdf},
month = {sep},
title = {{On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima}},
url = {http://arxiv.org/abs/1609.04836},
year = {2016}
}
@article{Goyal2017,
abstract = {Deep learning thrives with large neural networks and large datasets. However, larger networks and larger datasets result in longer training times that impede research and development progress. Distributed synchronous SGD offers a potential solution to this problem by dividing SGD minibatches over a pool of parallel workers. Yet to make this scheme efficient, the per-worker workload must be large, which implies nontrivial growth in the SGD minibatch size. In this paper, we empirically show that on the ImageNet dataset large minibatches cause optimization difficulties, but when these are addressed the trained networks exhibit good generalization. Specifically, we show no loss of accuracy when training with large minibatch sizes up to 8192 images. To achieve this result, we adopt a linear scaling rule for adjusting learning rates as a function of minibatch size and develop a new warmup scheme that overcomes optimization challenges early in training. With these simple techniques, our Caffe2-based system trains ResNet-50 with a minibatch size of 8192 on 256 GPUs in one hour, while matching small minibatch accuracy. Using commodity hardware, our implementation achieves {\~{}}90{\%} scaling efficiency when moving from 8 to 256 GPUs. This system enables us to train visual recognition models on internet-scale data with high efficiency.},
archivePrefix = {arXiv},
arxivId = {1706.02677},
author = {Goyal, Priya and Doll{\'{a}}r, Piotr and Girshick, Ross and Noordhuis, Pieter and Wesolowski, Lukasz and Kyrola, Aapo and Tulloch, Andrew and Jia, Yangqing and He, Kaiming},
eprint = {1706.02677},
file = {:Users/pietz/Documents/Mendeley/Goyal et al. - 2017 - Accurate, Large Minibatch SGD Training ImageNet in 1 Hour.pdf:pdf},
month = {jun},
title = {{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour}},
url = {http://arxiv.org/abs/1706.02677},
year = {2017}
}
@article{Hoffer2017,
abstract = {Background: Deep learning models are typically trained using stochastic gradient descent or one of its variants. These methods update the weights using their gradient, estimated from a small fraction of the training data. It has been observed that when using large batch sizes there is a persistent degradation in generalization performance - known as the "generalization gap" phenomena. Identifying the origin of this gap and closing it had remained an open problem. Contributions: We examine the initial high learning rate training phase. We find that the weight distance from its initialization grows logarithmically with the number of weight updates. We therefore propose a "random walk on random landscape" statistical model which is known to exhibit similar "ultra-slow" diffusion behavior. Following this hypothesis we conducted experiments to show empirically that the "generalization gap" stems from the relatively small number of updates rather than the batch size, and can be completely eliminated by adapting the training regime used. We further investigate different techniques to train models in the large-batch regime and present a novel algorithm named "Ghost Batch Normalization" which enables significant decrease in the generalization gap without increasing the number of updates. To validate our findings we conduct several additional experiments on MNIST, CIFAR-10, CIFAR-100 and ImageNet. Finally, we reassess common practices and beliefs concerning training of deep models and suggest they may not be optimal to achieve good generalization.},
archivePrefix = {arXiv},
arxivId = {1705.08741},
author = {Hoffer, Elad and Hubara, Itay and Soudry, Daniel},
eprint = {1705.08741},
file = {:Users/pietz/Documents/Mendeley/Hoffer, Hubara, Soudry - 2017 - Train longer, generalize better closing the generalization gap in large batch training of neural network.pdf:pdf},
month = {may},
title = {{Train longer, generalize better: closing the generalization gap in large batch training of neural networks}},
url = {http://arxiv.org/abs/1705.08741},
year = {2017}
}
@article{Kingma2014,
abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
archivePrefix = {arXiv},
arxivId = {1412.6980},
author = {Kingma, Diederik P. and Ba, Jimmy},
eprint = {1412.6980},
file = {:Users/pietz/Documents/Mendeley/Kingma, Ba - 2014 - Adam A Method for Stochastic Optimization.pdf:pdf},
month = {dec},
title = {{Adam: A Method for Stochastic Optimization}},
url = {http://arxiv.org/abs/1412.6980},
year = {2014}
}
@article{He2015a,
abstract = {Rectified activation units (rectifiers) are essential for state-of-the-art neural networks. In this work, we study rectifier neural networks for image classification from two aspects. First, we propose a Parametric Rectified Linear Unit (PReLU) that generalizes the traditional rectified unit. PReLU improves model fitting with nearly zero extra computational cost and little overfitting risk. Second, we derive a robust initialization method that particularly considers the rectifier nonlinearities. This method enables us to train extremely deep rectified models directly from scratch and to investigate deeper or wider network architectures. Based on our PReLU networks (PReLU-nets), we achieve 4.94{\%} top-5 test error on the ImageNet 2012 classification dataset. This is a 26{\%} relative improvement over the ILSVRC 2014 winner (GoogLeNet, 6.66{\%}). To our knowledge, our result is the first to surpass human-level performance (5.1{\%}, Russakovsky et al.) on this visual recognition challenge.},
archivePrefix = {arXiv},
arxivId = {1502.01852},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
eprint = {1502.01852},
file = {:Users/pietz/Documents/Mendeley/He et al. - 2015 - Delving Deep into Rectifiers Surpassing Human-Level Performance on ImageNet Classification.pdf:pdf},
month = {feb},
title = {{Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}},
url = {http://arxiv.org/abs/1502.01852},
year = {2015}
}
@article{Clevert2015,
abstract = {We introduce the "exponential linear unit" (ELU) which speeds up learning in deep neural networks and leads to higher classification accuracies. Like rectified linear units (ReLUs), leaky ReLUs (LReLUs) and parametrized ReLUs (PReLUs), ELUs alleviate the vanishing gradient problem via the identity for positive values. However, ELUs have improved learning characteristics compared to the units with other activation functions. In contrast to ReLUs, ELUs have negative values which allows them to push mean unit activations closer to zero like batch normalization but with lower computational complexity. Mean shifts toward zero speed up learning by bringing the normal gradient closer to the unit natural gradient because of a reduced bias shift effect. While LReLUs and PReLUs have negative values, too, they do not ensure a noise-robust deactivation state. ELUs saturate to a negative value with smaller inputs and thereby decrease the forward propagated variation and information. Therefore, ELUs code the degree of presence of particular phenomena in the input, while they do not quantitatively model the degree of their absence. In experiments, ELUs lead not only to faster learning, but also to significantly better generalization performance than ReLUs and LReLUs on networks with more than 5 layers. On CIFAR-100 ELUs networks significantly outperform ReLU networks with batch normalization while batch normalization does not improve ELU networks. ELU networks are among the top 10 reported CIFAR-10 results and yield the best published result on CIFAR-100, without resorting to multi-view evaluation or model averaging. On ImageNet, ELU networks considerably speed up learning compared to a ReLU network with the same architecture, obtaining less than 10{\%} classification error for a single crop, single model network.},
archivePrefix = {arXiv},
arxivId = {1511.07289},
author = {Clevert, Djork-Arn{\'{e}} and Unterthiner, Thomas and Hochreiter, Sepp},
eprint = {1511.07289},
file = {:Users/pietz/Documents/Mendeley/Clevert, Unterthiner, Hochreiter - 2015 - Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs).pdf:pdf},
month = {nov},
title = {{Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)}},
url = {http://arxiv.org/abs/1511.07289},
year = {2015}
}
@article{Nair,
abstract = {Restricted Boltzmann machines were devel-oped using binary stochastic hidden units. These can be generalized by replacing each binary unit by an infinite number of copies that all have the same weights but have pro-gressively more negative biases. The learning and inference rules for these " Stepped Sig-moid Units " are unchanged. They can be ap-proximated efficiently by noisy, rectified lin-ear units. Compared with binary units, these units learn features that are better for object recognition on the NORB dataset and face verification on the Labeled Faces in the Wild dataset. Unlike binary units, rectified linear units preserve information about relative in-tensities as information travels through mul-tiple layers of feature detectors.},
author = {Nair, Vinod and Hinton, Geoffrey E},
file = {:Users/pietz/Documents/Mendeley/Nair, Hinton - Unknown - Rectified Linear Units Improve Restricted Boltzmann Machines.pdf:pdf},
title = {{Rectified Linear Units Improve Restricted Boltzmann Machines}},
url = {http://www.cs.toronto.edu/{~}fritz/absps/reluICML.pdf}
}
@article{Simard,
abstract = {Neural networks are a powerful technology for classification of visual inputs arising from documents. However, there is a confusing plethora of different neural network methods that are used in the literature and in industry. This paper describes a set of concrete best practices that document analysis researchers can use to get good results with neural networks. The most important practice is getting a training set as large as possible: we expand the training set by adding a new form of distorted data. The next most important practice is that convolutional neural networks are better suited for visual document tasks than fully connected networks. We propose that a simple " do-it-yourself " implementation of convolution with a flexible architecture is suitable for many visual document problems. This simple convolutional neural network does not require complex methods, such as momentum, weight decay, structure-dependent learning rates, averaging layers, tangent prop, or even finely-tuning the architecture. The end result is a very simple yet general architecture which can yield state-of-the-art performance for document analysis. We illustrate our claims on the MNIST set of English digit images.},
author = {Simard, Patrice Y and Steinkraus, Dave and Platt, John C},
file = {:Users/pietz/Documents/Mendeley/Simard, Steinkraus, Platt - Unknown - Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis.pdf:pdf},
title = {{Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis}},
url = {https://pdfs.semanticscholar.org/7b1c/c19dec9289c66e7ab45e80e8c42273509ab6.pdf}
}
@article{LeCun1998,
abstract = {Finding an appropriate set of features is an essential problem in the design of shape recognition systems. This paper attempts to show that for recognizing simple objects with high shape variability such as handwritten characters, it is possible, and even advantageous, to feed the system directly with minimally processed images and to rely on learning to extract the right set of features. Convolutional Neural Networks are shown to be particularly well suited to this task. We also show that these networks can be used to recognize multiple objects without requiring explicit segmentation of the objects from their surrounding. The second part of the paper presents the Graph Transformer Network model which extends the applicability of gradient-based learning to systems that use graphs to represents features, objects, and their combinations.},
author = {LeCun, Yann and Haffner, Patrick and Bottou, L{\'{e}}on and Bengio, Yoshua},
file = {:Users/pietz/Documents/Mendeley/LeCun et al. - 1998 - Object Recognition with Gradient-Based Learning.pdf:pdf},
keywords = {cnn,convolution,neural network},
mendeley-tags = {cnn,convolution,neural network},
title = {{Object Recognition with Gradient-Based Learning}},
url = {http://yann.lecun.com/exdb/publis/pdf/lecun-99.pdf},
year = {1998}
}
@article{Krizhevsky,
abstract = {We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSVRC-2010 contest into the 1000 dif-ferent classes. On the test data, we achieved top-1 and top-5 error rates of 37.5{\%} and 17.0{\%} which is considerably better than the previous state-of-the-art. The neural network, which has 60 million parameters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully-connected layers with a final 1000-way softmax. To make train-ing faster, we used non-saturating neurons and a very efficient GPU implemen-tation of the convolution operation. To reduce overfitting in the fully-connected layers we employed a recently-developed regularization method called " dropout " that proved to be very effective. We also entered a variant of this model in the ILSVRC-2012 competition and achieved a winning top-5 test error rate of 15.3{\%}, compared to 26.2{\%} achieved by the second-best entry.},
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
file = {:Users/pietz/Documents/Mendeley/Krizhevsky, Sutskever, Hinton - Unknown - ImageNet Classification with Deep Convolutional Neural Networks.pdf:pdf},
title = {{ImageNet Classification with Deep Convolutional Neural Networks}}
}
@article{Albarqouni2016,
abstract = {The lack of publicly available ground-truth data has been identified as the major challenge for transferring recent developments in deep learning to the biomedical imaging domain. Though crowdsourcing has enabled annotation of large scale databases for real world images, its application for biomedical purposes requires a deeper understanding and hence, more precise definition of the actual annotation task. The fact that expert tasks are being outsourced to non-expert users may lead to noisy annotations introducing disagreement between users. Despite being a valuable resource for learning annotation models from crowdsourcing, conventional machine-learning methods may have difficulties dealing with noisy annotations during training. In this manuscript, we present a new concept for learning from crowds that handle data aggregation directly as part of the learning process of the convolutional neural network (CNN) via additional crowdsourcing layer (AggNet). Besides, we present an experimental study on learning from crowds designed to answer the following questions. 1) Can deep CNN be trained with data collected from crowdsourcing? 2) How to adapt the CNN to train on multiple types of annotation datasets (ground truth and crowd-based)? 3) How does the choice of annotation and aggregation affect the accuracy? Our experimental setup involved Annot8, a self-implemented web-platform based on Crowdflower API realizing image annotation tasks for a publicly available biomedical image database. Our results give valuable insights into the functionality of deep CNN learning from crowd annotations and prove the necessity of data aggregation integration.},
author = {Albarqouni, Shadi and Baur, Christoph and Achilles, Felix and Belagiannis, Vasileios and Demirci, Stefanie and Navab, Nassir},
doi = {10.1109/TMI.2016.2528120},
isbn = {1558-254X (Electronic)$\backslash$r0278-0062 (Linking)},
issn = {1558254X},
journal = {IEEE Transactions on Medical Imaging},
number = {5},
pmid = {26891484},
title = {{AggNet: Deep Learning From Crowds for Mitosis Detection in Breast Cancer Histology Images}},
volume = {35},
year = {2016}
}
@article{Havaei2017,
abstract = {In this paper, we present a fully automatic brain tumor segmentation method based on Deep Neural Networks (DNNs). The proposed networks are tailored to glioblastomas (both low and high grade) pictured in MR images. By their very nature, these tumors can appear anywhere in the brain and have almost any kind of shape, size, and contrast. These reasons motivate our exploration of a machine learning solution that exploits a flexible, high capacity DNN while being extremely efficient. Here, we give a description of different model choices that we've found to be necessary for obtaining competitive performance. We explore in particular different architectures based on Convolutional Neural Networks (CNN), i.e. DNNs specifically adapted to image data.We present a novel CNN architecture which differs from those traditionally used in computer vision. Our CNN exploits both local features as well as more global contextual features simultaneously. Also, different from most traditional uses of CNNs, our networks use a final layer that is a convolutional implementation of a fully connected layer which allows a 40 fold speed up. We also describe a 2-phase training procedure that allows us to tackle difficulties related to the imbalance of tumor labels. Finally, we explore a cascade architecture in which the output of a basic CNN is treated as an additional source of information for a subsequent CNN. Results reported on the 2013 BRATS test data-set reveal that our architecture improves over the currently published state-of-the-art while being over 30 times faster.},
archivePrefix = {arXiv},
arxivId = {1505.03540},
author = {Havaei, Mohammad and Davy, Axel and Warde-Farley, David and Biard, Antoine and Courville, Aaron and Bengio, Yoshua and Pal, Chris and Jodoin, Pierre Marc and Larochelle, Hugo},
doi = {10.1016/j.media.2016.05.004},
eprint = {1505.03540},
isbn = {1361-8415},
issn = {13618423},
journal = {Medical Image Analysis},
pmid = {27310171},
title = {{Brain tumor segmentation with Deep Neural Networks}},
volume = {35},
year = {2017}
}
@article{Chollet2016,
abstract = {We present an interpretation of Inception modules in convolutional neural networks as being an intermediate step in-between regular convolution and the depthwise separable convolution operation (a depthwise convolution followed by a pointwise convolution). In this light, a depthwise separable convolution can be understood as an Inception module with a maximally large number of towers. This observation leads us to propose a novel deep convolutional neural network architecture inspired by Inception, where Inception modules have been replaced with depthwise separable convolutions. We show that this architecture, dubbed Xception, slightly outperforms Inception V3 on the ImageNet dataset (which Inception V3 was designed for), and significantly outperforms Inception V3 on a larger image classification dataset comprising 350 million images and 17,000 classes. Since the Xception architecture has the same number of parameters as Inception V3, the performance gains are not due to increased capacity but rather to a more efficient use of model parameters.},
archivePrefix = {arXiv},
arxivId = {1610.02357},
author = {Chollet, Fran{\c{c}}ois},
eprint = {1610.02357},
file = {:Users/pietz/Documents/Mendeley/Chollet - 2016 - Xception Deep Learning with Depthwise Separable Convolutions.pdf:pdf},
month = {oct},
title = {{Xception: Deep Learning with Depthwise Separable Convolutions}},
url = {http://arxiv.org/abs/1610.02357},
year = {2016}
}
@article{Sled1997,
abstract = {A novel approach to correcting for intensity non-uniformity in MR data is described that achieves high performance without requiring supervision. By making relatively few assumptions about the data, the method can be applied at an early stage in an automated data analysis, before a tissue intensity or geometric model is available. Described as Non-parametric Non-uniform intensity Normalization (N3), the method is independent of pulse sequence and insensitive to pathological data that might oth-erwise violate model assumptions. To eliminate the dependence of the eld estimate on anatomy, an iterative approach is employed to estimate both the multiplicative bias eld and the distribution of the true tissue intensities. The performance of this method is evaluated using both real and simulated MR data. Preprocessing of MR data using N3 is shown to substantially improve the accuracy of anatomical analysis techniques such as tissue classiication and cortical surface extraction. ii},
author = {Sled, John G},
file = {:Users/pietz/Documents/Mendeley/Sled - 1997 - A Non-parametric Method for Automatic Correction of Intensity Non-uniformity in MRI Data.pdf:pdf},
title = {{A Non-parametric Method for Automatic Correction of Intensity Non-uniformity in MRI Data}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.2659{\&}rep=rep1{\&}type=pdf},
year = {1997}
}
@article{Lemley2017,
abstract = {A recurring problem faced when training neural networks is that there is typically not enough data to maximize the generalization capability of deep neural networks(DNN). There are many techniques to address this, including data augmentation, dropout, and transfer learning. In this paper, we introduce an additional method which we call Smart Augmentation and we show how to use it to increase the accuracy and reduce overfitting on a target network. Smart Augmentation works by creating a network that learns how to generate augmented data during the training process of a target network in a way that reduces that networks loss. This allows us to learn augmentations that minimize the error of that network. Smart Augmentation has shown the potential to increase accuracy by demonstrably significant measures on all datasets tested. In addition, it has shown potential to achieve similar or improved performance levels with significantly smaller network sizes in a number of tested cases.},
archivePrefix = {arXiv},
arxivId = {1703.08383},
author = {Lemley, Joseph and Bazrafkan, Shabab and Corcoran, Peter},
doi = {10.1109/ACCESS.2017.2696121},
eprint = {1703.08383},
file = {:Users/pietz/Documents/Mendeley/Lemley, Bazrafkan, Corcoran - 2017 - Smart Augmentation - Learning an Optimal Data Augmentation Strategy.pdf:pdf},
month = {mar},
title = {{Smart Augmentation - Learning an Optimal Data Augmentation Strategy}},
url = {http://arxiv.org/abs/1703.08383 http://dx.doi.org/10.1109/ACCESS.2017.2696121},
year = {2017}
}
@article{Wang2013,
abstract = {Label fusion based multi-atlas segmentation has proven to be one of the most competitive techniques for medical image segmentation. This technique transfers segmentations from expert-labeled images, called atlases, to a novel image using deformable image registration. Errors produced by label transfer are further reduced by label fusion that combines the results produced by all atlases into a consensus solution. Among the proposed label fusion strategies, weighted voting with spatially varying weight distributions derived from atlas-target intensity similarity is a simple and highly effective label fusion technique. However, one limitation of most weighted voting methods is that the weights are computed independently for each atlas, without taking into account the fact that different atlases may produce similar label errors. To address this problem, we recently developed the joint label fusion technique and the corrective learning technique, which won the first place of the 2012 MICCAI Multi-Atlas Labeling Challenge and was one of the top performers in 2013 MICCAI Segmentation: Algorithms, Theory and Applications (SATA) challenge. To make our techniques more accessible to the scientific research community, we describe an Insight-Toolkit based open source implementation of our label fusion methods. Our implementation extends our methods to work with multi-modality imaging data and is more suitable for segmentation problems with multiple labels. We demonstrate the usage of our tools through applying them to the 2012 MICCAI Multi-Atlas Labeling Challenge brain image dataset and the 2013 SATA challenge canine leg image dataset. We report the best results on these two datasets so far.},
author = {Wang, Hongzhi and Yushkevich, Paul A},
doi = {10.3389/fninf.2013.00027},
file = {:Users/pietz/Documents/Mendeley/Wang, Yushkevich - 2013 - Multi-atlas segmentation with joint label fusion and corrective learning-an open source implementation.pdf:pdf},
issn = {1662-5196},
journal = {Frontiers in neuroinformatics},
keywords = {Insight-Toolkit,corrective learning,joint label fusion,multi-atlas label fusion,open source implementation},
pages = {27},
pmid = {24319427},
publisher = {Frontiers Media SA},
title = {{Multi-atlas segmentation with joint label fusion and corrective learning-an open source implementation.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/24319427 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3837555},
volume = {7},
year = {2013}
}
@article{Iglesias2014,
abstract = {Multi-atlas segmentation (MAS), first introduced and popularized by the pioneering work of Rohlfing, Brandt, Menzel and Maurer Jr (2004), Klein, Mensh, Ghosh, Tourville and Hirsch (2005), and Heckemann, Hajnal, Aljabar, Rueckert and Hammers (2006), is becoming one of the most widely-used and successful image segmentation techniques in biomedical applications. By manipulating and utilizing the entire dataset of "atlases" (training images that have been previously labeled, e.g., manually by an expert), rather than some model-based average representation, MAS has the flexibility to better capture anatomical variation, thus offering superior segmentation accuracy. This benefit, however, typically comes at a high computational cost. Recent advancements in computer hardware and image processing software have been instrumental in addressing this challenge and facilitated the wide adoption of MAS. Today, MAS has come a long way and the approach includes a wide array of sophisticated algorithms that employ ideas from machine learning, probabilistic modeling, optimization, and computer vision, among other fields. This paper presents a survey of published MAS algorithms and studies that have applied these methods to various biomedical problems. In writing this survey, we have three distinct aims. Our primary goal is to document how MAS was originally conceived, later evolved, and now relates to alternative methods. Second, this paper is intended to be a detailed reference of past research activity in MAS, which now spans over a decade (2003 - 2014) and entails novel methodological developments and application-specific solutions. Finally, our goal is to also present a perspective on the future of MAS, which, we believe, will be one of the dominant approaches in biomedical image segmentation.},
archivePrefix = {arXiv},
arxivId = {1412.3421},
author = {Iglesias, Juan Eugenio and Sabuncu, Mert Rory},
eprint = {1412.3421},
file = {:Users/pietz/Documents/Mendeley/Iglesias, Sabuncu - 2014 - Multi-Atlas Segmentation of Biomedical Images A Survey.pdf:pdf},
month = {dec},
title = {{Multi-Atlas Segmentation of Biomedical Images: A Survey}},
url = {http://arxiv.org/abs/1412.3421},
year = {2014}
}
@article{Wang2017a,
abstract = {Accurate medical image segmentation is essential for diagnosis, surgical planning and many other applications. Convolutional Neural Networks (CNNs) have shown to be state-of-the-art automatic segmentation methods while the result still needs to be refined to become accurate and robust enough for clinical use. We propose a deep learning-based interactive segmentation method in order to improve the segmentation obtained by an automatic CNN as well as reduce user interactions during refinement for better results. We use one CNN to obtain an initial segmentation automatically, on which user interactions are added to indicate mis-segmentations. Another CNN takes as input the user interactions with the initial segmentation and gives a refined result. We propose a new way to combine user interactions with CNNs through geodesic distance maps, and propose a resolution-preserving network that can give better dense prediction. In addition, we integrate user interactions as hard constraints into back-propagatable Conditional Random Fields. We validated the proposed framework in the application of placenta segmentation from fetal MRI and clavicle segmentation from chest radiographs. Experimental results show our method achieves a large improvement from automatic CNNs, and obtains comparable accuracy with fewer user interventions and less time compared with traditional interactive methods.},
archivePrefix = {arXiv},
arxivId = {1707.00652},
author = {Wang, Guotai and Zuluaga, Maria A. and Li, Wenqi and Pratt, Rosalind and Patel, Premal A. and Aertsen, Michael and Doel, Tom and David, Anna L. and Deprest, Jan and Ourselin, Sebastien and Vercauteren, Tom},
eprint = {1707.00652},
file = {:Users/pietz/Documents/Mendeley/Wang et al. - 2017 - DeepIGeoS A Deep Interactive Geodesic Framework for Medical Image Segmentation.pdf:pdf},
keywords = {Index Terms—Interactive image segmentation,conditional random fields,convolutional neural network,geodesic distance},
month = {jul},
title = {{DeepIGeoS: A Deep Interactive Geodesic Framework for Medical Image Segmentation}},
url = {http://arxiv.org/abs/1707.00652 https://arxiv.org/pdf/1707.00652v1.pdf},
year = {2017}
}
@article{Salehi2017,
abstract = {Fully convolutional deep neural networks carry out excellent potential for fast and accurate image segmentation. One of the main challenges in training these networks is data imbalance, which is particularly problematic in medical imaging applications such as lesion segmentation where the number of lesion voxels is often much lower than the number of non-lesion voxels. Training with unbalanced data can lead to predictions that are severely biased towards high precision but low recall (sensitivity), which is undesired especially in medical applications where false negatives are much less tolerable than false positives. Several methods have been proposed to deal with this problem including balanced sampling, two step training, sample re-weighting, and similarity loss functions. In this paper, we propose a generalized loss function based on the Tversky index to address the issue of data imbalance and achieve much better trade-off between precision and recall in training 3D fully convolutional deep neural networks. Experimental results in multiple sclerosis lesion segmentation on magnetic resonance images show improved F2 score, Dice coefficient, and the area under the precision-recall curve in test data. Based on these results we suggest Tversky loss function as a generalized framework to effectively train deep neural networks.},
archivePrefix = {arXiv},
arxivId = {1706.05721},
author = {Salehi, Seyed Sadegh Mohseni and Erdogmus, Deniz and Gholipour, Ali},
eprint = {1706.05721},
file = {:Users/pietz/Documents/Mendeley/Salehi, Erdogmus, Gholipour - 2017 - Tversky loss function for image segmentation using 3D fully convolutional deep networks(2).pdf:pdf},
month = {jun},
title = {{Tversky loss function for image segmentation using 3D fully convolutional deep networks}},
url = {http://arxiv.org/abs/1706.05721},
year = {2017}
}
@article{Ioffe2015,
abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer's inputs changes during training, as the parameters of the previous layers change. This slows down the training by requiring lower learning rates and careful parameter initialization, and makes it notoriously hard to train models with saturating nonlinearities. We refer to this phenomenon as internal covariate shift, and address the problem by normalizing layer inputs. Our method draws its strength from making normalization a part of the model architecture and performing the normalization for each training mini-batch. Batch Normalization allows us to use much higher learning rates and be less careful about initialization. It also acts as a regularizer, in some cases eliminating the need for Dropout. Applied to a state-of-the-art image classification model, Batch Normalization achieves the same accuracy with 14 times fewer training steps, and beats the original model by a significant margin. Using an ensemble of batch-normalized networks, we improve upon the best published result on ImageNet classification: reaching 4.9{\%} top-5 validation error (and 4.8{\%} test error), exceeding the accuracy of human raters.},
archivePrefix = {arXiv},
arxivId = {1502.03167},
author = {Ioffe, Sergey and Szegedy, Christian},
eprint = {1502.03167},
file = {:Users/pietz/Documents/Mendeley/Ioffe, Szegedy - 2015 - Batch Normalization Accelerating Deep Network Training by Reducing Internal Covariate Shift.pdf:pdf},
month = {feb},
title = {{Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}},
url = {http://arxiv.org/abs/1502.03167},
year = {2015}
}
@article{Krahenbuhl2012,
abstract = {Most state-of-the-art techniques for multi-class image segmentation and labeling use conditional random fields defined over pixels or image regions. While region-level models often feature dense pairwise connectivity, pixel-level models are considerably larger and have only permitted sparse graph structures. In this paper, we consider fully connected CRF models defined on the complete set of pixels in an image. The resulting graphs have billions of edges, making traditional inference algorithms impractical. Our main contribution is a highly efficient approximate inference algorithm for fully connected CRF models in which the pairwise edge potentials are defined by a linear combination of Gaussian kernels. Our experiments demonstrate that dense connectivity at the pixel level substantially improves segmentation and labeling accuracy.},
archivePrefix = {arXiv},
arxivId = {1210.5644},
author = {Kr{\"{a}}henb{\"{u}}hl, Philipp and Koltun, Vladlen},
eprint = {1210.5644},
file = {:Users/pietz/Documents/Mendeley/Kr{\"{a}}henb{\"{u}}hl, Koltun - 2012 - Efficient Inference in Fully Connected CRFs with Gaussian Edge Potentials.pdf:pdf},
month = {oct},
title = {{Efficient Inference in Fully Connected CRFs with Gaussian Edge Potentials}},
url = {http://arxiv.org/abs/1210.5644},
year = {2012}
}
@article{Tustison2010,
abstract = {A variant of the popular nonparametric nonuniform intensity normalization (N3) algorithm is proposed for bias field correction. Given the superb performance of N3 and its public availability, it has been the subject of several evaluation studies. These studies have demonstrated the importance of certain parameters associated with the B-spline least-squares fitting. We propose the substitution of a recently developed fast and robust B-spline approximation routine and a modified hierarchical optimization scheme for improved bias field correction over the original N3 algorithm. Similar to the N3 algorithm, we also make the source code, testing, and technical documentation of our contribution, which we denote as ??N4ITK,?? available to the public through the Insight Toolkit of the National Institutes of Health. Performance assessment is demonstrated using simulated data from the publicly available Brainweb database, hyperpolarized 3He lung image data, and 9.4T postmortem hippocampus data.},
author = {Tustison, Nicholas J. and Avants, Brian B. and Cook, Philip A. and Zheng, Yuanjie and Egan, Alexander and Yushkevich, Paul A. and Gee, James C.},
doi = {10.1109/TMI.2010.2046908},
file = {:Users/pietz/Documents/Mendeley/N4ITK.pdf:pdf},
isbn = {1558-254X (Electronic)$\backslash$r0278-0062 (Linking)},
issn = {02780062},
journal = {IEEE Transactions on Medical Imaging},
keywords = {B-spline approximation,Bias field,Inhomogeneity,N3},
number = {6},
pages = {1310--1320},
pmid = {20378467},
title = {{N4ITK: Improved N3 bias correction}},
volume = {29},
year = {2010}
}
@incollection{Pastor-Pellicer2013,
author = {Pastor-Pellicer, Joan and Zamora-Mart{\'{i}}nez, Francisco and Espa{\~{n}}a-Boquera, Salvador and Castro-Bleda, Mar{\'{i}}a Jos{\'{e}}},
doi = {10.1007/978-3-642-38679-4_37},
file = {:Users/pietz/Documents/Mendeley/F-Measure.pdf:pdf},
pages = {376--384},
publisher = {Springer, Berlin, Heidelberg},
title = {{F-Measure as the Error Function to Train Neural Networks}},
url = {http://link.springer.com/10.1007/978-3-642-38679-4{\_}37},
year = {2013}
}
@article{Csurka2013,
abstract = {In this work, we consider the evaluation of the semantic segmentation task. We discuss the strengths and limitations of the few existing measures, and propose new ways to evaluate semantic segmentation. First, we argue that a per-image score instead of one computed over the entire dataset brings a lot more insight. Second, we propose to take contours more carefully into account. Based on the conducted experiments, we suggest best practices for the evaluation. Finally, we present a user study we conducted to better understand how the quality of image segmentations is perceived by humans.},
author = {Csurka, Gabriela and Larlus, Diane and Perronnin, Florent},
file = {:Users/pietz/Documents/Mendeley/Csurka, Larlus, Perronnin - Unknown - What is a good evaluation measure for semantic segmentation.pdf:pdf},
title = {{What is a good evaluation measure for semantic segmentation?}},
url = {http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf},
year = {2013}
}
@article{Chen2017,
abstract = {In this work, we revisit atrous convolution, a powerful tool to explicitly adjust filter's field-of-view as well as control the resolution of feature responses computed by Deep Convolutional Neural Networks, in the application of semantic image segmentation. To handle the problem of segmenting objects at multiple scales, we design modules which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, we propose to augment our previously proposed Atrous Spatial Pyramid Pooling module, which probes convolutional features at multiple scales, with image-level features encoding global context and further boost performance. We also elaborate on implementation details and share our experience on training our system. The proposed `DeepLabv3' system significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2012 semantic image segmentation benchmark.},
archivePrefix = {arXiv},
arxivId = {1706.05587},
author = {Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig},
eprint = {1706.05587},
file = {:Users/pietz/Documents/Mendeley/Chen et al. - 2017 - Rethinking Atrous Convolution for Semantic Image Segmentation.pdf:pdf},
month = {jun},
title = {{Rethinking Atrous Convolution for Semantic Image Segmentation}},
url = {http://arxiv.org/abs/1706.05587},
year = {2017}
}
@article{Yu2015,
abstract = {State-of-the-art models for semantic segmentation are based on adaptations of convolutional networks that had originally been designed for image classification. However, dense prediction and image classification are structurally different. In this work, we develop a new convolutional network module that is specifically designed for dense prediction. The presented module uses dilated convolutions to systematically aggregate multi-scale contextual information without losing resolution. The architecture is based on the fact that dilated convolutions support exponential expansion of the receptive field without loss of resolution or coverage. We show that the presented context module increases the accuracy of state-of-the-art semantic segmentation systems. In addition, we examine the adaptation of image classification networks to dense prediction and show that simplifying the adapted network can increase accuracy.},
archivePrefix = {arXiv},
arxivId = {1511.07122},
author = {Yu, Fisher and Koltun, Vladlen},
eprint = {1511.07122},
file = {:Users/pietz/Documents/Mendeley/1511.07122.pdf:pdf},
month = {nov},
title = {{Multi-Scale Context Aggregation by Dilated Convolutions}},
url = {http://arxiv.org/abs/1511.07122},
year = {2015}
}
@article{Hafiane2017,
abstract = {Ultrasound-Guided Regional Anesthesia (UGRA) has been gaining importance in the last few years, offering numerous advantages over alternative methods of nerve localization (neurostimulation or paraesthesia). However, nerve detection is one of the most tasks that anaesthetists can encounter in the UGRA procedure. Computer aided system that can detect automatically region of nerve, would help practitioner to concentrate more in anaesthetic delivery. In this paper we propose a new method based on deep learning combined with spatiotemporal information to robustly segment the nerve region. The proposed method is based on two phases, localisation and segmentation. The first phase, consists in using convolutional neural network combined with spatial and temporal consistency to detect the nerve zone. The second phase utilises active contour model to delineate the region of interest. Obtained results show the validity of the proposed approach and its robustness.},
archivePrefix = {arXiv},
arxivId = {1706.05870},
author = {Hafiane, Adel and Vieyres, Pierre and Delbos, Alain},
eprint = {1706.05870},
file = {:Users/pietz/Documents/Mendeley/1706.05870.pdf:pdf},
month = {jun},
title = {{Deep learning with spatiotemporal consistency for nerve segmentation in ultrasound images}},
url = {http://arxiv.org/abs/1706.05870},
year = {2017}
}
@phdthesis{Mauer2015,
author = {{Auf der Mauer}, Markus},
file = {:Users/pietz/Documents/Mendeley/Auf der Mauer - 2015 - Automated Quantification of the Growth Plate of the Proximal Tibia for the Age Assessment in 3D MR Images Using a.pdf:pdf},
title = {{Automated Quantification of the Growth Plate of the Proximal Tibia for the Age Assessment in 3D MR Images Using a Fuzzy-Logic Classification Approach}},
year = {2015}
}
@article{Chlebus2017,
abstract = {We present a fully automatic method employing convolutional neural networks based on the 2D U-net architecture and random forest classifier to solve the automatic liver lesion segmentation problem of the ISBI 2017 Liver Tumor Segmentation Challenge (LiTS). In order to constrain the ROI in which the tumors could be located, a liver segmentation is performed first. For the organ segmentation, an ensemble of convolutional networks is trained to segment a liver using a set of 179 liver CT datasets from liver surgery planning. Inside of the liver ROI a neural network, trained using 127 challenge training datasets, identifies tumor candidates, which are subsequently filtered with a random forest classifier yielding the final tumor segmentation. The evaluation on the 70 challenge test cases resulted in a mean Dice coefficient of 0.65, ranking our method in the second place.},
archivePrefix = {arXiv},
arxivId = {1706.00842},
author = {Chlebus, Grzegorz and Meine, Hans and Moltz, Jan Hendrik and Schenk, Andrea},
eprint = {1706.00842},
file = {:Users/pietz/Documents/Mendeley/Chlebus et al. - 2017 - Neureal Network-Based Automatic Liver Tumor Segmentation With Random Forest-Based Candidate Filtering.pdf:pdf},
month = {jun},
title = {{Neural Network-Based Automatic Liver Tumor Segmentation With Random Forest-Based Candidate Filtering}},
url = {http://arxiv.org/abs/1706.00842},
year = {2017}
}
@article{Son2017,
abstract = {Retinal vessel segmentation is an indispensable step for automatic detection of retinal diseases with fundoscopic images. Though many approaches have been proposed, existing methods tend to miss fine vessels or allow false positives at terminal branches. Let alone under-segmentation, over-segmentation is also problematic when quantitative studies need to measure the precise width of vessels. In this paper, we present a method that generates the precise map of retinal vessels using generative adversarial training. Our methods achieve dice coefficient of 0.829 on DRIVE dataset and 0.834 on STARE dataset which is the state-of-the-art performance on both datasets.},
archivePrefix = {arXiv},
arxivId = {1706.09318},
author = {Son, Jaemin and Park, Sang Jun and Jung, Kyu-Hwan},
eprint = {1706.09318},
file = {:Users/pietz/Documents/Mendeley/1706.09318.pdf:pdf},
month = {jun},
title = {{Retinal Vessel Segmentation in Fundoscopic Images with Generative Adversarial Networks}},
url = {http://arxiv.org/abs/1706.09318},
year = {2017}
}
@article{Yuheng2017,
abstract = {The technology of image segmentation is widely used in medical image processing, face recognition pedestrian detection, etc. The current image segmentation techniques include region-based segmentation, edge detection segmentation, segmentation based on clustering, segmentation based on weakly-supervised learning in CNN, etc. This paper analyzes and summarizes these algorithms of image segmentation, and compares the advantages and disadvantages of different algorithms. Finally, we make a prediction of the development trend of image segmentation with the combination of these algorithms.},
archivePrefix = {arXiv},
arxivId = {1707.02051},
author = {Yuheng, Song and Hao, Yan},
eprint = {1707.02051},
file = {:Users/pietz/Documents/Mendeley/Yuheng, Hao - 2017 - Image Segmentation Algorithms Overview.pdf:pdf},
month = {jul},
title = {{Image Segmentation Algorithms Overview}},
url = {http://arxiv.org/abs/1707.02051},
year = {2017}
}
@article{Feng2017,
abstract = {Automated detection and segmentation of pulmonary nod-ules on lung computed tomography (CT) scans can facilitate early lung cancer diagnosis. Existing supervised approaches for automated nodule segmentation on CT scans require voxel-based annotations for training, which are labor-and time-consuming to obtain. In this work, we propose a weakly-supervised method that generates accurate voxel-level nodule segmentation trained with image-level labels only. By adapting a con-volutional neural network (CNN) trained for image classification, our proposed method learns discriminative regions from the activation maps of convolution units at different scales, and identifies the true nodule location with a novel candidate-screening framework. Experimental re-sults on the public LIDC-IDRI dataset demonstrate that, our weakly-supervised nodule segmentation framework achieves competitive perfor-mance compared to a fully-supervised CNN-based segmentation method.},
archivePrefix = {arXiv},
arxivId = {1707.01086},
author = {Feng, Xinyang and Yang, Jie and Laine, Andrew F. and Angelini, Elsa D.},
eprint = {1707.01086},
file = {:Users/pietz/Documents/Mendeley/Feng et al. - 2017 - Discriminative Localization in CNNs for Weakly-Supervised Segmentation of Pulmonary Nodules.pdf:pdf},
month = {jul},
title = {{Discriminative Localization in CNNs for Weakly- Supervised Segmentation of Pulmonary Nodules}},
url = {http://arxiv.org/abs/1707.01086 https://arxiv.org/pdf/1707.01086v1.pdf},
year = {2017}
}
@article{Fidon2017,
abstract = {The Dice score is widely used for binary segmentation due to its robustness to class imbalance. Soft generalisations of the Dice score allow it to be used as a loss function for training convolutional neural networks (CNN). Although CNNs trained using mean-class Dice score achieve state-of-the-art results on multi-class segmentation, this loss function does neither take advantage of inter-class relationships nor multi-scale information. We argue that an improved loss function should balance misclassifications to favour predictions that are semantically meaningful. This paper investigates these issues in the context of multi-class brain tumour segmentation. Our contribution is threefold. 1) We propose a semantically-informed generalisation of the Dice score for multi-class segmentation based on the Wasserstein distance on the probabilistic label space. 2) We propose a holistic CNN that embeds spatial information at multiple scales with deep supervision. 3) We show that the joint use of holistic CNNs and generalised Wasserstein Dice scores achieves segmentations that are more semantically meaningful for brain tumour segmentation.},
archivePrefix = {arXiv},
arxivId = {arXiv:1707.00478v2},
author = {Fidon, Lucas and Li, Wenqi and Garcia-peraza-herrera, Luis C},
eprint = {arXiv:1707.00478v2},
file = {:Users/pietz/Documents/Mendeley/Fidon et al. - 2017 - Generalised Wasserstein Dice Score for Imbalanced Multi-class Segmentation using Holistic Convolutional Networks.pdf:pdf},
month = {jul},
pages = {1--11},
title = {{Imbalanced Multi-class Segmentation using Holistic Convolutional Networks}},
url = {http://arxiv.org/abs/1707.00478},
year = {2017}
}
@article{Moeskops2017,
abstract = {Convolutional neural networks (CNNs) have been applied to various automatic image segmentation tasks in medical image analysis, including brain MRI segmentation. Generative adversarial networks have recently gained popularity because of their power in generating images that are difficult to distinguish from real images. In this study we use an adversarial training approach to improve CNN-based brain MRI segmentation. To this end, we include an additional loss function that motivates the network to generate segmentations that are difficult to distinguish from manual segmentations. During training, this loss function is optimised together with the conventional average per-voxel cross entropy loss. The results show improved segmentation performance using this adversarial training procedure for segmentation of two different sets of images and using two different network architectures, both visually and in terms of Dice coefficients.},
archivePrefix = {arXiv},
arxivId = {1707.03195},
author = {Moeskops, Pim and Veta, Mitko and Lafarge, Maxime W. and Eppenhof, Koen A. J. and Pluim, Josien P. W.},
eprint = {1707.03195},
file = {:Users/pietz/Documents/Mendeley/Moeskops et al. - 2017 - Adversarial training and dilated convolutions for brain MRI segmentation.pdf:pdf},
month = {jul},
title = {{Adversarial training and dilated convolutions for brain MRI segmentation}},
url = {http://arxiv.org/abs/1707.03195},
year = {2017}
}
@article{Tai2016,
abstract = {Semantic segmentation of functional magnetic resonance imaging (fMRI) makes great sense for pathology diagnosis and decision system of medical robots. The multi-channel fMRI data provide more information of the pathological features. But the increased amount of data causes complexity in feature detection. This paper proposes a principal component analysis (PCA)-aided fully convolutional network to particularly deal with multi-channel fMRI. We transfer the learned weights of contemporary classification networks to the segmentation task by fine-tuning. The experiments results are compared with various methods e.g. k-NN. A new labelling strategy is proposed to solve the semantic segmentation problem with unclear boundaries. Even with a small-sized training dataset, the test results demonstrate that our model outperforms other pathological feature detection methods. Besides, its forward inference only takes 90 milliseconds for a single set of fMRI data. To our knowledge, this is the first time to realize pixel-wise labeling of multi-channel magnetic resonance image using FCN.},
archivePrefix = {arXiv},
arxivId = {1610.01732},
author = {Tai, Lei and Ye, Qiong and Liu, Ming},
eprint = {1610.01732},
file = {:Users/pietz/Documents/Mendeley/Tai et al. - 2016 - PCA-aided Fully Convolutional Networks for Semantic Segmentation of Multi-channel fMRI.pdf:pdf},
month = {oct},
title = {{PCA-aided Fully Convolutional Networks for Semantic Segmentation of Multi-channel fMRI}},
url = {http://arxiv.org/abs/1610.01732},
year = {2016}
}
@article{Badrinarayanan2015,
abstract = {We present a novel and practical deep fully convolutional neural network architecture for semantic pixel-wise segmentation termed SegNet. This core trainable segmentation engine consists of an encoder network, a corresponding decoder network followed by a pixel-wise classification layer. The architecture of the encoder network is topologically identical to the 13 convolutional layers in the VGG16 network. The role of the decoder network is to map the low resolution encoder feature maps to full input resolution feature maps for pixel-wise classification. The novelty of SegNet lies is in the manner in which the decoder upsamples its lower resolution input feature map(s). Specifically, the decoder uses pooling indices computed in the max-pooling step of the corresponding encoder to perform non-linear upsampling. This eliminates the need for learning to upsample. The upsampled maps are sparse and are then convolved with trainable filters to produce dense feature maps. We compare our proposed architecture with the widely adopted FCN and also with the well known DeepLab-LargeFOV, DeconvNet architectures. This comparison reveals the memory versus accuracy trade-off involved in achieving good segmentation performance. SegNet was primarily motivated by scene understanding applications. Hence, it is designed to be efficient both in terms of memory and computational time during inference. It is also significantly smaller in the number of trainable parameters than other competing architectures. We also performed a controlled benchmark of SegNet and other architectures on both road scenes and SUN RGB-D indoor scene segmentation tasks. We show that SegNet provides good performance with competitive inference time and more efficient inference memory-wise as compared to other architectures. We also provide a Caffe implementation of SegNet and a web demo at http://mi.eng.cam.ac.uk/projects/segnet/.},
archivePrefix = {arXiv},
arxivId = {1511.00561},
author = {Badrinarayanan, Vijay and Kendall, Alex and Cipolla, Roberto},
eprint = {1511.00561},
file = {:Users/pietz/Documents/Mendeley/1511.00561.pdf:pdf},
month = {nov},
title = {{SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation}},
url = {http://arxiv.org/abs/1511.00561},
year = {2015}
}
@article{Lieman-Sifry2017,
abstract = {Cardiac Magnetic Resonance (CMR) imaging is commonly used to assess cardiac structure and function. One disadvantage of CMR is that post-processing of exams is tedious. Without automation, precise assessment of cardiac function via CMR typically requires an annotator to spend tens of minutes per case manually contouring ventricular structures. Automatic contouring can lower the required time per patient by generating contour suggestions that can be lightly modified by the annotator. Fully convolutional networks (FCNs), a variant of convolutional neural networks, have been used to rapidly advance the state-of-the-art in automated segmentation, which makes FCNs a natural choice for ventricular segmentation. However, FCNs are limited by their computational cost, which increases the monetary cost and degrades the user experience of production systems. To combat this shortcoming, we have developed the FastVentricle architecture, an FCN architecture for ventricular segmentation based on the recently developed ENet architecture. FastVentricle is 4x faster and runs with 6x less memory than the previous state-of-the-art ventricular segmentation architecture while still maintaining excellent clinical accuracy.},
archivePrefix = {arXiv},
arxivId = {1704.04296},
author = {Lieman-Sifry, Jesse and Le, Matthieu and Lau, Felix and Sall, Sean and Golden, Daniel},
doi = {10.1007/978-3-319-59448-4_13},
eprint = {1704.04296},
file = {:Users/pietz/Documents/Mendeley/1704.04296.pdf:pdf},
isbn = {9783319594477},
issn = {16113349},
title = {{FastVentricle: Cardiac Segmentation with ENet}},
year = {2017}
}
@article{Srivastava2014,
author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
file = {:Users/pietz/Documents/Mendeley/Srivastava et al. - 2014 - Dropout A Simple Way to Prevent Neural Networks from Overfitting.pdf:pdf},
journal = {Journal of Machine Learning Research},
pages = {1929--1958},
title = {{Dropout: A Simple Way to Prevent Neural Networks from Overfitting}},
url = {http://jmlr.org/papers/v15/srivastava14a.html},
volume = {15},
year = {2014}
}
@book{Chollet2017,
author = {Chollet, Francois},
booktitle = {Manning},
file = {:Users/pietz/Documents/Mendeley/Chollet - 2017 - Deep Learning With Python.pdf:pdf},
keywords = {ai,artificial intelligence,deep learning,dl,keras,machine learning,ml},
mendeley-tags = {ai,artificial intelligence,deep learning,dl,keras,machine learning,ml},
title = {{Deep Learning With Python}},
volume = {1},
year = {2017}
}
@book{Goodfellow2016,
author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
file = {:Users/pietz/Documents/Mendeley/Goodfellow, Bengio, Courville - 2016 - Deep Learning.pdf:pdf},
isbn = {0262035618},
publisher = {The MIT Press},
title = {{Deep Learning}},
year = {2016}
}
@inproceedings{Milletari2016,
abstract = {Convolutional Neural Networks (CNNs) have been recently employed to solve problems from both the computer vision and medical image analysis fields. Despite their popularity, most approaches are only able to process 2D images while most medical data used in clinical practice consists of 3D volumes. In this work we propose an approach to 3D image segmentation based on a volumetric, fully convolutional, neural network. Our CNN is trained end-to-end on MRI volumes depicting prostate, and learns to predict segmentation for the whole volume at once. We introduce a novel objective function, that we optimise during training, based on Dice coefficient. In this way we can deal with situations where there is a strong imbalance between the number of foreground and background voxels. To cope with the limited number of annotated volumes available for training, we augment the data applying random non-linear transformations and histogram matching. We show in our experimental evaluation that our approach achieves good performances on challenging test data while requiring only a fraction of the processing time needed by other previous methods.},
archivePrefix = {arXiv},
arxivId = {1606.04797},
author = {Milletari, Fausto and Navab, Nassir and Ahmadi, Seyed Ahmad},
booktitle = {Proceedings - 2016 4th International Conference on 3D Vision, 3DV 2016},
doi = {10.1109/3DV.2016.79},
eprint = {1606.04797},
file = {:Users/pietz/Documents/Mendeley/Milletari, Navab, Ahmadi - 2016 - V-Net Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation.pdf:pdf},
isbn = {9781509054077},
keywords = {Deep learning,convolutional neural networks,machine learning,prostate,segmentation},
month = {jun},
pages = {565--571},
title = {{V-Net: Fully convolutional neural networks for volumetric medical image segmentation}},
url = {http://arxiv.org/abs/1606.04797},
year = {2016}
}
@article{Kayalibay2017,
abstract = {Convolutional neural networks have been applied to a wide variety of computer vision tasks. Recent advances in semantic segmentation have enabled their application to medical image segmentation. While most CNNs use two-dimensional kernels, recent CNN-based publications on medical image segmentation featured three-dimensional kernels, allowing full access to the three-dimensional structure of medical images. Though closely related to semantic segmentation, medical image segmentation includes specific challenges that need to be addressed, such as the scarcity of labelled data, the high class imbalance found in the ground truth and the high memory demand of three-dimensional images. In this work, a CNN-based method with three-dimensional filters is demonstrated and applied to hand and brain MRI. Two modifications to an existing CNN architecture are discussed, along with methods on addressing the aforementioned challenges. While most of the existing literature on medical image segmentation focuses on soft tissue and the major organs, this work is validated on data both from the central nervous system as well as the bones of the hand.},
archivePrefix = {arXiv},
arxivId = {1701.03056},
author = {Kayalibay, Baris and Jensen, Grady and van der Smagt, Patrick},
eprint = {1701.03056},
file = {:Users/pietz/Documents/Mendeley/Kayalibay, Jensen, van der Smagt - 2017 - CNN-based Segmentation of Medical Imaging Data.pdf:pdf},
month = {jan},
title = {{CNN-based Segmentation of Medical Imaging Data}},
url = {http://arxiv.org/abs/1701.03056},
year = {2017}
}
@article{Shelhamer2016,
abstract = {Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixels-to-pixels, improve on the previous best result in semantic segmentation. Our key insight is to build "fully convolutional" networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet, the VGG net, and GoogLeNet) into fully convolutional networks and transfer their learned representations by fine-tuning to the segmentation task. We then define a skip architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves improved segmentation of PASCAL VOC (30{\%} relative improvement to 67.2{\%} mean IU on 2012), NYUDv2, SIFT Flow, and PASCAL-Context, while inference takes one tenth of a second for a typical image.},
archivePrefix = {arXiv},
arxivId = {1605.06211},
author = {Shelhamer, Evan and Long, Jonathan and Darrell, Trevor},
eprint = {1605.06211},
file = {:Users/pietz/Documents/Mendeley/Shelhamer, Long, Darrell - 2016 - Fully Convolutional Networks for Semantic Segmentation.pdf:pdf},
month = {may},
title = {{Fully Convolutional Networks for Semantic Segmentation}},
url = {http://arxiv.org/abs/1605.06211},
year = {2016}
}
@article{Wang2017,
abstract = {Recent advances in deep learning, especially deep convolutional neural networks (CNNs), have led to significant improvement over previous semantic segmentation systems. Here we show how to improve pixel-wise semantic segmentation by manipulating convolution-related operations that are better for practical use. First, we implement dense upsampling convolution (DUC) to generate pixel-level prediction, which is able to capture and decode more detailed information that is generally missing in bilinear upsampling. Second, we propose a hybrid dilated convolution (HDC) framework in the encoding phase. This framework 1) effectively enlarges the receptive fields of the network to aggregate global information; 2) alleviates what we call the "gridding issue" caused by the standard dilated convolution operation. We evaluate our approaches thoroughly on the Cityscapes dataset, and achieve a new state-of-art result of 80.1{\%} mIOU in the test set. We also are state-of-the-art overall on the KITTI road estimation benchmark and the PASCAL VOC2012 segmentation task. Pretrained models are available at https://goo.gl/DQMeun},
archivePrefix = {arXiv},
arxivId = {1702.08502},
author = {Wang, Panqu and Chen, Pengfei and Yuan, Ye and Liu, Ding and Huang, Zehua and Hou, Xiaodi and Cottrell, Garrison},
eprint = {1702.08502},
file = {:Users/pietz/Documents/Mendeley/Wang et al. - 2017 - Understanding Convolution for Semantic Segmentation.pdf:pdf},
month = {feb},
title = {{Understanding Convolution for Semantic Segmentation}},
url = {http://arxiv.org/abs/1702.08502},
year = {2017}
}
@inproceedings{Cicek2016,
abstract = {This paper introduces a network for volumetric segmentation that learns from sparsely annotated volumetric images. We outline two attractive use cases of this method: (1) In a semi-automated setup, the user annotates some slices in the volume to be segmented. The network learns from these sparse annotations and provides a dense 3D segmentation. (2) In a fully-automated setup, we assume that a representative, sparsely annotated training set exists. Trained on this data set, the network densely segments new volumetric images. The proposed network extends the previous u-net architecture from Ronneberger et al. by replacing all 2D operations with their 3D counterparts. The implementation performs on-the-fly elastic deformations for efficient data augmentation during training. It is trained end-to-end from scratch, i.e., no pre-trained network is required. We test the performance of the proposed method on a complex, highly variable 3D structure, the Xenopus kidney, and achieve good results for both use cases.},
archivePrefix = {arXiv},
arxivId = {1606.06650},
author = {{\c{C}}i{\c{c}}ek, {\"{O}}zg{\"{u}}n and Abdulkadir, Ahmed and Lienkamp, Soeren S. and Brox, Thomas and Ronneberger, Olaf},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-319-46723-8_49},
eprint = {1606.06650},
file = {:Users/pietz/Documents/Mendeley/{\c{C}}i{\c{c}}ek et al. - 2016 - 3D U-net Learning dense volumetric segmentation from sparse annotation.pdf:pdf},
isbn = {9783319467221},
issn = {16113349},
keywords = {3D,Biomedical volumetric image segmentation,Convolutional neural networks,Fully-automated,Semi-automated,Sparse annotation,Xenopus kidney},
month = {jun},
pages = {424--432},
title = {{3D U-net: Learning dense volumetric segmentation from sparse annotation}},
url = {http://arxiv.org/abs/1606.06650},
volume = {9901 LNCS},
year = {2016}
}
@article{Zhang2017,
abstract = {We introduce an extremely computation efficient CNN architecture named ShuffleNet, designed specially for mobile devices with very limited computing power (e.g., 10-150 MFLOPs). The new architecture utilizes two proposed operations, pointwise group convolution and channel shuffle, to greatly reduce computation cost while maintaining accuracy. Experiments on ImageNet classification and MS COCO object detection demonstrate the superior performance of ShuffleNet over other structures, e.g. lower top-1 error (absolute 6.7$\backslash${\%}) than the recent MobileNet system on ImageNet classification under the computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet achieves $\backslash$textasciitilde 13{\$}\backslashtimes{\$} actual speedup over AlexNet while maintaining comparable accuracy.},
archivePrefix = {arXiv},
arxivId = {1707.01083},
author = {Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
eprint = {1707.01083},
file = {:Users/pietz/Documents/Mendeley/Zhang et al. - 2017 - ShuffleNet An Extremely Efficient Convolutional Neural Network for Mobile Devices.pdf:pdf},
month = {jul},
pages = {1--10},
title = {{ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices}},
url = {http://arxiv.org/abs/1707.01083},
year = {2017}
}
@article{Rajpurkar2017,
abstract = {We develop an algorithm which exceeds the performance of board certified cardiologists in detecting a wide range of heart arrhythmias from electrocardiograms recorded with a single-lead wearable monitor. We build a dataset with more than 500 times the number of unique patients than previously studied corpora. On this dataset, we train a 34-layer convolutional neural network which maps a sequence of ECG samples to a sequence of rhythm classes. Committees of board-certified cardiologists annotate a gold standard test set on which we compare the performance of our model to that of 6 other individual cardiologists. We exceed the average cardiologist performance in both recall (sensitivity) and precision (positive predictive value).},
archivePrefix = {arXiv},
arxivId = {1707.01836},
author = {Rajpurkar, Pranav and Hannun, Awni Y. and Haghpanahi, Masoumeh and Bourn, Codie and Ng, Andrew Y.},
eprint = {1707.01836},
file = {:Users/pietz/Documents/Mendeley/Rajpurkar et al. - 2017 - Cardiologist-Level Arrhythmia Detection with Convolutional Neural Networks.pdf:pdf},
month = {jul},
title = {{Cardiologist-Level Arrhythmia Detection with Convolutional Neural Networks}},
url = {http://arxiv.org/abs/1707.01836},
year = {2017}
}
@article{Ronneberger2015a,
abstract = {There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net .},
archivePrefix = {arXiv},
arxivId = {1505.04597},
author = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
doi = {10.1007/978-3-319-24574-4_28},
eprint = {1505.04597},
file = {:Users/pietz/Documents/Mendeley/Ronneberger, Fischer, Brox - 2015 - U-Net Convolutional Networks for Biomedical Image Segmentation.pdf:pdf},
isbn = {978-3-319-24573-7},
issn = {16113349},
journal = {Miccai},
month = {may},
pages = {234--241},
pmid = {23285570},
title = {{U-Net: Convolutional Networks for Biomedical Image Segmentation}},
url = {http://arxiv.org/abs/1505.04597},
year = {2015}
}
@article{Noh2015,
abstract = {We propose a novel semantic segmentation algorithm by learning a deconvolution network. We learn the network on top of the convolutional layers adopted from VGG 16-layer net. The deconvolution network is composed of deconvolution and unpooling layers, which identify pixel-wise class labels and predict segmentation masks. We apply the trained network to each proposal in an input image, and construct the final semantic segmentation map by combining the results from all proposals in a simple manner. The proposed algorithm mitigates the limitations of the existing methods based on fully convolutional networks by integrating deep deconvolution network and proposal-wise prediction; our segmentation method typically identifies detailed structures and handles objects in multiple scales naturally. Our network demonstrates outstanding performance in PASCAL VOC 2012 dataset, and we achieve the best accuracy (72.5{\%}) among the methods trained with no external data through ensemble with the fully convolutional network.},
archivePrefix = {arXiv},
arxivId = {1505.04366},
author = {Noh, Hyeonwoo and Hong, Seunghoon and Han, Bohyung},
eprint = {1505.04366},
file = {:Users/pietz/Documents/Mendeley/1505.04366.pdf:pdf},
month = {may},
title = {{Learning Deconvolution Network for Semantic Segmentation}},
url = {http://arxiv.org/abs/1505.04366},
year = {2015}
}
@article{Springenberg2014,
abstract = {Most modern convolutional neural networks (CNNs) used for object recognition are built using the same principles: Alternating convolution and max-pooling layers followed by a small number of fully connected layers. We re-evaluate the state of the art for object recognition from small images with convolutional networks, questioning the necessity of different components in the pipeline. We find that max-pooling can simply be replaced by a convolutional layer with increased stride without loss in accuracy on several image recognition benchmarks. Following this finding -- and building on other recent work for finding simple network structures -- we propose a new architecture that consists solely of convolutional layers and yields competitive or state of the art performance on several object recognition datasets (CIFAR-10, CIFAR-100, ImageNet). To analyze the network we introduce a new variant of the "deconvolution approach" for visualizing features learned by CNNs, which can be applied to a broader range of network structures than existing approaches.},
archivePrefix = {arXiv},
arxivId = {1412.6806},
author = {Springenberg, Jost Tobias and Dosovitskiy, Alexey and Brox, Thomas and Riedmiller, Martin},
eprint = {1412.6806},
file = {:Users/pietz/Documents/Mendeley/Springenberg et al. - 2014 - Striving for Simplicity The All Convolutional Net.pdf:pdf},
month = {dec},
title = {{Striving for Simplicity: The All Convolutional Net}},
url = {http://arxiv.org/abs/1412.6806},
year = {2014}
}
@article{Roth2015,
abstract = {Automated classification of human anatomy is an important prerequisite for many computer-aided diagnosis systems. The spatial complexity and variability of anatomy throughout the human body makes classification difficult. "Deep learning" methods such as convolutional networks (ConvNets) outperform other state-of-the-art methods in image classification tasks. In this work, we present a method for organ- or body-part-specific anatomical classification of medical images acquired using computed tomography (CT) with ConvNets. We train a ConvNet, using 4,298 separate axial 2D key-images to learn 5 anatomical classes. Key-images were mined from a hospital PACS archive, using a set of 1,675 patients. We show that a data augmentation approach can help to enrich the data set and improve classification performance. Using ConvNets and data augmentation, we achieve anatomy-specific classification error of 5.9 {\%} and area-under-the-curve (AUC) values of an average of 0.998 in testing. We demonstrate that deep learning can be used to train very reliable and accurate classifiers that could initialize further computer-aided diagnosis.},
archivePrefix = {arXiv},
arxivId = {1504.04003},
author = {Roth, Holger R. and Lee, Christopher T. and Shin, Hoo-Chang and Seff, Ari and Kim, Lauren and Yao, Jianhua and Lu, Le and Summers, Ronald M.},
doi = {10.1109/ISBI.2015.7163826},
eprint = {1504.04003},
file = {:Users/pietz/Documents/Mendeley/Roth et al. - 2015 - Anatomy-specific classification of medical images using deep convolutional nets.pdf:pdf},
month = {apr},
title = {{Anatomy-specific classification of medical images using deep convolutional nets}},
url = {http://arxiv.org/abs/1504.04003 http://dx.doi.org/10.1109/ISBI.2015.7163826},
year = {2015}
}
@article{Camlica2015,
abstract = {Good results on image classification and retrieval using support vector machines (SVM) with local binary patterns (LBPs) as features have been extensively reported in the literature where an entire image is retrieved or classified. In contrast, in medical imaging, not all parts of the image may be equally significant or relevant to the image retrieval application at hand. For instance, in lung x-ray image, the lung region may contain a tumour, hence being highly significant whereas the surrounding area does not contain significant information from medical diagnosis perspective. In this paper, we propose to detect salient regions of images during training and fold the data to reduce the effect of irrelevant regions. As a result, smaller image areas will be used for LBP features calculation and consequently classification by SVM. We use IRMA 2009 dataset with 14,410 x-ray images to verify the performance of the proposed approach. The results demonstrate the benefits of saliency-based folding approach that delivers comparable classification accuracies with state-of-the-art but exhibits lower computational cost and storage requirements, factors highly important for big data analytics.},
archivePrefix = {arXiv},
arxivId = {1509.04619},
author = {Camlica, Zehra and Tizhoosh, H. R. and Khalvati, Farzad},
doi = {10.1109/ICMLA.2015.131},
eprint = {1509.04619},
file = {:Users/pietz/Documents/Mendeley/Camlica, Tizhoosh, Khalvati - 2015 - Medical Image Classification via SVM using LBP Features from Saliency-Based Folded Data.pdf:pdf},
month = {sep},
title = {{Medical Image Classification via SVM using LBP Features from Saliency-Based Folded Data}},
url = {http://arxiv.org/abs/1509.04619 http://dx.doi.org/10.1109/ICMLA.2015.131},
year = {2015}
}
@article{Cho2015,
abstract = {The use of Convolutional Neural Networks (CNN) in natural image classification systems has produced very impressive results. Combined with the inherent nature of medical images that make them ideal for deep-learning, further application of such systems to medical image classification holds much promise. However, the usefulness and potential impact of such a system can be completely negated if it does not reach a target accuracy. In this paper, we present a study on determining the optimum size of the training data set necessary to achieve high classification accuracy with low variance in medical image classification systems. The CNN was applied to classify axial Computed Tomography (CT) images into six anatomical classes. We trained the CNN using six different sizes of training data set (5, 10, 20, 50, 100, and 200) and then tested the resulting system with a total of 6000 CT images. All images were acquired from the Massachusetts General Hospital (MGH) Picture Archiving and Communication System (PACS). Using this data, we employ the learning curve approach to predict classification accuracy at a given training sample size. Our research will present a general methodology for determining the training data set size necessary to achieve a certain target classification accuracy that can be easily applied to other problems within such systems.},
archivePrefix = {arXiv},
arxivId = {1511.06348},
author = {Cho, Junghwan and Lee, Kyewook and Shin, Ellie and Choy, Garry and Do, Synho},
eprint = {1511.06348},
file = {:Users/pietz/Documents/Mendeley/Cho et al. - 2015 - How much data is needed to train a medical image deep learning system to achieve necessary high accuracy.pdf:pdf},
month = {nov},
title = {{How much data is needed to train a medical image deep learning system to achieve necessary high accuracy?}},
url = {http://arxiv.org/abs/1511.06348},
year = {2015}
}
@article{Mishkin2016,
abstract = {The paper systematically studies the impact of a range of recent advances in CNN architectures and learning methods on the object categorization (ILSVRC) problem. The evalution tests the influence of the following choices of the architecture: non-linearity (ReLU, ELU, maxout, compatibility with batch normalization), pooling variants (stochastic, max, average, mixed), network width, classifier design (convolutional, fully-connected, SPP), image pre-processing, and of learning parameters: learning rate, batch size, cleanliness of the data, etc. The performance gains of the proposed modifications are first tested individually and then in combination. The sum of individual gains is bigger than the observed improvement when all modifications are introduced, but the "deficit" is small suggesting independence of their benefits. We show that the use of 128x128 pixel images is sufficient to make qualitative conclusions about optimal network structure that hold for the full size Caffe and VGG nets. The results are obtained an order of magnitude faster than with the standard 224 pixel images.},
archivePrefix = {arXiv},
arxivId = {1606.02228},
author = {Mishkin, Dmytro and Sergievskiy, Nikolay and Matas, Jiri},
doi = {10.1016/j.cviu.2017.05.007},
eprint = {1606.02228},
file = {:Users/pietz/Documents/Mendeley/Mishkin, Sergievskiy, Matas - 2016 - Systematic evaluation of CNN advances on the ImageNet.pdf:pdf},
month = {jun},
title = {{Systematic evaluation of CNN advances on the ImageNet}},
url = {http://arxiv.org/abs/1606.02228 http://dx.doi.org/10.1016/j.cviu.2017.05.007},
year = {2016}
}
@article{Anderson2016,
abstract = {In this paper we present a technique to train neural network models on small amounts of data. Current methods for training neural networks on small amounts of rich data typically rely on strategies such as fine-tuning a pre-trained neural network or the use of domain-specific hand-engineered features. Here we take the approach of treating network layers, or entire networks, as modules and combine pre-trained modules with untrained modules, to learn the shift in distributions between data sets. The central impact of using a modular approach comes from adding new representations to a network, as opposed to replacing representations via fine-tuning. Using this technique, we are able surpass results using standard fine-tuning transfer learning approaches, and we are also able to significantly increase performance over such approaches when using smaller amounts of data.},
archivePrefix = {arXiv},
arxivId = {1611.01714},
author = {Anderson, Ark and Shaffer, Kyle and Yankov, Artem and Corley, Court D. and Hodas, Nathan O.},
eprint = {1611.01714},
file = {:Users/pietz/Documents/Mendeley/Anderson et al. - 2016 - Beyond Fine Tuning A Modular Approach to Learning on Small Data.pdf:pdf},
month = {nov},
title = {{Beyond Fine Tuning: A Modular Approach to Learning on Small Data}},
url = {http://arxiv.org/abs/1611.01714},
year = {2016}
}
@article{Cole2016,
abstract = {Machine learning analysis of neuroimaging data can accurately predict chronological age in healthy people and deviations from healthy brain ageing have been associated with cognitive impairment and disease. Here we sought to further establish the credentials of "brain-predicted age" as a biomarker of individual differences in the brain ageing process, using a predictive modelling approach based on deep learning, and specifically convolutional neural networks (CNN), and applied to both pre-processed and raw T1-weighted MRI data. Firstly, we aimed to demonstrate the accuracy of CNN brain-predicted age using a large dataset of healthy adults (N = 2001). Next, we sought to establish the heritability of brain-predicted age using a sample of monozygotic and dizygotic female twins (N = 62). Thirdly, we examined the test-retest and multi-centre reliability of brain-predicted age using two samples (within-scanner N = 20; between-scanner N = 11). CNN brain-predicted ages were generated and compared to a Gaussian Process Regression (GPR) approach, on all datasets. Input data were grey matter (GM) or white matter (WM) volumetric maps generated by Statistical Parametric Mapping (SPM) or raw data. Brain-predicted age represents an accurate, highly reliable and genetically-valid phenotype, that has potential to be used as a biomarker of brain ageing. Moreover, age predictions can be accurately generated on raw T1-MRI data, substantially reducing computation time for novel data, bringing the process closer to giving real-time information on brain health in clinical settings.},
archivePrefix = {arXiv},
arxivId = {1612.02572},
author = {Cole, James H and Poudel, Rudra PK and Tsagkrasoulis, Dimosthenis and Caan, Matthan WA and Steves, Claire and Spector, Tim D and Montana, Giovanni},
eprint = {1612.02572},
file = {:Users/pietz/Documents/Mendeley/Cole et al. - 2016 - Predicting brain age with deep learning from raw imaging data results in a reliable and heritable biomarker.pdf:pdf},
month = {dec},
title = {{Predicting brain age with deep learning from raw imaging data results in a reliable and heritable biomarker}},
url = {http://arxiv.org/abs/1612.02572},
year = {2016}
}
@article{Wu2017,
abstract = {Recently, DNN model compression based on network architecture design, e.g., SqueezeNet, attracted a lot attention. No accuracy drop on image classification is observed on these extremely compact networks, compared to well-known models. An emerging question, however, is whether these model compression techniques hurt DNN's learning ability other than classifying images on a single dataset. Our preliminary experiment shows that these compression methods could degrade domain adaptation (DA) ability, though the classification performance is preserved. Therefore, we propose a new compact network architecture and unsupervised DA method in this paper. The DNN is built on a new basic module Conv-M which provides more diverse feature extractors without significantly increasing parameters. The unified framework of our DA method will simultaneously learn invariance across domains, reduce divergence of feature representations, and adapt label prediction. Our DNN has 4.1M parameters, which is only 6.7{\%} of AlexNet or 59{\%} of GoogLeNet. Experiments show that our DNN obtains GoogLeNet-level accuracy both on classification and DA, and our DA method slightly outperforms previous competitive ones. Put all together, our DA strategy based on our DNN achieves state-of-the-art on sixteen of total eighteen DA tasks on popular Office-31 and Office-Caltech datasets.},
archivePrefix = {arXiv},
arxivId = {1703.04071},
author = {Wu, Chunpeng and Wen, Wei and Afzal, Tariq and Zhang, Yongmei and Chen, Yiran and Li, Hai},
eprint = {1703.04071},
file = {:Users/pietz/Documents/Mendeley/Wu et al. - 2017 - A Compact DNN Approaching GoogLeNet-Level Accuracy of Classification and Domain Adaptation.pdf:pdf},
month = {mar},
title = {{A Compact DNN: Approaching GoogLeNet-Level Accuracy of Classification and Domain Adaptation}},
url = {http://arxiv.org/abs/1703.04071},
year = {2017}
}
@article{Smith2015,
abstract = {It is known that the learning rate is the most important hyper-parameter to tune for training deep neural networks. This paper describes a new method for setting the learning rate, named cyclical learning rates, which practically eliminates the need to experimentally find the best values and schedule for the global learning rates. Instead of monotonically decreasing the learning rate, this method lets the learning rate cyclically vary between reasonable boundary values. Training with cyclical learning rates instead of fixed values achieves improved classification accuracy without a need to tune and often in fewer iterations. This paper also describes a simple way to estimate "reasonable bounds" -- linearly increasing the learning rate of the network for a few epochs. In addition, cyclical learning rates are demonstrated on the CIFAR-10 and CIFAR-100 datasets with ResNets, Stochastic Depth networks, and DenseNets, and the ImageNet dataset with the AlexNet and GoogLeNet architectures. These are practical tools for everyone who trains neural networks.},
archivePrefix = {arXiv},
arxivId = {1506.01186},
author = {Smith, Leslie N.},
eprint = {1506.01186},
file = {:Users/pietz/Documents/Mendeley/Smith - 2015 - Cyclical Learning Rates for Training Neural Networks.pdf:pdf},
month = {jun},
title = {{Cyclical Learning Rates for Training Neural Networks}},
url = {http://arxiv.org/abs/1506.01186},
year = {2015}
}
@article{Xu2017,
abstract = {Image matting is a fundamental computer vision problem and has many applications. Previous algorithms have poor performance when an image has similar foreground and background colors or complicated textures. The main reasons are prior methods 1) only use low-level features and 2) lack high-level context. In this paper, we propose a novel deep learning based algorithm that can tackle both these problems. Our deep model has two parts. The first part is a deep convolutional encoder-decoder network that takes an image and the corresponding trimap as inputs and predict the alpha matte of the image. The second part is a small convolutional network that refines the alpha matte predictions of the first network to have more accurate alpha values and sharper edges. In addition, we also create a large-scale image matting dataset including 49300 training images and 1000 testing images. We evaluate our algorithm on the image matting benchmark, our testing set, and a wide variety of real images. Experimental results clearly demonstrate the superiority of our algorithm over previous methods.},
archivePrefix = {arXiv},
arxivId = {1703.03872},
author = {Xu, Ning and Price, Brian and Cohen, Scott and Huang, Thomas},
eprint = {1703.03872},
file = {:Users/pietz/Documents/Mendeley/Xu et al. - 2017 - Deep Image Matting.pdf:pdf},
month = {mar},
title = {{Deep Image Matting}},
url = {http://arxiv.org/abs/1703.03872},
year = {2017}
}
@article{Ravishankar2017,
abstract = {The ability to automatically learn task specific feature representations has led to a huge success of deep learning methods. When large training data is scarce, such as in medical imaging problems, transfer learning has been very effective. In this paper, we systematically investigate the process of transferring a Convolutional Neural Network, trained on ImageNet images to perform image classification, to kidney detection problem in ultrasound images. We study how the detection performance depends on the extent of transfer. We show that a transferred and tuned CNN can outperform a state-of-the-art feature engineered pipeline and a hybridization of these two techniques achieves 20$\backslash${\%} higher performance. We also investigate how the evolution of intermediate response images from our network. Finally, we compare these responses to state-of-the-art image processing filters in order to gain greater insight into how transfer learning is able to effectively manage widely varying imaging regimes.},
archivePrefix = {arXiv},
arxivId = {1704.06040},
author = {Ravishankar, Hariharan and Sudhakar, Prasad and Venkataramani, Rahul and Thiruvenkadam, Sheshadri and Annangi, Pavan and Babu, Narayanan and Vaidya, Vivek},
eprint = {1704.06040},
file = {:Users/pietz/Documents/Mendeley/Ravishankar et al. - 2017 - Understanding the Mechanisms of Deep Transfer Learning for Medical Images.pdf:pdf},
month = {apr},
title = {{Understanding the Mechanisms of Deep Transfer Learning for Medical Images}},
url = {http://arxiv.org/abs/1704.06040},
year = {2017}
}
@article{Razzak2017,
abstract = {Healthcare sector is totally different from other industry. It is on high priority sector and people expect highest level of care and services regardless of cost. It did not achieve social expectation even though it consume huge percentage of budget. Mostly the interpretations of medical data is being done by medical expert. In terms of image interpretation by human expert, it is quite limited due to its subjectivity, the complexity of the image, extensive variations exist across different interpreters, and fatigue. After the success of deep learning in other real world application, it is also providing exciting solutions with good accuracy for medical imaging and is seen as a key method for future applications in health secotr. In this chapter, we discussed state of the art deep learning architecture and its optimization used for medical image segmentation and classification. In the last section, we have discussed the challenges deep learning based methods for medical imaging and open research issue.},
archivePrefix = {arXiv},
arxivId = {1704.06825},
author = {Razzak, Muhammad Imran and Naz, Saeeda and Zaib, Ahmad},
eprint = {1704.06825},
file = {:Users/pietz/Documents/Mendeley/Razzak, Naz, Zaib - 2017 - Deep Learning for Medical Image Processing Overview, Challenges and Future.pdf:pdf},
month = {apr},
title = {{Deep Learning for Medical Image Processing: Overview, Challenges and Future}},
url = {http://arxiv.org/abs/1704.06825},
year = {2017}
}
@article{Shwartz-Ziv2017,
abstract = {Despite their great success, there is still no comprehensive theoretical understanding of learning with Deep Neural Networks (DNNs) or their inner organization. Previous work proposed to analyze DNNs in the $\backslash$textit{\{}Information Plane{\}}; i.e., the plane of the Mutual Information values that each layer preserves on the input and output variables. They suggested that the goal of the network is to optimize the Information Bottleneck (IB) tradeoff between compression and prediction, successively, for each layer. In this work we follow up on this idea and demonstrate the effectiveness of the Information-Plane visualization of DNNs. Our main results are: (i) most of the training epochs in standard DL are spent on {\{}$\backslash$emph compression{\}} of the input to efficient representation and not on fitting the training labels. (ii) The representation compression phase begins when the training errors becomes small and the Stochastic Gradient Decent (SGD) epochs change from a fast drift to smaller training error into a stochastic relaxation, or random diffusion, constrained by the training error value. (iii) The converged layers lie on or very close to the Information Bottleneck (IB) theoretical bound, and the maps from the input to any hidden layer and from this hidden layer to the output satisfy the IB self-consistent equations. This generalization through noise mechanism is unique to Deep Neural Networks and absent in one layer networks. (iv) The training time is dramatically reduced when adding more hidden layers. Thus the main advantage of the hidden layers is computational. This can be explained by the reduced relaxation time, as this it scales super-linearly (exponentially for simple diffusion) with the information compression from the previous layer.},
archivePrefix = {arXiv},
arxivId = {1703.00810},
author = {Shwartz-Ziv, Ravid and Tishby, Naftali},
eprint = {1703.00810},
file = {:Users/pietz/Documents/Mendeley/Shwartz-Ziv, Tishby - 2017 - Opening the Black Box of Deep Neural Networks via Information.pdf:pdf},
month = {mar},
title = {{Opening the Black Box of Deep Neural Networks via Information}},
url = {http://arxiv.org/abs/1703.00810},
year = {2017}
}
@article{Qassim2017,
abstract = {Deep learning has given way to a new era of machine learning, apart from computer vision. Convolutional neural networks have been implemented in image classification, segmentation and object detection. Despite recent advancements, we are still in the very early stages and have yet to settle on best practices for network architecture in terms of deep design, small in size and a short training time. In this work, we propose a very deep neural network comprised of 16 Convolutional layers compressed with the Fire Module adapted from the SQUEEZENET model. We also call for the addition of residual connections to help suppress degradation. This model can be implemented on almost every neural network model with fully incorporated residual learning. This proposed model Residual-Squeeze-VGG16 (ResSquVGG16) trained on the large-scale MIT Places365-Standard scene dataset. In our tests, the model performed with accuracy similar to the pre-trained VGG16 model in Top-1 and Top-5 validation accuracy while also enjoying a 23.86{\%} reduction in training time and an 88.4{\%} reduction in size. In our tests, this model was trained from scratch.},
archivePrefix = {arXiv},
arxivId = {1705.03004},
author = {Qassim, Hussam and Feinzimer, David and Verma, Abhishek},
eprint = {1705.03004},
file = {:Users/pietz/Documents/Mendeley/Qassim, Feinzimer, Verma - 2017 - Residual Squeeze VGG16.pdf:pdf},
month = {may},
title = {{Residual Squeeze VGG16}},
url = {http://arxiv.org/abs/1705.03004},
year = {2017}
}
@article{Sarraf2016,
abstract = {Recently, machine learning techniques especially predictive modeling and pattern recognition in biomedical sciences from drug delivery system to medical imaging has become one of the important methods which are assisting researchers to have deeper understanding of entire issue and to solve complex medical problems. Deep learning is a powerful machine learning algorithm in classification while extracting low to high-level features. In this paper, we used convolutional neural network to classify Alzheimer's brain from normal healthy brain. The importance of classifying this kind of medical data is to potentially develop a predict model or system in order to recognize the type disease from normal subjects or to estimate the stage of the disease. Classification of clinical data such as Alzheimer's disease has been always challenging and most problematic part has been always selecting the most discriminative features. Using Convolutional Neural Network (CNN) and the famous architecture LeNet-5, we successfully classified structural MRI data of Alzheimer's subjects from normal controls where the accuracy of test data on trained data reached 98.84{\%}. This experiment suggests us the shift and scale invariant features extracted by CNN followed by deep learning classification is most powerful method to distinguish clinical data from healthy data in fMRI. This approach also enables us to expand our methodology to predict more complicated systems.},
archivePrefix = {arXiv},
arxivId = {1607.06583},
author = {Sarraf, Saman and Tofighi, Ghassem},
eprint = {1607.06583},
file = {:Users/pietz/Documents/Mendeley/Sarraf, Tofighi - 2016 - Classification of Alzheimer's Disease using fMRI Data and Deep Learning Convolutional Neural Networks.pdf:pdf},
month = {jul},
title = {{Classification of Alzheimer's Disease Structural MRI Data by Deep Learning Convolutional Neural Networks}},
url = {http://arxiv.org/abs/1607.06583},
year = {2016}
}
@article{Grace2017,
abstract = {Advances in artificial intelligence (AI) will transform modern life by reshaping transportation, health, science, finance, and the military. To adapt public policy, we need to better anticipate these advances. Here we report the results from a large survey of machine learning researchers on their beliefs about progress in AI. Researchers predict AI will outperform humans in many activities in the next ten years, such as translating languages (by 2024), writing high-school essays (by 2026), driving a truck (by 2027), working in retail (by 2031), writing a bestselling book (by 2049), and working as a surgeon (by 2053). Researchers believe there is a 50{\%} chance of AI outperforming humans in all tasks in 45 years and of automating all human jobs in 120 years, with Asian respondents expecting these dates much sooner than North Americans. These results will inform discussion amongst researchers and policymakers about anticipating and managing trends in AI.},
archivePrefix = {arXiv},
arxivId = {1705.08807},
author = {Grace, Katja and Salvatier, John and Dafoe, Allan and Zhang, Baobao and Evans, Owain},
eprint = {1705.08807},
file = {:Users/pietz/Documents/Mendeley/Grace et al. - 2017 - When will AI exceed human performance Evidence from AI experts.pdf:pdf},
month = {may},
title = {{When Will AI Exceed Human Performance? Evidence from AI Experts}},
url = {http://arxiv.org/abs/1705.08807},
year = {2017}
}
@article{Rolnick2017,
abstract = {Deep neural networks trained on large supervised datasets have led to impressive results in recent years. However, since well-annotated datasets can be prohibitively expensive and time-consuming to collect, recent work has explored the use of larger but noisy datasets that can be more easily obtained. In this paper, we investigate the behavior of deep neural networks on training sets with massively noisy labels. We show that successful learning is possible even with an essentially arbitrary amount of noise. For example, on MNIST we find that accuracy of above 90 percent is still attainable even when the dataset has been diluted with 100 noisy examples for each clean example. Such behavior holds across multiple patterns of label noise, even when noisy labels are biased towards confusing classes. Further, we show how the required dataset size for successful training increases with higher label noise. Finally, we present simple actionable techniques for improving learning in the regime of high label noise.},
archivePrefix = {arXiv},
arxivId = {1705.10694},
author = {Rolnick, David and Veit, Andreas and Belongie, Serge and Shavit, Nir},
eprint = {1705.10694},
file = {:Users/pietz/Documents/Mendeley/Rolnick et al. - 2017 - Deep Learning is Robust to Massive Label Noise.pdf:pdf},
month = {may},
title = {{Deep Learning is Robust to Massive Label Noise}},
url = {http://arxiv.org/abs/1705.10694},
year = {2017}
}
@article{Tajbakhsh2017,
abstract = {Training a deep convolutional neural network (CNN) from scratch is difficult because it requires a large amount of labeled training data and a great deal of expertise to ensure proper convergence. A promising alternative is to fine-tune a CNN that has been pre-trained using, for instance, a large set of labeled natural images. However, the substantial differences between natural and medical images may advise against such knowledge transfer. In this paper, we seek to answer the following central question in the context of medical image analysis: $\backslash$emph{\{}Can the use of pre-trained deep CNNs with sufficient fine-tuning eliminate the need for training a deep CNN from scratch?{\}} To address this question, we considered 4 distinct medical imaging applications in 3 specialties (radiology, cardiology, and gastroenterology) involving classification, detection, and segmentation from 3 different imaging modalities, and investigated how the performance of deep CNNs trained from scratch compared with the pre-trained CNNs fine-tuned in a layer-wise manner. Our experiments consistently demonstrated that (1) the use of a pre-trained CNN with adequate fine-tuning outperformed or, in the worst case, performed as well as a CNN trained from scratch; (2) fine-tuned CNNs were more robust to the size of training sets than CNNs trained from scratch; (3) neither shallow tuning nor deep tuning was the optimal choice for a particular application; and (4) our layer-wise fine-tuning scheme could offer a practical way to reach the best performance for the application at hand based on the amount of available data.},
archivePrefix = {arXiv},
arxivId = {1706.00712},
author = {Tajbakhsh, Nima and Shin, Jae Y. and Gurudu, Suryakanth R. and Hurst, R. Todd and Kendall, Christopher B. and Gotway, Michael B. and Liang, Jianming},
doi = {10.1109/TMI.2016.2535302},
eprint = {1706.00712},
file = {:Users/pietz/Documents/Mendeley/Tajbakhsh et al. - 2016 - Convolutional Neural Networks for Medical Image Analysis Full Training or Fine Tuning.pdf:pdf},
month = {jun},
title = {{Convolutional Neural Networks for Medical Image Analysis: Full Training or Fine Tuning?}},
url = {http://arxiv.org/abs/1706.00712 http://dx.doi.org/10.1109/TMI.2016.2535302},
year = {2017}
}
@article{Dong2017,
abstract = {A major challenge in brain tumor treatment planning and quantitative evaluation is determination of the tumor extent. The noninvasive magnetic resonance imaging (MRI) technique has emerged as a front-line diagnostic tool for brain tumors without ionizing radiation. Manual segmentation of brain tumor extent from 3D MRI volumes is a very time-consuming task and the performance is highly relied on operator's experience. In this context, a reliable fully automatic segmentation method for the brain tumor segmentation is necessary for an efficient measurement of the tumor extent. In this study, we propose a fully automatic method for brain tumor segmentation, which is developed using U-Net based deep convolutional networks. Our method was evaluated on Multimodal Brain Tumor Image Segmentation (BRATS 2015) datasets, which contain 220 high-grade brain tumor and 54 low-grade tumor cases. Cross-validation has shown that our method can obtain promising segmentation efficiently.},
archivePrefix = {arXiv},
arxivId = {1705.03820},
author = {Dong, Hao and Yang, Guang and Liu, Fangde and Mo, Yuanhan and Guo, Yike},
eprint = {1705.03820},
file = {:Users/pietz/Documents/Mendeley/Dong, Liu, Mo - 2011 - Automatic Brain Tumor Detection and Segmentation Using U-Net Based Fully Convolutional Networks.pdf:pdf},
month = {may},
title = {{Automatic Brain Tumor Detection and Segmentation Using U-Net Based Fully Convolutional Networks}},
url = {http://arxiv.org/abs/1705.03820},
year = {2017}
}
@article{Litjens2017,
abstract = {Deep learning algorithms, in particular convolutional networks, have rapidly become a methodology of choice for analyzing medical images. This paper reviews the major deep learning concepts pertinent to medical image analysis and summarizes over 300 contributions to the field, most of which appeared in the last year. We survey the use of deep learning for image classification, object detection, segmentation, registration, and other tasks and provide concise overviews of studies per application area. Open challenges and directions for future research are discussed.},
archivePrefix = {arXiv},
arxivId = {1702.05747},
author = {Litjens, Geert and Kooi, Thijs and Bejnordi, Babak Ehteshami and Setio, Arnaud Arindra Adiyoso and Ciompi, Francesco and Ghafoorian, Mohsen and van der Laak, Jeroen A. W. M. and van Ginneken, Bram and S{\'{a}}nchez, Clara I.},
eprint = {1702.05747},
file = {:Users/pietz/Documents/Mendeley/Litjens et al. - 2017 - A Survey on Deep Learning in Medical Image Analysis.pdf:pdf},
month = {feb},
title = {{A Survey on Deep Learning in Medical Image Analysis}},
url = {http://arxiv.org/abs/1702.05747},
year = {2017}
}