ref.bib

% Encoding: UTF-8

@article{wright2009robust,
  title={Robust face recognition via sparse representation},
  author={Wright, John and Yang, Allen Y and Ganesh, Arvind and Sastry, S Shankar and Ma, Yi},
  journal={IEEE transactions on pattern analysis and machine intelligence},
  volume={31},
  number={2},
  pages={210--227},
  year={2009},
  publisher={IEEE}
}

@inproceedings{pang2002thumbs,
  title={Thumbs up?: sentiment classification using machine learning techniques},
  author={Pang, Bo and Lee, Lillian and Vaithyanathan, Shivakumar},
  booktitle={Proceedings of the ACL-02 conference on Empirical methods in natural language processing-Volume 10},
  pages={79--86},
  year={2002},
  organization={Association for Computational Linguistics}
}

@article{androutsopoulos2000spam,
  title={An evaluation of naive bayesian anti-spam filtering},
  author={Androutsopoulos, Ion and Koutsias, John and Chandrinos, Konstantinos V and Paliouras, George and Spyropoulos, Constantine D},
  journal={arXiv preprint cs/0006013},
  year={2000}
}

@inproceedings{lecun1990handwritten,
  title={Handwritten digit recognition with a back-propagation network},
  author={LeCun, Yann and Boser, Bernhard E and Denker, John S and Henderson, Donnie and Howard, Richard E and Hubbard, Wayne E and Jackel, Lawrence D},
  booktitle={Advances in neural information processing systems},
  pages={396--404},
  year={1990}
}

@INPROCEEDINGS{he2016resnet, 
author={K. He and X. Zhang and S. Ren and J. Sun}, 
booktitle={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 
title={Deep Residual Learning for Image Recognition}, 
year={2016}, 
volume={}, 
number={}, 
pages={770-778}, 
keywords={image classification;learning (artificial intelligence);neural nets;object detection;CIFAR-10;COCO object detection dataset;COCO segmentation;ILSVRC & COCO 2015 competitions;ILSVRC 2015 classification task;ImageNet dataset;ImageNet localization;ImageNet test set;VGG nets;deep residual learning;deep residual nets;deeper neural network training;image recognition;residual function learning;residual nets;visual recognition tasks;Complexity theory;Degradation;Image recognition;Image segmentation;Neural networks;Training;Visualization}, 
doi={10.1109/CVPR.2016.90}, 
ISSN={}, 
month={June},}

@inproceedings{ribeiro2016kdd,
 author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
 title = {"Why Should I Trust You?": Explaining the Predictions of Any Classifier},
 booktitle = {Proceedings of the 22Nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
 year = {2016},
 isbn = {978-1-4503-4232-2},
 location = {San Francisco, California, USA},
 pages = {1135--1144},
 numpages = {10},
 doi = {10.1145/2939672.2939778},
 acmid = {2939778},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {black box classifier, explaining machine learning, interpretability, interpretable machine learning},
} 

@techreport{clancey1981tech,
 author = {Clancey, William},
 title = {The Epistemology of a Rule-based Expert System: A Framework for Explanation},
 year = {1981},
 source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Astan%3ASTAN%2F%2FCS-TR-81-896},
 publisher = {Stanford University},
 address = {Stanford, CA, USA},
} 

@ARTICLE{neches1985tse, 
author={R. Neches and W. Swartout and J. Moore}, 
journal={IEEE Transactions on Software Engineering}, 
title={Enhanced Maintenance and Explanation of Expert Systems Through Explicit Models of Their Development}, 
year={1985}, 
volume={SE-11}, 
number={11}, 
pages={1337-1351}, 
keywords={Expert systems;explanation;natural language generation;software development;software maintenance;Collaborative work;Costs;Encoding;Expert systems;History;Knowledge engineering;Knowledge representation;Natural languages;Programming;Software maintenance;Expert systems;explanation;natural language generation;software development;software maintenance}, 
doi={10.1109/TSE.1985.231882}, 
ISSN={0098-5589}, 
month={Nov},}

@ARTICLE{swartout1991expert, 
author={W. Swartout and C. Paris and J. Moore}, 
journal={IEEE Expert}, 
title={Explanations in knowledge systems: design for explainable expert systems}, 
year={1991}, 
volume={6}, 
number={3}, 
pages={58-64}, 
keywords={expert systems;knowledge representation;Common Lisp programs;DARPA;PEA;Program Enhancement Advisor;Strategic Computing Initiative;design;design aspects;explainable expert systems;general problem-solving strategies;knowledge systems;Drugs;Expert systems;Knowledge based systems;Medical diagnostic imaging;Medical expert systems;Problem-solving;Terminology}, 
doi={10.1109/64.87686}, 
ISSN={0885-9000}, 
month={June},}

@Inbook{swartout1993expert,
author="Swartout, William
and Moore, Johanna",
editor="David, Jean-Marc
and Krivine, Jean-Paul
and Simmons, Reid",
title="Explanation in Second Generation Expert Systems",
bookTitle="Second Generation Expert Systems",
year="1993",
publisher="Springer Berlin Heidelberg",
address="Berlin, Heidelberg",
pages="543--585",
abstract="What is needed for good explanation? This paper begins by considering some desiderata for expert system explanation. These desiderata concern not only the form and content of the explanations, but also the impact of explanation generation on the expert system itself--- how it is built and how it performs. In this paper, we use these desiderata as a yardstick for measuring progress in the field. The paper describes two major developments that have differentiated explanation in second generation systems from explanation in first generation systems: 1) new architectures have been developed that capture more of the knowledge that is needed for explanation, and 2) more powerful explanation generators have been developed in which explanation generation is viewed as a problem-solving activity in its own right. These developments have led to significant improvements in explanation facilities: the explanations they offer are richer and more coherent, they are better adapted to the user's needs and knowledge, and the explanation facilities can offer clarifying explanations to correct misunderstandings.",
isbn="978-3-642-77927-5",
doi="10.1007/978-3-642-77927-5_24",
url="https://doi.org/10.1007/978-3-642-77927-5_24"
}

@article{darpa2017xai,
  title={Explainable artificial intelligence ({X}{A}{I})},
  year="2017",
  journal = {Defense Advanced Research Projects Agency ({DARPA})},
  author={Gunning, David}
}

@online{google2017pair,
  author = {{Google Inc.}},
  title = {{PAIR} | People + AI Research Initiative},
  year = 2017,
  url = {http://ai.google/pair},
  urldate = {2017-10-14}
}

@phdthesis{brown1987aa,
 author = {Brown, Marc H.},
 title = {Algorithm Animation},
 year = {1987},
 note = {UMI Order No. GAX87-15461},
 publisher = {Brown University},
 address = {Providence, RI, USA},
} 

@ARTICLE{stasko1990aa, 
author={J. T. Stasko}, 
journal={Computer}, 
title={Tango: a framework and system for algorithm animation}, 
year={1990}, 
volume={23}, 
number={9}, 
pages={27-39}, 
keywords={computer animation;software engineering;Tango;algorithm animation;first-fit bin-packing algorithm;path-transition;program development;program evaluation;program understanding;Algorithm design and analysis;Animation;Computer science;Computerized monitoring;Data visualization;Education;Operating systems;Programming profession;Rendering (computer graphics);System performance}, 
doi={10.1109/2.58216}, 
ISSN={0018-9162}, 
month={Sept},}


@inproceedings{price1992taxonomy,
abstract = "Software visualization is the use of interactive computer graphics, typography, graphic design, animation, and cinematography to enhance the interface between the software engineer or the computer science student and their programs. Although several taxonomies of software visualization have been proposed, they use few dimensions and do not span the space of important distinctions between systems. The authors propose a novel and systematic taxonomy of six areas making up thirty characteristic features of software visualization technology. The taxonomy is presented and illustrated in terms of its application to seven systems of historic importance and technical interest.>",
journal = "System Sciences, 1992. Proceedings of the Twenty-Fifth Hawaii International Conference on",
pages = "597--606",
volume = "ii",
publisher = "IEEE Publishing",
isbn = "0-8186-2420-5",
year = "1992",
title = "A taxonomy of software visualization",
language = "eng",
author = "Price, B.A. and Small, I.S. and Baecker, R.M.",
}

@inproceedings{zeiler2014eccv,
author="Zeiler, Matthew
and Fergus, Rob",
editor="Fleet, David
and Pajdla, Tomas
and Schiele, Bernt
and Tuytelaars, Tinne",
title="Visualizing and Understanding Convolutional Networks",
bookTitle="Computer Vision -- ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part I",
year="2014",
publisher="Springer International Publishing",
address="Cham",
pages="818--833",
abstract="Large Convolutional Network models have recently demonstrated impressive classification performance on the ImageNet benchmark Krizhevsky et al. [18]. However there is no clear understanding of why they perform so well, or how they might be improved. In this paper we explore both issues. We introduce a novel visualization technique that gives insight into the function of intermediate feature layers and the operation of the classifier. Used in a diagnostic role, these visualizations allow us to find model architectures that outperform Krizhevsky et al on the ImageNet classification benchmark. We also perform an ablation study to discover the performance contribution from different model layers. We show our ImageNet model generalizes well to other datasets: when the softmax classifier is retrained, it convincingly beats the current state-of-the-art results on Caltech-101 and Caltech-256 datasets.",
isbn="978-3-319-10590-1",
doi="10.1007/978-3-319-10590-1_53",
}

@ARTICLE{bach15plos,
  author = {Sebastian Bach and Alexander Binder and Gr{\'e}goire Montavon and Frederick Klauschen and Klaus-Robert M{\"u}ller and Wojciech Samek},
  title = {On Pixel-wise Explanations for Non-Linear Classifier Decisions by Layer-wise Relevance Propagation},
  journal = {PLOS ONE},
  year = {2015},
  volume = {10},
  number = {7},
  pages = {e0130140},
  doi = {10.1371/journal.pone.0130140},
}

@inproceedings{simonyan14saliency,
  author = {Karen Simonyan and Andrea Vedaldi and Andrew Zisserman},
  title = {Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps},
  bookTitle = {International Conference on Learning Representations (ICLR) Workshop},
  year = {2014},
}

@article{smilkov2017smoothgrad,
  author    = {Daniel Smilkov and
               Nikhil Thorat and
               Been Kim and
               Fernanda B. Vi{\'{e}}gas and
               Martin Wattenberg},
  title     = {SmoothGrad: removing noise by adding noise},
  journal   = {CoRR},
  volume    = {abs/1706.03825},
  year      = {2017},
  timestamp = {Mon, 03 Jul 2017 13:29:02 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/SmilkovTKVW17},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{zintgraf17visualize,
  author = {Luisa Zintgraf and Taco Cohen and Tameem Adel and Max Welling},
  title = {Visualizing Deep Neural Network Decisions: Prediction Difference Analysis},
  bookTitle = {International Conference on Learning Representations (ICLR)},
  year = {2017},
}

@inproceedings{murdoch2017rule,
  title={Automatic Rule Extraction from Long Short Term Memory Networks},
  author={Murdoch, W James and Szlam, Arthur},
  bookTitle = {International Conference on Learning Representations (ICLR)},
  year={2017}
}

@BOOK{breiman1984classificationtree,
  title={Classification and regression trees},
  author={Breiman, Leo and Friedman, Jerome and Stone, Charles J and Olshen, Richard A},
  year={1984},
  publisher={CRC press}
}

@article{quinlan1987simplifying,
  title={Simplifying decision trees},
  author={Quinlan, J. Ross},
  journal={International journal of man-machine studies},
  volume={27},
  number={3},
  pages={221--234},
  year={1987},
  publisher={Elsevier}
}

@article{letham2015stroke,
author = "Letham, Benjamin and Rudin, Cynthia and McCormick, Tyler and Madigan, David",
doi = "10.1214/15-AOAS848",
fjournal = "The Annals of Applied Statistics",
journal = "Ann. Appl. Stat.",
month = "09",
number = "3",
pages = "1350--1371",
publisher = "The Institute of Mathematical Statistics",
title = "Interpretable classifiers using rules and Bayesian analysis: Building a better stroke prediction model",
volume = "9",
year = "2015"
}

@InProceedings{wang2015falling,
  title = 	 {{Falling Rule Lists}},
  author = 	 {Fulton Wang and Cynthia Rudin},
  booktitle = 	 {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics},
  pages = 	 {1013--1022},
  year = 	 {2015},
  editor = 	 {Guy Lebanon and S. V. N. Vishwanathan},
  volume = 	 {38},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {San Diego, California, USA},
  month = 	 {09--12 May},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v38/wang15a.pdf},
  abstract = 	 {Falling rule lists are classification models consisting of an ordered list of if-then rules, where (i) the order of rules determines which example should be classified by each rule, and (ii) the estimated probability of success decreases monotonically down the list. These kinds of rule lists are inspired by healthcare applications where patients would be stratified into risk sets and the highest at-risk patients should be considered first. We provide a Bayesian framework for learning falling rule lists that does not rely on traditional greedy decision tree learning methods.}
}

@article{wang2017rulesets,
  author  = {Tong Wang and Cynthia Rudin and Finale Doshi-Velez and Yimin Liu and Erica Klampfl and Perry MacNeille},
  title   = {A Bayesian Framework for Learning Rule Sets for Interpretable Classification},
  journal = {Journal of Machine Learning Research},
  year    = {2017},
  volume  = {18},
  number  = {70},
  pages   = {1-37},
}

@article{debock2010gam,
title = "Ensemble classification based on generalized additive models",
journal = "Computational Statistics \& Data Analysis",
volume = "54",
number = "6",
pages = "1535 - 1546",
year = "2010",
issn = "0167-9473",
doi = "https://doi.org/10.1016/j.csda.2009.12.013",
author = "Koen De Bock and Kristof Coussement and Dirk Van den Poel",
keywords = "Data mining",
keywords = "Classification",
keywords = "Ensemble learning",
keywords = "GAM",
keywords = "UCI"
}

@ARTICLE{dudani1976weightedknn, 
author={S. A. Dudani}, 
journal={IEEE Transactions on Systems, Man, and Cybernetics}, 
title={The Distance-Weighted k-Nearest-Neighbor Rule}, 
year={1976}, 
volume={SMC-6}, 
number={4}, 
pages={325-327}, 
keywords={}, 
doi={10.1109/TSMC.1976.5408784}, 
ISSN={0018-9472}, 
month={April},}

@ARTICLE{keller1985fuzzyknn, 
author={J. M. Keller and M. R. Gray and J. A. Givens}, 
journal={IEEE Transactions on Systems, Man, and Cybernetics}, 
title={A fuzzy K-nearest neighbor algorithm}, 
year={1985}, 
volume={SMC-15}, 
number={4}, 
pages={580-585}, 
keywords={Bayes methods;fuzzy set theory;pattern recognition;Bayes decision;K-nearest neighbor decision rule;classification;fuzzy memberships;fuzzy sets;labeled samples;pattern recognition;Classification algorithms;Error analysis;Iris;Pattern recognition;Prototypes;Support vector machine classification;Vectors}, 
doi={10.1109/TSMC.1985.6313426}, 
ISSN={0018-9472}, 
month={July},}

@INPROCEEDINGS{liu2015sparsecnn, 
author={Baoyuan Liu and Min Wang and H. Foroosh and M. Tappen and M. Penksy}, 
booktitle={2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 
title={Sparse Convolutional Neural Networks}, 
year={2015}, 
volume={}, 
number={}, 
pages={806-814}, 
keywords={matrix decomposition;matrix multiplication;neural nets;object detection;SCNN model;cascade model;object detection problem;sparse convolutional neural networks;sparse decomposition;sparse fully connected layers;sparse matrix multiplication algorithm;Accuracy;Convolutional codes;Kernel;Matrix decomposition;Neural networks;Redundancy;Sparse matrices}, 
doi={10.1109/CVPR.2015.7298681}, 
ISSN={1063-6919}, 
month={June},}

@article{downs2001simplifysvm,
  title={Exact simplification of support vector solutions},
  author={Downs, Tom and Gates, Kevin E and Masters, Annette},
  journal={Journal of Machine Learning Research},
  volume={2},
  number={Dec},
  pages={293--297},
  year={2001}
}

@inproceedings{tan2010sparsesvm,
  title={Learning sparse svm for feature selection on very high dimensional datasets},
  author={Tan, Mingkui and Wang, Li and Tsang, Ivor W},
  booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)},
  pages={1047--1054},
  year={2010}
}

@article{tipping2001sparse,
  title={Sparse Bayesian learning and the relevance vector machine},
  author={Tipping, Michael E},
  journal={Journal of machine learning research},
  volume={1},
  number={Jun},
  pages={211--244},
  year={2001}
}

@Article{ustun2016supersparse,
author="Ustun, Berk
and Rudin, Cynthia",
title="Supersparse linear integer models for optimized medical scoring systems",
journal="Machine Learning",
year="2016",
month="Mar",
day="01",
volume="102",
number="3",
pages="349--391",
abstract="Scoring systems are linear classification models that only require users to add, subtract and multiply a few small numbers in order to make a prediction. These models are in widespread use by the medical community, but are difficult to learn from data because they need to be accurate and sparse, have coprime integer coefficients, and satisfy multiple operational constraints. We present a new method for creating data-driven scoring systems called a Supersparse Linear Integer Model (SLIM). SLIM scoring systems are built by using an integer programming problem that directly encodes measures of accuracy (the 0--1 loss) and sparsity while restricting coefficients to coprime integers. SLIM can seamlessly incorporate a wide range of operational constraints related to accuracy and sparsity, and can produce acceptable models without parameter tuning because of the direct control provided over these quantities. We provide bounds on the testing and training accuracy of SLIM scoring systems, and present a new data reduction technique that can improve scalability by eliminating a portion of the training data beforehand. Our paper includes results from a collaboration with the Massachusetts General Hospital Sleep Laboratory, where SLIM is being used to create a highly tailored scoring system for sleep apnea screening.",
issn="1573-0565",
doi="10.1007/s10994-015-5528-6",
}

@article{bahdanau2014translation,
  author    = {Dzmitry Bahdanau and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  journal   = {CoRR},
  volume    = {abs/1409.0473},
  year      = {2014},
  url       = {http://arxiv.org/abs/1409.0473},
}

@InProceedings{xu15icml,
  title = 	 {Show, Attend and Tell: Neural Image Caption Generation with Visual Attention},
  author = 	 {Kelvin Xu and Jimmy Ba and Ryan Kiros and Kyunghyun Cho and Aaron Courville and Ruslan Salakhudinov and Rich Zemel and Yoshua Bengio},
  booktitle = 	 {Proceedings of the 32nd International Conference on Machine Learning},
  pages = 	 {2048--2057},
  year = 	 {2015},
  editor = 	 {Francis Bach and David Blei},
  volume = 	 {37},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {Lille, France},
  month = 	 {07--09 Jul},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v37/xuc15.pdf},
  url = 	 {http://proceedings.mlr.press/v37/xuc15.html},
  abstract = 	 {Inspired by recent work in machine translation and object detection, we introduce an attention based model that automatically learns to describe the content of images. We describe how we can train this model in a deterministic manner using standard backpropagation techniques and stochastically by maximizing a variational lower bound. We also show through visualization how the model is able to automatically learn to fix its gaze on salient objects while generating the corresponding words in the output sequence. We validate the use of attention with state-of-the-art performance on three benchmark datasets: Flickr8k, Flickr30k and MS COCO.}
}

@article{hendricks16generate,
  author    = {Lisa Hendricks and
               Zeynep Akata and
               Marcus Rohrbach and
               Jeff Donahue and
               Bernt Schiele and
               Trevor Darrell},
  title     = {Generating Visual Explanations},
  journal   = {CoRR},
  volume    = {abs/1603.08507},
  year      = {2016},
  url       = {http://arxiv.org/abs/1603.08507},
  timestamp = {Wed, 07 Jun 2017 14:42:49 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/HendricksARDSD16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}


@TECHREPORT{erhan2009techreport,
       author = {Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Vincent, Pascal},
        month = jun,
        title = {Visualizing Higher-Layer Features of a Deep Network},
       number = {1341},
         year = {2009},
  institution = {University of Montreal},
         note = {Also presented at the ICML 2009 Workshop on Learning Feature Hierarchies, Montr{\'{e}}al, Canada.},
     abstract = {Deep architectures have demonstrated state-of-the-art results in a variety of
settings, especially with vision datasets. Beyond the model definitions and the quantitative analyses, there is a need for qualitative comparisons of the solutions learned by various deep architectures. The goal of this paper is to find good qualitative interpretations of high level features represented by such models. To this end, we contrast and compare several techniques applied on Stacked Denoising Autoencoders and Deep Belief Networks, trained on several vision datasets. We show that, perhaps counter-intuitively, such interpretation is possible at the unit level, that it is simple to accomplish and that the results are consistent across various techniques. We hope that such techniques will allow researchers in deep architectures to understand more of how and why deep architectures work}
}


@article{lombrozo2006explanation,
title = "The structure and function of explanations",
journal = "Trends in Cognitive Sciences",
volume = "10",
number = "10",
pages = "464 - 470",
year = "2006",
issn = "1364-6613",
doi = "https://doi.org/10.1016/j.tics.2006.08.004",
author = "Tania Lombrozo"
}

@article{hempel1948explanation,
author = { Carl Hempel  and  Paul Oppenheim },
title = {Studies in the Logic of Explanation},
journal = {Philosophy of Science},
volume = {15},
number = {2},
pages = {135-175},
year = {1948},
doi = {10.1086/286983},
}

@article{doshi-velez2017interpretableml,
title = {Towards A Rigorous Science of Interpretable Machine Learning},
author  = {Finale Doshi-Velez and Been Kim},
year  = 2017,
URL = {https://arxiv.org/abs/1702.08608},
journal = {arXiv}
}

@inproceedings{karpathy16rnn,
  title = {Visualizing and Understanding Recurrent Networks},
  author = {Andrej Karpathy and Justin Johnson and Li Fei-Fei},
  year = {2016},
  booktitle = {International Conference on Learning Representations (ICLR) Workshop}
}

@InProceedings{li2016naccl-hlt,
  author    = {Li, Jiwei  and  Chen, Xinlei  and  Hovy, Eduard  and  Jurafsky, Dan},
  title     = {Visualizing and Understanding Neural Models in NLP},
  booktitle = {Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  month     = {June},
  year      = {2016},
  address   = {San Diego, California},
  publisher = {Association for Computational Linguistics},
  pages     = {681--691},
}

@inproceedings{ming2017vast,
  author = {Understanding Hidden Memories of Recurrent Neural Networks},
  title = {Yao Ming and Shaozu Cao and Ruixiang Zhang and Zhen Li and Yuanzhe Chen and Yangqiu Song and Huamin Qu.},
  booktitle = {Proceedings of the IEEE Conference on Visual Analytics Science and Technology},
  year = {2017}
}

@article{martens2014explaindocument,
 author = {Martens, David and Provost, Foster},
 title = {Explaining Data-driven Document Classifications},
 journal = {MIS Q.},
 issue_date = {March 2014},
 volume = {38},
 number = {1},
 month = mar,
 year = {2014},
 issn = {0276-7783},
 pages = {73--100},
 numpages = {28},
 acmid = {2600523},
 publisher = {Society for Information Management and The Management Information Systems Research Center},
 address = {Minneapolis, MN, USA},
 keywords = {comprehensibility, document classification, instance level explanation, text mining},
} 

@article{feraud2002nn,
 author = {F{\'e}raud, Raphael and Cl{\'e}rot, Fabrice},
 title = {A Methodology to Explain Neural Network Classification},
 journal = {Neural Netw.},
 issue_date = {March 2002},
 volume = {15},
 number = {2},
 month = mar,
 year = {2002},
 issn = {0893-6080},
 pages = {237--246},
 numpages = {10},
 doi = {10.1016/S0893-6080(01)00127-7},
 acmid = {607596},
 publisher = {Elsevier Science Ltd.},
 address = {Oxford, UK, UK},
 keywords = {classification, clustering, knowledge extraction, neural network, saliency}
}

@InProceedings{arras2017rnn-sentiment,
  author    = {Arras, Leila  and  Montavon, Gr\'{e}goire  and  M\"{u}ller, Klaus-Robert  and  Samek, Wojciech},
  title     = {Explaining Recurrent Neural Network Predictions in Sentiment Analysis},
  booktitle = {Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {159--168},
  abstract  = {Recently, a technique called Layer-wise
	Relevance Propagation (LRP) was shown
	to deliver insightful explanations in the
	form of input space relevances for un-
	derstanding feed-forward neural network
	classification decisions. In the present
	work, we extend the usage of LRP to
	recurrent neural networks. We propose
	a specific propagation rule applicable to
	multiplicative connections as they arise
	in recurrent network architectures such
	as LSTMs and GRUs. We apply our
	technique to a word-based bi-directional
	LSTM model on a five-class sentiment
	prediction task, and evaluate the result-
	ing LRP relevances both qualitatively and
	quantitatively, obtaining better results than
	a gradient-based related method which
	was used in previous work.},
}


@inproceedings{bau2017netdissect,
  title={Network Dissection: Quantifying Interpretability of Deep Visual Representations},
  author={Bau, David and Zhou, Bolei and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
  booktitle={Computer Vision and Pattern Recognition},
  year={2017}
}


@InProceedings{koh2017influencefunctions,
  title = 	 {Understanding Black-box Predictions via Influence Functions},
  author = 	 {Pang Wei Koh and Percy Liang},
  booktitle = 	 {Proceedings of the 34th International Conference on Machine Learning},
  pages = 	 {1885--1894},
  year = 	 {2017},
  editor = 	 {Doina Precup and Yee Whye Teh},
  volume = 	 {70},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {International Convention Centre, Sydney, Australia},
  month = 	 {06--11 Aug},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v70/koh17a/koh17a.pdf},
  abstract = 	 {How can we explain the predictions of a black-box model? In this paper, we use influence functions — a classic technique from robust statistics — to trace a model’s prediction through the learning algorithm and back to its training data, thereby identifying training points most responsible for a given prediction. To scale up influence functions to modern machine learning settings, we develop a simple, efficient implementation that requires only oracle access to gradients and Hessian-vector products. We show that even on non-convex and non-differentiable models where the theory breaks down, approximations to influence functions can still provide valuable information. On linear models and convolutional neural networks, we demonstrate that influence functions are useful for multiple purposes: understanding model behavior, debugging models, detecting dataset errors, and even creating visually-indistinguishable training-set attacks.}
}

@book{munzner2014visualization,
  title={Visualization analysis and design},
  author={Munzner, Tamara},
  year={2014},
  publisher={CRC press}
}

@ARTICLE{alsallakh2017cnn-hierarchy, 
author={B. Alsallakh and A. Jourabloo and M. Ye and X. Liu and L. Ren}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Do Convolutional Neural Networks Learn Class Hierarchy?}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Data visualization;Feature extraction;Image recognition;Neurons;Training;Training data;Convolutional Neural Networks;confusion matrix;deep learning;image classification;large-scale classification}, 
doi={10.1109/TVCG.2017.2744683}, 
ISSN={1077-2626}, 
month={},}

@article{silver2017mastering,
  abstract = {A long-standing goal of artificial intelligence is an algorithm that learns, tabula rasa, superhuman proficiency in challenging domains. Recently, AlphaGo became the first program to defeat a world champion in the game of Go. The tree search in AlphaGo evaluated positions and selected moves using deep neural networks. These neural networks were trained by supervised learning from human expert moves, and by reinforcement learning from self-play. Here we introduce an algorithm based solely on reinforcement learning, without human data, guidance or domain knowledge beyond game rules. AlphaGo becomes its own teacher: a neural network is trained to predict AlphaGo’s own move selections and also the winner of AlphaGo’s games. This neural network improves the strength of the tree search, resulting in higher quality move selection and stronger self-play in the next iteration. Starting tabula rasa, our new program AlphaGo Zero achieved superhuman performance, winning 100–0 against the previously published, champion-defeating AlphaGo.},
  added-at = {2017-10-18T23:44:30.000+0200},
  author = {Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and Chen, Yutian and Lillicrap, Timothy and Hui, Fan and Sifre, Laurent and van den Driessche, George and Graepel, Thore and Hassabis, Demis},
  biburl = {https://www.bibsonomy.org/bibtex/265903fb62a5e49e2fe2b40e7845310ff/vrepo},
  description = {Mastering the game of Go without human knowledge : Nature : Nature Research},
  interhash = {c45d318e105d0f2d62ccc28c2699d9d4},
  intrahash = {65903fb62a5e49e2fe2b40e7845310ff},
  issn = {00280836},
  journal = {Nature},
  keywords = {ai algorithm alphago games go machine_learning},
  month = oct,
  number = 7676,
  pages = {354--359},
  publisher = {Macmillan Publishers Limited, part of Springer Nature. All rights reserved.},
  timestamp = {2017-10-18T23:45:02.000+0200},
  title = {Mastering the game of Go without human knowledge},
  volume = 550,
  year = 2017
}

@inproceedings{wirth2000crisp,
  title={CRISP-DM: Towards a standard process model for data mining},
  author={Wirth, R{\"u}diger and Hipp, Jochen},
  booktitle={Proceedings of the 4th international conference on the practical applications of knowledge discovery and data mining},
  pages={29--39},
  year={2000}
}

@online{carlton2017ml,
  author = {Carlton Sapp},
  title = {Preparing and Architecting for Machine Learning},
  year = 2017,
  url = {https://www.gartner.com/doc/3573617/preparing-architecting-machine-learning},
  urldate = {2017-10-22}
}

@inproceedings{lasalle1990expert-system,
 author = {La Salle, A. J. and Medsker, L. R.},
 title = {The Expert System Life Cycle: What Have We Learned from Software Engineering?},
 booktitle = {Proceedings of the 1990 ACM SIGBDP Conference on Trends and Directions in Expert Systems},
 series = {SIGBDP '90},
 year = {1990},
 isbn = {0-89791-416-3},
 location = {Orlando, Florida, USA},
 pages = {17--26},
 numpages = {10},
 doi = {10.1145/97709.97711},
 acmid = {97711},
 publisher = {ACM},
 address = {New York, NY, USA},
} 

@inproceedings{potter2010statistics,
 author = {Potter, K. and Kniss, J. and Riesenfeld, R. and Johnson, C. R.},
 title = {Visualizing Summary Statistics and Uncertainty},
 booktitle = {Proceedings of the 12th Eurographics / IEEE - VGTC Conference on Visualization},
 series = {EuroVis'10},
 year = {2010},
 location = {Bordeaux, France},
 pages = {823--832},
 numpages = {10},
 url = {http://dx.doi.org/10.1111/j.1467-8659.2009.01677.x},
 doi = {10.1111/j.1467-8659.2009.01677.x},
 acmid = {2421844},
 publisher = {The Eurographs Association \&\#38; John Wiley \&\#38; Sons, Ltd.},
 address = {Chichester, UK},
} 

@ARTICLE{strobelt2017lstmvis, 
author={H. Strobelt and S. Gehrmann and H. Pfister and A. M. Rush}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={LSTMVis: A Tool for Visual Analysis of Hidden State Dynamics in Recurrent Neural Networks}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Computational modeling;Data models;Pattern matching;Recurrent neural networks;Tools;Visualization}, 
doi={10.1109/TVCG.2017.2744158}, 
ISSN={1077-2626}, 
month={},}

@article{liu2017cnnvis,
title = "Towards better analysis of machine learning models: A visual analytics perspective",
journal = "Visual Informatics",
volume = "1",
number = "1",
pages = "48 - 56",
year = "2017",
issn = "2468-502X",
doi = "https://doi.org/10.1016/j.visinf.2017.01.006",
author = "Shixia Liu and Xiting Wang and Mengchen Liu and Jun Zhu",
keywords = "Interactive model analysis",
keywords = "Interactive visualization",
keywords = "Machine learning",
keywords = "Understanding",
keywords = "Diagnosis",
keywords = "Refinement"
}

@proceedings{amershi2015modeltracker,
author = {Amershi, Saleema and Chickering, Max and Drucker, Steven and Lee, Bongshin and Simard, Patrice and Suh, Jina},
title = {ModelTracker: Redesigning Performance Analysis Tools for Machine Learning},
booktitle = {Proceedings of the Conference on Human Factors in Computing Systems (CHI 2015)},
year = {2015},
month = {April},
abstract = {Model building in machine learning is an iterative process. The performance analysis and debugging step typically involves a disruptive cognitive switch from model building to error analysis, discouraging an informed approach to model building. We present ModelTracker, an interactive visualization that subsumes information contained in numerous traditional summary statistics and graphs while displaying example-level performance and enabling direct error examination and debugging. Usage analysis from machine learning practitioners building real models with ModelTracker over six months shows ModelTracker is used often and throughout model building. A controlled experiment focusing on ModelTracker’s debugging capabilities shows participants prefer ModelTracker over traditional tools without a loss in model performance.},
publisher = {ACM – Association for Computing Machinery},
url = {https://www.microsoft.com/en-us/research/publication/modeltracker-redesigning-performance-analysis-tools-for-machine-learning/},
address = {},
pages = {},
journal = {},
volume = {},
chapter = {},
isbn = {},
}

@ARTICLE{ren2017squares, 
author={D. Ren and S. Amershi and B. Lee and J. Suh and J. D. Williams}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Squares: Supporting Interactive Performance Analysis for Multiclass Classifiers}, 
year={2017}, 
volume={23}, 
number={1}, 
pages={61-70}, 
keywords={data visualisation;learning (artificial intelligence);pattern classification;software performance evaluation;Squares;interactive performance analysis;machine learning;multiclass classifier;performance visualization;Analytical models;Data models;Data visualization;Debugging;Measurement;Performance analysis;Visualization;Performance analysis;classification;usable machine learning}, 
doi={10.1109/TVCG.2016.2598828}, 
ISSN={1077-2626}, 
month={Jan},}

@ARTICLE{wongsuphasawat2017dataflow, 
author={K. Wongsuphasawat and D. Smilkov and J. Wexler and J. Wilson and D. Mané and D. Fritz and D. Krishnan and F. B. Viégas and M. Wattenberg}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Visualizing Dataflow Graphs of Deep Learning Models in TensorFlow}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Computational modeling;Layout;Machine learning;Neural networks;Standards;Tools;Visualization;Clustered Graph;Dataflow Graph;Graph Visualization;Neural Network}, 
doi={10.1109/TVCG.2017.2744878}, 
ISSN={1077-2626}, 
month={},}

@ARTICLE{pezzotti2017deep-eyes, 
author={N. Pezzotti and T. Höllt and J. v. Gemert and B. P. F. Lelieveldt and E. Eisemann and A. Vilanova}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={DeepEyes: Progressive Visual Analytics for Designing Deep Neural Networks}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Kernel;Layout;Neural networks;Neurons;Three-dimensional displays;Training;Visual analytics;Progressive visual analytics;deep neural networks;machine learning}, 
doi={10.1109/TVCG.2017.2744358}, 
ISSN={1077-2626}, 
month={},}

@ARTICLE{kahng2017activis, 
author={M. Kahng and P. Y. Andrews and A. Kalro and D. H. Polo Chau}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={ACTIVIS: Visual Exploration of Industry-Scale Deep Neural Network Models}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Computational modeling;Data models;Data visualization;Facebook;Machine learning;Neurons;Tools;Visual analytics;deep learning;information visualization;machine learning}, 
doi={10.1109/TVCG.2017.2744718}, 
ISSN={1077-2626}, 
month={},}

@ARTICLE{liu2017gan, 
author={M. Liu and J. Shi and K. Cao and J. Zhu and S. Liu}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Analyzing the Training Processes of Deep Generative Models}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Analytical models;Neurons;Time series analysis;Tools;Training;Visual analytics;blue noise sampling;credit assignment;deep generative models;deep learning}, 
doi={10.1109/TVCG.2017.2744938}, 
ISSN={1077-2626}, 
month={},}

@ARTICLE{liu2017tree, 
author={S. Liu and J. Xiao and J. Liu and X. Wang and J. Wu and J. Zhu}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Visual Diagnosis of Tree Boosting Methods}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Analytical models;Boosting;Decision trees;Tools;Training;Vegetation;Visualization;model analysis;temporal confusion matrix;tree boosting;tree visualization}, 
doi={10.1109/TVCG.2017.2744378}, 
ISSN={1077-2626}, 
month={},}

@ARTICLE{muhlabacher2017treepod, 
author={T. Mühlbacher and L. Linhardt and T. Möller and H. Piringer}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={TreePOD: Sensitivity-Aware Selection of Pareto-Optimal Decision Trees}, 
year={2017}, 
volume={PP}, 
number={99}, 
pages={1-1}, 
keywords={Buildings;Data models;Decision trees;Focusing;Measurement;Vegetation;Visualization;Model selection;Pareto optimality;classification trees;sensitivity analysis;visual parameter search}, 
doi={10.1109/TVCG.2017.2745158}, 
ISSN={1077-2626}, 
month={},}

@inproceedings{krause2017workflow,
author = {Josua Krause and Aritra Dasgupta and Jordan Swartz and Yindalon Aphinyanaphongs and Enrico Bertini},
title={A Workflow for Visual Diagnostics of Binary Classifiers using Instance-Level Explanations},
year={2017},
booktitle={Proceedings of the IEEE Conference on Visual Analytics Science and Technology}
}

@book{kruskal1978mds,
  title={Multidimensional scaling},
  author={Kruskal, Joseph B and Wish, Myron},
  volume={11},
  year={1978},
  publisher={Sage}
}

@article{maaten2008tsne,
  title={Visualizing data using t-SNE},
  author={Maaten, Laurens van der and Hinton, Geoffrey},
  journal={Journal of Machine Learning Research},
  volume={9},
  number={Nov},
  pages={2579--2605},
  year={2008}
}

@article{smilkov2016projector,
  title={Embedding projector: Interactive visualization and interpretation of embeddings},
  author={Smilkov, Daniel and Thorat, Nikhil and Nicholson, Charles and Reif, Emily and Vi{\'e}gas, Fernanda B and Wattenberg, Martin},
  journal={arXiv preprint arXiv:1611.05469},
  year={2016}
}

@article{hall2009weka,
 author = {Hall, Mark and Frank, Eibe and Holmes, Geoffrey and Pfahringer, Bernhard and Reutemann, Peter and Witten, Ian H.},
 title = {The {WEKA} Data Mining Software: An Update},
 journal = {SIGKDD Explor. Newsl.},
 issue_date = {June 2009},
 volume = {11},
 number = {1},
 month = nov,
 year = {2009},
 issn = {1931-0145},
 pages = {10--18},
 numpages = {9},
 url = {http://doi.acm.org/10.1145/1656274.1656278},
 doi = {10.1145/1656274.1656278},
 acmid = {1656278},
 publisher = {ACM},
 address = {New York, NY, USA},
} 

@INPROCEEDINGS{tzeng2005visualize-nn, 
author={F. Y. Tzeng and K. L. Ma}, 
booktitle={VIS 05. IEEE Visualization, 2005.}, 
title={Opening the black box - data driven visualization of neural networks}, 
year={2005}, 
volume={}, 
number={}, 
pages={383-390}, 
keywords={backpropagation;data visualisation;neural nets;problem solving;artificial neural network;black box opening;classification task;data driven visualization;human nervous system;machine learning tool;problem solving;Artificial neural networks;Biological neural networks;Computer networks;Data visualization;Humans;Neural network hardware;Neural networks;Neurons;Power system modeling;Software}, 
doi={10.1109/VISUAL.2005.1532820}, 
ISSN={}, 
month={Oct},}

@ARTICLE{rauber2017hidden-activity,
author={P. E. Rauber and S. G. Fadel and A. X. Falcão and A. C. Telea}, 
journal={IEEE Transactions on Visualization and Computer Graphics}, 
title={Visualizing the Hidden Activity of Artificial Neural Networks}, 
year={2017}, 
volume={23}, 
number={1}, 
pages={101-110}, 
keywords={data visualisation;learning (artificial intelligence);neural nets;pattern classification;artificial neural networks;dimensionality reduction;hidden activity visualization;high-dimensional vectors;machine learning;pattern classification;Benchmark testing;Computational modeling;Data visualization;Neural networks;Neurons;Training;Visualization;Artificial neural networks;algorithm understanding;dimensionality reduction}, 
doi={10.1109/TVCG.2016.2598838}, 
ISSN={1077-2626}, 
month={Jan},}

@inproceedings{harley2015isvc,
    title = {An Interactive Node-Link Visualization of Convolutional Neural Networks},
    author = {Adam W Harley},
    booktitle = {ISVC},
    pages = {867--877},
    year = {2015}
}


@InProceedings{ritter2017cognitive,
  title = 	 {Cognitive Psychology for Deep Neural Networks: A Shape Bias Case Study},
  author = 	 {Samuel Ritter and David G. T. Barrett and Adam Santoro and Matt M. Botvinick},
  booktitle = 	 {Proceedings of the 34th International Conference on Machine Learning},
  pages = 	 {2940--2949},
  year = 	 {2017},
  editor = 	 {Doina Precup and Yee Whye Teh},
  volume = 	 {70},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {International Convention Centre, Sydney, Australia},
  month = 	 {06--11 Aug},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v70/ritter17a/ritter17a.pdf},
  url = 	 {http://proceedings.mlr.press/v70/ritter17a.html},
  abstract = 	 {Deep neural networks (DNNs) have advanced performance on a wide range of complex tasks, rapidly outpacing our understanding of the nature of their solutions. While past work sought to advance our understanding of these models, none has made use of the rich history of problem descriptions, theories, and experimental methods developed by cognitive psychologists to study the human mind. To explore the potential value of these tools, we chose a well-established analysis from developmental psychology that explains how children learn word labels for objects, and applied that analysis to DNNs. Using datasets of stimuli inspired by the original cognitive psychology experiments, we find that state-of-the-art one shot learning models trained on ImageNet exhibit a similar bias to that observed in humans: they prefer to categorize objects according to shape rather than color. The magnitude of this shape bias varies greatly among architecturally identical, but differently seeded models, and even fluctuates within seeds throughout training, despite nearly equivalent classification performance. These results demonstrate the capability of tools from cognitive psychology for exposing hidden computational properties of DNNs, while concurrently providing us with a computational model for human word learning.}
}