--- a +++ b/docs/articles/integrated_gradient.html @@ -0,0 +1,480 @@ +<!DOCTYPE html> +<!-- Generated by pkgdown: do not edit by hand --><html lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> +<meta name="description" content="deepG"> +<title>Integrated Gradient • deepG</title> +<!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"> +<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"> +<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"> +<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"> +<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"> +<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"> +<script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> +<link href="../deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"> +<script src="../deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"> +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"> +<!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.11/clipboard.min.js" integrity="sha512-7O5pXpc0oCRrxk8RUfDYFgn0nO1t+jLuIOQdOMRp4APB7uZ4vSjspzp5y6YDtDs4VzUSTbWzBFZ/LKJhnyFOKw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="Integrated Gradient"> +<meta property="og:description" content="deepG"> +<meta property="og:image" content="https://genomenet.github.io/deepG/logo.png"> +<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]> +<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script> +<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> +<![endif]--> +</head> +<body> + <a href="#main" class="visually-hidden-focusable">Skip to contents</a> + + + <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light" data-bs-theme="light"><div class="container"> + + <a class="navbar-brand me-2" href="../index.html">deepG</a> + + <small class="nav-text text-default me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="Released version">0.3.0</small> + + + <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation"> + <span class="navbar-toggler-icon"></span> + </button> + + <div id="navbar" class="collapse navbar-collapse ms-3"> + <ul class="navbar-nav me-auto"> +<li class="nav-item"> + <a class="nav-link" href="../reference/index.html"> + <span class="fa fa fa fa-file-alt"></span> + + Reference + </a> +</li> +<li class="nav-item dropdown"> + <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-notebooks">Notebooks</a> + <div class="dropdown-menu" aria-labelledby="dropdown-notebooks"> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/175jIdXcDcgPUvaBo2rH2Lupbpjnp5O7G?usp=sharing">deepG tutorial</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/1Eolc0koMNM1zkuO4XyVM58ImeF1BpRiH?usp=sharing">Read-length level: Human contamination</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/1yiXSwFafXpMLHaov9iBTQLIDZ6bK1zYX?usp=sharing">Locus level: CRISPR detection</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/1G7bOFEX87cZNrM2tdRtTdkrZn5fM__g0?usp=sharing">Gene level: 16S rRNA detection</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/1BCggL-tfQF136YeJ8cKKi-zoBEDMgkNh?usp=sharing">Genome level: Bacterial morphology (Sporulation)</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/10xpRzGd3JeBAbqQYSCxzQUMctt01sx9D?usp=sharing">Full metagenome level: Colorectal cancer prediction</a> + <a class="external-link dropdown-item" href="https://colab.research.google.com/drive/1kyYK7IU7GSfdpDzO_a8U3_qD4i3zTu6w?usp=sharing">BERT with deepG</a> + </div> +</li> +<li class="active nav-item dropdown"> + <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-tutorials">Tutorials</a> + <div class="dropdown-menu" aria-labelledby="dropdown-tutorials"> + <a class="dropdown-item" href="../articles/getting_started.html">Getting Started</a> + <a class="dropdown-item" href="../articles/training_types.html">Training types</a> + <a class="dropdown-item" href="../articles/data_generator.html">Data generator</a> + <a class="dropdown-item" href="../articles/using_tb.html">Using tensorboard</a> + <a class="dropdown-item" href="../articles/integrated_gradient.html">Integrated Gradient</a> + </div> +</li> + </ul> +<form class="form-inline my-2 my-lg-0" role="search"> + <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"> +</form> + + <ul class="navbar-nav"> +<li class="nav-item"> + <a class="external-link nav-link" href="https://github.com/GenomeNet/deepG/" aria-label="github"> + <span class="fab fa fab fa-github fa-lg"></span> + + </a> +</li> + </ul> +</div> + + + </div> +</nav><div class="container template-article"> + + + + +<div class="row"> + <main id="main" class="col-md-9"><div class="page-header"> + <img src="../logo.png" class="logo" alt=""><h1>Integrated Gradient</h1> + + + <small class="dont-index">Source: <a href="https://github.com/GenomeNet/deepG/blob/HEAD/vignettes/integrated_gradient.Rmd" class="external-link"><code>vignettes/integrated_gradient.Rmd</code></a></small> + <div class="d-none name"><code>integrated_gradient.Rmd</code></div> + </div> + + + +<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># devtools::install_github("GenomeNet/deepG")</span></span> +<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/GenomeNet/deepG" class="external-link">deepG</a></span><span class="op">)</span></span> +<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://magrittr.tidyverse.org" class="external-link">magrittr</a></span><span class="op">)</span></span> +<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://ggplot2.tidyverse.org" class="external-link">ggplot2</a></span><span class="op">)</span></span></code></pre></div> +<style type="text/css"> +mark.in { +background-color: CornflowerBlue; +} + +mark.out { +background-color: IndianRed; +} + +</style> +<div class="section level2"> +<h2 id="introduction">Introduction<a class="anchor" aria-label="anchor" href="#introduction"></a> +</h2> +<p>The <a href="https://arxiv.org/abs/1703.01365" class="external-link">Integrated +Gradient</a> (IG) method can be used to determine what parts of an input +sequence are important for the models decision. We start with training a +model that can differentiate sequences based on the GC content (as +described in the <a href="getting_started.html">Getting started +tutorial</a>).</p> +</div> +<div class="section level2"> +<h2 id="model-training">Model Training<a class="anchor" aria-label="anchor" href="#model-training"></a> +</h2> +<p>We create two simple dummy training and validation data sets. Both +consist of random <tt>ACGT</tt> sequences but the first category has a +probability of 40% each for drawing <tt>G</tt> or <tt>C</tt> and the +second has equal probability for each nucleotide (first category has +around 80% <tt>GC</tt> content and second one around 50%).</p> +<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">123</span><span class="op">)</span></span> +<span></span> +<span><span class="co"># Create data </span></span> +<span><span class="va">vocabulary</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"A"</span>, <span class="st">"C"</span>, <span class="st">"G"</span>, <span class="st">"T"</span><span class="op">)</span></span> +<span><span class="va">data_type</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"train_1"</span>, <span class="st">"train_2"</span>, <span class="st">"val_1"</span>, <span class="st">"val_2"</span><span class="op">)</span></span> +<span></span> +<span><span class="kw">for</span> <span class="op">(</span><span class="va">i</span> <span class="kw">in</span> <span class="fl">1</span><span class="op">:</span><span class="fu"><a href="https://rdrr.io/r/base/length.html" class="external-link">length</a></span><span class="op">(</span><span class="va">data_type</span><span class="op">)</span><span class="op">)</span> <span class="op">{</span></span> +<span> </span> +<span> <span class="va">temp_file</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span></span> +<span> <span class="fu"><a href="https://rdrr.io/r/base/assign.html" class="external-link">assign</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/paste.html" class="external-link">paste0</a></span><span class="op">(</span><span class="va">data_type</span><span class="op">[</span><span class="va">i</span><span class="op">]</span>, <span class="st">"_dir"</span><span class="op">)</span>, <span class="va">temp_file</span><span class="op">)</span></span> +<span> <span class="fu"><a href="https://rdrr.io/r/base/files2.html" class="external-link">dir.create</a></span><span class="op">(</span><span class="va">temp_file</span><span class="op">)</span></span> +<span> </span> +<span> <span class="kw">if</span> <span class="op">(</span><span class="va">i</span> <span class="op"><a href="https://rdrr.io/r/base/Arithmetic.html" class="external-link">%%</a></span> <span class="fl">2</span> <span class="op">==</span> <span class="fl">1</span><span class="op">)</span> <span class="op">{</span></span> +<span> <span class="va">header</span> <span class="op"><-</span> <span class="st">"label_1"</span></span> +<span> <span class="va">prob</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.1</span>, <span class="fl">0.4</span>, <span class="fl">0.4</span>, <span class="fl">0.1</span><span class="op">)</span></span> +<span> <span class="op">}</span> <span class="kw">else</span> <span class="op">{</span></span> +<span> <span class="va">header</span> <span class="op"><-</span> <span class="st">"label_2"</span></span> +<span> <span class="va">prob</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fl">0.25</span>, <span class="fl">4</span><span class="op">)</span></span> +<span> <span class="op">}</span></span> +<span> <span class="va">fasta_name_start</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/paste.html" class="external-link">paste0</a></span><span class="op">(</span><span class="va">header</span>, <span class="st">"_"</span>, <span class="va">data_type</span><span class="op">[</span><span class="va">i</span><span class="op">]</span>, <span class="st">"file"</span><span class="op">)</span></span> +<span> </span> +<span> <span class="fu"><a href="../reference/create_dummy_data.html">create_dummy_data</a></span><span class="op">(</span>file_path <span class="op">=</span> <span class="va">temp_file</span>,</span> +<span> num_files <span class="op">=</span> <span class="fl">1</span>,</span> +<span> seq_length <span class="op">=</span> <span class="fl">20000</span>, </span> +<span> num_seq <span class="op">=</span> <span class="fl">1</span>,</span> +<span> header <span class="op">=</span> <span class="va">header</span>,</span> +<span> prob <span class="op">=</span> <span class="va">prob</span>,</span> +<span> fasta_name_start <span class="op">=</span> <span class="va">fasta_name_start</span>,</span> +<span> vocabulary <span class="op">=</span> <span class="va">vocabulary</span><span class="op">)</span></span> +<span> </span> +<span><span class="op">}</span></span> +<span></span> +<span><span class="co"># Create model</span></span> +<span><span class="va">maxlen</span> <span class="op"><-</span> <span class="fl">50</span></span> +<span><span class="va">model</span> <span class="op"><-</span> <span class="fu"><a href="../reference/create_model_lstm_cnn.html">create_model_lstm_cnn</a></span><span class="op">(</span>maxlen <span class="op">=</span> <span class="va">maxlen</span>,</span> +<span> filters <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">8</span>, <span class="fl">16</span><span class="op">)</span>,</span> +<span> kernel_size <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">8</span>, <span class="fl">8</span><span class="op">)</span>,</span> +<span> pool_size <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">3</span>, <span class="fl">3</span><span class="op">)</span>,</span> +<span> layer_lstm <span class="op">=</span> <span class="fl">8</span>,</span> +<span> layer_dense <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">2</span><span class="op">)</span>,</span> +<span> model_seed <span class="op">=</span> <span class="fl">3</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## Model: "model"</span></span> +<span><span class="co">## _________________________________________________________________</span></span> +<span><span class="co">## Layer (type) Output Shape Param # </span></span> +<span><span class="co">## =================================================================</span></span> +<span><span class="co">## input_1 (InputLayer) [(None, 50, 4)] 0 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## conv1d (Conv1D) (None, 50, 8) 264 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## max_pooling1d (MaxPooling1 (None, 16, 8) 0 </span></span> +<span><span class="co">## D) </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## batch_normalization (Batch (None, 16, 8) 32 </span></span> +<span><span class="co">## Normalization) </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## conv1d_1 (Conv1D) (None, 16, 16) 1040 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## batch_normalization_1 (Bat (None, 16, 16) 64 </span></span> +<span><span class="co">## chNormalization) </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## max_pooling1d_1 (MaxPoolin (None, 5, 16) 0 </span></span> +<span><span class="co">## g1D) </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## lstm (LSTM) (None, 8) 800 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## dense (Dense) (None, 4) 36 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## dense_1 (Dense) (None, 2) 10 </span></span> +<span><span class="co">## </span></span> +<span><span class="co">## =================================================================</span></span> +<span><span class="co">## Total params: 2246 (8.77 KB)</span></span> +<span><span class="co">## Trainable params: 2198 (8.59 KB)</span></span> +<span><span class="co">## Non-trainable params: 48 (192.00 Byte)</span></span> +<span><span class="co">## _________________________________________________________________</span></span></code></pre> +<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># Train model</span></span> +<span><span class="va">hist</span> <span class="op"><-</span> <span class="fu"><a href="../reference/train_model.html">train_model</a></span><span class="op">(</span><span class="va">model</span>,</span> +<span> train_type <span class="op">=</span> <span class="st">"label_folder"</span>,</span> +<span> run_name <span class="op">=</span> <span class="st">"gc_model_1"</span>,</span> +<span> path <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="va">train_1_dir</span>, <span class="va">train_2_dir</span><span class="op">)</span>,</span> +<span> path_val <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="va">val_1_dir</span>, <span class="va">val_2_dir</span><span class="op">)</span>,</span> +<span> epochs <span class="op">=</span> <span class="fl">6</span>, </span> +<span> batch_size <span class="op">=</span> <span class="fl">64</span>,</span> +<span> steps_per_epoch <span class="op">=</span> <span class="fl">50</span>, </span> +<span> step <span class="op">=</span> <span class="fl">50</span>, </span> +<span> vocabulary_label <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"high_gc"</span>, <span class="st">"equal_dist"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## Epoch 1/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 1:00 - loss: 0.7005 - acc: 0.3906 6/50 [==>...........................] - ETA: 0s - loss: 0.6881 - acc: 0.5417 10/50 [=====>........................] - ETA: 0s - loss: 0.6825 - acc: 0.578115/50 [========>.....................] - ETA: 0s - loss: 0.6681 - acc: 0.676021/50 [===========>..................] - ETA: 0s - loss: 0.6466 - acc: 0.746325/50 [==============>...............] - ETA: 0s - loss: 0.6319 - acc: 0.777529/50 [================>.............] - ETA: 0s - loss: 0.6159 - acc: 0.805034/50 [===================>..........] - ETA: 0s - loss: 0.5983 - acc: 0.826738/50 [=====================>........] - ETA: 0s - loss: 0.5831 - acc: 0.842543/50 [========================>.....] - ETA: 0s - loss: 0.5637 - acc: 0.858347/50 [===========================>..] - ETA: 0s - loss: 0.5501 - acc: 0.867450/50 [==============================] - ETA: 0s - loss: 0.5391 - acc: 0.874750/50 [==============================] - 2s 20ms/step - loss: 0.5391 - acc: 0.8747 - val_loss: 0.5296 - val_acc: 0.9578 - lr: 0.0010</span></span> +<span><span class="co">## Epoch 2/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 0s - loss: 0.3424 - acc: 0.9844 6/50 [==>...........................] - ETA: 0s - loss: 0.3381 - acc: 0.981811/50 [=====>........................] - ETA: 0s - loss: 0.3287 - acc: 0.984417/50 [=========>....................] - ETA: 0s - loss: 0.3093 - acc: 0.985320/50 [===========>..................] - ETA: 0s - loss: 0.3038 - acc: 0.985225/50 [==============>...............] - ETA: 0s - loss: 0.2914 - acc: 0.986231/50 [=================>............] - ETA: 0s - loss: 0.2775 - acc: 0.985936/50 [====================>.........] - ETA: 0s - loss: 0.2672 - acc: 0.987039/50 [======================>.......] - ETA: 0s - loss: 0.2604 - acc: 0.987242/50 [========================>.....] - ETA: 0s - loss: 0.2541 - acc: 0.987746/50 [==========================>...] - ETA: 0s - loss: 0.2471 - acc: 0.987849/50 [============================>.] - ETA: 0s - loss: 0.2413 - acc: 0.988250/50 [==============================] - 1s 16ms/step - loss: 0.2392 - acc: 0.9884 - val_loss: 0.3314 - val_acc: 0.9406 - lr: 0.0010</span></span> +<span><span class="co">## Epoch 3/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 0s - loss: 0.1552 - acc: 0.9844 6/50 [==>...........................] - ETA: 0s - loss: 0.1424 - acc: 0.992211/50 [=====>........................] - ETA: 0s - loss: 0.1349 - acc: 0.994316/50 [========>.....................] - ETA: 0s - loss: 0.1275 - acc: 0.995122/50 [============>.................] - ETA: 0s - loss: 0.1222 - acc: 0.995026/50 [==============>...............] - ETA: 0s - loss: 0.1183 - acc: 0.995232/50 [==================>...........] - ETA: 0s - loss: 0.1130 - acc: 0.995136/50 [====================>.........] - ETA: 0s - loss: 0.1090 - acc: 0.995741/50 [=======================>......] - ETA: 0s - loss: 0.1045 - acc: 0.995846/50 [==========================>...] - ETA: 0s - loss: 0.1011 - acc: 0.995650/50 [==============================] - 1s 14ms/step - loss: 0.0977 - acc: 0.9959 - val_loss: 0.1857 - val_acc: 0.9594 - lr: 0.0010</span></span> +<span><span class="co">## Epoch 4/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 0s - loss: 0.0734 - acc: 0.9844 7/50 [===>..........................] - ETA: 0s - loss: 0.0624 - acc: 0.995511/50 [=====>........................] - ETA: 0s - loss: 0.0580 - acc: 0.997217/50 [=========>....................] - ETA: 0s - loss: 0.0545 - acc: 0.997223/50 [============>.................] - ETA: 0s - loss: 0.0528 - acc: 0.997329/50 [================>.............] - ETA: 0s - loss: 0.0503 - acc: 0.997335/50 [====================>.........] - ETA: 0s - loss: 0.0488 - acc: 0.997340/50 [=======================>......] - ETA: 0s - loss: 0.0468 - acc: 0.997746/50 [==========================>...] - ETA: 0s - loss: 0.0454 - acc: 0.997650/50 [==============================] - 1s 14ms/step - loss: 0.0441 - acc: 0.9978 - val_loss: 0.1155 - val_acc: 0.9656 - lr: 0.0010</span></span> +<span><span class="co">## Epoch 5/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 0s - loss: 0.0293 - acc: 1.0000 7/50 [===>..........................] - ETA: 0s - loss: 0.0305 - acc: 0.997811/50 [=====>........................] - ETA: 0s - loss: 0.0288 - acc: 0.998616/50 [========>.....................] - ETA: 0s - loss: 0.0272 - acc: 0.999021/50 [===========>..................] - ETA: 0s - loss: 0.0266 - acc: 0.999325/50 [==============>...............] - ETA: 0s - loss: 0.0258 - acc: 0.999431/50 [=================>............] - ETA: 0s - loss: 0.0249 - acc: 0.999536/50 [====================>.........] - ETA: 0s - loss: 0.0242 - acc: 0.999641/50 [=======================>......] - ETA: 0s - loss: 0.0235 - acc: 0.999646/50 [==========================>...] - ETA: 0s - loss: 0.0229 - acc: 0.999750/50 [==============================] - ETA: 0s - loss: 0.0224 - acc: 0.999750/50 [==============================] - 1s 16ms/step - loss: 0.0224 - acc: 0.9997 - val_loss: 0.0869 - val_acc: 0.9750 - lr: 0.0010</span></span> +<span><span class="co">## Epoch 6/6</span></span> +<span><span class="co">## 1/50 [..............................] - ETA: 0s - loss: 0.0164 - acc: 1.0000 7/50 [===>..........................] - ETA: 0s - loss: 0.0161 - acc: 1.000011/50 [=====>........................] - ETA: 0s - loss: 0.0159 - acc: 1.000016/50 [========>.....................] - ETA: 0s - loss: 0.0154 - acc: 1.000022/50 [============>.................] - ETA: 0s - loss: 0.0151 - acc: 1.000026/50 [==============>...............] - ETA: 0s - loss: 0.0148 - acc: 1.000032/50 [==================>...........] - ETA: 0s - loss: 0.0145 - acc: 1.000037/50 [=====================>........] - ETA: 0s - loss: 0.0142 - acc: 1.000043/50 [========================>.....] - ETA: 0s - loss: 0.0138 - acc: 1.000048/50 [===========================>..] - ETA: 0s - loss: 0.0136 - acc: 1.000050/50 [==============================] - 1s 13ms/step - loss: 0.0135 - acc: 1.0000 - val_loss: 0.0858 - val_acc: 0.9766 - lr: 0.0010</span></span></code></pre> +<pre><code><span><span class="co">## Training done.</span></span></code></pre> +<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html" class="external-link">plot</a></span><span class="op">(</span><span class="va">hist</span><span class="op">)</span></span></code></pre></div> +<p><img src="integrated_gradient_files/figure-html/unnamed-chunk-5-1.png" width="700"></p> +</div> +<div class="section level2"> +<h2 id="integrated-gradient">Integrated Gradient<a class="anchor" aria-label="anchor" href="#integrated-gradient"></a> +</h2> +<p>We can try to visualize what parts of an input sequence is important +for the models decision, using Integrated Gradient. Let’s create a +sequence with a high GC content. We use same number of Cs as Gs and of +As as Ts.</p> +<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">321</span><span class="op">)</span></span> +<span><span class="va">g_count</span> <span class="op"><-</span> <span class="fl">17</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/stopifnot.html" class="external-link">stopifnot</a></span><span class="op">(</span><span class="va">g_count</span> <span class="op"><</span> <span class="fl">25</span><span class="op">)</span></span> +<span><span class="va">a_count</span> <span class="op"><-</span> <span class="op">(</span><span class="fl">50</span> <span class="op">-</span> <span class="op">(</span><span class="fl">2</span><span class="op">*</span><span class="va">g_count</span><span class="op">)</span><span class="op">)</span><span class="op">/</span><span class="fl">2</span> </span> +<span><span class="va">high_gc_seq</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="st">"G"</span>, <span class="va">g_count</span><span class="op">)</span>, <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="st">"C"</span>, <span class="va">g_count</span><span class="op">)</span>, <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="st">"A"</span>, <span class="va">a_count</span><span class="op">)</span>, <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="st">"T"</span>, <span class="va">a_count</span><span class="op">)</span><span class="op">)</span></span> +<span><span class="va">high_gc_seq</span> <span class="op"><-</span> <span class="va">high_gc_seq</span><span class="op">[</span><span class="fu"><a href="https://rdrr.io/r/base/sample.html" class="external-link">sample</a></span><span class="op">(</span><span class="va">maxlen</span><span class="op">)</span><span class="op">]</span> <span class="op"><a href="../reference/pipe.html">%>%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/paste.html" class="external-link">paste</a></span><span class="op">(</span>collapse <span class="op">=</span> <span class="st">""</span><span class="op">)</span> <span class="co"># shuffle nt order</span></span> +<span><span class="va">high_gc_seq</span></span></code></pre></div> +<pre><code><span><span class="co">## [1] "TGCGCGAGCCCAGCTAAGCGGCCTCCTTAGGCTGCCGGCGGGATCAGCTA"</span></span></code></pre> +<p>We need to one-hot encode the sequence before applying Integrated +Gradient.</p> +<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">high_gc_seq_one_hot</span> <span class="op"><-</span> <span class="fu"><a href="../reference/seq_encoding_label.html">seq_encoding_label</a></span><span class="op">(</span>char_sequence <span class="op">=</span> <span class="va">high_gc_seq</span>,</span> +<span> maxlen <span class="op">=</span> <span class="fl">50</span>,</span> +<span> start_ind <span class="op">=</span> <span class="fl">1</span>,</span> +<span> vocabulary <span class="op">=</span> <span class="va">vocabulary</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">high_gc_seq_one_hot</span><span class="op">[</span><span class="fl">1</span>,,<span class="op">]</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## [,1] [,2] [,3] [,4]</span></span> +<span><span class="co">## [1,] 0 0 0 1</span></span> +<span><span class="co">## [2,] 0 0 1 0</span></span> +<span><span class="co">## [3,] 0 1 0 0</span></span> +<span><span class="co">## [4,] 0 0 1 0</span></span> +<span><span class="co">## [5,] 0 1 0 0</span></span> +<span><span class="co">## [6,] 0 0 1 0</span></span></code></pre> +<p>Our model should be confident, this sequences belongs to the first +class</p> +<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html" class="external-link">colnames</a></span><span class="op">(</span><span class="va">pred</span><span class="op">)</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"high_gc"</span>, <span class="st">"equal_dist"</span><span class="op">)</span></span> +<span><span class="va">pred</span></span></code></pre></div> +<pre><code><span><span class="co">## high_gc equal_dist</span></span> +<span><span class="co">## [1,] 0.9657075 0.0342925</span></span></code></pre> +<p>We can visualize what parts where important for the prediction.</p> +<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">ig</span> <span class="op"><-</span> <span class="fu"><a href="../reference/integrated_gradients.html">integrated_gradients</a></span><span class="op">(</span></span> +<span> input_seq <span class="op">=</span> <span class="va">high_gc_seq_one_hot</span>,</span> +<span> target_class_idx <span class="op">=</span> <span class="fl">1</span>,</span> +<span> model <span class="op">=</span> <span class="va">model</span><span class="op">)</span></span> +<span></span> +<span><span class="kw">if</span> <span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/ns-load.html" class="external-link">requireNamespace</a></span><span class="op">(</span><span class="st">"ComplexHeatmap"</span>, quietly <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span><span class="op">)</span> <span class="op">{</span></span> +<span> <span class="fu"><a href="../reference/heatmaps_integrated_grad.html">heatmaps_integrated_grad</a></span><span class="op">(</span>integrated_grads <span class="op">=</span> <span class="va">ig</span>,</span> +<span> input_seq <span class="op">=</span> <span class="va">high_gc_seq_one_hot</span><span class="op">)</span></span> +<span><span class="op">}</span> <span class="kw">else</span> <span class="op">{</span></span> +<span> <span class="fu"><a href="https://rdrr.io/r/base/message.html" class="external-link">message</a></span><span class="op">(</span><span class="st">"Skipping ComplexHeatmap-related code because the package is not installed."</span><span class="op">)</span></span> +<span><span class="op">}</span></span></code></pre></div> +<pre><code><span><span class="co">## [[1]]</span></span></code></pre> +<p><img src="integrated_gradient_files/figure-html/unnamed-chunk-9-1.png" width="700"></p> +<p>We may test how our models prediction changes if we exchange certain +nucleotides in the input sequence. First, we look for the positions with +the smallest IG score.</p> +<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">ig</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/array.html" class="external-link">as.array</a></span><span class="op">(</span><span class="va">ig</span><span class="op">)</span></span> +<span><span class="va">smallest_index</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.html" class="external-link">which</a></span><span class="op">(</span><span class="va">ig</span> <span class="op">==</span> <span class="fu"><a href="https://rdrr.io/r/base/Extremes.html" class="external-link">min</a></span><span class="op">(</span><span class="va">ig</span><span class="op">)</span>, arr.ind <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span> +<span><span class="va">smallest_index</span></span></code></pre></div> +<pre><code><span><span class="co">## row col</span></span> +<span><span class="co">## [1,] 33 4</span></span></code></pre> +<p>We may change the nucleotide with the lowest score and observe the +change in prediction confidence</p> +<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># copy original sequence</span></span> +<span><span class="va">high_gc_seq_one_hot_changed</span> <span class="op"><-</span> <span class="va">high_gc_seq_one_hot</span> </span> +<span></span> +<span><span class="co"># prediction for original sequence</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## [,1] [,2]</span></span> +<span><span class="co">## [1,] 0.9657075 0.0342925</span></span></code></pre> +<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># change nt</span></span> +<span><span class="va">smallest_index</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.html" class="external-link">which</a></span><span class="op">(</span><span class="va">ig</span> <span class="op">==</span> <span class="fu"><a href="https://rdrr.io/r/base/Extremes.html" class="external-link">min</a></span><span class="op">(</span><span class="va">ig</span><span class="op">)</span>, arr.ind <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span> +<span><span class="va">smallest_index</span></span></code></pre></div> +<pre><code><span><span class="co">## row col</span></span> +<span><span class="co">## [1,] 33 4</span></span></code></pre> +<div class="sourceCode" id="cb22"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">row_index</span> <span class="op"><-</span> <span class="va">smallest_index</span><span class="op">[</span> , <span class="st">"row"</span><span class="op">]</span></span> +<span><span class="va">col_index</span> <span class="op"><-</span> <span class="va">smallest_index</span><span class="op">[</span> , <span class="st">"col"</span><span class="op">]</span> </span> +<span><span class="va">new_row</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">4</span><span class="op">)</span></span> +<span><span class="va">nt_index_old</span> <span class="op"><-</span> <span class="va">col_index</span></span> +<span><span class="va">nt_index_new</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.min.html" class="external-link">which.max</a></span><span class="op">(</span><span class="va">ig</span><span class="op">[</span><span class="va">row_index</span>, <span class="op">]</span><span class="op">)</span></span> +<span><span class="va">new_row</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span> <span class="op"><-</span> <span class="fl">1</span></span> +<span><span class="va">high_gc_seq_one_hot_changed</span><span class="op">[</span><span class="fl">1</span>, <span class="va">row_index</span>, <span class="op">]</span> <span class="op"><-</span> <span class="va">new_row</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/cat.html" class="external-link">cat</a></span><span class="op">(</span><span class="st">"At position"</span>, <span class="va">row_index</span>, <span class="st">"changing"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_old</span><span class="op">]</span>, <span class="st">"to"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span>, <span class="st">"\n"</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## At position 33 changing T to A</span></span></code></pre> +<div class="sourceCode" id="cb24"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot_changed</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/print.html" class="external-link">print</a></span><span class="op">(</span><span class="va">pred</span><span class="op">)</span></span></code></pre></div> +<pre><code><span><span class="co">## [,1] [,2]</span></span> +<span><span class="co">## [1,] 0.9255649 0.07443508</span></span></code></pre> +<p>Let’s repeatedly apply the previous step and change the sequence +after each iteration.</p> +<div class="sourceCode" id="cb26"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># copy original sequence</span></span> +<span><span class="va">high_gc_seq_one_hot_changed</span> <span class="op"><-</span> <span class="va">high_gc_seq_one_hot</span> </span> +<span></span> +<span><span class="va">pred_list</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="op">)</span></span> +<span><span class="va">pred_list</span><span class="op">[[</span><span class="fl">1</span><span class="op">]</span><span class="op">]</span> <span class="op"><-</span> <span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span></span> +<span><span class="co"># change nts</span></span> +<span><span class="kw">for</span> <span class="op">(</span><span class="va">i</span> <span class="kw">in</span> <span class="fl">1</span><span class="op">:</span><span class="fl">20</span><span class="op">)</span> <span class="op">{</span></span> +<span> </span> +<span> <span class="co"># update ig scores for changed input</span></span> +<span> <span class="va">ig</span> <span class="op"><-</span> <span class="fu"><a href="../reference/integrated_gradients.html">integrated_gradients</a></span><span class="op">(</span></span> +<span> input_seq <span class="op">=</span> <span class="va">high_gc_seq_one_hot_changed</span>,</span> +<span> target_class_idx <span class="op">=</span> <span class="fl">1</span>,</span> +<span> model <span class="op">=</span> <span class="va">model</span><span class="op">)</span> <span class="op"><a href="../reference/pipe.html">%>%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/array.html" class="external-link">as.array</a></span><span class="op">(</span><span class="op">)</span></span> +<span> </span> +<span> <span class="va">smallest_index</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.html" class="external-link">which</a></span><span class="op">(</span><span class="va">ig</span> <span class="op">==</span> <span class="fu"><a href="https://rdrr.io/r/base/Extremes.html" class="external-link">min</a></span><span class="op">(</span><span class="va">ig</span><span class="op">)</span>, arr.ind <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span> +<span> <span class="va">smallest_index</span></span> +<span> <span class="va">row_index</span> <span class="op"><-</span> <span class="va">smallest_index</span><span class="op">[</span> , <span class="st">"row"</span><span class="op">]</span></span> +<span> <span class="va">col_index</span> <span class="op"><-</span> <span class="va">smallest_index</span><span class="op">[</span> , <span class="st">"col"</span><span class="op">]</span> </span> +<span> <span class="va">new_row</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">4</span><span class="op">)</span></span> +<span> <span class="va">nt_index_old</span> <span class="op"><-</span> <span class="va">col_index</span></span> +<span> <span class="va">nt_index_new</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.min.html" class="external-link">which.max</a></span><span class="op">(</span><span class="va">ig</span><span class="op">[</span><span class="va">row_index</span>, <span class="op">]</span><span class="op">)</span></span> +<span> <span class="va">new_row</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span> <span class="op"><-</span> <span class="fl">1</span></span> +<span> <span class="va">high_gc_seq_one_hot_changed</span><span class="op">[</span><span class="fl">1</span>, <span class="va">row_index</span>, <span class="op">]</span> <span class="op"><-</span> <span class="va">new_row</span></span> +<span> <span class="fu"><a href="https://rdrr.io/r/base/cat.html" class="external-link">cat</a></span><span class="op">(</span><span class="st">"At position"</span>, <span class="va">row_index</span>, <span class="st">"changing"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_old</span><span class="op">]</span>,</span> +<span> <span class="st">"to"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span>, <span class="st">"\n"</span><span class="op">)</span></span> +<span> <span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot_changed</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span> <span class="va">pred_list</span><span class="op">[[</span><span class="va">i</span> <span class="op">+</span> <span class="fl">1</span><span class="op">]</span><span class="op">]</span> <span class="op"><-</span> <span class="va">pred</span> </span> +<span> </span> +<span><span class="op">}</span></span></code></pre></div> +<pre><code><span><span class="co">## At position 33 changing T to A </span></span> +<span><span class="co">## At position 15 changing T to A </span></span> +<span><span class="co">## At position 46 changing A to C </span></span> +<span><span class="co">## At position 11 changing C to A </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 19 changing A to C</span></span></code></pre> +<div class="sourceCode" id="cb28"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">pred_df</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/do.call.html" class="external-link">do.call</a></span><span class="op">(</span><span class="va">rbind</span>, <span class="va">pred_list</span><span class="op">)</span></span> +<span><span class="va">pred_df</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span><span class="va">pred_df</span>, iteration <span class="op">=</span> <span class="fl">0</span><span class="op">:</span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/nrow.html" class="external-link">nrow</a></span><span class="op">(</span><span class="va">pred_df</span><span class="op">)</span> <span class="op">-</span> <span class="fl">1</span><span class="op">)</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">pred_df</span><span class="op">)</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"high_gc"</span>, <span class="st">"equal_dist"</span>, <span class="st">"iteration"</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html" class="external-link">ggplot</a></span><span class="op">(</span><span class="va">pred_df</span>, <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html" class="external-link">aes</a></span><span class="op">(</span>x <span class="op">=</span> <span class="va">iteration</span>, y <span class="op">=</span> <span class="va">high_gc</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_path.html" class="external-link">geom_line</a></span><span class="op">(</span><span class="op">)</span> <span class="op">+</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/labs.html" class="external-link">ylab</a></span><span class="op">(</span><span class="st">"high GC confidence"</span><span class="op">)</span></span></code></pre></div> +<p><img src="integrated_gradient_files/figure-html/unnamed-chunk-12-1.png" width="700"></p> +<p>We can try the same in the opposite direction, i.e. replace big IG +scores.</p> +<div class="sourceCode" id="cb29"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="co"># copy original sequence</span></span> +<span><span class="va">high_gc_seq_one_hot_changed</span> <span class="op"><-</span> <span class="va">high_gc_seq_one_hot</span> </span> +<span></span> +<span><span class="va">pred_list</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="op">)</span></span> +<span><span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span><span class="va">pred_list</span><span class="op">[[</span><span class="fl">1</span><span class="op">]</span><span class="op">]</span> <span class="op"><-</span> <span class="va">pred</span></span> +<span></span> +<span><span class="co"># change nts</span></span> +<span><span class="kw">for</span> <span class="op">(</span><span class="va">i</span> <span class="kw">in</span> <span class="fl">1</span><span class="op">:</span><span class="fl">20</span><span class="op">)</span> <span class="op">{</span></span> +<span> </span> +<span> <span class="co"># update ig scores for changed input</span></span> +<span> <span class="va">ig</span> <span class="op"><-</span> <span class="fu"><a href="../reference/integrated_gradients.html">integrated_gradients</a></span><span class="op">(</span></span> +<span> input_seq <span class="op">=</span> <span class="va">high_gc_seq_one_hot_changed</span>,</span> +<span> target_class_idx <span class="op">=</span> <span class="fl">1</span>,</span> +<span> model <span class="op">=</span> <span class="va">model</span><span class="op">)</span> <span class="op"><a href="../reference/pipe.html">%>%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/array.html" class="external-link">as.array</a></span><span class="op">(</span><span class="op">)</span></span> +<span> </span> +<span> <span class="va">biggest_index</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.html" class="external-link">which</a></span><span class="op">(</span><span class="va">ig</span> <span class="op">==</span> <span class="fu"><a href="https://rdrr.io/r/base/Extremes.html" class="external-link">max</a></span><span class="op">(</span><span class="va">ig</span><span class="op">)</span>, arr.ind <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span> +<span> <span class="va">biggest_index</span></span> +<span> <span class="va">row_index</span> <span class="op"><-</span> <span class="va">biggest_index</span><span class="op">[</span> , <span class="st">"row"</span><span class="op">]</span></span> +<span> <span class="va">col_index</span> <span class="op"><-</span> <span class="va">biggest_index</span><span class="op">[</span> , <span class="st">"col"</span><span class="op">]</span> </span> +<span> <span class="va">new_row</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">4</span><span class="op">)</span></span> +<span> <span class="va">nt_index_old</span> <span class="op"><-</span> <span class="va">col_index</span></span> +<span> <span class="va">nt_index_new</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/which.min.html" class="external-link">which.min</a></span><span class="op">(</span><span class="va">ig</span><span class="op">[</span><span class="va">row_index</span>, <span class="op">]</span><span class="op">)</span></span> +<span> <span class="va">new_row</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span> <span class="op"><-</span> <span class="fl">1</span></span> +<span> <span class="va">high_gc_seq_one_hot_changed</span><span class="op">[</span><span class="fl">1</span>, <span class="va">row_index</span>, <span class="op">]</span> <span class="op"><-</span> <span class="va">new_row</span></span> +<span> <span class="fu"><a href="https://rdrr.io/r/base/cat.html" class="external-link">cat</a></span><span class="op">(</span><span class="st">"At position"</span>, <span class="va">row_index</span>, <span class="st">"changing"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_old</span><span class="op">]</span>, <span class="st">"to"</span>, <span class="va">vocabulary</span><span class="op">[</span><span class="va">nt_index_new</span><span class="op">]</span>, <span class="st">"\n"</span><span class="op">)</span></span> +<span> </span> +<span> <span class="va">pred</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/stats/predict.html" class="external-link">predict</a></span><span class="op">(</span><span class="va">model</span>, <span class="va">high_gc_seq_one_hot_changed</span>, verbose <span class="op">=</span> <span class="fl">0</span><span class="op">)</span></span> +<span> <span class="va">pred_list</span><span class="op">[[</span><span class="va">i</span> <span class="op">+</span> <span class="fl">1</span><span class="op">]</span><span class="op">]</span> <span class="op"><-</span> <span class="va">pred</span> </span> +<span> </span> +<span><span class="op">}</span></span></code></pre></div> +<pre><code><span><span class="co">## At position 30 changing G to A </span></span> +<span><span class="co">## At position 20 changing G to A </span></span> +<span><span class="co">## At position 34 changing G to A </span></span> +<span><span class="co">## At position 38 changing G to A </span></span> +<span><span class="co">## At position 32 changing C to A </span></span> +<span><span class="co">## At position 18 changing G to A </span></span> +<span><span class="co">## At position 19 changing C to A </span></span> +<span><span class="co">## At position 23 changing C to A </span></span> +<span><span class="co">## At position 25 changing C to A </span></span> +<span><span class="co">## At position 48 changing C to A </span></span> +<span><span class="co">## At position 41 changing G to A </span></span> +<span><span class="co">## At position 10 changing C to A </span></span> +<span><span class="co">## At position 40 changing G to A </span></span> +<span><span class="co">## At position 37 changing G to A </span></span> +<span><span class="co">## At position 42 changing G to A </span></span> +<span><span class="co">## At position 35 changing C to A </span></span> +<span><span class="co">## At position 6 changing G to A </span></span> +<span><span class="co">## At position 36 changing C to A </span></span> +<span><span class="co">## At position 45 changing C to A </span></span> +<span><span class="co">## At position 13 changing G to A</span></span></code></pre> +<div class="sourceCode" id="cb31"><pre class="downlit sourceCode r"> +<code class="sourceCode R"><span><span class="va">pred_df</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/do.call.html" class="external-link">do.call</a></span><span class="op">(</span><span class="va">rbind</span>, <span class="va">pred_list</span><span class="op">)</span></span> +<span><span class="va">pred_df</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span><span class="va">pred_df</span>, iteration <span class="op">=</span> <span class="fl">0</span><span class="op">:</span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/nrow.html" class="external-link">nrow</a></span><span class="op">(</span><span class="va">pred_df</span><span class="op">)</span> <span class="op">-</span> <span class="fl">1</span><span class="op">)</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">pred_df</span><span class="op">)</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"high_gc"</span>, <span class="st">"equal_dist"</span>, <span class="st">"iteration"</span><span class="op">)</span></span> +<span><span class="fu"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html" class="external-link">ggplot</a></span><span class="op">(</span><span class="va">pred_df</span>, <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/aes.html" class="external-link">aes</a></span><span class="op">(</span>x <span class="op">=</span> <span class="va">iteration</span>, y <span class="op">=</span> <span class="va">high_gc</span><span class="op">)</span><span class="op">)</span> <span class="op">+</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/geom_path.html" class="external-link">geom_line</a></span><span class="op">(</span><span class="op">)</span> <span class="op">+</span> <span class="fu"><a href="https://ggplot2.tidyverse.org/reference/labs.html" class="external-link">ylab</a></span><span class="op">(</span><span class="st">"high GC confidence"</span><span class="op">)</span></span></code></pre></div> +<p><img src="integrated_gradient_files/figure-html/unnamed-chunk-13-1.png" width="700"></p> +</div> + </main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2> + </nav></aside> +</div> + + + + <footer><div class="pkgdown-footer-left"> + <p>Developed by Philipp Münch, René Mreches, Martin Binder, Hüseyin Anil Gündüz, Xiao-Yin To, Alice McHardy.</p> +</div> + +<div class="pkgdown-footer-right"> + <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.9.</p> +</div> + + </footer> +</div> + + + + + + </body> +</html>