--- a +++ b/docs/_modules/pathflowai/utils.html @@ -0,0 +1,1035 @@ + + +<!DOCTYPE html> +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> +<head> + <meta charset="utf-8"> + + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <title>pathflowai.utils — PathFlowAI 0.1 documentation</title> + + + + + + + + + <script type="text/javascript" src="../../_static/js/modernizr.min.js"></script> + + + <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script> + <script type="text/javascript" src="../../_static/jquery.js"></script> + <script type="text/javascript" src="../../_static/underscore.js"></script> + <script type="text/javascript" src="../../_static/doctools.js"></script> + <script type="text/javascript" src="../../_static/language_data.js"></script> + <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> + + <script type="text/javascript" src="../../_static/js/theme.js"></script> + + + + + <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" /> + <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> +</head> + +<body class="wy-body-for-nav"> + + + <div class="wy-grid-for-nav"> + + <nav data-toggle="wy-nav-shift" class="wy-nav-side"> + <div class="wy-side-scroll"> + <div class="wy-side-nav-search" > + + + + <a href="../../index.html" class="icon icon-home"> PathFlowAI + + + + </a> + + + + + + + +<div role="search"> + <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> +</div> + + + </div> + + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> + + + + + + + <!-- Local TOC --> + <div class="local-toc"></div> + + + </div> + </div> + </nav> + + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> + + + <nav class="wy-nav-top" aria-label="top navigation"> + + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href="../../index.html">PathFlowAI</a> + + </nav> + + + <div class="wy-nav-content"> + + <div class="rst-content"> + + + + + + + + + + + + + + + + + +<div role="navigation" aria-label="breadcrumbs navigation"> + + <ul class="wy-breadcrumbs"> + + <li><a href="../../index.html">Docs</a> »</li> + + <li><a href="../index.html">Module code</a> »</li> + + <li>pathflowai.utils</li> + + + <li class="wy-breadcrumbs-aside"> + + </li> + + </ul> + + + <hr/> +</div> + <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> + <div itemprop="articleBody"> + + <h1>Source code for pathflowai.utils</h1><div class="highlight"><pre> +<span></span><span class="sd">"""</span> +<span class="sd">utils.py</span> +<span class="sd">=======================</span> +<span class="sd">General utilities that still need to be broken up into preprocessing, machine learning input preparation, and output submodules.</span> +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">from</span> <span class="nn">bs4</span> <span class="k">import</span> <span class="n">BeautifulSoup</span> +<span class="kn">from</span> <span class="nn">shapely.geometry</span> <span class="k">import</span> <span class="n">Point</span> +<span class="kn">from</span> <span class="nn">shapely.geometry.polygon</span> <span class="k">import</span> <span class="n">Polygon</span> +<span class="kn">import</span> <span class="nn">glob</span> +<span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">join</span> +<span class="kn">import</span> <span class="nn">plotly.graph_objs</span> <span class="k">as</span> <span class="nn">go</span> +<span class="kn">import</span> <span class="nn">plotly.offline</span> <span class="k">as</span> <span class="nn">py</span> +<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span><span class="o">,</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">import</span> <span class="nn">scipy.sparse</span> <span class="k">as</span> <span class="nn">sps</span> +<span class="kn">from</span> <span class="nn">PIL</span> <span class="k">import</span> <span class="n">Image</span><span class="p">,</span> <span class="n">ImageDraw</span> +<span class="n">Image</span><span class="o">.</span><span class="n">MAX_IMAGE_PIXELS</span><span class="o">=</span><span class="mf">1e10</span> +<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">import</span> <span class="nn">scipy.sparse</span> <span class="k">as</span> <span class="nn">sps</span> +<span class="kn">from</span> <span class="nn">os.path</span> <span class="k">import</span> <span class="n">join</span> +<span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">subprocess</span><span class="o">,</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> +<span class="kn">import</span> <span class="nn">sqlite3</span> +<span class="kn">import</span> <span class="nn">torch</span> +<span class="kn">from</span> <span class="nn">torch.utils.data</span> <span class="k">import</span> <span class="n">Dataset</span><span class="c1">#, DataLoader</span> +<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="k">import</span> <span class="n">train_test_split</span> +<span class="kn">import</span> <span class="nn">pysnooper</span> + +<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">import</span> <span class="nn">dask.array</span> <span class="k">as</span> <span class="nn">da</span> +<span class="kn">import</span> <span class="nn">dask</span> +<span class="kn">import</span> <span class="nn">openslide</span> +<span class="kn">from</span> <span class="nn">openslide</span> <span class="k">import</span> <span class="n">deepzoom</span> +<span class="c1">#import xarray as xr, sparse</span> +<span class="kn">import</span> <span class="nn">pickle</span> +<span class="kn">import</span> <span class="nn">copy</span> + +<span class="kn">import</span> <span class="nn">nonechucks</span> <span class="k">as</span> <span class="nn">nc</span> + +<span class="kn">from</span> <span class="nn">nonechucks</span> <span class="k">import</span> <span class="n">SafeDataLoader</span> <span class="k">as</span> <span class="n">DataLoader</span> + +<div class="viewcode-block" id="load_sql_df"><a class="viewcode-back" href="../../index.html#pathflowai.utils.load_sql_df">[docs]</a><span class="k">def</span> <span class="nf">load_sql_df</span><span class="p">(</span><span class="n">sql_file</span><span class="p">,</span> <span class="n">patch_size</span><span class="p">):</span> + <span class="sd">"""Load pandas dataframe from SQL, accessing particular patch size within SQL.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> sql_file:str</span> +<span class="sd"> SQL db.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dataframe</span> +<span class="sd"> Patch level information.</span> + +<span class="sd"> """</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">sql_file</span><span class="p">)</span> + <span class="n">df</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">read_sql</span><span class="p">(</span><span class="s1">'select * from "</span><span class="si">{}</span><span class="s1">";'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">patch_size</span><span class="p">),</span><span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">)</span> + <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> + <span class="k">return</span> <span class="n">df</span></div> + +<div class="viewcode-block" id="df2sql"><a class="viewcode-back" href="../../index.html#pathflowai.utils.df2sql">[docs]</a><span class="k">def</span> <span class="nf">df2sql</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">sql_file</span><span class="p">,</span> <span class="n">patch_size</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">'replace'</span><span class="p">):</span> + <span class="sd">"""Write dataframe containing patch level information to SQL db.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> df:dataframe</span> +<span class="sd"> Dataframe containing patch information.</span> +<span class="sd"> sql_file:str</span> +<span class="sd"> SQL database.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Size of patches.</span> +<span class="sd"> mode:str</span> +<span class="sd"> Replace or append.</span> + +<span class="sd"> """</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">sql_file</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="s1">'index'</span><span class="p">)</span><span class="o">.</span><span class="n">to_sql</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">patch_size</span><span class="p">),</span> <span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">,</span> <span class="n">if_exists</span><span class="o">=</span><span class="n">mode</span><span class="p">)</span> + <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div> + + +<span class="c1">#########</span> + +<span class="c1"># https://github.com/qupath/qupath/wiki/Supported-image-formats</span> +<div class="viewcode-block" id="svs2dask_array"><a class="viewcode-back" href="../../index.html#pathflowai.utils.svs2dask_array">[docs]</a><span class="k">def</span> <span class="nf">svs2dask_array</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">tile_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">overlap</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">remove_last</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">allow_unknown_chunksizes</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Convert SVS, TIF or TIFF to dask array.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> svs_file:str</span> +<span class="sd"> Image file.</span> +<span class="sd"> tile_size:int</span> +<span class="sd"> Size of chunk to be read in.</span> +<span class="sd"> overlap:int</span> +<span class="sd"> Do not modify, overlap between neighboring tiles.</span> +<span class="sd"> remove_last:bool</span> +<span class="sd"> Remove last tile because it has a custom size.</span> +<span class="sd"> allow_unknown_chunksizes: bool</span> +<span class="sd"> Allow different chunk sizes, more flexible, but slowdown.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dask.array</span> +<span class="sd"> Dask Array.</span> + +<span class="sd"> >>> arr=svs2dask_array(svs_file, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False)</span> +<span class="sd"> >>> arr2=arr.compute()</span> +<span class="sd"> >>> arr3=to_pil(cv2.resize(arr2, dsize=(1440,700), interpolation=cv2.INTER_CUBIC))</span> +<span class="sd"> >>> arr3.save(test_image_name)"""</span> + <span class="n">img</span><span class="o">=</span><span class="n">openslide</span><span class="o">.</span><span class="n">open_slide</span><span class="p">(</span><span class="n">svs_file</span><span class="p">)</span> + <span class="n">gen</span><span class="o">=</span><span class="n">deepzoom</span><span class="o">.</span><span class="n">DeepZoomGenerator</span><span class="p">(</span><span class="n">img</span><span class="p">,</span> <span class="n">tile_size</span><span class="o">=</span><span class="n">tile_size</span><span class="p">,</span> <span class="n">overlap</span><span class="o">=</span><span class="n">overlap</span><span class="p">,</span> <span class="n">limit_bounds</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">max_level</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">gen</span><span class="o">.</span><span class="n">level_dimensions</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span> + <span class="n">n_tiles_x</span><span class="p">,</span> <span class="n">n_tiles_y</span> <span class="o">=</span> <span class="n">gen</span><span class="o">.</span><span class="n">level_tiles</span><span class="p">[</span><span class="n">max_level</span><span class="p">]</span> + <span class="n">get_tile</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">gen</span><span class="o">.</span><span class="n">get_tile</span><span class="p">(</span><span class="n">max_level</span><span class="p">,(</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">)))</span><span class="o">.</span><span class="n">transpose</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span> + <span class="n">sample_tile</span> <span class="o">=</span> <span class="n">get_tile</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span> + <span class="n">sample_tile_shape</span> <span class="o">=</span> <span class="n">sample_tile</span><span class="o">.</span><span class="n">shape</span> + <span class="n">dask_get_tile</span> <span class="o">=</span> <span class="n">dask</span><span class="o">.</span><span class="n">delayed</span><span class="p">(</span><span class="n">get_tile</span><span class="p">,</span> <span class="n">pure</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">arr</span><span class="o">=</span><span class="n">da</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">da</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">da</span><span class="o">.</span><span class="n">from_delayed</span><span class="p">(</span><span class="n">dask_get_tile</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">),</span><span class="n">sample_tile_shape</span><span class="p">,</span><span class="n">np</span><span class="o">.</span><span class="n">uint8</span><span class="p">)</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_tiles_y</span> <span class="o">-</span> <span class="p">(</span><span class="mi">0</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">remove_last</span> <span class="k">else</span> <span class="mi">1</span><span class="p">))],</span><span class="n">allow_unknown_chunksizes</span><span class="o">=</span><span class="n">allow_unknown_chunksizes</span><span class="p">,</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_tiles_x</span> <span class="o">-</span> <span class="p">(</span><span class="mi">0</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">remove_last</span> <span class="k">else</span> <span class="mi">1</span><span class="p">))],</span><span class="n">allow_unknown_chunksizes</span><span class="o">=</span><span class="n">allow_unknown_chunksizes</span><span class="p">)</span><span class="c1">#.transpose([1,0,2])</span> + <span class="k">return</span> <span class="n">arr</span></div> + +<div class="viewcode-block" id="img2npy_"><a class="viewcode-back" href="../../index.html#pathflowai.utils.img2npy_">[docs]</a><span class="k">def</span> <span class="nf">img2npy_</span><span class="p">(</span><span class="n">input_dir</span><span class="p">,</span><span class="n">basename</span><span class="p">,</span> <span class="n">svs_file</span><span class="p">):</span> + <span class="sd">"""Convert SVS, TIF, TIFF to NPY.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> input_dir:str</span> +<span class="sd"> Output file dir.</span> +<span class="sd"> basename:str</span> +<span class="sd"> Basename of output file</span> +<span class="sd"> svs_file:str</span> +<span class="sd"> SVS, TIF, TIFF file input.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> str</span> +<span class="sd"> NPY output file.</span> +<span class="sd"> """</span> + <span class="n">npy_out_file</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">input_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">.npy'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">))</span> + <span class="n">arr</span> <span class="o">=</span> <span class="n">svs2dask_array</span><span class="p">(</span><span class="n">svs_file</span><span class="p">)</span> + <span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">npy_out_file</span><span class="p">,</span><span class="n">arr</span><span class="o">.</span><span class="n">compute</span><span class="p">())</span> + <span class="k">return</span> <span class="n">npy_out_file</span></div> + +<div class="viewcode-block" id="load_image"><a class="viewcode-back" href="../../index.html#pathflowai.utils.load_image">[docs]</a><span class="k">def</span> <span class="nf">load_image</span><span class="p">(</span><span class="n">svs_file</span><span class="p">):</span> + <span class="sd">"""Load SVS, TIF, TIFF</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> svs_file:type</span> +<span class="sd"> Description of parameter `svs_file`.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> type</span> +<span class="sd"> Description of returned object.</span> +<span class="sd"> """</span> + <span class="n">im</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">svs_file</span><span class="p">)</span> + <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">im</span><span class="p">),(</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">)),</span> <span class="n">im</span><span class="o">.</span><span class="n">size</span></div> + +<div class="viewcode-block" id="create_purple_mask"><a class="viewcode-back" href="../../index.html#pathflowai.utils.create_purple_mask">[docs]</a><span class="k">def</span> <span class="nf">create_purple_mask</span><span class="p">(</span><span class="n">arr</span><span class="p">,</span> <span class="n">img_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sparse</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="sd">"""Create a gray scale intensity mask. This will be changed soon to support other thresholding QC methods.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> arr:dask.array</span> +<span class="sd"> Dask array containing image information.</span> +<span class="sd"> img_size:int</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> sparse:bool</span> +<span class="sd"> Deprecated</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dask.array</span> +<span class="sd"> Intensity, grayscale array over image.</span> + +<span class="sd"> """</span> + <span class="n">r</span><span class="p">,</span><span class="n">b</span><span class="p">,</span><span class="n">g</span><span class="o">=</span><span class="n">arr</span><span class="p">[:,:,</span><span class="mi">0</span><span class="p">],</span><span class="n">arr</span><span class="p">[:,:,</span><span class="mi">1</span><span class="p">],</span><span class="n">arr</span><span class="p">[:,:,</span><span class="mi">2</span><span class="p">]</span> + <span class="n">gray</span> <span class="o">=</span> <span class="mf">0.2989</span> <span class="o">*</span> <span class="n">r</span> <span class="o">+</span> <span class="mf">0.5870</span> <span class="o">*</span> <span class="n">g</span> <span class="o">+</span> <span class="mf">0.1140</span> <span class="o">*</span> <span class="n">b</span> + <span class="c1">#rb_avg = (r+b)/2</span> + <span class="n">mask</span><span class="o">=</span> <span class="p">((</span><span class="mf">255.</span><span class="o">-</span><span class="n">gray</span><span class="p">))</span><span class="c1"># >= threshold)#(r > g - 10) & (b > g - 10) & (rb_avg > g + 20)#np.vectorize(is_purple)(arr).astype(int)</span> + <span class="k">if</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">sparse</span><span class="p">:</span> + <span class="n">mask</span> <span class="o">=</span> <span class="n">mask</span><span class="o">.</span><span class="n">nonzero</span><span class="p">()</span> + <span class="n">mask</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">mask</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">compute</span><span class="p">(),</span> <span class="n">mask</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">compute</span><span class="p">()])</span><span class="o">.</span><span class="n">T</span> + <span class="c1">#mask = (np.ones(len(mask[0])),mask)</span> + <span class="c1">#mask = sparse.COO.from_scipy_sparse(sps.coo_matrix(mask, img_size, dtype=np.uint8).tocsr())</span> + <span class="k">return</span> <span class="n">mask</span></div> + +<div class="viewcode-block" id="add_purple_mask"><a class="viewcode-back" href="../../index.html#pathflowai.utils.add_purple_mask">[docs]</a><span class="k">def</span> <span class="nf">add_purple_mask</span><span class="p">(</span><span class="n">arr</span><span class="p">):</span> + <span class="sd">"""Optional add intensity mask to the dask array.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> arr:dask.array</span> +<span class="sd"> Image data.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> array</span> +<span class="sd"> Image data with intensity added as forth channel.</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">arr</span><span class="p">,</span><span class="n">create_purple_mask</span><span class="p">(</span><span class="n">arr</span><span class="p">)),</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div> + +<div class="viewcode-block" id="create_sparse_annotation_arrays"><a class="viewcode-back" href="../../index.html#pathflowai.utils.create_sparse_annotation_arrays">[docs]</a><span class="k">def</span> <span class="nf">create_sparse_annotation_arrays</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[]):</span> + <span class="sd">"""Convert annotation xml to shapely objects and store in dictionary.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> xml_file:str</span> +<span class="sd"> XML file containing annotations.</span> +<span class="sd"> img_size:int</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> Annotations to look for in xml export.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dict</span> +<span class="sd"> Dictionary with annotation-shapely object pairs.</span> + +<span class="sd"> """</span> + <span class="n">interior_points_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">annotation</span><span class="p">:</span><span class="n">parse_coord_return_boxes</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation_name</span> <span class="o">=</span> <span class="n">annotation</span><span class="p">,</span> <span class="n">return_coords</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">annotations</span><span class="p">}</span><span class="c1">#grab_interior_points(xml_file, img_size, annotations=annotations) if annotations else {}</span> + <span class="k">return</span> <span class="p">{</span><span class="n">annotation</span><span class="p">:</span><span class="n">interior_points_dict</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span> <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">annotations</span><span class="p">}</span><span class="c1">#sparse.COO.from_scipy_sparse((sps.coo_matrix(interior_points_dict[annotation],img_size, dtype=np.uint8) if interior_points_dict[annotation] not None else sps.coo_matrix(img_size, dtype=np.uint8)).tocsr()) for annotation in annotations} # [sps.coo_matrix(img_size, dtype=np.uint8)]+</span></div> + +<div class="viewcode-block" id="load_process_image"><a class="viewcode-back" href="../../index.html#pathflowai.utils.load_process_image">[docs]</a><span class="k">def</span> <span class="nf">load_process_image</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">xml_file</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">npy_mask</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[]):</span> + <span class="sd">"""Load SVS-like image (including NPY), segmentation/classification annotations, generate dask array and dictionary of annotations.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> svs_file:str</span> +<span class="sd"> Image file</span> +<span class="sd"> xml_file:str</span> +<span class="sd"> Annotation file.</span> +<span class="sd"> npy_mask:array</span> +<span class="sd"> Numpy segmentation mask.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> List of annotations in xml.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> array</span> +<span class="sd"> Dask array of image.</span> +<span class="sd"> dict</span> +<span class="sd"> Annotation masks.</span> + +<span class="sd"> """</span> + <span class="n">arr</span> <span class="o">=</span> <span class="n">npy2da</span><span class="p">(</span><span class="n">svs_file</span><span class="p">)</span> <span class="k">if</span> <span class="n">svs_file</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'.npy'</span><span class="p">)</span> <span class="k">else</span> <span class="n">svs2dask_array</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">tile_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">overlap</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span><span class="c1">#load_image(svs_file)</span> + <span class="n">img_size</span> <span class="o">=</span> <span class="n">arr</span><span class="o">.</span><span class="n">shape</span><span class="p">[:</span><span class="mi">2</span><span class="p">]</span> + <span class="n">masks</span> <span class="o">=</span> <span class="p">{}</span><span class="c1">#{'purple': create_purple_mask(arr,img_size,sparse=False)}</span> + <span class="k">if</span> <span class="n">xml_file</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">masks</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">create_sparse_annotation_arrays</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="n">annotations</span><span class="p">))</span> + <span class="k">if</span> <span class="n">npy_mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="n">masks</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s1">'annotations'</span><span class="p">:</span><span class="n">npy_mask</span><span class="p">})</span> + <span class="c1">#data = dict(image=(['x','y','rgb'],arr),**masks)</span> + <span class="c1">#data_arr = {'image':xr.Variable(['x','y','color'], arr)}</span> + <span class="c1">#purple_arr = {'mask':xr.Variable(['x','y'], masks['purple'])}</span> + <span class="c1">#mask_arr = {m:xr.Variable(['row','col'],masks[m]) for m in masks if m != 'purple'} if 'annotations' not in annotations else {'annotations':xr.Variable(['x','y'],masks['annotations'])}</span> + <span class="c1">#masks['purple'] = masks['purple'].reshape(*masks['purple'].shape,1)</span> + <span class="c1">#arr = da.concatenate([arr,masks.pop('purple')],axis=2)</span> + <span class="k">return</span> <span class="n">arr</span><span class="p">,</span> <span class="n">masks</span><span class="c1">#xr.Dataset.from_dict({k:v for k,v in list(data_arr.items())+list(purple_arr.items())+list(mask_arr.items())})#list(dict(image=data_arr,purple=purple_arr,annotations=mask_arr).items()))#arr, masks</span></div> + +<div class="viewcode-block" id="save_dataset"><a class="viewcode-back" href="../../index.html#pathflowai.utils.save_dataset">[docs]</a><span class="k">def</span> <span class="nf">save_dataset</span><span class="p">(</span><span class="n">arr</span><span class="p">,</span> <span class="n">masks</span><span class="p">,</span> <span class="n">out_zarr</span><span class="p">,</span> <span class="n">out_pkl</span><span class="p">):</span> + <span class="sd">"""Saves dask array image, dictionary of annotations to zarr and pickle respectively.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> arr:array</span> +<span class="sd"> Image.</span> +<span class="sd"> masks:dict</span> +<span class="sd"> Dictionary of annotation shapes.</span> +<span class="sd"> out_zarr:str</span> +<span class="sd"> Zarr output file for image.</span> +<span class="sd"> out_pkl:str</span> +<span class="sd"> Pickle output file.</span> +<span class="sd"> """</span> + <span class="n">arr</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">'uint8'</span><span class="p">)</span><span class="o">.</span><span class="n">to_zarr</span><span class="p">(</span><span class="n">out_zarr</span><span class="p">,</span> <span class="n">overwrite</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">masks</span><span class="p">,</span><span class="nb">open</span><span class="p">(</span><span class="n">out_pkl</span><span class="p">,</span><span class="s1">'wb'</span><span class="p">))</span></div> + + <span class="c1">#dataset.to_netcdf(out_netcdf, compute=False)</span> + <span class="c1">#pickle.dump(dataset, open(out_pkl,'wb'), protocol=-1)</span> + +<div class="viewcode-block" id="run_preprocessing_pipeline"><a class="viewcode-back" href="../../index.html#pathflowai.utils.run_preprocessing_pipeline">[docs]</a><span class="k">def</span> <span class="nf">run_preprocessing_pipeline</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">xml_file</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">npy_mask</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">out_zarr</span><span class="o">=</span><span class="s1">'output_zarr.zarr'</span><span class="p">,</span> <span class="n">out_pkl</span><span class="o">=</span><span class="s1">'output.pkl'</span><span class="p">):</span> + <span class="sd">"""Run preprocessing pipeline. Store image into zarr format, segmentations maintain as npy, and xml annotations as pickle.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> svs_file:str</span> +<span class="sd"> Input image file.</span> +<span class="sd"> xml_file:str</span> +<span class="sd"> Input annotation file.</span> +<span class="sd"> npy_mask:str</span> +<span class="sd"> NPY segmentation mask.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> List of annotations.</span> +<span class="sd"> out_zarr:str</span> +<span class="sd"> Output zarr for image.</span> +<span class="sd"> out_pkl:str</span> +<span class="sd"> Output pickle for annotations.</span> +<span class="sd"> """</span> + <span class="c1">#save_dataset(load_process_image(svs_file, xml_file, npy_mask, annotations), out_netcdf)</span> + <span class="n">arr</span><span class="p">,</span> <span class="n">masks</span> <span class="o">=</span> <span class="n">load_process_image</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">xml_file</span><span class="p">,</span> <span class="n">npy_mask</span><span class="p">,</span> <span class="n">annotations</span><span class="p">)</span> + <span class="n">save_dataset</span><span class="p">(</span><span class="n">arr</span><span class="p">,</span> <span class="n">masks</span><span class="p">,</span><span class="n">out_zarr</span><span class="p">,</span> <span class="n">out_pkl</span><span class="p">)</span></div> + +<span class="c1">###################</span> + +<div class="viewcode-block" id="adjust_mask"><a class="viewcode-back" href="../../index.html#pathflowai.utils.adjust_mask">[docs]</a><span class="k">def</span> <span class="nf">adjust_mask</span><span class="p">(</span><span class="n">mask_file</span><span class="p">,</span> <span class="n">dask_img_array_file</span><span class="p">,</span> <span class="n">out_npy</span><span class="p">,</span> <span class="n">n_neighbors</span><span class="p">):</span> + <span class="sd">"""Fixes segmentation masks to reduce coarse annotations over empty regions.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> mask_file:str</span> +<span class="sd"> NPY segmentation mask.</span> +<span class="sd"> dask_img_array_file:str</span> +<span class="sd"> Dask image file.</span> +<span class="sd"> out_npy:str</span> +<span class="sd"> Output numpy file.</span> +<span class="sd"> n_neighbors:int</span> +<span class="sd"> Number nearest neighbors for dilation and erosion of mask from background to not background.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> str</span> +<span class="sd"> Output numpy file.</span> + +<span class="sd"> """</span> + <span class="kn">from</span> <span class="nn">dask_image.ndmorph</span> <span class="k">import</span> <span class="n">binary_opening</span> + <span class="kn">from</span> <span class="nn">dask.distributed</span> <span class="k">import</span> <span class="n">Client</span> + <span class="c1">#c=Client()</span> + <span class="n">dask_img_array</span><span class="o">=</span><span class="n">da</span><span class="o">.</span><span class="n">from_zarr</span><span class="p">(</span><span class="n">dask_img_array_file</span><span class="p">)</span> + <span class="n">mask</span><span class="o">=</span><span class="n">npy2da</span><span class="p">(</span><span class="n">mask_file</span><span class="p">)</span> + <span class="n">is_tissue_mask</span> <span class="o">=</span> <span class="n">mask</span><span class="o">></span><span class="mf">0.</span> + <span class="n">is_tissue_mask_img</span><span class="o">=</span><span class="p">((</span><span class="n">dask_img_array</span><span class="p">[</span><span class="o">...</span><span class="p">,</span><span class="mi">0</span><span class="p">]</span><span class="o">></span><span class="mf">200.</span><span class="p">)</span> <span class="o">&</span> <span class="p">(</span><span class="n">dask_img_array</span><span class="p">[</span><span class="o">...</span><span class="p">,</span><span class="mi">1</span><span class="p">]</span><span class="o">></span><span class="mf">200.</span><span class="p">)</span><span class="o">&</span> <span class="p">(</span><span class="n">dask_img_array</span><span class="p">[</span><span class="o">...</span><span class="p">,</span><span class="mi">2</span><span class="p">]</span><span class="o">></span><span class="mf">200.</span><span class="p">))</span> <span class="o">==</span> <span class="mi">0</span> + <span class="n">opening</span><span class="o">=</span><span class="n">binary_opening</span><span class="p">(</span><span class="n">is_tissue_mask_img</span><span class="p">,</span><span class="n">structure</span><span class="o">=</span><span class="n">da</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="n">n_neighbors</span><span class="p">,</span><span class="n">n_neighbors</span><span class="p">)))</span><span class="c1">#,mask=is_tissue_mask)</span> + <span class="n">mask</span><span class="p">[(</span><span class="n">opening</span><span class="o">==</span><span class="mi">0</span><span class="p">)</span><span class="o">&</span><span class="p">(</span><span class="n">is_tissue_mask</span><span class="o">==</span><span class="mi">1</span><span class="p">)]</span><span class="o">=</span><span class="mi">0</span> + <span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">out_npy</span><span class="p">,</span><span class="n">mask</span><span class="o">.</span><span class="n">compute</span><span class="p">())</span> + <span class="c1">#c.close()</span> + <span class="k">return</span> <span class="n">out_npy</span></div> + +<span class="c1">###################</span> + +<div class="viewcode-block" id="process_svs"><a class="viewcode-back" href="../../index.html#pathflowai.utils.process_svs">[docs]</a><span class="k">def</span> <span class="nf">process_svs</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">xml_file</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">output_dir</span><span class="o">=</span><span class="s1">'./'</span><span class="p">):</span> + <span class="sd">"""Store images into npy format and store annotations into pickle dictionary.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> svs_file:str</span> +<span class="sd"> Image file.</span> +<span class="sd"> xml_file:str</span> +<span class="sd"> Annotations file.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> List of annotations in image.</span> +<span class="sd"> output_dir:str</span> +<span class="sd"> Output directory.</span> +<span class="sd"> """</span> + <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span><span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">basename</span> <span class="o">=</span> <span class="n">svs_file</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> + <span class="n">arr</span><span class="p">,</span> <span class="n">masks</span> <span class="o">=</span> <span class="n">load_process_image</span><span class="p">(</span><span class="n">svs_file</span><span class="p">,</span> <span class="n">xml_file</span><span class="p">)</span> + <span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">.npy'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">)),</span><span class="n">arr</span><span class="p">)</span> + <span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">masks</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">.pkl'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">)),</span><span class="s1">'wb'</span><span class="p">),</span> <span class="n">protocol</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div> + +<span class="c1">####################</span> + +<div class="viewcode-block" id="load_dataset"><a class="viewcode-back" href="../../index.html#pathflowai.utils.load_dataset">[docs]</a><span class="k">def</span> <span class="nf">load_dataset</span><span class="p">(</span><span class="n">in_zarr</span><span class="p">,</span> <span class="n">in_pkl</span><span class="p">):</span> + <span class="sd">"""Load ZARR image and annotations pickle.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> in_zarr:str</span> +<span class="sd"> Input image.</span> +<span class="sd"> in_pkl:str</span> +<span class="sd"> Input annotations.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dask.array</span> +<span class="sd"> Image array.</span> +<span class="sd"> dict</span> +<span class="sd"> Annotations dictionary.</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">da</span><span class="o">.</span><span class="n">from_zarr</span><span class="p">(</span><span class="n">in_zarr</span><span class="p">),</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">in_pkl</span><span class="p">,</span><span class="s1">'rb'</span><span class="p">))</span><span class="c1">#xr.open_dataset(in_netcdf)</span></div> + +<div class="viewcode-block" id="is_valid_patch"><a class="viewcode-back" href="../../index.html#pathflowai.utils.is_valid_patch">[docs]</a><span class="k">def</span> <span class="nf">is_valid_patch</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span><span class="n">ys</span><span class="p">,</span><span class="n">patch_size</span><span class="p">,</span><span class="n">purple_mask</span><span class="p">,</span><span class="n">intensity_threshold</span><span class="p">,</span><span class="n">threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span> + <span class="sd">"""Deprecated, computes whether patch is valid."""</span> + <span class="nb">print</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span><span class="n">ys</span><span class="p">)</span> + <span class="k">return</span> <span class="p">(</span><span class="n">purple_mask</span><span class="p">[</span><span class="n">xs</span><span class="p">:</span><span class="n">xs</span><span class="o">+</span><span class="n">patch_size</span><span class="p">,</span><span class="n">ys</span><span class="p">:</span><span class="n">ys</span><span class="o">+</span><span class="n">patch_size</span><span class="p">]</span><span class="o">>=</span><span class="n">intensity_threshold</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="o">></span> <span class="n">threshold</span></div> + +<span class="c1">#@pysnooper.snoop("extract_patch.log")</span> +<div class="viewcode-block" id="extract_patch_information"><a class="viewcode-back" href="../../index.html#pathflowai.utils.extract_patch_information">[docs]</a><span class="k">def</span> <span class="nf">extract_patch_information</span><span class="p">(</span><span class="n">basename</span><span class="p">,</span> <span class="n">input_dir</span><span class="o">=</span><span class="s1">'./'</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">patch_size</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span> <span class="n">generate_finetune_segmentation</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">target_class</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">intensity_threshold</span><span class="o">=</span><span class="mf">100.</span><span class="p">,</span> <span class="n">target_threshold</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span> <span class="n">adj_mask</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">basic_preprocess</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">tries</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> + <span class="sd">"""Final step of preprocessing pipeline. Break up image into patches, include if not background and of a certain intensity, find area of each annotation type in patch, spatial information, image ID and dump data to SQL table.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> basename:str</span> +<span class="sd"> Patient ID.</span> +<span class="sd"> input_dir:str</span> +<span class="sd"> Input directory.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> List of annotations to record, these can be different tissue types, must correspond with XML labels.</span> +<span class="sd"> threshold:float</span> +<span class="sd"> Value between 0 and 1 that indicates the minimum amount of patch that musn't be background for inclusion.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size of patches; this will become one of the tables.</span> +<span class="sd"> generate_finetune_segmentation:bool</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> target_class:int</span> +<span class="sd"> Number of segmentation classes desired, from 0th class to target_class-1 will be annotated in SQL.</span> +<span class="sd"> intensity_threshold:float</span> +<span class="sd"> Value between 0 and 255 that represents minimum intensity to not include as background. Will be modified with new transforms.</span> +<span class="sd"> target_threshold:float</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> adj_mask:str</span> +<span class="sd"> Adjusted mask if performed binary opening operations in previous preprocessing step.</span> +<span class="sd"> basic_preprocess:bool</span> +<span class="sd"> Do not store patch level information.</span> +<span class="sd"> tries:int</span> +<span class="sd"> Number of tries in case there is a Dask timeout, run again.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dataframe</span> +<span class="sd"> Patch information.</span> + +<span class="sd"> """</span> + <span class="c1">#from collections import OrderedDict</span> + <span class="c1">#annotations=OrderedDict(annotations)</span> + <span class="c1">#from dask.multiprocessing import get</span> + <span class="kn">import</span> <span class="nn">dask</span> + <span class="kn">import</span> <span class="nn">time</span> + <span class="kn">from</span> <span class="nn">dask</span> <span class="k">import</span> <span class="n">dataframe</span> <span class="k">as</span> <span class="n">dd</span> + <span class="kn">import</span> <span class="nn">dask.array</span> <span class="k">as</span> <span class="nn">da</span> + <span class="kn">import</span> <span class="nn">multiprocessing</span> + <span class="kn">from</span> <span class="nn">shapely.ops</span> <span class="k">import</span> <span class="n">unary_union</span> + <span class="kn">from</span> <span class="nn">shapely.geometry</span> <span class="k">import</span> <span class="n">MultiPolygon</span> + <span class="kn">from</span> <span class="nn">itertools</span> <span class="k">import</span> <span class="n">product</span> + <span class="c1">#from distributed import Client,LocalCluster</span> + <span class="n">max_tries</span><span class="o">=</span><span class="mi">4</span> + <span class="n">kargs</span><span class="o">=</span><span class="nb">dict</span><span class="p">(</span><span class="n">basename</span><span class="o">=</span><span class="n">basename</span><span class="p">,</span> <span class="n">input_dir</span><span class="o">=</span><span class="n">input_dir</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="n">annotations</span><span class="p">,</span> <span class="n">threshold</span><span class="o">=</span><span class="n">threshold</span><span class="p">,</span> <span class="n">patch_size</span><span class="o">=</span><span class="n">patch_size</span><span class="p">,</span> <span class="n">generate_finetune_segmentation</span><span class="o">=</span><span class="n">generate_finetune_segmentation</span><span class="p">,</span> <span class="n">target_class</span><span class="o">=</span><span class="n">target_class</span><span class="p">,</span> <span class="n">intensity_threshold</span><span class="o">=</span><span class="n">intensity_threshold</span><span class="p">,</span> <span class="n">target_threshold</span><span class="o">=</span><span class="n">target_threshold</span><span class="p">,</span> <span class="n">adj_mask</span><span class="o">=</span><span class="n">adj_mask</span><span class="p">,</span> <span class="n">basic_preprocess</span><span class="o">=</span><span class="n">basic_preprocess</span><span class="p">,</span> <span class="n">tries</span><span class="o">=</span><span class="n">tries</span><span class="p">)</span> + <span class="k">try</span><span class="p">:</span> + <span class="c1">#,</span> + <span class="c1"># 'distributed.scheduler.allowed-failures':20,</span> + <span class="c1"># 'num-workers':20}):</span> + <span class="c1">#cluster=LocalCluster()</span> + <span class="c1">#cluster.adapt(minimum=10, maximum=100)</span> + <span class="c1">#cluster = LocalCluster(threads_per_worker=1, n_workers=20, memory_limit="80G")</span> + <span class="c1">#client=Client()#Client(cluster)#processes=True)#cluster,</span> + + <span class="n">arr</span><span class="p">,</span> <span class="n">masks</span> <span class="o">=</span> <span class="n">load_dataset</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">input_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">.zarr'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">)),</span><span class="n">join</span><span class="p">(</span><span class="n">input_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">_mask.pkl'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">)))</span> + <span class="k">if</span> <span class="s1">'annotations'</span> <span class="ow">in</span> <span class="n">masks</span><span class="p">:</span> + <span class="n">segmentation</span> <span class="o">=</span> <span class="kc">True</span> + <span class="c1">#if generate_finetune_segmentation:</span> + <span class="n">segmentation_mask</span> <span class="o">=</span> <span class="n">npy2da</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">input_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">_mask.npy'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">basename</span><span class="p">))</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">adj_mask</span> <span class="k">else</span> <span class="n">adj_mask</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">segmentation</span> <span class="o">=</span> <span class="kc">False</span> + <span class="c1">#masks=np.load(masks['annotations'])</span> + <span class="c1">#npy_file = join(input_dir,'{}.npy'.format(basename))</span> + <span class="n">purple_mask</span> <span class="o">=</span> <span class="n">create_purple_mask</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span> + <span class="n">x_max</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">arr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> + <span class="n">y_max</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">arr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> + <span class="n">x_steps</span> <span class="o">=</span> <span class="nb">int</span><span class="p">((</span><span class="n">x_max</span><span class="o">-</span><span class="n">patch_size</span><span class="p">)</span> <span class="o">/</span> <span class="n">patch_size</span> <span class="p">)</span> + <span class="n">y_steps</span> <span class="o">=</span> <span class="nb">int</span><span class="p">((</span><span class="n">y_max</span><span class="o">-</span><span class="n">patch_size</span><span class="p">)</span> <span class="o">/</span> <span class="n">patch_size</span> <span class="p">)</span> + <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">annotations</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span><span class="o">=</span><span class="p">[</span><span class="n">unary_union</span><span class="p">(</span><span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">])]</span> <span class="k">if</span> <span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span> <span class="k">else</span> <span class="p">[]</span> + <span class="k">except</span><span class="p">:</span> + <span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span><span class="o">=</span><span class="p">[</span><span class="n">MultiPolygon</span><span class="p">(</span><span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">])]</span> <span class="k">if</span> <span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span> <span class="k">else</span> <span class="p">[]</span> + + <span class="n">patch_info</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([([</span><span class="n">basename</span><span class="p">,</span><span class="n">i</span><span class="o">*</span><span class="n">patch_size</span><span class="p">,</span><span class="n">j</span><span class="o">*</span><span class="n">patch_size</span><span class="p">,</span><span class="n">patch_size</span><span class="p">,</span><span class="s1">'NA'</span><span class="p">]</span><span class="o">+</span><span class="p">[</span><span class="mf">0.</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">target_class</span> <span class="k">if</span> <span class="n">segmentation</span> <span class="k">else</span> <span class="nb">len</span><span class="p">(</span><span class="n">annotations</span><span class="p">)))</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">j</span> <span class="ow">in</span> <span class="n">product</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">x_steps</span><span class="o">+</span><span class="mi">1</span><span class="p">),</span><span class="nb">range</span><span class="p">(</span><span class="n">y_steps</span><span class="o">+</span><span class="mi">1</span><span class="p">))],</span><span class="n">columns</span><span class="o">=</span><span class="p">([</span><span class="s1">'ID'</span><span class="p">,</span><span class="s1">'x'</span><span class="p">,</span><span class="s1">'y'</span><span class="p">,</span><span class="s1">'patch_size'</span><span class="p">,</span><span class="s1">'annotation'</span><span class="p">]</span><span class="o">+</span><span class="p">(</span><span class="n">annotations</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">segmentation</span> <span class="k">else</span> <span class="nb">list</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">target_class</span><span class="p">)]))))</span><span class="c1">#[dask.delayed(return_line_info)(i,j) for (i,j) in product(range(x_steps+1),range(y_steps+1))]</span> + <span class="k">if</span> <span class="n">basic_preprocess</span><span class="p">:</span> + <span class="n">patch_info</span><span class="o">=</span><span class="n">patch_info</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,:</span><span class="mi">4</span><span class="p">]</span> + <span class="n">valid_patches</span><span class="o">=</span><span class="p">[]</span> + <span class="k">for</span> <span class="n">xs</span><span class="p">,</span><span class="n">ys</span> <span class="ow">in</span> <span class="n">patch_info</span><span class="p">[[</span><span class="s1">'x'</span><span class="p">,</span><span class="s1">'y'</span><span class="p">]]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">tolist</span><span class="p">():</span> + <span class="n">valid_patches</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">purple_mask</span><span class="p">[</span><span class="n">xs</span><span class="p">:</span><span class="n">xs</span><span class="o">+</span><span class="n">patch_size</span><span class="p">,</span><span class="n">ys</span><span class="p">:</span><span class="n">ys</span><span class="o">+</span><span class="n">patch_size</span><span class="p">]</span><span class="o">>=</span><span class="n">intensity_threshold</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="o">></span> <span class="n">threshold</span><span class="p">)</span> <span class="c1"># dask.delayed(is_valid_patch)(xs,ys,patch_size,purple_mask,intensity_threshold,threshold)</span> + <span class="n">valid_patches</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">da</span><span class="o">.</span><span class="n">compute</span><span class="p">(</span><span class="o">*</span><span class="n">valid_patches</span><span class="p">))</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'Valid Patches Complete'</span><span class="p">)</span> + <span class="c1">#print(valid_patches)</span> + <span class="n">patch_info</span><span class="o">=</span><span class="n">patch_info</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">valid_patches</span><span class="p">]</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">basic_preprocess</span><span class="p">:</span> + <span class="n">area_info</span><span class="o">=</span><span class="p">[]</span> + <span class="k">if</span> <span class="n">segmentation</span><span class="p">:</span> + <span class="n">patch_info</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span><span class="s1">'annotation'</span><span class="p">]</span><span class="o">=</span><span class="s1">'segment'</span> + <span class="k">for</span> <span class="n">xs</span><span class="p">,</span><span class="n">ys</span> <span class="ow">in</span> <span class="n">patch_info</span><span class="p">[[</span><span class="s1">'x'</span><span class="p">,</span><span class="s1">'y'</span><span class="p">]]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">tolist</span><span class="p">():</span> + <span class="n">xf</span><span class="o">=</span><span class="n">xs</span><span class="o">+</span><span class="n">patch_size</span> + <span class="n">yf</span><span class="o">=</span><span class="n">ys</span><span class="o">+</span><span class="n">patch_size</span> + <span class="c1">#print(xs,ys)</span> + <span class="n">area_info</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">da</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">segmentation_mask</span><span class="p">[</span><span class="n">xs</span><span class="p">:</span><span class="n">xf</span><span class="p">,</span><span class="n">ys</span><span class="p">:</span><span class="n">yf</span><span class="p">],</span><span class="nb">range</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span><span class="n">target_class</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span><span class="n">bins</span><span class="o">=</span><span class="n">target_class</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> + <span class="c1">#area_info.append(dask.delayed(seg_line)(xs,ys,patch_size,segmentation_mask,target_class))</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">for</span> <span class="n">xs</span><span class="p">,</span><span class="n">ys</span> <span class="ow">in</span> <span class="n">patch_info</span><span class="p">[[</span><span class="s1">'x'</span><span class="p">,</span><span class="s1">'y'</span><span class="p">]]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">tolist</span><span class="p">():</span> + <span class="n">area_info</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">dask</span><span class="o">.</span><span class="n">delayed</span><span class="p">(</span><span class="n">is_coords_in_box</span><span class="p">)([</span><span class="n">xs</span><span class="p">,</span><span class="n">ys</span><span class="p">],</span><span class="n">patch_size</span><span class="p">,</span><span class="n">masks</span><span class="p">[</span><span class="n">annotation</span><span class="p">])</span> <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">annotations</span><span class="p">])</span> + <span class="c1">#area_info=da.concatenate(area_info,axis=0).compute()</span> + <span class="n">area_info</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">dask</span><span class="o">.</span><span class="n">compute</span><span class="p">(</span><span class="o">*</span><span class="n">area_info</span><span class="p">))</span><span class="c1">#da.concatenate(area_info,axis=0).compute(dtype=np.float16,scheduler='threaded')).astype(np.float16)</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'Area Info Complete'</span><span class="p">)</span> + <span class="k">if</span> <span class="n">segmentation</span><span class="p">:</span> + <span class="n">area_info</span> <span class="o">=</span> <span class="n">area_info</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">float16</span><span class="p">(</span><span class="n">patch_size</span><span class="o">*</span><span class="n">patch_size</span><span class="p">)</span> + <span class="c1">#print(area_info)</span> + <span class="n">patch_info</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">:]</span><span class="o">=</span><span class="n">area_info</span> + <span class="c1">#print(patch_info)</span> + <span class="c1">#print(patch_info.dtypes)</span> + <span class="n">annot</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">patch_info</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">:])</span> + <span class="n">patch_info</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span><span class="s1">'annotation'</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">vectorize</span><span class="p">(</span><span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">annot</span><span class="p">[</span><span class="n">patch_info</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="mi">5</span><span class="p">:]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">argmax</span><span class="p">()])(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">patch_info</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span><span class="c1">#patch_info[np.arange(target_class).astype(str).tolist()].values.argmax(1).astype(str)</span> + <span class="c1">#client.close()</span> + <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> + <span class="n">kargs</span><span class="p">[</span><span class="s1">'tries'</span><span class="p">]</span><span class="o">+=</span><span class="mi">1</span> + <span class="k">if</span> <span class="n">kargs</span><span class="p">[</span><span class="s1">'tries'</span><span class="p">]</span><span class="o">==</span><span class="n">max_tries</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">'Exceeded past maximum number of tries.'</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="nb">print</span><span class="p">(</span><span class="s1">'Restarting preprocessing again.'</span><span class="p">)</span> + <span class="n">extract_patch_information</span><span class="p">(</span><span class="o">**</span><span class="n">kargs</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">patch_info</span></div> + +<div class="viewcode-block" id="generate_patch_pipeline"><a class="viewcode-back" href="../../index.html#pathflowai.utils.generate_patch_pipeline">[docs]</a><span class="k">def</span> <span class="nf">generate_patch_pipeline</span><span class="p">(</span><span class="n">basename</span><span class="p">,</span> <span class="n">input_dir</span><span class="o">=</span><span class="s1">'./'</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">patch_size</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span> <span class="n">out_db</span><span class="o">=</span><span class="s1">'patch_info.db'</span><span class="p">,</span> <span class="n">generate_finetune_segmentation</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">target_class</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">intensity_threshold</span><span class="o">=</span><span class="mf">100.</span><span class="p">,</span> <span class="n">target_threshold</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span> <span class="n">adj_mask</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">basic_preprocess</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Short summary.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> basename:str</span> +<span class="sd"> Patient ID.</span> +<span class="sd"> input_dir:str</span> +<span class="sd"> Input directory.</span> +<span class="sd"> annotations:list</span> +<span class="sd"> List of annotations to record, these can be different tissue types, must correspond with XML labels.</span> +<span class="sd"> threshold:float</span> +<span class="sd"> Value between 0 and 1 that indicates the minimum amount of patch that musn't be background for inclusion.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size of patches; this will become one of the tables.</span> +<span class="sd"> out_db:str</span> +<span class="sd"> Output SQL database.</span> +<span class="sd"> generate_finetune_segmentation:bool</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> target_class:int</span> +<span class="sd"> Number of segmentation classes desired, from 0th class to target_class-1 will be annotated in SQL.</span> +<span class="sd"> intensity_threshold:float</span> +<span class="sd"> Value between 0 and 255 that represents minimum intensity to not include as background. Will be modified with new transforms.</span> +<span class="sd"> target_threshold:float</span> +<span class="sd"> Deprecated.</span> +<span class="sd"> adj_mask:str</span> +<span class="sd"> Adjusted mask if performed binary opening operations in previous preprocessing step.</span> +<span class="sd"> basic_preprocess:bool</span> +<span class="sd"> Do not store patch level information.</span> +<span class="sd"> """</span> + <span class="n">patch_info</span> <span class="o">=</span> <span class="n">extract_patch_information</span><span class="p">(</span><span class="n">basename</span><span class="p">,</span> <span class="n">input_dir</span><span class="p">,</span> <span class="n">annotations</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">patch_size</span><span class="p">,</span> <span class="n">generate_finetune_segmentation</span><span class="o">=</span><span class="n">generate_finetune_segmentation</span><span class="p">,</span> <span class="n">target_class</span><span class="o">=</span><span class="n">target_class</span><span class="p">,</span> <span class="n">intensity_threshold</span><span class="o">=</span><span class="n">intensity_threshold</span><span class="p">,</span> <span class="n">target_threshold</span><span class="o">=</span><span class="n">target_threshold</span><span class="p">,</span> <span class="n">adj_mask</span><span class="o">=</span><span class="n">adj_mask</span><span class="p">,</span> <span class="n">basic_preprocess</span><span class="o">=</span><span class="n">basic_preprocess</span><span class="p">)</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">out_db</span><span class="p">)</span> + <span class="n">patch_info</span><span class="o">.</span><span class="n">to_sql</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">patch_size</span><span class="p">),</span> <span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">,</span> <span class="n">if_exists</span><span class="o">=</span><span class="s1">'append'</span><span class="p">)</span> + <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div> + + +<span class="c1"># now output csv</span> +<div class="viewcode-block" id="save_all_patch_info"><a class="viewcode-back" href="../../index.html#pathflowai.utils.save_all_patch_info">[docs]</a><span class="k">def</span> <span class="nf">save_all_patch_info</span><span class="p">(</span><span class="n">basenames</span><span class="p">,</span> <span class="n">input_dir</span><span class="o">=</span><span class="s1">'./'</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">patch_size</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span> <span class="n">output_pkl</span><span class="o">=</span><span class="s1">'patch_info.pkl'</span><span class="p">):</span> + <span class="sd">"""Deprecated."""</span> + <span class="n">df</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">extract_patch_information</span><span class="p">(</span><span class="n">basename</span><span class="p">,</span> <span class="n">input_dir</span><span class="p">,</span> <span class="n">annotations</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">patch_size</span><span class="p">)</span> <span class="k">for</span> <span class="n">basename</span> <span class="ow">in</span> <span class="n">basenames</span><span class="p">])</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">to_pickle</span><span class="p">(</span><span class="n">output_pkl</span><span class="p">)</span></div> + +<span class="c1">#########</span> + + +<div class="viewcode-block" id="create_train_val_test"><a class="viewcode-back" href="../../index.html#pathflowai.utils.create_train_val_test">[docs]</a><span class="k">def</span> <span class="nf">create_train_val_test</span><span class="p">(</span><span class="n">train_val_test_pkl</span><span class="p">,</span> <span class="n">input_info_db</span><span class="p">,</span> <span class="n">patch_size</span><span class="p">):</span> + <span class="sd">"""Create dataframe that splits slides into training validation and test.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> train_val_test_pkl:str</span> +<span class="sd"> Pickle for training validation and test slides.</span> +<span class="sd"> input_info_db:str</span> +<span class="sd"> Patch information SQL database.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size looking to access.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dataframe</span> +<span class="sd"> Train test validation splits.</span> + +<span class="sd"> """</span> + <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">train_val_test_pkl</span><span class="p">):</span> + <span class="n">IDs</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_pickle</span><span class="p">(</span><span class="n">train_val_test_pkl</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">input_info_db</span><span class="p">)</span> + <span class="n">df</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">read_sql</span><span class="p">(</span><span class="s1">'select * from "</span><span class="si">{}</span><span class="s1">";'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">patch_size</span><span class="p">),</span><span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">)</span> + <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> + <span class="n">IDs</span><span class="o">=</span><span class="n">df</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">]</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="n">IDs</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">IDs</span><span class="p">,</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">])</span> + <span class="n">IDs_train</span><span class="p">,</span> <span class="n">IDs_test</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">IDs</span><span class="p">)</span> + <span class="n">IDs_train</span><span class="p">,</span> <span class="n">IDs_val</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">IDs_train</span><span class="p">)</span> + <span class="n">IDs_train</span><span class="p">[</span><span class="s1">'set'</span><span class="p">]</span><span class="o">=</span><span class="s1">'train'</span> + <span class="n">IDs_val</span><span class="p">[</span><span class="s1">'set'</span><span class="p">]</span><span class="o">=</span><span class="s1">'val'</span> + <span class="n">IDs_test</span><span class="p">[</span><span class="s1">'set'</span><span class="p">]</span><span class="o">=</span><span class="s1">'test'</span> + <span class="n">IDs</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">IDs_train</span><span class="p">,</span><span class="n">IDs_val</span><span class="p">,</span><span class="n">IDs_test</span><span class="p">])</span> + <span class="n">IDs</span><span class="o">.</span><span class="n">to_pickle</span><span class="p">(</span><span class="n">train_val_test_pkl</span><span class="p">)</span> + <span class="k">return</span> <span class="n">IDs</span></div> + +<span class="k">def</span> <span class="nf">modify_patch_info</span><span class="p">(</span><span class="n">input_info_db</span><span class="o">=</span><span class="s1">'patch_info.db'</span><span class="p">,</span> <span class="n">slide_labels</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(),</span> <span class="n">pos_annotation_class</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">patch_size</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span> <span class="n">segmentation</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">other_annotations</span><span class="o">=</span><span class="p">[],</span> <span class="n">target_segmentation_class</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">target_threshold</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span> <span class="n">classify_annotations</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Modify the patch information to get ready for deep learning, incorporate whole slide labels if needed.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> input_info_db:str</span> +<span class="sd"> SQL DB file.</span> +<span class="sd"> slide_labels:dataframe</span> +<span class="sd"> Dataframe with whole slide labels.</span> +<span class="sd"> pos_annotation_class:str</span> +<span class="sd"> Tissue/annotation label to label with whole slide image label, if not supplied, any slide's patches receive the whole slide label.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size.</span> +<span class="sd"> segmentation:bool</span> +<span class="sd"> Segmentation?</span> +<span class="sd"> other_annotations:list</span> +<span class="sd"> Other annotations to access from patch information.</span> +<span class="sd"> target_segmentation_class:int</span> +<span class="sd"> Segmentation class to threshold.</span> +<span class="sd"> target_threshold:float</span> +<span class="sd"> Include patch if patch has target area greater than this.</span> +<span class="sd"> classify_annotations:bool</span> +<span class="sd"> Classifying annotations for pretraining, or final model?</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dataframe</span> +<span class="sd"> Modified patch information.</span> + +<span class="sd"> """</span> + <span class="n">conn</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">input_info_db</span><span class="p">)</span> + <span class="n">df</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">read_sql</span><span class="p">(</span><span class="s1">'select * from "</span><span class="si">{}</span><span class="s1">";'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">patch_size</span><span class="p">),</span><span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">)</span> + <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> + <span class="c1">#print(df)</span> + <span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">()</span> + <span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">],</span><span class="n">slide_labels</span><span class="o">.</span><span class="n">index</span><span class="p">)]</span> + <span class="c1">#print(classify_annotations)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">segmentation</span><span class="p">:</span> + <span class="k">if</span> <span class="n">classify_annotations</span><span class="p">:</span> + <span class="n">targets</span><span class="o">=</span><span class="n">df</span><span class="p">[</span><span class="s1">'annotation'</span><span class="p">]</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">targets</span><span class="p">)</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span> + <span class="n">targets</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">:])</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">targets</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">slide_labels</span><span class="p">)</span> + <span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">pos_annotation_class</span><span class="p">)</span><span class="o">==</span><span class="nb">type</span><span class="p">(</span><span class="s1">''</span><span class="p">):</span> + <span class="n">included_annotations</span> <span class="o">=</span> <span class="p">[</span><span class="n">pos_annotation_class</span><span class="p">]</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">included_annotations</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">pos_annotation_class</span><span class="p">)</span> + <span class="n">included_annotations</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">other_annotations</span><span class="p">)</span> + <span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'annotation'</span><span class="p">],</span><span class="n">included_annotations</span><span class="p">)]</span> + <span class="k">for</span> <span class="n">target</span> <span class="ow">in</span> <span class="n">targets</span><span class="p">:</span> + <span class="n">df</span><span class="p">[</span><span class="n">target</span><span class="p">]</span><span class="o">=</span><span class="mf">0.</span> + <span class="k">for</span> <span class="n">slide</span> <span class="ow">in</span> <span class="n">slide_labels</span><span class="o">.</span><span class="n">index</span><span class="p">:</span> + <span class="n">slide_bool</span><span class="o">=</span><span class="p">((</span><span class="n">df</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">]</span><span class="o">==</span><span class="n">slide</span><span class="p">)</span> <span class="o">&</span> <span class="n">df</span><span class="p">[</span><span class="n">pos_annotation_class</span><span class="p">]</span><span class="o">></span><span class="mf">0.</span><span class="p">)</span> <span class="k">if</span> <span class="n">pos_annotation_class</span> <span class="k">else</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">]</span><span class="o">==</span><span class="n">slide</span><span class="p">)</span> <span class="c1"># (df['annotation']==pos_annotation_class)</span> + <span class="k">if</span> <span class="n">slide_bool</span><span class="o">.</span><span class="n">sum</span><span class="p">():</span> + <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">slide_bool</span><span class="p">,</span><span class="n">targets</span><span class="p">]</span> <span class="o">=</span> <span class="n">slide_labels</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">slide</span><span class="p">,</span><span class="n">targets</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="c1">#1.</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'area'</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">vectorize</span><span class="p">(</span><span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="s1">'annotation'</span><span class="p">]])(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> + <span class="k">if</span> <span class="s1">'area'</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="ow">and</span> <span class="n">target_threshold</span><span class="o">></span><span class="mf">0.</span><span class="p">:</span> + <span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">'area'</span><span class="p">]</span><span class="o">>=</span><span class="n">target_threshold</span><span class="p">]</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'target'</span><span class="p">]</span><span class="o">=</span><span class="mf">0.</span> + <span class="k">if</span> <span class="n">target_segmentation_class</span> <span class="o">>=</span><span class="mi">0</span><span class="p">:</span> + <span class="n">df</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">target_segmentation_class</span><span class="p">)]</span><span class="o">>=</span><span class="n">target_threshold</span><span class="p">]</span> + <span class="k">return</span> <span class="n">df</span> + +<div class="viewcode-block" id="npy2da"><a class="viewcode-back" href="../../index.html#pathflowai.utils.npy2da">[docs]</a><span class="k">def</span> <span class="nf">npy2da</span><span class="p">(</span><span class="n">npy_file</span><span class="p">):</span> + <span class="sd">"""Numpy to dask array.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> npy_file:str</span> +<span class="sd"> Input npy file.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dask.array</span> +<span class="sd"> Converted numpy array to dask.</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="n">da</span><span class="o">.</span><span class="n">from_array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">npy_file</span><span class="p">,</span> <span class="n">mmap_mode</span> <span class="o">=</span> <span class="s1">'r+'</span><span class="p">))</span></div> + +<div class="viewcode-block" id="grab_interior_points"><a class="viewcode-back" href="../../index.html#pathflowai.utils.grab_interior_points">[docs]</a><span class="k">def</span> <span class="nf">grab_interior_points</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="n">annotations</span><span class="o">=</span><span class="p">[]):</span> + <span class="sd">"""Deprecated."""</span> + <span class="n">interior_point_dict</span> <span class="o">=</span> <span class="p">{}</span> + <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">annotations</span><span class="p">:</span> + <span class="k">try</span><span class="p">:</span> + <span class="n">interior_point_dict</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span> <span class="o">=</span> <span class="n">parse_coord_return_boxes</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation</span><span class="p">,</span> <span class="n">return_coords</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="c1"># boxes2interior(img_size,</span> + <span class="k">except</span><span class="p">:</span> + <span class="n">interior_point_dict</span><span class="p">[</span><span class="n">annotation</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span><span class="c1">#np.array([[],[]])</span> + <span class="k">return</span> <span class="n">interior_point_dict</span></div> + +<div class="viewcode-block" id="boxes2interior"><a class="viewcode-back" href="../../index.html#pathflowai.utils.boxes2interior">[docs]</a><span class="k">def</span> <span class="nf">boxes2interior</span><span class="p">(</span><span class="n">img_size</span><span class="p">,</span> <span class="n">polygons</span><span class="p">):</span> + <span class="sd">"""Deprecated."""</span> + <span class="n">img</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">new</span><span class="p">(</span><span class="s1">'L'</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> + <span class="k">for</span> <span class="n">polygon</span> <span class="ow">in</span> <span class="n">polygons</span><span class="p">:</span> + <span class="n">ImageDraw</span><span class="o">.</span><span class="n">Draw</span><span class="p">(</span><span class="n">img</span><span class="p">)</span><span class="o">.</span><span class="n">polygon</span><span class="p">(</span><span class="n">polygon</span><span class="p">,</span> <span class="n">outline</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> + <span class="n">mask</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">img</span><span class="p">)</span><span class="o">.</span><span class="n">nonzero</span><span class="p">()</span> + <span class="c1">#mask = (np.ones(len(mask[0])),mask)</span> + <span class="k">return</span> <span class="n">mask</span></div> + +<div class="viewcode-block" id="parse_coord_return_boxes"><a class="viewcode-back" href="../../index.html#pathflowai.utils.parse_coord_return_boxes">[docs]</a><span class="k">def</span> <span class="nf">parse_coord_return_boxes</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation_name</span> <span class="o">=</span> <span class="s1">''</span><span class="p">,</span> <span class="n">return_coords</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Get list of shapely objects for each annotation in the XML object.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> xml_file:str</span> +<span class="sd"> Annotation file.</span> +<span class="sd"> annotation_name:str</span> +<span class="sd"> Name of xml annotation.</span> +<span class="sd"> return_coords:bool</span> +<span class="sd"> Just return list of coords over shapes.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> list</span> +<span class="sd"> List of shapely objects.</span> + +<span class="sd"> """</span> + <span class="n">boxes</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">xml_data</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">xml_file</span><span class="p">),</span><span class="s1">'html'</span><span class="p">)</span> + <span class="c1">#print(xml_data.findAll('annotation'))</span> + <span class="c1">#print(xml_data.findAll('Annotation'))</span> + <span class="k">for</span> <span class="n">annotation</span> <span class="ow">in</span> <span class="n">xml_data</span><span class="o">.</span><span class="n">findAll</span><span class="p">(</span><span class="s1">'annotation'</span><span class="p">):</span> + <span class="k">if</span> <span class="n">annotation</span><span class="p">[</span><span class="s1">'partofgroup'</span><span class="p">]</span> <span class="o">==</span> <span class="n">annotation_name</span><span class="p">:</span> + <span class="k">for</span> <span class="n">coordinates</span> <span class="ow">in</span> <span class="n">annotation</span><span class="o">.</span><span class="n">findAll</span><span class="p">(</span><span class="s1">'coordinates'</span><span class="p">):</span> + <span class="c1"># FIXME may need to change x and y coordinates</span> + <span class="n">coords</span> <span class="o">=</span> <span class="p">[(</span><span class="n">coordinate</span><span class="p">[</span><span class="s1">'x'</span><span class="p">],</span><span class="n">coordinate</span><span class="p">[</span><span class="s1">'y'</span><span class="p">])</span> <span class="k">for</span> <span class="n">coordinate</span> <span class="ow">in</span> <span class="n">coordinates</span><span class="o">.</span><span class="n">findAll</span><span class="p">(</span><span class="s1">'coordinate'</span><span class="p">)]</span> + <span class="k">if</span> <span class="n">return_coords</span><span class="p">:</span> + <span class="n">boxes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">coords</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">boxes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">Polygon</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">coords</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">float</span><span class="p">)))</span> + <span class="k">return</span> <span class="n">boxes</span></div> + +<div class="viewcode-block" id="is_coords_in_box"><a class="viewcode-back" href="../../index.html#pathflowai.utils.is_coords_in_box">[docs]</a><span class="k">def</span> <span class="nf">is_coords_in_box</span><span class="p">(</span><span class="n">coords</span><span class="p">,</span><span class="n">patch_size</span><span class="p">,</span><span class="n">boxes</span><span class="p">):</span> + <span class="sd">"""Get area of annotation in patch.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> coords:array</span> +<span class="sd"> X,Y coordinates of patch.</span> +<span class="sd"> patch_size:int</span> +<span class="sd"> Patch size.</span> +<span class="sd"> boxes:list</span> +<span class="sd"> Shapely objects for annotations.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> float</span> +<span class="sd"> Area of annotation type.</span> + +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">boxes</span><span class="p">):</span> + <span class="n">points</span><span class="o">=</span><span class="n">Polygon</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">],[</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">],[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">],[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">]])</span><span class="o">*</span><span class="n">patch_size</span><span class="o">+</span><span class="n">coords</span><span class="p">)</span> + <span class="n">area</span><span class="o">=</span><span class="n">points</span><span class="o">.</span><span class="n">intersection</span><span class="p">(</span><span class="n">boxes</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">area</span><span class="o">/</span><span class="nb">float</span><span class="p">(</span><span class="n">points</span><span class="o">.</span><span class="n">area</span><span class="p">)</span><span class="c1">#any(list(map(lambda x: x.intersects(points),boxes)))#return_image_coord(nx=nx,ny=ny,xi=xi,yi=yi, output_point=output_point)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">area</span><span class="o">=</span><span class="mf">0.</span> + <span class="k">return</span> <span class="n">area</span></div> + +<div class="viewcode-block" id="is_image_in_boxes"><a class="viewcode-back" href="../../index.html#pathflowai.utils.is_image_in_boxes">[docs]</a><span class="k">def</span> <span class="nf">is_image_in_boxes</span><span class="p">(</span><span class="n">image_coord_dict</span><span class="p">,</span> <span class="n">boxes</span><span class="p">):</span> + <span class="sd">"""Find if image intersects with annotations.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> image_coord_dict:dict</span> +<span class="sd"> Dictionary of patches.</span> +<span class="sd"> boxes:list</span> +<span class="sd"> Shapely annotation shapes.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> dict</span> +<span class="sd"> Dictionary of whether image intersects with any of the annotations.</span> + +<span class="sd"> """</span> + <span class="k">return</span> <span class="p">{</span><span class="n">image</span><span class="p">:</span> <span class="nb">any</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">intersects</span><span class="p">(</span><span class="n">image_coord_dict</span><span class="p">[</span><span class="n">image</span><span class="p">]),</span><span class="n">boxes</span><span class="p">)))</span> <span class="k">for</span> <span class="n">image</span> <span class="ow">in</span> <span class="n">image_coord_dict</span><span class="p">}</span></div> + +<div class="viewcode-block" id="images2coord_dict"><a class="viewcode-back" href="../../index.html#pathflowai.utils.images2coord_dict">[docs]</a><span class="k">def</span> <span class="nf">images2coord_dict</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="n">output_point</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Deprecated"""</span> + <span class="k">return</span> <span class="p">{</span><span class="n">image</span><span class="p">:</span> <span class="n">image2coords</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">output_point</span><span class="p">)</span> <span class="k">for</span> <span class="n">image</span> <span class="ow">in</span> <span class="n">images</span><span class="p">}</span></div> + +<div class="viewcode-block" id="dir2images"><a class="viewcode-back" href="../../index.html#pathflowai.utils.dir2images">[docs]</a><span class="k">def</span> <span class="nf">dir2images</span><span class="p">(</span><span class="n">image_dir</span><span class="p">):</span> + <span class="sd">"""Deprecated"""</span> + <span class="k">return</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">image_dir</span><span class="p">,</span><span class="s1">'*.jpg'</span><span class="p">))</span></div> + +<div class="viewcode-block" id="return_image_in_boxes_dict"><a class="viewcode-back" href="../../index.html#pathflowai.utils.return_image_in_boxes_dict">[docs]</a><span class="k">def</span> <span class="nf">return_image_in_boxes_dict</span><span class="p">(</span><span class="n">image_dir</span><span class="p">,</span> <span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span> + <span class="sd">"""Deprecated"""</span> + <span class="n">boxes</span> <span class="o">=</span> <span class="n">parse_coord_return_boxes</span><span class="p">(</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation</span><span class="p">)</span> + <span class="n">images</span> <span class="o">=</span> <span class="n">dir2images</span><span class="p">(</span><span class="n">image_dir</span><span class="p">)</span> + <span class="n">coord_dict</span> <span class="o">=</span> <span class="n">images2coord_dict</span><span class="p">(</span><span class="n">images</span><span class="p">)</span> + <span class="k">return</span> <span class="n">is_image_in_boxes</span><span class="p">(</span><span class="n">image_coord_dict</span><span class="o">=</span><span class="n">coord_dict</span><span class="p">,</span><span class="n">boxes</span><span class="o">=</span><span class="n">boxes</span><span class="p">)</span></div> + +<div class="viewcode-block" id="image2coords"><a class="viewcode-back" href="../../index.html#pathflowai.utils.image2coords">[docs]</a><span class="k">def</span> <span class="nf">image2coords</span><span class="p">(</span><span class="n">image_file</span><span class="p">,</span> <span class="n">output_point</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Deprecated."""</span> + <span class="n">nx</span><span class="p">,</span><span class="n">ny</span><span class="p">,</span><span class="n">yi</span><span class="p">,</span><span class="n">xi</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">image_file</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">1</span><span class="p">:])</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> + <span class="k">return</span> <span class="n">return_image_coord</span><span class="p">(</span><span class="n">nx</span><span class="o">=</span><span class="n">nx</span><span class="p">,</span><span class="n">ny</span><span class="o">=</span><span class="n">ny</span><span class="p">,</span><span class="n">xi</span><span class="o">=</span><span class="n">xi</span><span class="p">,</span><span class="n">yi</span><span class="o">=</span><span class="n">yi</span><span class="p">,</span> <span class="n">output_point</span><span class="o">=</span><span class="n">output_point</span><span class="p">)</span></div> + +<div class="viewcode-block" id="retain_images"><a class="viewcode-back" href="../../index.html#pathflowai.utils.retain_images">[docs]</a><span class="k">def</span> <span class="nf">retain_images</span><span class="p">(</span><span class="n">image_dir</span><span class="p">,</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span> + <span class="sd">"""Deprecated"""</span> + <span class="n">image_in_boxes_dict</span><span class="o">=</span><span class="n">return_image_in_boxes_dict</span><span class="p">(</span><span class="n">image_dir</span><span class="p">,</span><span class="n">xml_file</span><span class="p">,</span> <span class="n">annotation</span><span class="p">)</span> + <span class="k">return</span> <span class="p">[</span><span class="n">img</span> <span class="k">for</span> <span class="n">img</span> <span class="ow">in</span> <span class="n">image_in_boxes_dict</span> <span class="k">if</span> <span class="n">image_in_boxes_dict</span><span class="p">[</span><span class="n">img</span><span class="p">]]</span></div> + +<div class="viewcode-block" id="return_image_coord"><a class="viewcode-back" href="../../index.html#pathflowai.utils.return_image_coord">[docs]</a><span class="k">def</span> <span class="nf">return_image_coord</span><span class="p">(</span><span class="n">nx</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span><span class="n">ny</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span><span class="n">xl</span><span class="o">=</span><span class="mi">3333</span><span class="p">,</span><span class="n">yl</span><span class="o">=</span><span class="mi">3333</span><span class="p">,</span><span class="n">xi</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span><span class="n">yi</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span><span class="n">xc</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span><span class="n">yc</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span><span class="n">dimx</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span><span class="n">dimy</span><span class="o">=</span><span class="mi">224</span><span class="p">,</span> <span class="n">output_point</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""Deprecated"""</span> + <span class="k">if</span> <span class="n">output_point</span><span class="p">:</span> + <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">xc</span><span class="p">,</span><span class="n">yc</span><span class="p">])</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nx</span><span class="o">*</span><span class="n">xl</span><span class="o">+</span><span class="n">xi</span><span class="o">+</span><span class="n">dimx</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span><span class="n">ny</span><span class="o">*</span><span class="n">yl</span><span class="o">+</span><span class="n">yi</span><span class="o">+</span><span class="n">dimy</span><span class="o">/</span><span class="mi">2</span><span class="p">])</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">static_point</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nx</span><span class="o">*</span><span class="n">xl</span><span class="o">+</span><span class="n">xi</span><span class="p">,</span><span class="n">ny</span><span class="o">*</span><span class="n">yl</span><span class="o">+</span><span class="n">yi</span><span class="p">])</span> + <span class="n">points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">xc</span><span class="p">,</span><span class="n">yc</span><span class="p">])</span><span class="o">*</span><span class="p">(</span><span class="n">static_point</span><span class="o">+</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">new_point</span><span class="p">)))</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> <span class="k">for</span> <span class="n">new_point</span> <span class="ow">in</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">],[</span><span class="n">dimx</span><span class="p">,</span><span class="mi">0</span><span class="p">],[</span><span class="n">dimx</span><span class="p">,</span><span class="n">dimy</span><span class="p">],[</span><span class="mi">0</span><span class="p">,</span><span class="n">dimy</span><span class="p">]]])</span> + <span class="k">return</span> <span class="n">Polygon</span><span class="p">(</span><span class="n">points</span><span class="p">)</span><span class="c1">#Point(*((np.array([xc,yc])*np.array([nx*xl+xi+dimx/2,ny*yl+yi+dimy/2])).tolist())) # [::-1]</span></div> + +<div class="viewcode-block" id="fix_name"><a class="viewcode-back" href="../../index.html#pathflowai.utils.fix_name">[docs]</a><span class="k">def</span> <span class="nf">fix_name</span><span class="p">(</span><span class="n">basename</span><span class="p">):</span> + <span class="sd">"""Fixes illegitimate basename, deprecated."""</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">basename</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span> + <span class="k">return</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">0</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">*</span><span class="n">basename</span><span class="p">)</span> + <span class="k">return</span> <span class="n">basename</span></div> + +<div class="viewcode-block" id="fix_names"><a class="viewcode-back" href="../../index.html#pathflowai.utils.fix_names">[docs]</a><span class="k">def</span> <span class="nf">fix_names</span><span class="p">(</span><span class="n">file_dir</span><span class="p">):</span> + <span class="sd">"""Fixes basenames, deprecated."""</span> + <span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">glob</span><span class="o">.</span><span class="n">glob</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">file_dir</span><span class="p">,</span><span class="s1">'*'</span><span class="p">)):</span> + <span class="n">basename</span> <span class="o">=</span> <span class="n">filename</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> + <span class="n">basename</span><span class="p">,</span> <span class="n">suffix</span> <span class="o">=</span> <span class="n">basename</span><span class="p">[:</span><span class="n">basename</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)],</span> <span class="n">basename</span><span class="p">[</span><span class="n">basename</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s1">'.'</span><span class="p">):]</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">basename</span><span class="p">)</span> <span class="o"><</span> <span class="mi">3</span><span class="p">:</span> + <span class="n">new_filename</span><span class="o">=</span><span class="n">join</span><span class="p">(</span><span class="n">file_dir</span><span class="p">,</span><span class="s1">'</span><span class="si">{}</span><span class="s1">0</span><span class="si">{}{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">*</span><span class="n">basename</span><span class="p">,</span><span class="n">suffix</span><span class="p">))</span> + <span class="nb">print</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span><span class="n">new_filename</span><span class="p">)</span> + <span class="n">subprocess</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s1">'mv </span><span class="si">{}</span><span class="s1"> </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span><span class="n">new_filename</span><span class="p">),</span><span class="n">shell</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div> + +<span class="c1">#######</span> + +<span class="c1">#@pysnooper.snoop('seg2npy.log')</span> +<div class="viewcode-block" id="segmentation_predictions2npy"><a class="viewcode-back" href="../../index.html#pathflowai.utils.segmentation_predictions2npy">[docs]</a><span class="k">def</span> <span class="nf">segmentation_predictions2npy</span><span class="p">(</span><span class="n">y_pred</span><span class="p">,</span> <span class="n">patch_info</span><span class="p">,</span> <span class="n">segmentation_map</span><span class="p">,</span> <span class="n">npy_output</span><span class="p">):</span> + <span class="sd">"""Convert segmentation predictions from model to numpy masks.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> y_pred:list</span> +<span class="sd"> List of patch segmentation masks</span> +<span class="sd"> patch_info:dataframe</span> +<span class="sd"> Patch information from DB.</span> +<span class="sd"> segmentation_map:array</span> +<span class="sd"> Existing segmentation mask.</span> +<span class="sd"> npy_output:str</span> +<span class="sd"> Output npy file.</span> +<span class="sd"> """</span> + <span class="n">segmentation_map</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">segmentation_map</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">:])</span> + <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">patch_info</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> + <span class="n">patch_info_i</span> <span class="o">=</span> <span class="n">patch_info</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> + <span class="n">ID</span> <span class="o">=</span> <span class="n">patch_info_i</span><span class="p">[</span><span class="s1">'ID'</span><span class="p">]</span> + <span class="n">xs</span> <span class="o">=</span> <span class="n">patch_info_i</span><span class="p">[</span><span class="s1">'x'</span><span class="p">]</span> + <span class="n">ys</span> <span class="o">=</span> <span class="n">patch_info_i</span><span class="p">[</span><span class="s1">'y'</span><span class="p">]</span> + <span class="n">patch_size</span> <span class="o">=</span> <span class="n">patch_info_i</span><span class="p">[</span><span class="s1">'patch_size'</span><span class="p">]</span> + <span class="n">prediction</span><span class="o">=</span><span class="n">y_pred</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="o">...</span><span class="p">]</span> + <span class="n">pred_shape</span><span class="o">=</span><span class="n">prediction</span><span class="o">.</span><span class="n">shape</span> + <span class="n">segmentation_map</span><span class="p">[</span><span class="n">xs</span><span class="p">:</span><span class="n">xs</span><span class="o">+</span><span class="n">patch_size</span><span class="p">,</span><span class="n">ys</span><span class="p">:</span><span class="n">ys</span><span class="o">+</span><span class="n">patch_size</span><span class="p">]</span> <span class="o">=</span> <span class="n">prediction</span> + <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">npy_output</span><span class="p">[:</span><span class="n">npy_output</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)],</span><span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + <span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">npy_output</span><span class="p">,</span><span class="n">segmentation_map</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">uint8</span><span class="p">))</span></div> +</pre></div> + + </div> + + </div> + <footer> + + + <hr/> + + <div role="contentinfo"> + <p> + © Copyright 2019, Joshua Levy + + </p> + </div> + Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. + +</footer> + + </div> + </div> + + </section> + + </div> + + + + <script type="text/javascript"> + jQuery(function () { + SphinxRtdTheme.Navigation.enable(true); + }); + </script> + + + + + + +</body> +</html> \ No newline at end of file