Diff of /aggmap/utils/matrixopt.py [000000] .. [9e8054]

Switch to side-by-side view

--- a
+++ b/aggmap/utils/matrixopt.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Aug 25 20:29:36 2019
+
+@author: wanxiang.shen@u.nus.edu
+
+matrix operation
+
+"""
+
+import numpy as np
+from lapjv import lapjv
+from scipy.signal import convolve2d
+from scipy.spatial.distance import cdist
+
+
+class Scatter2Grid:
+    
+    def __init__(self):  
+        """assign x,y coords to gird numpy array"""
+        self.fmap_shape = None
+        self.indices = None
+        self.indices_list = None
+        
+    def fit(self, df, split_channels = True, channel_col = 'Channels'):
+        """
+        parameters
+        ------------------
+        df: dataframe with x, y columns
+        split_channels: bool, if True, will apply split by group
+        channel_col: column in df.columns, split to groups by this col        
+        
+        """
+        df['idx'] = range(len(df))
+        
+        embedding_2d = df[['x','y']].values
+        N = len(df)
+
+        size1 = int(np.ceil(np.sqrt(N)))
+        size2 = int(np.ceil(N/size1))
+        grid_size = (size1, size2)
+        
+        grid = np.dstack(np.meshgrid(np.linspace(0, 1, size2), 
+                                     np.linspace(0, 1, size1))).reshape(-1, 2)
+        grid_map = grid[:N]
+        cost_matrix = cdist(grid_map, embedding_2d, "sqeuclidean").astype(float)
+        cost_matrix = cost_matrix * (100000 / cost_matrix.max())
+        row_asses, col_asses, _ = lapjv(cost_matrix)
+
+        self.row_asses = row_asses
+        self.col_asses = col_asses
+        self.fmap_shape = grid_size
+        self.indices = col_asses
+
+        self.channel_col = channel_col
+        self.split_channels = split_channels
+        df['indices'] = self.indices
+        self.df = df
+        
+        if self.split_channels:
+            def _apply_split(x):
+                return x[['idx', 'indices']].to_dict('list')
+            sidx = df.groupby(channel_col).apply(_apply_split)      
+            channels = sidx.index.tolist()
+            indices_list = sidx.tolist()            
+            self.channels = channels
+            self.indices_list = indices_list
+
+            
+    def refit_c(self, df):
+        """
+        parameters
+        ------------------
+        df: dataframe with x, y columns
+      
+        
+        """
+        df['idx'] = range(len(df))
+        df['indices'] = self.indices
+        self.df = df
+        
+        if self.split_channels:
+            def _apply_split(x):
+                return x[['idx', 'indices']].to_dict('list')
+            sidx = df.groupby(self.channel_col).apply(_apply_split)      
+            channels = sidx.index.tolist()
+            indices_list = sidx.tolist()            
+            self.channels = channels
+            self.indices_list = indices_list
+            
+            
+    def transform(self, vector_1d):
+        """vector_1d: extracted features
+        """             
+        ### linear assignment map ###
+        M, N = self.fmap_shape
+
+        if self.split_channels:
+            arr_res = []
+            for idict in self.indices_list:
+
+                indices = idict['indices']
+                idx = idict['idx']
+
+                arr = np.zeros(self.fmap_shape)
+                arr_1d = arr.reshape(M*N, )
+                arr_1d[indices] = vector_1d[idx]
+                arr = arr_1d.reshape(M, N)  
+                arr_res.append(arr) 
+            arr_res = np.stack(arr_res, axis=-1)
+        else:
+            arr_res = np.zeros(self.fmap_shape)
+            arr_1d = arr_res.reshape(M*N, )
+            arr_1d[self.indices] = vector_1d
+            arr_res = arr_1d.reshape(M, N, 1)          
+        return arr_res
+    
+
+    
+class Scatter2Array:
+    
+    def __init__(self, fmap_shape = (128,128)):  
+        """convert x,y coords to numpy array"""
+        self.fmap_shape = fmap_shape
+        self.indices = None
+        self.indices_list = None
+        
+    def _fit(self, df):
+        """df: dataframe with x, y columns"""
+        M, N = self.fmap_shape
+        self.X = np.linspace(df.x.min(), df.x.max(), M)
+        self.Y = np.linspace(df.y.min(), df.y.max(), N)
+
+    
+    def _transform(self, dfnew):
+        """dfnew: dataframe with x, y columns
+           in case we need to split channels
+        """             
+        x = dfnew.x.values
+        y = dfnew.y.values
+        M, N = self.fmap_shape
+        indices = []
+        for i in range(len(dfnew)):
+            #perform a l1 distance
+            idx = np.argmin(abs(self.X-x[i]))
+            idy = np.argmin(abs(self.Y-y[i]))     
+            indice = N*idy + idx
+            indices.append(indice)
+        return indices
+    
+    
+    def fit(self, df, split_channels = True, channel_col = 'Channels'):
+        """
+        parameters
+        ---------------
+        df: embedding_df, dataframe
+        split_channels: bool, if True, will apply split by group
+        channel_col: column in df.columns, split to groups by this col
+        """
+        df['idx'] = range(len(df))
+        self.df = df
+        self.channel_col = channel_col
+        self.split_channels = split_channels
+        _ = self._fit(df)
+        
+        if self.split_channels:
+            g = df.groupby(channel_col)
+            sidx = g.apply(self._transform)            
+            self.channels = sidx.index.tolist()
+            self.indices_list = sidx.tolist()
+        else:    
+            self.indices = self._transform(df)
+            
+            
+    def transform(self, vector_1d):
+        """vector_1d: feature values 1d array"""
+        
+        M, N = self.fmap_shape
+        arr = np.zeros(self.fmap_shape)
+        arr_1d = arr.reshape(M*N, )
+            
+        if self.split_channels:
+            df = self.df
+            arr_res = []
+            for indices, channel in zip(self.indices_list, self.channels):
+                arr = np.zeros(self.fmap_shape)
+                df1 = df[df[self.channel_col] == channel]
+                idx = df1.idx.tolist()
+                arr_1d_copy = arr_1d.copy()
+                arr_1d_copy[indices] = vector_1d[idx]
+                arr_1d_copy = arr_1d_copy.reshape(M, N) 
+                arr_res.append(arr_1d_copy)
+            arr_res = np.stack(arr_res, axis=-1)
+        else:
+            arr_1d_copy = arr_1d.copy()
+            arr_1d_copy[self.indices] = vector_1d
+            arr_res = arr_1d_copy.reshape(M, N, 1) 
+        return arr_res
+
+
+def smartpadding(array, target_size, mode='constant', constant_values=0):
+    """
+    array: 2d array to be padded
+    target_size: tuple of target array's shape
+    """
+    X, Y = array.shape
+    M, N = target_size
+    top = int(np.ceil((M-X)/2))
+    bottom = int(M - X - top)
+    right = int(np.ceil((N-Y)/2))
+    left = int(N - Y - right)
+    array_pad = np.pad(array, pad_width=[(top, bottom),
+                                         (left, right)], 
+                       mode=mode, 
+                       constant_values=constant_values)
+    
+    return array_pad
+
+
+def fspecial_gauss(size = 31, sigma = 2):
+
+    """Function to mimic the 'fspecial' gaussian MATLAB function
+      size should be odd value
+    """
+    x, y = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
+    g = np.exp(-((x**2 + y**2)/(2.0*sigma**2)))
+    return g/g.sum()
+
+
+def conv2(array, kernel_size = 31, sigma = 2,  mode='same', fillvalue = 0):
+    kernel = fspecial_gauss(kernel_size, sigma)
+    return np.rot90(convolve2d(np.rot90(array, 2), np.rot90(kernel, 2), 
+                               mode=mode, 
+                               fillvalue = fillvalue), 2)