__all__ = ['eig_seg',
'initialize_eigenanatomy',
'sparse_decom2']
import numpy as np
from scipy.stats import pearsonr
import pandas as pd
from pandas import DataFrame
import statsmodels.api as sm
import ants
from ants.internal import get_lib_fn, get_pointer_string
def sparse_decom2(inmatrix,
inmask=(None, None),
sparseness=(0.01, 0.01),
nvecs=3,
its=20,
cthresh=(0,0),
statdir=None,
perms=0,
uselong=0,
z=0,
smooth=0,
robust=0,
mycoption=0,
initialization_list=[],
initialization_list2=[],
ell1=10,
prior_weight=0,
verbose=False,
rejector=0,
max_based=False,
version=1):
"""
Decomposes two matrices into paired sparse eigenevectors to
maximize canonical correlation - aka Sparse CCA.
Note: we do not scale the matrices internally. We leave
scaling choices to the user.
ANTsR function: `sparseDecom2`
Arguments
---------
inmatrix : 2-tuple of ndarrays
input as inmatrix=(mat1,mat2). n by p input matrix and n by q input matrix , spatial variable lies along columns.
inmask : 2-tuple of ANTsImage types (optional - one or both)
optional pair of image masks
sparseness : tuple
a pair of float values e.g c(0.01,0.1) enforces an unsigned 99 percent and 90 percent sparse solution for each respective view
nvecs : integer
number of eigenvector pairs
its : integer
number of iterations, 10 or 20 usually sufficient
cthresh : 2-tuple
cluster threshold pair
statdir : string (optional)
temporary directory if you want to look at full output
perms : integer
number of permutations. settings permutations greater than 0 will estimate significance per vector empirically. For small datasets, these may be conservative. p-values depend on how one scales the input matrices.
uselong : boolean
enforce solutions of both views to be the same - requires matrices to be the same size
z : float
subject space (low-dimensional space) sparseness value
smooth : float
smooth the data (only available when mask is used)
robust : boolean
rank transform input matrices
mycoption : integer
enforce 1 - spatial orthogonality, 2 - low-dimensional orthogonality or 0 - both
initialization_list : list
initialization for first view
initialization_list2 : list
initialization for 2nd view
ell1 : float
gradient descent parameter, if negative then l0 otherwise use l1
prior_weight : scalar
Scalar value weight on prior between 0 (prior is weak) and 1 (prior is strong). Only engaged if initialization is used
verbose : boolean
activates verbose output to screen
rejector : scalar
rejects small correlation solutions
max_based : boolean
whether to choose max-based thresholding
Returns
-------
dict w/ following key/value pairs:
`projections` : ndarray
X projections
`projections2` : ndarray
Y projections
`eig1` : ndarray
X components
`eig2` : ndarray
Y components
`summary` : pd.DataFrame
first column is canonical correlations,
second column is p-values (these are `None` if perms > 0)
Example
-------
>>> import numpy as np
>>> import ants
>>> mat = np.random.randn(20, 100)
>>> mat2 = np.random.randn(20, 90)
>>> mydecom = ants.sparse_decom2(inmatrix = (mat,mat2),
sparseness=(0.1,0.3), nvecs=3,
its=3, perms=0)
"""
if inmatrix[0].shape[0] != inmatrix[1].shape[0]:
raise ValueError('Matrices must have same number of rows (samples)')
idim = 3
if ants.is_image(inmask[0]):
maskx = inmask[0].clone('float')
idim = inmask[0].dimension
hasmaskx = 1
elif isinstance(inmask[0], np.ndarray):
maskx = ants.from_numpy(inmask[0], pixeltype='float')
idim = inmask[0].ndim
hasmaskx = 1
else:
maskx = ants.make_image([1]*idim, pixeltype='float')
hasmaskx = -1
if ants.is_image(inmask[1]):
masky = inmask[1].clone('float')
idim = inmask[1].dimension
hasmasky = 1
elif isinstance(inmask[1], np.ndarray):
masky = ants.from_numpy(inmask[1], pixeltype='float')
idim = inmask[1].ndim
hasmasky = 1
else:
masky = ants.make_image([1]*idim, pixeltype='float')
hasmasky = -1
inmask = [maskx, masky]
if robust > 0:
raise NotImplementedError('robust > 0 not currently implemented')
else:
input_matrices = inmatrix
if idim == 2:
if version == 1:
sccancpp_fn = get_lib_fn('sccanCpp2D')
elif version == 2:
sccancpp_fn = get_lib_fn('sccanCpp2DV2')
input_matrices = (input_matrices[0].tolist(), input_matrices[1].tolist())
elif idim ==3:
if version == 1:
sccancpp_fn = get_lib_fn('sccanCpp3D')
elif version == 2:
sccancpp_fn = get_lib_fn('sccanCpp3DV2')
input_matrices = (input_matrices[0].tolist(), input_matrices[1].tolist())
outval = sccancpp_fn(input_matrices[0], input_matrices[1],
inmask[0].pointer, inmask[1].pointer,
hasmaskx, hasmasky,
sparseness[0], sparseness[1],
nvecs, its,
cthresh[0], cthresh[1],
z, smooth,
initialization_list, initialization_list2,
ell1, verbose,
prior_weight, mycoption, max_based)
outval['eig1'] = np.array(outval['eig1'])
outval['eig2'] = np.array(outval['eig2'])
p1 = np.dot(input_matrices[0], outval['eig1'].T)
p2 = np.dot(input_matrices[1], outval['eig2'].T)
outcorrs = np.array([pearsonr(p1[:,i],p2[:,i])[0] for i in range(p1.shape[1])])
if prior_weight < 1e-10:
myord = np.argsort(np.abs(outcorrs))[::-1]
outcorrs = outcorrs[myord]
p1 = p1[:, myord]
p2 = p2[:, myord]
outval['eig1'] = outval['eig1'][myord,:]
outval['eig2'] = outval['eig2'][myord,:]
cca_summary = np.vstack((outcorrs,[None]*len(outcorrs))).T
if perms > 0:
cca_summary[:,1] = 0
nsubs = input_matrices[0].shape[0]
for permer in range(perms):
m1 = input_matrices[0][np.random.permutation(nsubs),:]
m2 = input_matrices[1][np.random.permutation(nsubs),:]
outvalperm = sccancpp_fn(m1, m2,
inmask[0].pointer, inmask[1].pointer,
hasmaskx, hasmasky,
sparseness[0], sparseness[1],
nvecs, its,
cthresh[0], cthresh[1],
z, smooth,
initialization_list, initialization_list2,
ell1, verbose,
prior_weight, mycoption, max_based)
outvalperm['eig1'] = np.array(outvalperm['eig1'])
outvalperm['eig2'] = np.array(outvalperm['eig2'])
p1perm = np.dot(m1, outvalperm['eig1'].T)
p2perm = np.dot(m2, outvalperm['eig2'].T)
outcorrsperm = np.array([pearsonr(p1perm[:,i],p2perm[:,i])[0] for i in range(p1perm.shape[1])])
if prior_weight < 1e-10:
myord = np.argsort(np.abs(outcorrsperm))[::-1]
outcorrsperm = outcorrsperm[myord]
counter = np.abs(cca_summary[:,0]) < np.abs(outcorrsperm)
counter = counter.astype('int')
cca_summary[:,1] = cca_summary[:,1] + counter
cca_summary[:,1] = cca_summary[:,1] / float(perms)
return {'projections': p1,
'projections2': p2,
'eig1': outval['eig1'].T,
'eig2': outval['eig2'].T,
'summary': pd.DataFrame(cca_summary,columns=['corrs','pvalues'])}
def initialize_eigenanatomy(initmat, mask=None, initlabels=None, nreps=1, smoothing=0):
"""
InitializeEigenanatomy is a helper function to initialize sparseDecom
and sparseDecom2. Can be used to estimate sparseness parameters per
eigenvector. The user then only chooses nvecs and optional
regularization parameters.
Arguments
---------
initmat : np.ndarray or ANTsImage
input matrix where rows provide initial vector values.
alternatively, this can be an antsImage which contains labeled regions.
mask : ANTsImage
mask if available
initlabels : list/tuple of integers
which labels in initmat to use as initial components
nreps : integer
nrepetitions to use
smoothing : float
if using an initial label image, optionally smooth each roi
Returns
-------
dict w/ the following key/value pairs:
`initlist` : list of ANTsImage types
initialization list(s) for sparseDecom(2)
`mask` : ANTsImage
mask(s) for sparseDecom(2)
`enames` : list of strings
string names of components for sparseDecom(2)
Example
-------
>>> import ants
>>> import numpy as np
>>> mat = np.random.randn(4,100).astype('float32')
>>> init = ants.initialize_eigenanatomy(mat)
"""
if ants.is_image(initmat):
# create initmat from each of the unique labels
if mask is not None:
selectvec = mask > 0
else:
selectvec = initmat > 0
initmatvec = initmat[selectvec]
if initlabels is None:
ulabs = np.sort(np.unique(initmatvec))
ulabs = ulabs[ulabs > 0]
else:
ulabs = initlabels
nvox = len(initmatvec)
temp = np.zeros((len(ulabs), nvox))
for x in range(len(ulabs)):
timg = ants.threshold_image(initmat, ulabs[x]-1e-4, ulabs[x]+1e-4)
if smoothing > 0:
timg = ants.smooth_image(timg, smoothing)
temp[x,:] = timg[selectvec]
initmat = temp
nclasses = initmat.shape[0]
classlabels = ['init%i'%i for i in range(nclasses)]
initlist = []
if mask is None:
maskmat = np.zeros(initmat.shape)
maskmat[0,:] = 1
mask = ants.from_numpy(maskmat.astype('float32'))
eanatnames = ['A'] * (nclasses*nreps)
ct = 0
for i in range(nclasses):
vecimg = mask.clone('float')
initf = initmat[i,:]
vecimg[mask==1] = initf
for nr in range(nreps):
initlist.append(vecimg)
eanatnames[ct+nr-1] = str(classlabels[i])
ct = ct + 1
return {'initlist': initlist, 'mask': mask, 'enames': eanatnames}
def eig_seg(mask, img_list, apply_segmentation_to_images=False, cthresh=0, smooth=1):
"""
Segment a mask into regions based on the max value in an image list.
At a given voxel the segmentation label will contain the index to the image
that has the largest value. If the 3rd image has the greatest value,
the segmentation label will be 3 at that voxel.
Arguments
---------
mask : ANTsImage
D-dimensional mask > 0 defining segmentation region.
img_list : collection of ANTsImage or np.ndarray
images to use
apply_segmentation_to_images : boolean
determines if original image list is modified by the segmentation.
cthresh : integer
throw away isolated clusters smaller than this value
smooth : float
smooth the input data first by this value
Returns
-------
ANTsImage
Example
-------
>>> import ants
>>> mylist = [ants.image_read(ants.get_ants_data('r16')),
ants.image_read(ants.get_ants_data('r27')),
ants.image_read(ants.get_ants_data('r85'))]
>>> myseg = ants.eig_seg(ants.get_mask(mylist[0]), mylist)
"""
maskvox = mask > 0
maskseg = mask.clone()
maskseg[maskvox] = 0
if isinstance(img_list, np.ndarray):
mydata = img_list
elif isinstance(img_list, (tuple, list)):
mydata = ants.image_list_to_matrix(img_list, mask)
if (smooth > 0):
for i in range(mydata.shape[0]):
temp_img = ants.make_image(mask, mydata[i,:], pixeltype='float')
temp_img = ants.smooth_image(temp_img, smooth, sigma_in_physical_coordinates=True)
mydata[i,:] = temp_img[mask >= 0.5]
segids = np.argmax(np.abs(mydata), axis=0)+1
segmax = np.max(np.abs(mydata), axis=0)
maskseg[maskvox] = (segids * (segmax > 1e-09))
if cthresh > 0:
for kk in range(int(maskseg.max())):
timg = ants.threshold_image(maskseg, kk, kk)
timg = ants.label_clusters(timg, cthresh)
timg = ants.threshold_image(timg, 1, 1e15) * float(kk)
maskseg[maskseg == kk] = timg[maskseg == kk]
if (apply_segmentation_to_images) and (not isinstance(img_list, np.ndarray)):
for i in range(len(img_list)):
img = img_list[i]
img[maskseg != float(i)] = 0
img_list[i] = img
return maskseg