[5d12a0]: / ants / math / quantile.py

Download this file

448 lines (370 with data), 14.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
__all__ = ['ilr',
'rank_intensity',
'quantile',
'regress_poly',
'regress_components',
'get_average_of_timeseries',
'compcor',
'bandpass_filter_matrix' ]
import numpy as np
from numpy.polynomial import Legendre
from scipy import linalg
from scipy.stats import pearsonr
from scipy.stats import rankdata
import pandas as pd
from pandas import DataFrame
import statsmodels.api as sm
import statsmodels.formula.api as smf
import ants
from ants.decorators import image_method
def rank_intensity( x, mask=None, get_mask=True, method='max', ):
"""
Rank transform the intensity of the input image with or without masking.
Intensities will transform from [0,1,2,55] to [0,1,2,3] so this may not be
appropriate for quantitative images - however, you never know. rank
transformations generally improve robustness so it is an empirical question
that should be evaluated.
Arguments
---------
x : ANTsImage
input image
mask : ANTsImage
optional mask
get_mask: boolean
will estimate a mask when none provided
method : a scipy rank method (max,min,average,dense)
return: transformed image
Example
-------
>>> img = ants.image_read(ants.get_data('r16'))
>>> ants.rank_intensity(img)
"""
if mask is not None:
fir = rankdata( (x*mask).numpy(), method=method )
elif mask is None and get_mask == True:
mask = ants.get_mask( x )
fir = rankdata( (x*mask).numpy(), method=method )
else:
fir = rankdata( x.numpy(), method=method )
fir = fir - 1
fir = fir.reshape( x.shape )
rimg = ants.from_numpy( fir.astype(float) )
rimg = ants.iMath(rimg,"Normalize")
ants.copy_image_info( x, rimg )
if mask is not None:
rimg = rimg * mask
return( rimg )
def ilr( data_frame, voxmats, ilr_formula, verbose = False ):
"""
Image-based linear regression.
This function simplifies calculating p-values from linear models
in which there is a similar formula that is applied many times
with a change in image-based predictors. Image-based variables
are stored in the input matrix list. They should be named
consistently in the input formula and in the image list. If they
are not, an error will be thrown. All input matrices should have
the same number of rows and columns.
This function takes advantage of statsmodels R-style formulas.
ANTsR function: `ilr`
Arguments
---------
data_frame: This data frame contains all relevant predictors except for
the matrices associated with the image variables. One should convert
any categorical predictors ahead of time using `pd.get_dummies`.
voxmats: The named list of matrices that contains the changing
predictors.
ilr_formula: This is a character string that defines a valid regression
formula in the R-style.
verbose: will print a little bit of diagnostic information that allows
a degree of model checking
Returns
-------
A list of different matrices that contain names derived from the
formula and the coefficients of the regression model. The size of
the output values ( p-values, t-values, parameter values ) will match
the input matrix and, as such, can be converted to an image via `make_image`
Example
-------
>>> nsub = 20
>>> mu, sigma = 0, 1
>>> outcome = np.random.normal( mu, sigma, nsub )
>>> covar = np.random.normal( mu, sigma, nsub )
>>> mat = np.random.normal( mu, sigma, (nsub, 500 ) )
>>> mat2 = np.random.normal( mu, sigma, (nsub, 500 ) )
>>> data = {'covar':covar,'outcome':outcome}
>>> df = pd.DataFrame( data )
>>> vlist = { "mat1": mat, "mat2": mat2 }
>>> myform = " outcome ~ covar * mat1 "
>>> result = ants.ilr( df, vlist, myform)
>>> myform = " mat2 ~ covar + mat1 "
>>> result = ants.ilr( df, vlist, myform)
"""
nvoxmats = len( voxmats )
if nvoxmats < 1 :
raise ValueError('Pass at least one matrix to voxmats list')
keylist = list(voxmats.keys())
firstmat = keylist[0]
voxshape = voxmats[firstmat].shape
nvox = voxshape[1]
nmats = len( keylist )
for k in keylist:
if voxmats[firstmat].shape != voxmats[k].shape:
raise ValueError('Matrices must have same number of rows (samples)')
# test voxel
vox = 0
nrows = data_frame.shape[0]
data_frame_vox = data_frame.copy()
for k in range( nmats ):
data = {keylist[k]: np.random.normal(0,1,nrows) }
temp = pd.DataFrame( data )
data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 )
mod = smf.ols(formula=ilr_formula, data=data_frame_vox )
res = mod.fit()
modelNames = res.model.exog_names
if verbose:
print( data_frame_vox )
print(res.summary())
nOutcomes = len( modelNames )
tValsOut = list()
pValsOut = list()
bValsOut = list()
for k in range( len( modelNames ) ):
bValsOut.append( np.zeros( nvox ) )
pValsOut.append( np.zeros( nvox ) )
tValsOut.append( np.zeros( nvox ) )
data_frame_vox = data_frame.copy()
for v in range( nmats ):
data = {keylist[v]: voxmats[keylist[v]][:,k] }
temp = pd.DataFrame( data )
data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 )
for k in range( nvox ):
# first get the correct data frame
for v in range( nmats ):
data_frame_vox[ keylist[v] ] = voxmats[keylist[v]][:,k]
# then get the local model results
mod = smf.ols(formula=ilr_formula, data=data_frame_vox )
res = mod.fit()
tvals = res.tvalues
pvals = res.pvalues
bvals = res.params
for v in range( len( modelNames ) ):
bValsOut[v][k] = bvals.iloc[v]
pValsOut[v][k] = pvals.iloc[v]
tValsOut[v][k] = tvals.iloc[v]
bValsOutDict = { }
tValsOutDict = { }
pValsOutDict = { }
for v in range( len( modelNames ) ):
bValsOutDict[ 'coef_' + modelNames[v] ] = bValsOut[v]
tValsOutDict[ 'tval_' + modelNames[v] ] = tValsOut[v]
pValsOutDict[ 'pval_' + modelNames[v] ] = pValsOut[v]
return {
'modelNames': modelNames,
'coefficientValues': bValsOutDict,
'pValues': pValsOutDict,
'tValues': tValsOutDict }
@image_method
def quantile(image, q, nonzero=True):
"""
Get the quantile values from an ANTsImage
Examples
--------
>>> img = ants.image_read(ants.get_data('r16'))
>>> ants.quantile(img, 0.5)
>>> ants.quantile(img, (0.5, 0.75))
"""
img_arr = image.numpy()
if isinstance(q, (list,tuple)):
q = [qq*100. if qq <= 1. else qq for qq in q]
if nonzero:
img_arr = img_arr[img_arr>0]
vals = [np.percentile(img_arr, qq) for qq in q]
return tuple(vals)
elif isinstance(q, (float,int)):
if q <= 1.:
q = q*100.
if nonzero:
img_arr = img_arr[img_arr>0]
return np.percentile(img_arr[img_arr>0], q)
else:
raise ValueError('q argument must be list/tuple or float/int')
def regress_poly(degree, data, remove_mean=True, axis=-1):
"""
Returns data with degree polynomial regressed out.
:param bool remove_mean: whether or not demean data (i.e. degree 0),
:param int axis: numpy array axes along which regression is performed
"""
timepoints = data.shape[0]
# Generate design matrix
X = np.ones((timepoints, 1)) # quick way to calc degree 0
for i in range(degree):
polynomial_func = Legendre.basis(i + 1)
value_array = np.linspace(-1, 1, timepoints)
X = np.hstack((X, polynomial_func(value_array)[:, np.newaxis]))
non_constant_regressors = X[:, :-1] if X.shape[1] > 1 else np.array([])
betas = np.linalg.pinv(X).dot(data)
if remove_mean:
datahat = X.dot(betas)
else: # disregard the first layer of X, which is degree 0
datahat = X[:, 1:].dot(betas[1:, ...])
regressed_data = data - datahat
return regressed_data, non_constant_regressors
def regress_components( data, components, remove_mean=True ):
"""
Returns data with components regressed out.
:param bool remove_mean: whether or not demean data (i.e. degree 0),
:param int axis: numpy array axes along which regression is performed
"""
timepoints = data.shape[0]
betas = np.linalg.pinv(components).dot(data)
if remove_mean:
datahat = components.dot(betas)
else: # disregard the first layer of X, which is degree 0
datahat = components[:, 1:].dot(betas[1:, ...])
regressed_data = data - datahat
return regressed_data
def get_average_of_timeseries( image, idx=None ):
"""Average the timeseries into a dimension-1 image.
image: input time series image
idx: indices over which to average
"""
imagedim = image.dimension
if idx is None:
idx = range( image.shape[ imagedim - 1 ] )
i0 = ants.slice_image( image, axis=image.dimension-1, idx=idx[0] ) * 0
wt = 1.0 / len( idx )
for k in idx:
i0 = i0 + ants.slice_image( image, axis=image.dimension-1, idx=k ) * wt
return( i0 )
def bandpass_filter_matrix( matrix,
tr=1, lowf=0.01, highf=0.1, order = 3):
"""
Bandpass filter the input time series image
ANTsR function: `frequencyFilterfMRI`
Arguments
---------
image: input time series image
tr: sampling time interval (inverse of sampling rate)
lowf: low frequency cutoff
highf: high frequency cutoff
order: order of the butterworth filter run using `filtfilt`
Returns
-------
filtered matrix
Example
-------
>>> import numpy as np
>>> import ants
>>> import matplotlib.pyplot as plt
>>> brainSignal = np.random.randn( 400, 1000 )
>>> tr = 1
>>> filtered = ants.bandpass_filter_matrix( brainSignal, tr = tr )
>>> nsamples = brainSignal.shape[0]
>>> t = np.linspace(0, tr*nsamples, nsamples, endpoint=False)
>>> k = 20
>>> plt.plot(t, brainSignal[:,k], label='Noisy signal')
>>> plt.plot(t, filtered[:,k], label='Filtered signal')
>>> plt.xlabel('time (seconds)')
>>> plt.grid(True)
>>> plt.axis('tight')
>>> plt.legend(loc='upper left')
>>> plt.show()
"""
from scipy.signal import butter, filtfilt
def butter_bandpass(lowcut, highcut, fs, order ):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')
return b, a
def butter_bandpass_filter(data, lowcut, highcut, fs, order ):
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
y = filtfilt(b, a, data)
return y
fs = 1/tr # sampling rate based on tr
nsamples = matrix.shape[0]
ncolumns = matrix.shape[1]
matrixOut = matrix.copy()
for k in range( ncolumns ):
matrixOut[:,k] = butter_bandpass_filter(
matrix[:,k], lowf, highf, fs, order=order )
return matrixOut
def clean_data(arr, standardize=True):
"""
Remove columns from a NumPy array that have no variation or contain NA/Inf values.
Optionally standardize the remaining data.
:param arr: NumPy array to be cleaned.
:param standardize: Boolean, if True standardize the data.
:return: Cleaned (and optionally standardized) NumPy array.
"""
valid_columns = []
for i in range(arr.shape[1]):
column = arr[:, i]
if np.any(column != column[0]) and not np.any(np.isnan(column)) and not np.any(np.isinf(column)):
valid_columns.append(i)
cleaned_data = arr[:, valid_columns]
if standardize:
mean = np.mean(cleaned_data, axis=0)
std_dev = np.std(cleaned_data, axis=0)
# Avoid division by zero in case of zero standard deviation
std_dev[std_dev == 0] = 1
cleaned_data = (cleaned_data - mean) / std_dev
return cleaned_data
def compcor( boldImage, ncompcor=4, quantile=0.975, mask=None, filter_type=False, degree=2 ):
"""
Compute noise components from the input image
ANTsR function: `compcor`
this is adapted from nipy code https://github.com/nipy/nipype/blob/e29ac95fc0fc00fedbcaa0adaf29d5878408ca7c/nipype/algorithms/confounds.py
Arguments
---------
boldImage: input time series image
ncompcor: number of noise components to return
quantile: quantile defining high-variance
mask: mask defining brain or specific tissues
filter_type: type off filter to apply to time series before computing
noise components.
'polynomial' - Legendre polynomial basis
False - None (mean-removal only)
degree: order of polynomial used to remove trends from the timeseries
Returns
-------
dictionary containing:
components: a numpy array
basis: a numpy array containing the (non-constant) filter regressors
Example
-------
>>> cc = ants.compcor( ants.image_read(ants.get_ants_data("ch2")) )
"""
def compute_tSTD(M, quantile, x=0, axis=0):
stdM = np.std(M, axis=axis)
# set bad values to x
stdM[stdM == 0] = x
stdM[np.isnan(stdM)] = x
tt = round( quantile*100 )
threshold_std = np.percentile( stdM, tt )
# threshold_std = quantile( stdM, quantile )
return { 'tSTD': stdM, 'threshold_std': threshold_std}
if mask is None:
temp = ants.slice_image( boldImage, axis=boldImage.dimension-1, idx=0 )
mask = ants.get_mask( temp )
imagematrix = ants.timeseries_to_matrix( boldImage, mask )
temp = compute_tSTD( imagematrix, quantile, 0 )
tsnrmask = ants.make_image( mask, temp['tSTD'] )
tsnrmask = ants.threshold_image( tsnrmask, temp['threshold_std'], temp['tSTD'].max() )
M = ants.timeseries_to_matrix( boldImage, tsnrmask )
components = None
basis = np.array([])
if filter_type in ('polynomial', False):
M, basis = regress_poly(degree, M)
# M = M / compute_tSTD(M, 1.)['tSTD']
# "The covariance matrix C = MMT was constructed and decomposed into its
# principal components using a singular value decomposition."
M = clean_data( M, standardize=True )
u, _, _ = linalg.svd(M, full_matrices=False)
if components is None:
components = u[:, :ncompcor]
else:
components = np.hstack((components, u[:, :ncompcor]))
if components is None and ncompcor > 0:
raise ValueError('No components found')
return { 'components': components, 'basis': basis }