|
a |
|
b/ants/math/quantile.py |
|
|
1 |
|
|
|
2 |
__all__ = ['ilr', |
|
|
3 |
'rank_intensity', |
|
|
4 |
'quantile', |
|
|
5 |
'regress_poly', |
|
|
6 |
'regress_components', |
|
|
7 |
'get_average_of_timeseries', |
|
|
8 |
'compcor', |
|
|
9 |
'bandpass_filter_matrix' ] |
|
|
10 |
|
|
|
11 |
import numpy as np |
|
|
12 |
from numpy.polynomial import Legendre |
|
|
13 |
from scipy import linalg |
|
|
14 |
from scipy.stats import pearsonr |
|
|
15 |
from scipy.stats import rankdata |
|
|
16 |
import pandas as pd |
|
|
17 |
from pandas import DataFrame |
|
|
18 |
import statsmodels.api as sm |
|
|
19 |
import statsmodels.formula.api as smf |
|
|
20 |
|
|
|
21 |
import ants |
|
|
22 |
from ants.decorators import image_method |
|
|
23 |
|
|
|
24 |
def rank_intensity( x, mask=None, get_mask=True, method='max', ): |
|
|
25 |
""" |
|
|
26 |
Rank transform the intensity of the input image with or without masking. |
|
|
27 |
Intensities will transform from [0,1,2,55] to [0,1,2,3] so this may not be |
|
|
28 |
appropriate for quantitative images - however, you never know. rank |
|
|
29 |
transformations generally improve robustness so it is an empirical question |
|
|
30 |
that should be evaluated. |
|
|
31 |
|
|
|
32 |
Arguments |
|
|
33 |
--------- |
|
|
34 |
|
|
|
35 |
x : ANTsImage |
|
|
36 |
input image |
|
|
37 |
|
|
|
38 |
mask : ANTsImage |
|
|
39 |
optional mask |
|
|
40 |
|
|
|
41 |
get_mask: boolean |
|
|
42 |
will estimate a mask when none provided |
|
|
43 |
|
|
|
44 |
method : a scipy rank method (max,min,average,dense) |
|
|
45 |
|
|
|
46 |
|
|
|
47 |
return: transformed image |
|
|
48 |
|
|
|
49 |
Example |
|
|
50 |
------- |
|
|
51 |
>>> img = ants.image_read(ants.get_data('r16')) |
|
|
52 |
>>> ants.rank_intensity(img) |
|
|
53 |
""" |
|
|
54 |
if mask is not None: |
|
|
55 |
fir = rankdata( (x*mask).numpy(), method=method ) |
|
|
56 |
elif mask is None and get_mask == True: |
|
|
57 |
mask = ants.get_mask( x ) |
|
|
58 |
fir = rankdata( (x*mask).numpy(), method=method ) |
|
|
59 |
else: |
|
|
60 |
fir = rankdata( x.numpy(), method=method ) |
|
|
61 |
fir = fir - 1 |
|
|
62 |
fir = fir.reshape( x.shape ) |
|
|
63 |
rimg = ants.from_numpy( fir.astype(float) ) |
|
|
64 |
rimg = ants.iMath(rimg,"Normalize") |
|
|
65 |
ants.copy_image_info( x, rimg ) |
|
|
66 |
if mask is not None: |
|
|
67 |
rimg = rimg * mask |
|
|
68 |
return( rimg ) |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
def ilr( data_frame, voxmats, ilr_formula, verbose = False ): |
|
|
72 |
""" |
|
|
73 |
Image-based linear regression. |
|
|
74 |
|
|
|
75 |
This function simplifies calculating p-values from linear models |
|
|
76 |
in which there is a similar formula that is applied many times |
|
|
77 |
with a change in image-based predictors. Image-based variables |
|
|
78 |
are stored in the input matrix list. They should be named |
|
|
79 |
consistently in the input formula and in the image list. If they |
|
|
80 |
are not, an error will be thrown. All input matrices should have |
|
|
81 |
the same number of rows and columns. |
|
|
82 |
|
|
|
83 |
This function takes advantage of statsmodels R-style formulas. |
|
|
84 |
|
|
|
85 |
ANTsR function: `ilr` |
|
|
86 |
|
|
|
87 |
Arguments |
|
|
88 |
--------- |
|
|
89 |
|
|
|
90 |
data_frame: This data frame contains all relevant predictors except for |
|
|
91 |
the matrices associated with the image variables. One should convert |
|
|
92 |
any categorical predictors ahead of time using `pd.get_dummies`. |
|
|
93 |
|
|
|
94 |
voxmats: The named list of matrices that contains the changing |
|
|
95 |
predictors. |
|
|
96 |
|
|
|
97 |
ilr_formula: This is a character string that defines a valid regression |
|
|
98 |
formula in the R-style. |
|
|
99 |
|
|
|
100 |
verbose: will print a little bit of diagnostic information that allows |
|
|
101 |
a degree of model checking |
|
|
102 |
|
|
|
103 |
Returns |
|
|
104 |
------- |
|
|
105 |
|
|
|
106 |
A list of different matrices that contain names derived from the |
|
|
107 |
formula and the coefficients of the regression model. The size of |
|
|
108 |
the output values ( p-values, t-values, parameter values ) will match |
|
|
109 |
the input matrix and, as such, can be converted to an image via `make_image` |
|
|
110 |
|
|
|
111 |
Example |
|
|
112 |
------- |
|
|
113 |
|
|
|
114 |
>>> nsub = 20 |
|
|
115 |
>>> mu, sigma = 0, 1 |
|
|
116 |
>>> outcome = np.random.normal( mu, sigma, nsub ) |
|
|
117 |
>>> covar = np.random.normal( mu, sigma, nsub ) |
|
|
118 |
>>> mat = np.random.normal( mu, sigma, (nsub, 500 ) ) |
|
|
119 |
>>> mat2 = np.random.normal( mu, sigma, (nsub, 500 ) ) |
|
|
120 |
>>> data = {'covar':covar,'outcome':outcome} |
|
|
121 |
>>> df = pd.DataFrame( data ) |
|
|
122 |
>>> vlist = { "mat1": mat, "mat2": mat2 } |
|
|
123 |
>>> myform = " outcome ~ covar * mat1 " |
|
|
124 |
>>> result = ants.ilr( df, vlist, myform) |
|
|
125 |
>>> myform = " mat2 ~ covar + mat1 " |
|
|
126 |
>>> result = ants.ilr( df, vlist, myform) |
|
|
127 |
|
|
|
128 |
""" |
|
|
129 |
|
|
|
130 |
nvoxmats = len( voxmats ) |
|
|
131 |
if nvoxmats < 1 : |
|
|
132 |
raise ValueError('Pass at least one matrix to voxmats list') |
|
|
133 |
keylist = list(voxmats.keys()) |
|
|
134 |
firstmat = keylist[0] |
|
|
135 |
voxshape = voxmats[firstmat].shape |
|
|
136 |
nvox = voxshape[1] |
|
|
137 |
nmats = len( keylist ) |
|
|
138 |
for k in keylist: |
|
|
139 |
if voxmats[firstmat].shape != voxmats[k].shape: |
|
|
140 |
raise ValueError('Matrices must have same number of rows (samples)') |
|
|
141 |
|
|
|
142 |
# test voxel |
|
|
143 |
vox = 0 |
|
|
144 |
nrows = data_frame.shape[0] |
|
|
145 |
data_frame_vox = data_frame.copy() |
|
|
146 |
for k in range( nmats ): |
|
|
147 |
data = {keylist[k]: np.random.normal(0,1,nrows) } |
|
|
148 |
temp = pd.DataFrame( data ) |
|
|
149 |
data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 ) |
|
|
150 |
mod = smf.ols(formula=ilr_formula, data=data_frame_vox ) |
|
|
151 |
res = mod.fit() |
|
|
152 |
modelNames = res.model.exog_names |
|
|
153 |
if verbose: |
|
|
154 |
print( data_frame_vox ) |
|
|
155 |
print(res.summary()) |
|
|
156 |
nOutcomes = len( modelNames ) |
|
|
157 |
tValsOut = list() |
|
|
158 |
pValsOut = list() |
|
|
159 |
bValsOut = list() |
|
|
160 |
for k in range( len( modelNames ) ): |
|
|
161 |
bValsOut.append( np.zeros( nvox ) ) |
|
|
162 |
pValsOut.append( np.zeros( nvox ) ) |
|
|
163 |
tValsOut.append( np.zeros( nvox ) ) |
|
|
164 |
|
|
|
165 |
data_frame_vox = data_frame.copy() |
|
|
166 |
for v in range( nmats ): |
|
|
167 |
data = {keylist[v]: voxmats[keylist[v]][:,k] } |
|
|
168 |
temp = pd.DataFrame( data ) |
|
|
169 |
data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 ) |
|
|
170 |
for k in range( nvox ): |
|
|
171 |
# first get the correct data frame |
|
|
172 |
for v in range( nmats ): |
|
|
173 |
data_frame_vox[ keylist[v] ] = voxmats[keylist[v]][:,k] |
|
|
174 |
# then get the local model results |
|
|
175 |
mod = smf.ols(formula=ilr_formula, data=data_frame_vox ) |
|
|
176 |
res = mod.fit() |
|
|
177 |
tvals = res.tvalues |
|
|
178 |
pvals = res.pvalues |
|
|
179 |
bvals = res.params |
|
|
180 |
for v in range( len( modelNames ) ): |
|
|
181 |
bValsOut[v][k] = bvals.iloc[v] |
|
|
182 |
pValsOut[v][k] = pvals.iloc[v] |
|
|
183 |
tValsOut[v][k] = tvals.iloc[v] |
|
|
184 |
|
|
|
185 |
bValsOutDict = { } |
|
|
186 |
tValsOutDict = { } |
|
|
187 |
pValsOutDict = { } |
|
|
188 |
for v in range( len( modelNames ) ): |
|
|
189 |
bValsOutDict[ 'coef_' + modelNames[v] ] = bValsOut[v] |
|
|
190 |
tValsOutDict[ 'tval_' + modelNames[v] ] = tValsOut[v] |
|
|
191 |
pValsOutDict[ 'pval_' + modelNames[v] ] = pValsOut[v] |
|
|
192 |
|
|
|
193 |
return { |
|
|
194 |
'modelNames': modelNames, |
|
|
195 |
'coefficientValues': bValsOutDict, |
|
|
196 |
'pValues': pValsOutDict, |
|
|
197 |
'tValues': tValsOutDict } |
|
|
198 |
|
|
|
199 |
|
|
|
200 |
@image_method |
|
|
201 |
def quantile(image, q, nonzero=True): |
|
|
202 |
""" |
|
|
203 |
Get the quantile values from an ANTsImage |
|
|
204 |
|
|
|
205 |
Examples |
|
|
206 |
-------- |
|
|
207 |
>>> img = ants.image_read(ants.get_data('r16')) |
|
|
208 |
>>> ants.quantile(img, 0.5) |
|
|
209 |
>>> ants.quantile(img, (0.5, 0.75)) |
|
|
210 |
""" |
|
|
211 |
img_arr = image.numpy() |
|
|
212 |
if isinstance(q, (list,tuple)): |
|
|
213 |
q = [qq*100. if qq <= 1. else qq for qq in q] |
|
|
214 |
if nonzero: |
|
|
215 |
img_arr = img_arr[img_arr>0] |
|
|
216 |
vals = [np.percentile(img_arr, qq) for qq in q] |
|
|
217 |
return tuple(vals) |
|
|
218 |
elif isinstance(q, (float,int)): |
|
|
219 |
if q <= 1.: |
|
|
220 |
q = q*100. |
|
|
221 |
if nonzero: |
|
|
222 |
img_arr = img_arr[img_arr>0] |
|
|
223 |
return np.percentile(img_arr[img_arr>0], q) |
|
|
224 |
else: |
|
|
225 |
raise ValueError('q argument must be list/tuple or float/int') |
|
|
226 |
|
|
|
227 |
|
|
|
228 |
def regress_poly(degree, data, remove_mean=True, axis=-1): |
|
|
229 |
""" |
|
|
230 |
Returns data with degree polynomial regressed out. |
|
|
231 |
:param bool remove_mean: whether or not demean data (i.e. degree 0), |
|
|
232 |
:param int axis: numpy array axes along which regression is performed |
|
|
233 |
""" |
|
|
234 |
timepoints = data.shape[0] |
|
|
235 |
# Generate design matrix |
|
|
236 |
X = np.ones((timepoints, 1)) # quick way to calc degree 0 |
|
|
237 |
for i in range(degree): |
|
|
238 |
polynomial_func = Legendre.basis(i + 1) |
|
|
239 |
value_array = np.linspace(-1, 1, timepoints) |
|
|
240 |
X = np.hstack((X, polynomial_func(value_array)[:, np.newaxis])) |
|
|
241 |
non_constant_regressors = X[:, :-1] if X.shape[1] > 1 else np.array([]) |
|
|
242 |
betas = np.linalg.pinv(X).dot(data) |
|
|
243 |
if remove_mean: |
|
|
244 |
datahat = X.dot(betas) |
|
|
245 |
else: # disregard the first layer of X, which is degree 0 |
|
|
246 |
datahat = X[:, 1:].dot(betas[1:, ...]) |
|
|
247 |
regressed_data = data - datahat |
|
|
248 |
return regressed_data, non_constant_regressors |
|
|
249 |
|
|
|
250 |
def regress_components( data, components, remove_mean=True ): |
|
|
251 |
""" |
|
|
252 |
Returns data with components regressed out. |
|
|
253 |
:param bool remove_mean: whether or not demean data (i.e. degree 0), |
|
|
254 |
:param int axis: numpy array axes along which regression is performed |
|
|
255 |
""" |
|
|
256 |
timepoints = data.shape[0] |
|
|
257 |
betas = np.linalg.pinv(components).dot(data) |
|
|
258 |
if remove_mean: |
|
|
259 |
datahat = components.dot(betas) |
|
|
260 |
else: # disregard the first layer of X, which is degree 0 |
|
|
261 |
datahat = components[:, 1:].dot(betas[1:, ...]) |
|
|
262 |
regressed_data = data - datahat |
|
|
263 |
return regressed_data |
|
|
264 |
|
|
|
265 |
|
|
|
266 |
def get_average_of_timeseries( image, idx=None ): |
|
|
267 |
"""Average the timeseries into a dimension-1 image. |
|
|
268 |
image: input time series image |
|
|
269 |
idx: indices over which to average |
|
|
270 |
""" |
|
|
271 |
imagedim = image.dimension |
|
|
272 |
if idx is None: |
|
|
273 |
idx = range( image.shape[ imagedim - 1 ] ) |
|
|
274 |
i0 = ants.slice_image( image, axis=image.dimension-1, idx=idx[0] ) * 0 |
|
|
275 |
wt = 1.0 / len( idx ) |
|
|
276 |
for k in idx: |
|
|
277 |
i0 = i0 + ants.slice_image( image, axis=image.dimension-1, idx=k ) * wt |
|
|
278 |
return( i0 ) |
|
|
279 |
|
|
|
280 |
def bandpass_filter_matrix( matrix, |
|
|
281 |
tr=1, lowf=0.01, highf=0.1, order = 3): |
|
|
282 |
""" |
|
|
283 |
Bandpass filter the input time series image |
|
|
284 |
|
|
|
285 |
ANTsR function: `frequencyFilterfMRI` |
|
|
286 |
|
|
|
287 |
Arguments |
|
|
288 |
--------- |
|
|
289 |
|
|
|
290 |
image: input time series image |
|
|
291 |
|
|
|
292 |
tr: sampling time interval (inverse of sampling rate) |
|
|
293 |
|
|
|
294 |
lowf: low frequency cutoff |
|
|
295 |
|
|
|
296 |
highf: high frequency cutoff |
|
|
297 |
|
|
|
298 |
order: order of the butterworth filter run using `filtfilt` |
|
|
299 |
|
|
|
300 |
Returns |
|
|
301 |
------- |
|
|
302 |
filtered matrix |
|
|
303 |
|
|
|
304 |
Example |
|
|
305 |
------- |
|
|
306 |
|
|
|
307 |
>>> import numpy as np |
|
|
308 |
>>> import ants |
|
|
309 |
>>> import matplotlib.pyplot as plt |
|
|
310 |
>>> brainSignal = np.random.randn( 400, 1000 ) |
|
|
311 |
>>> tr = 1 |
|
|
312 |
>>> filtered = ants.bandpass_filter_matrix( brainSignal, tr = tr ) |
|
|
313 |
>>> nsamples = brainSignal.shape[0] |
|
|
314 |
>>> t = np.linspace(0, tr*nsamples, nsamples, endpoint=False) |
|
|
315 |
>>> k = 20 |
|
|
316 |
>>> plt.plot(t, brainSignal[:,k], label='Noisy signal') |
|
|
317 |
>>> plt.plot(t, filtered[:,k], label='Filtered signal') |
|
|
318 |
>>> plt.xlabel('time (seconds)') |
|
|
319 |
>>> plt.grid(True) |
|
|
320 |
>>> plt.axis('tight') |
|
|
321 |
>>> plt.legend(loc='upper left') |
|
|
322 |
>>> plt.show() |
|
|
323 |
""" |
|
|
324 |
from scipy.signal import butter, filtfilt |
|
|
325 |
|
|
|
326 |
def butter_bandpass(lowcut, highcut, fs, order ): |
|
|
327 |
nyq = 0.5 * fs |
|
|
328 |
low = lowcut / nyq |
|
|
329 |
high = highcut / nyq |
|
|
330 |
b, a = butter(order, [low, high], btype='band') |
|
|
331 |
return b, a |
|
|
332 |
|
|
|
333 |
def butter_bandpass_filter(data, lowcut, highcut, fs, order ): |
|
|
334 |
b, a = butter_bandpass(lowcut, highcut, fs, order=order) |
|
|
335 |
y = filtfilt(b, a, data) |
|
|
336 |
return y |
|
|
337 |
|
|
|
338 |
fs = 1/tr # sampling rate based on tr |
|
|
339 |
nsamples = matrix.shape[0] |
|
|
340 |
ncolumns = matrix.shape[1] |
|
|
341 |
matrixOut = matrix.copy() |
|
|
342 |
for k in range( ncolumns ): |
|
|
343 |
matrixOut[:,k] = butter_bandpass_filter( |
|
|
344 |
matrix[:,k], lowf, highf, fs, order=order ) |
|
|
345 |
return matrixOut |
|
|
346 |
|
|
|
347 |
def clean_data(arr, standardize=True): |
|
|
348 |
""" |
|
|
349 |
Remove columns from a NumPy array that have no variation or contain NA/Inf values. |
|
|
350 |
Optionally standardize the remaining data. |
|
|
351 |
|
|
|
352 |
:param arr: NumPy array to be cleaned. |
|
|
353 |
:param standardize: Boolean, if True standardize the data. |
|
|
354 |
:return: Cleaned (and optionally standardized) NumPy array. |
|
|
355 |
""" |
|
|
356 |
valid_columns = [] |
|
|
357 |
|
|
|
358 |
for i in range(arr.shape[1]): |
|
|
359 |
column = arr[:, i] |
|
|
360 |
if np.any(column != column[0]) and not np.any(np.isnan(column)) and not np.any(np.isinf(column)): |
|
|
361 |
valid_columns.append(i) |
|
|
362 |
|
|
|
363 |
cleaned_data = arr[:, valid_columns] |
|
|
364 |
|
|
|
365 |
if standardize: |
|
|
366 |
mean = np.mean(cleaned_data, axis=0) |
|
|
367 |
std_dev = np.std(cleaned_data, axis=0) |
|
|
368 |
# Avoid division by zero in case of zero standard deviation |
|
|
369 |
std_dev[std_dev == 0] = 1 |
|
|
370 |
cleaned_data = (cleaned_data - mean) / std_dev |
|
|
371 |
|
|
|
372 |
return cleaned_data |
|
|
373 |
|
|
|
374 |
def compcor( boldImage, ncompcor=4, quantile=0.975, mask=None, filter_type=False, degree=2 ): |
|
|
375 |
""" |
|
|
376 |
Compute noise components from the input image |
|
|
377 |
|
|
|
378 |
ANTsR function: `compcor` |
|
|
379 |
|
|
|
380 |
this is adapted from nipy code https://github.com/nipy/nipype/blob/e29ac95fc0fc00fedbcaa0adaf29d5878408ca7c/nipype/algorithms/confounds.py |
|
|
381 |
|
|
|
382 |
Arguments |
|
|
383 |
--------- |
|
|
384 |
|
|
|
385 |
boldImage: input time series image |
|
|
386 |
|
|
|
387 |
ncompcor: number of noise components to return |
|
|
388 |
|
|
|
389 |
quantile: quantile defining high-variance |
|
|
390 |
|
|
|
391 |
mask: mask defining brain or specific tissues |
|
|
392 |
|
|
|
393 |
filter_type: type off filter to apply to time series before computing |
|
|
394 |
noise components. |
|
|
395 |
|
|
|
396 |
'polynomial' - Legendre polynomial basis |
|
|
397 |
False - None (mean-removal only) |
|
|
398 |
|
|
|
399 |
degree: order of polynomial used to remove trends from the timeseries |
|
|
400 |
|
|
|
401 |
Returns |
|
|
402 |
------- |
|
|
403 |
dictionary containing: |
|
|
404 |
|
|
|
405 |
components: a numpy array |
|
|
406 |
|
|
|
407 |
basis: a numpy array containing the (non-constant) filter regressors |
|
|
408 |
|
|
|
409 |
Example |
|
|
410 |
------- |
|
|
411 |
>>> cc = ants.compcor( ants.image_read(ants.get_ants_data("ch2")) ) |
|
|
412 |
|
|
|
413 |
""" |
|
|
414 |
|
|
|
415 |
def compute_tSTD(M, quantile, x=0, axis=0): |
|
|
416 |
stdM = np.std(M, axis=axis) |
|
|
417 |
# set bad values to x |
|
|
418 |
stdM[stdM == 0] = x |
|
|
419 |
stdM[np.isnan(stdM)] = x |
|
|
420 |
tt = round( quantile*100 ) |
|
|
421 |
threshold_std = np.percentile( stdM, tt ) |
|
|
422 |
# threshold_std = quantile( stdM, quantile ) |
|
|
423 |
return { 'tSTD': stdM, 'threshold_std': threshold_std} |
|
|
424 |
if mask is None: |
|
|
425 |
temp = ants.slice_image( boldImage, axis=boldImage.dimension-1, idx=0 ) |
|
|
426 |
mask = ants.get_mask( temp ) |
|
|
427 |
imagematrix = ants.timeseries_to_matrix( boldImage, mask ) |
|
|
428 |
temp = compute_tSTD( imagematrix, quantile, 0 ) |
|
|
429 |
tsnrmask = ants.make_image( mask, temp['tSTD'] ) |
|
|
430 |
tsnrmask = ants.threshold_image( tsnrmask, temp['threshold_std'], temp['tSTD'].max() ) |
|
|
431 |
M = ants.timeseries_to_matrix( boldImage, tsnrmask ) |
|
|
432 |
components = None |
|
|
433 |
basis = np.array([]) |
|
|
434 |
if filter_type in ('polynomial', False): |
|
|
435 |
M, basis = regress_poly(degree, M) |
|
|
436 |
# M = M / compute_tSTD(M, 1.)['tSTD'] |
|
|
437 |
# "The covariance matrix C = MMT was constructed and decomposed into its |
|
|
438 |
# principal components using a singular value decomposition." |
|
|
439 |
M = clean_data( M, standardize=True ) |
|
|
440 |
u, _, _ = linalg.svd(M, full_matrices=False) |
|
|
441 |
if components is None: |
|
|
442 |
components = u[:, :ncompcor] |
|
|
443 |
else: |
|
|
444 |
components = np.hstack((components, u[:, :ncompcor])) |
|
|
445 |
if components is None and ncompcor > 0: |
|
|
446 |
raise ValueError('No components found') |
|
|
447 |
return { 'components': components, 'basis': basis } |