Diff of /ants/math/quantile.py [000000] .. [5d12a0]

Switch to unified view

a b/ants/math/quantile.py
1
2
__all__ = ['ilr',
3
           'rank_intensity',
4
           'quantile',
5
           'regress_poly',
6
           'regress_components',
7
           'get_average_of_timeseries',
8
           'compcor',
9
           'bandpass_filter_matrix' ]
10
11
import numpy as np
12
from numpy.polynomial import Legendre
13
from scipy import linalg
14
from scipy.stats import pearsonr
15
from scipy.stats import rankdata
16
import pandas as pd
17
from pandas import DataFrame
18
import statsmodels.api as sm
19
import statsmodels.formula.api as smf
20
21
import ants
22
from ants.decorators import image_method
23
24
def rank_intensity( x, mask=None, get_mask=True, method='max',  ):
25
    """
26
    Rank transform the intensity of the input image with or without masking.
27
    Intensities will transform from [0,1,2,55] to [0,1,2,3] so this may not be
28
    appropriate for quantitative images - however, you never know.  rank
29
    transformations generally improve robustness so it is an empirical question
30
    that should be evaluated.
31
32
    Arguments
33
    ---------
34
35
    x : ANTsImage
36
        input image
37
38
    mask : ANTsImage
39
        optional mask
40
41
    get_mask: boolean
42
        will estimate a mask when none provided
43
44
    method : a scipy rank method (max,min,average,dense)
45
46
47
    return: transformed image
48
49
    Example
50
    -------
51
    >>> img = ants.image_read(ants.get_data('r16'))
52
    >>> ants.rank_intensity(img)
53
    """
54
    if mask is not None:
55
        fir = rankdata( (x*mask).numpy(), method=method )
56
    elif mask is None and get_mask == True:
57
        mask = ants.get_mask( x )
58
        fir = rankdata( (x*mask).numpy(), method=method )
59
    else:
60
        fir = rankdata( x.numpy(), method=method )
61
    fir = fir - 1
62
    fir = fir.reshape( x.shape )
63
    rimg = ants.from_numpy( fir.astype(float)  )
64
    rimg = ants.iMath(rimg,"Normalize")
65
    ants.copy_image_info( x, rimg )
66
    if mask is not None:
67
        rimg = rimg * mask
68
    return( rimg )
69
70
71
def ilr( data_frame, voxmats, ilr_formula, verbose = False ):
72
    """
73
    Image-based linear regression.
74
75
    This function simplifies calculating p-values from linear models
76
    in which there is a similar formula that is applied many times
77
    with a change in image-based predictors.  Image-based variables
78
    are stored in the input matrix list. They should be named
79
    consistently in the input formula and in the image list.  If they
80
    are not, an error will be thrown.  All input matrices should have
81
    the same number of rows and columns.
82
83
    This function takes advantage of statsmodels R-style formulas.
84
85
    ANTsR function: `ilr`
86
87
    Arguments
88
    ---------
89
90
    data_frame: This data frame contains all relevant predictors except for
91
        the matrices associated with the image variables.  One should convert
92
        any categorical predictors ahead of time using `pd.get_dummies`.
93
94
    voxmats: The named list of matrices that contains the changing
95
        predictors.
96
97
    ilr_formula: This is a character string that defines a valid regression
98
        formula in the R-style.
99
100
    verbose:  will print a little bit of diagnostic information that allows
101
        a degree of model checking
102
103
    Returns
104
    -------
105
106
    A list of different matrices that contain names derived from the
107
    formula and the coefficients of the regression model.  The size of
108
    the output values ( p-values, t-values, parameter values ) will match
109
    the input matrix and, as such, can be converted to an image via `make_image`
110
111
    Example
112
    -------
113
114
    >>> nsub = 20
115
    >>> mu, sigma = 0, 1
116
    >>> outcome = np.random.normal( mu, sigma, nsub )
117
    >>> covar = np.random.normal( mu, sigma, nsub )
118
    >>> mat = np.random.normal( mu, sigma, (nsub, 500 ) )
119
    >>> mat2 = np.random.normal( mu, sigma, (nsub, 500 ) )
120
    >>> data = {'covar':covar,'outcome':outcome}
121
    >>> df = pd.DataFrame( data )
122
    >>> vlist = { "mat1": mat, "mat2": mat2 }
123
    >>> myform = " outcome ~ covar * mat1 "
124
    >>> result = ants.ilr( df, vlist, myform)
125
    >>> myform = " mat2 ~ covar + mat1 "
126
    >>> result = ants.ilr( df, vlist, myform)
127
128
    """
129
130
    nvoxmats = len( voxmats )
131
    if nvoxmats < 1 :
132
        raise ValueError('Pass at least one matrix to voxmats list')
133
    keylist = list(voxmats.keys())
134
    firstmat = keylist[0]
135
    voxshape = voxmats[firstmat].shape
136
    nvox = voxshape[1]
137
    nmats = len( keylist )
138
    for k in keylist:
139
        if voxmats[firstmat].shape != voxmats[k].shape:
140
            raise ValueError('Matrices must have same number of rows (samples)')
141
142
    # test voxel
143
    vox = 0
144
    nrows = data_frame.shape[0]
145
    data_frame_vox = data_frame.copy()
146
    for k in range( nmats ):
147
        data = {keylist[k]: np.random.normal(0,1,nrows) }
148
        temp = pd.DataFrame( data )
149
        data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 )
150
    mod = smf.ols(formula=ilr_formula, data=data_frame_vox )
151
    res = mod.fit()
152
    modelNames = res.model.exog_names
153
    if verbose:
154
        print( data_frame_vox )
155
        print(res.summary())
156
    nOutcomes = len( modelNames )
157
    tValsOut = list()
158
    pValsOut = list()
159
    bValsOut = list()
160
    for k in range( len( modelNames ) ):
161
        bValsOut.append( np.zeros( nvox ) )
162
        pValsOut.append( np.zeros( nvox ) )
163
        tValsOut.append( np.zeros( nvox ) )
164
165
    data_frame_vox = data_frame.copy()
166
    for v in range( nmats ):
167
        data = {keylist[v]: voxmats[keylist[v]][:,k] }
168
        temp = pd.DataFrame( data )
169
        data_frame_vox = pd.concat([data_frame_vox.reset_index(drop=True),temp], axis=1 )
170
    for k in range( nvox ):
171
        # first get the correct data frame
172
        for v in range( nmats ):
173
            data_frame_vox[ keylist[v] ] = voxmats[keylist[v]][:,k]
174
        # then get the local model results
175
        mod = smf.ols(formula=ilr_formula, data=data_frame_vox )
176
        res = mod.fit()
177
        tvals = res.tvalues
178
        pvals = res.pvalues
179
        bvals = res.params
180
        for v in range( len( modelNames ) ):
181
            bValsOut[v][k] = bvals.iloc[v]
182
            pValsOut[v][k] = pvals.iloc[v]
183
            tValsOut[v][k] = tvals.iloc[v]
184
185
    bValsOutDict = { }
186
    tValsOutDict = { }
187
    pValsOutDict = { }
188
    for v in range( len( modelNames ) ):
189
        bValsOutDict[ 'coef_' + modelNames[v] ] = bValsOut[v]
190
        tValsOutDict[ 'tval_' + modelNames[v] ] = tValsOut[v]
191
        pValsOutDict[ 'pval_' + modelNames[v] ] = pValsOut[v]
192
193
    return {
194
        'modelNames': modelNames,
195
        'coefficientValues': bValsOutDict,
196
        'pValues': pValsOutDict,
197
        'tValues': tValsOutDict }
198
199
200
@image_method
201
def quantile(image, q, nonzero=True):
202
    """
203
    Get the quantile values from an ANTsImage
204
    
205
    Examples
206
    --------
207
    >>> img = ants.image_read(ants.get_data('r16'))
208
    >>> ants.quantile(img, 0.5)
209
    >>> ants.quantile(img, (0.5, 0.75))
210
    """
211
    img_arr = image.numpy()
212
    if isinstance(q, (list,tuple)):
213
        q = [qq*100. if qq <= 1. else qq for qq in q]
214
        if nonzero:
215
            img_arr = img_arr[img_arr>0]
216
        vals = [np.percentile(img_arr, qq) for qq in q]
217
        return tuple(vals)
218
    elif isinstance(q, (float,int)):
219
        if q <= 1.:
220
            q = q*100.
221
        if nonzero:
222
            img_arr = img_arr[img_arr>0]
223
        return np.percentile(img_arr[img_arr>0], q)
224
    else:
225
        raise ValueError('q argument must be list/tuple or float/int')
226
227
228
def regress_poly(degree, data, remove_mean=True, axis=-1):
229
    """
230
    Returns data with degree polynomial regressed out.
231
    :param bool remove_mean: whether or not demean data (i.e. degree 0),
232
    :param int axis: numpy array axes along which regression is performed
233
    """
234
    timepoints = data.shape[0]
235
    # Generate design matrix
236
    X = np.ones((timepoints, 1))  # quick way to calc degree 0
237
    for i in range(degree):
238
        polynomial_func = Legendre.basis(i + 1)
239
        value_array = np.linspace(-1, 1, timepoints)
240
        X = np.hstack((X, polynomial_func(value_array)[:, np.newaxis]))
241
    non_constant_regressors = X[:, :-1] if X.shape[1] > 1 else np.array([])
242
    betas = np.linalg.pinv(X).dot(data)
243
    if remove_mean:
244
        datahat = X.dot(betas)
245
    else:  # disregard the first layer of X, which is degree 0
246
        datahat = X[:, 1:].dot(betas[1:, ...])
247
    regressed_data = data - datahat
248
    return regressed_data, non_constant_regressors
249
250
def regress_components( data, components, remove_mean=True ):
251
    """
252
    Returns data with components regressed out.
253
    :param bool remove_mean: whether or not demean data (i.e. degree 0),
254
    :param int axis: numpy array axes along which regression is performed
255
    """
256
    timepoints = data.shape[0]
257
    betas = np.linalg.pinv(components).dot(data)
258
    if remove_mean:
259
        datahat = components.dot(betas)
260
    else:  # disregard the first layer of X, which is degree 0
261
        datahat = components[:, 1:].dot(betas[1:, ...])
262
    regressed_data = data - datahat
263
    return regressed_data
264
265
266
def get_average_of_timeseries( image, idx=None ):
267
    """Average the timeseries into a dimension-1 image.
268
    image: input time series image
269
    idx:  indices over which to average
270
    """
271
    imagedim = image.dimension
272
    if idx is None:
273
        idx = range( image.shape[ imagedim - 1 ] )
274
    i0 = ants.slice_image( image, axis=image.dimension-1, idx=idx[0] ) * 0
275
    wt = 1.0 / len( idx )
276
    for k in idx:
277
        i0 = i0 + ants.slice_image( image, axis=image.dimension-1, idx=k ) * wt
278
    return( i0 )
279
280
def bandpass_filter_matrix( matrix,
281
    tr=1, lowf=0.01, highf=0.1, order = 3):
282
    """
283
    Bandpass filter the input time series image
284
285
    ANTsR function: `frequencyFilterfMRI`
286
287
    Arguments
288
    ---------
289
290
    image: input time series image
291
292
    tr:    sampling time interval (inverse of sampling rate)
293
294
    lowf:  low frequency cutoff
295
296
    highf: high frequency cutoff
297
298
    order: order of the butterworth filter run using `filtfilt`
299
300
    Returns
301
    -------
302
    filtered matrix
303
304
    Example
305
    -------
306
307
    >>> import numpy as np
308
    >>> import ants
309
    >>> import matplotlib.pyplot as plt
310
    >>> brainSignal = np.random.randn( 400, 1000 )
311
    >>> tr = 1
312
    >>> filtered = ants.bandpass_filter_matrix( brainSignal, tr = tr )
313
    >>> nsamples = brainSignal.shape[0]
314
    >>> t = np.linspace(0, tr*nsamples, nsamples, endpoint=False)
315
    >>> k = 20
316
    >>> plt.plot(t, brainSignal[:,k], label='Noisy signal')
317
    >>> plt.plot(t, filtered[:,k], label='Filtered signal')
318
    >>> plt.xlabel('time (seconds)')
319
    >>> plt.grid(True)
320
    >>> plt.axis('tight')
321
    >>> plt.legend(loc='upper left')
322
    >>> plt.show()
323
    """
324
    from scipy.signal import butter, filtfilt
325
326
    def butter_bandpass(lowcut, highcut, fs, order ):
327
        nyq = 0.5 * fs
328
        low = lowcut / nyq
329
        high = highcut / nyq
330
        b, a = butter(order, [low, high], btype='band')
331
        return b, a
332
333
    def butter_bandpass_filter(data, lowcut, highcut, fs, order ):
334
        b, a = butter_bandpass(lowcut, highcut, fs, order=order)
335
        y = filtfilt(b, a, data)
336
        return y
337
338
    fs = 1/tr   # sampling rate based on tr
339
    nsamples = matrix.shape[0]
340
    ncolumns = matrix.shape[1]
341
    matrixOut = matrix.copy()
342
    for k in range( ncolumns ):
343
        matrixOut[:,k] = butter_bandpass_filter(
344
            matrix[:,k], lowf, highf, fs, order=order )
345
    return matrixOut
346
347
def clean_data(arr, standardize=True):
348
    """
349
    Remove columns from a NumPy array that have no variation or contain NA/Inf values.
350
    Optionally standardize the remaining data.
351
352
    :param arr: NumPy array to be cleaned.
353
    :param standardize: Boolean, if True standardize the data.
354
    :return: Cleaned (and optionally standardized) NumPy array.
355
    """
356
    valid_columns = []
357
358
    for i in range(arr.shape[1]):
359
        column = arr[:, i]
360
        if np.any(column != column[0]) and not np.any(np.isnan(column)) and not np.any(np.isinf(column)):
361
            valid_columns.append(i)
362
363
    cleaned_data = arr[:, valid_columns]
364
365
    if standardize:
366
        mean = np.mean(cleaned_data, axis=0)
367
        std_dev = np.std(cleaned_data, axis=0)
368
        # Avoid division by zero in case of zero standard deviation
369
        std_dev[std_dev == 0] = 1
370
        cleaned_data = (cleaned_data - mean) / std_dev
371
372
    return cleaned_data
373
374
def compcor( boldImage, ncompcor=4, quantile=0.975, mask=None, filter_type=False, degree=2 ):
375
    """
376
    Compute noise components from the input image
377
378
    ANTsR function: `compcor`
379
380
    this is adapted from nipy code https://github.com/nipy/nipype/blob/e29ac95fc0fc00fedbcaa0adaf29d5878408ca7c/nipype/algorithms/confounds.py
381
382
    Arguments
383
    ---------
384
385
    boldImage: input time series image
386
387
    ncompcor:  number of noise components to return
388
389
    quantile:  quantile defining high-variance
390
391
    mask:      mask defining brain or specific tissues
392
393
    filter_type: type off filter to apply to time series before computing
394
                 noise components.
395
396
        'polynomial' - Legendre polynomial basis
397
        False - None (mean-removal only)
398
399
    degree: order of polynomial used to remove trends from the timeseries
400
401
    Returns
402
    -------
403
    dictionary containing:
404
405
        components: a numpy array
406
407
        basis: a numpy array containing the (non-constant) filter regressors
408
409
    Example
410
    -------
411
    >>> cc = ants.compcor( ants.image_read(ants.get_ants_data("ch2")) )
412
413
    """
414
415
    def compute_tSTD(M, quantile, x=0, axis=0):
416
        stdM = np.std(M, axis=axis)
417
        # set bad values to x
418
        stdM[stdM == 0] = x
419
        stdM[np.isnan(stdM)] = x
420
        tt = round( quantile*100 )
421
        threshold_std = np.percentile( stdM, tt )
422
    #    threshold_std = quantile( stdM, quantile )
423
        return { 'tSTD': stdM, 'threshold_std': threshold_std}
424
    if mask is None:
425
        temp = ants.slice_image( boldImage, axis=boldImage.dimension-1, idx=0 )
426
        mask = ants.get_mask( temp )
427
    imagematrix = ants.timeseries_to_matrix( boldImage, mask )
428
    temp = compute_tSTD( imagematrix, quantile, 0 )
429
    tsnrmask = ants.make_image( mask,  temp['tSTD'] )
430
    tsnrmask = ants.threshold_image( tsnrmask, temp['threshold_std'], temp['tSTD'].max() )
431
    M = ants.timeseries_to_matrix( boldImage, tsnrmask )
432
    components = None
433
    basis = np.array([])
434
    if filter_type in ('polynomial', False):
435
        M, basis = regress_poly(degree, M)
436
#    M = M / compute_tSTD(M, 1.)['tSTD']
437
    # "The covariance matrix C = MMT was constructed and decomposed into its
438
    # principal components using a singular value decomposition."
439
    M = clean_data( M, standardize=True )
440
    u, _, _ = linalg.svd(M, full_matrices=False)
441
    if components is None:
442
        components = u[:, :ncompcor]
443
    else:
444
        components = np.hstack((components, u[:, :ncompcor]))
445
    if components is None and ncompcor > 0:
446
        raise ValueError('No components found')
447
    return { 'components': components, 'basis': basis }