Diff of /import_data.py [000000] .. [0f2bcf]

Switch to unified view

a b/import_data.py
1
import numpy as np
2
import pandas as pd
3
import random
4
5
6
7
## all samples MUST include at least one view.
8
def import_incomplete_handwritten():
9
    npz = np.load('./data/Handwritten_Missing/data_with_missingviews.npz', allow_pickle=True)
10
11
    X_set    = npz['X_set'].tolist()
12
    Y_onehot = npz['Y_onehot']
13
14
    M        = len(X_set)
15
16
    ### Construct Mask Vector to indicate available (m=1) or missing (m=0) values
17
    Mask     = np.ones([np.shape(X_set[0])[0], M])
18
    for m_idx in range(M):
19
        Mask[np.isnan(X_set[m_idx]).all(axis=1), m_idx] = 0
20
        X_set[m_idx][Mask[:, m_idx] == 0] = np.mean(X_set[m_idx][Mask[:, m_idx] == 1], axis=0)
21
    
22
    return X_set, Y_onehot, Mask
23
24
25
26
def import_dataset_TCGA(year=1):
27
    filename = '/media/vdslab/Genomics/TCGA/dataset/FINAL/cleaned/incomplete_multi_view_pca_{}yr.npz'.format(int(year))
28
    npz      = np.load(filename)
29
30
    Mask  = npz['m']
31
    M     = np.shape(Mask)[1]
32
33
    X_set = {}
34
    for m in range(M):
35
        tmp = npz['x{}'.format(m+1)]
36
        tmp[np.isnan(tmp[:, 0]), :] = np.nanmean(tmp, axis=0)
37
        X_set[m] = tmp
38
39
    Y     = npz['y']
40
41
    X_set_incomp = {}
42
    X_set_comp   = {}
43
    for m in range(M):
44
        X_set_comp[m]    = X_set[m][np.sum(Mask, axis=1) == 4]
45
        X_set_incomp[m]  = X_set[m][np.sum(Mask, axis=1) != 4]
46
47
    Y_comp    = Y[np.sum(Mask, axis=1) == 4]
48
    Y_incomp  = Y[np.sum(Mask, axis=1) != 4]
49
50
    Mask_comp    = Mask[np.sum(Mask, axis=1) == 4]
51
    Mask_incomp  = Mask[np.sum(Mask, axis=1) != 4]
52
53
54
    Y_onehot_incomp = np.zeros([np.shape(Y_incomp)[0], 2])
55
    Y_onehot_comp   = np.zeros([np.shape(Y_comp)[0], 2])
56
57
    Y_onehot_incomp[Y_incomp == 0, 0] = 1
58
    Y_onehot_incomp[Y_incomp == 1, 1] = 1
59
60
    Y_onehot_comp[Y_comp == 0, 0] = 1
61
    Y_onehot_comp[Y_comp == 1, 1] = 1
62
    
63
    return X_set_comp, Y_onehot_comp, Mask_comp, X_set_incomp, Y_onehot_incomp, Mask_incomp