Switch to unified view

a b/data/simulated/generate_oned_data.py
1
import numpy as np
2
import matplotlib.pyplot as plt
3
import pandas as pd
4
5
import seaborn as sns
6
import sys
7
from gpsa.util import rbf_kernel_numpy as rbf_covariance
8
from scipy.stats import multivariate_normal as mvnpy
9
10
11
def generate_oned_data_affine_warp(
12
    n_views,
13
    n_outputs,
14
    n_samples_per_view,
15
    noise_variance=0.0,
16
    n_latent_gps=None,
17
    scale_factor=1.1,
18
    additive_factor=0.3,
19
):
20
21
    kernel = rbf_covariance
22
    kernel_params_true = np.array([np.log(1.0), np.log(1.0)])
23
    n_latent_gps = 2
24
    n_spatial_dims = 1
25
26
    X_orig_single = np.random.uniform(-10, 10, size=(n_samples_per_view, 1))
27
    # X_orig_single = np.linspace(-10, 10, n_samples_per_view)[:, :-1]
28
    X_orig = np.concatenate([X_orig_single.copy(), X_orig_single.copy()], axis=0)
29
30
    n_samples_list = [n_samples_per_view] * n_views
31
    cumulative_sums = np.cumsum(n_samples_list)
32
    cumulative_sums = np.insert(cumulative_sums, 0, 0)
33
    view_idx = np.array(
34
        [
35
            np.arange(cumulative_sums[ii], cumulative_sums[ii + 1])
36
            for ii in range(n_views)
37
        ]
38
    )
39
    n = np.sum(n_samples_list)
40
41
    nY = n_outputs if n_latent_gps is None else n_latent_gps
42
43
    Y_orig = np.vstack(
44
        [
45
            mvnpy.rvs(
46
                mean=np.zeros(X_orig_single.shape[0]),
47
                cov=kernel(X_orig_single, X_orig_single, kernel_params_true),
48
            )
49
            for _ in range(nY)
50
        ]
51
    ).T
52
53
    if n_latent_gps is not None:
54
        W_mat = np.random.normal(size=(n_latent_gps, n_outputs))
55
        # W_mat = np.expand_dims(np.array([1, -1]), 0)
56
        Y_orig = Y_orig @ W_mat
57
58
    Y = np.concatenate([Y_orig, Y_orig], axis=0)
59
    Y += np.random.normal(scale=np.sqrt(noise_variance), size=(Y.shape))
60
    X = X_orig.copy()
61
    X[n_samples_per_view:] = X[n_samples_per_view:] * scale_factor + additive_factor
62
63
    return X, Y, n_samples_list, view_idx
64
65
66
def generate_oned_data_gp_warp(
67
    n_views,
68
    n_outputs,
69
    n_samples_per_view,
70
    noise_variance=0.0,
71
    n_latent_gps=None,
72
    kernel_variance=1.0,
73
    kernel_lengthscale=1.0,
74
    mean_slope=1.,
75
    mean_intercept=0.,
76
):
77
78
    kernel = rbf_covariance
79
    kernel_params_true = np.array([np.log(1.0), np.log(1.0)])
80
    n_spatial_dims = 1
81
82
    # X_orig_single = np.random.uniform(-10, 10, size=(n_samples_per_view, 1))
83
    X_orig_single = np.linspace(-10, 10, n_samples_per_view).reshape(-1, 1)
84
    X_orig = np.concatenate([X_orig_single.copy()] * n_views, axis=0)
85
86
    n_samples_list = [n_samples_per_view] * n_views
87
    cumulative_sums = np.cumsum(n_samples_list)
88
    cumulative_sums = np.insert(cumulative_sums, 0, 0)
89
    view_idx = np.array(
90
        [
91
            np.arange(cumulative_sums[ii], cumulative_sums[ii + 1])
92
            for ii in range(n_views)
93
        ]
94
    )
95
    n = np.sum(n_samples_list)
96
97
    nY = n_outputs if n_latent_gps is None else n_latent_gps
98
99
    Y_orig = np.vstack(
100
        [
101
            mvnpy.rvs(
102
                mean=np.zeros(X_orig_single.shape[0]),
103
                cov=kernel(X_orig_single, X_orig_single, kernel_params_true),
104
            )
105
            for _ in range(nY)
106
        ]
107
    ).T
108
109
    if n_latent_gps is not None:
110
        if n_outputs == 2:
111
            W_mat = np.expand_dims(np.array([1, -1]), 0)
112
        else:
113
            W_mat = np.random.normal(size=(n_latent_gps, n_outputs))
114
        
115
        Y_orig = Y_orig @ W_mat
116
117
    Y = np.concatenate([Y_orig] * n_views, axis=0)
118
    Y += np.random.normal(scale=np.sqrt(noise_variance), size=(Y.shape))
119
    X = X_orig.copy()
120
121
    # X_view1 = X[:n_samples_per_view]
122
    # X_view2 = X[n_samples_per_view : n_samples_per_view * 2]
123
124
    # Draw warped coordinates from a GP
125
    warp_kernel_params_true = np.array([np.log(kernel_variance), np.log(kernel_lengthscale)])
126
127
    for vv in range(n_views):
128
        X_curr_view_warped = mvnpy.rvs(
129
            mean=X_orig_single.squeeze() * mean_slope + mean_intercept,
130
            cov=kernel(X_orig_single, X_orig_single, warp_kernel_params_true),
131
        )
132
        X_curr_view_warped = np.expand_dims(X_curr_view_warped, 1)
133
        # import ipdb; ipdb.set_trace()
134
        X[n_samples_per_view * vv : n_samples_per_view * (vv + 1)] = X_curr_view_warped
135
136
    # X += np.random.normal(scale=np.sqrt(0.1), size=(X.shape))
137
    # X_view1_warped = mvnpy.rvs(
138
    #     mean=X_view2.squeeze() * mean_slope + mean_intercept,
139
    #     cov=kernel(X_view1, X_view1, warp_kernel_params_true),
140
    # )
141
    # X_view1_warped = np.expand_dims(X_view1_warped, 1)
142
    # X[n_samples_per_view : n_samples_per_view * 2] = X_view1_warped
143
144
    # X_view2_warped = mvnpy.rvs(
145
    #     mean=X_view2.squeeze() * mean_slope + mean_intercept,
146
    #     cov=kernel(X_view2, X_view2, warp_kernel_params_true),
147
    # )
148
    # X_view2_warped = np.expand_dims(X_view2_warped, 1)
149
    # X[n_samples_per_view : n_samples_per_view * 2] = X_view2_warped
150
151
    return X, Y, n_samples_list, view_idx
152
153
154
if __name__ == "__main__":
155
156
    n_views = 2
157
    n_samples_per_view = 100
158
    X, Y, n_samples_list, view_idx = generate_oned_data_gp_warp(
159
        n_views=n_views, n_outputs=1, n_samples_per_view=n_samples_per_view
160
    )
161
162
    for vv in range(n_views):
163
        curr_start_idx = vv * n_samples_per_view
164
        curr_end_idx = vv * n_samples_per_view + n_samples_per_view
165
        plt.scatter(X[curr_start_idx:curr_end_idx], Y[curr_start_idx:curr_end_idx])
166
    plt.show()
167
168
    import ipdb
169
170
    ipdb.set_trace()