|
a |
|
b/baselines/common/math_util.py |
|
|
1 |
import numpy as np |
|
|
2 |
import scipy.signal |
|
|
3 |
|
|
|
4 |
|
|
|
5 |
def discount(x, gamma): |
|
|
6 |
""" |
|
|
7 |
computes discounted sums along 0th dimension of x. |
|
|
8 |
|
|
|
9 |
inputs |
|
|
10 |
------ |
|
|
11 |
x: ndarray |
|
|
12 |
gamma: float |
|
|
13 |
|
|
|
14 |
outputs |
|
|
15 |
------- |
|
|
16 |
y: ndarray with same shape as x, satisfying |
|
|
17 |
|
|
|
18 |
y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k], |
|
|
19 |
where k = len(x) - t - 1 |
|
|
20 |
|
|
|
21 |
""" |
|
|
22 |
assert x.ndim >= 1 |
|
|
23 |
return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1] |
|
|
24 |
|
|
|
25 |
def explained_variance(ypred,y): |
|
|
26 |
""" |
|
|
27 |
Computes fraction of variance that ypred explains about y. |
|
|
28 |
Returns 1 - Var[y-ypred] / Var[y] |
|
|
29 |
|
|
|
30 |
interpretation: |
|
|
31 |
ev=0 => might as well have predicted zero |
|
|
32 |
ev=1 => perfect prediction |
|
|
33 |
ev<0 => worse than just predicting zero |
|
|
34 |
|
|
|
35 |
""" |
|
|
36 |
assert y.ndim == 1 and ypred.ndim == 1 |
|
|
37 |
vary = np.var(y) |
|
|
38 |
return np.nan if vary==0 else 1 - np.var(y-ypred)/vary |
|
|
39 |
|
|
|
40 |
def explained_variance_2d(ypred, y): |
|
|
41 |
assert y.ndim == 2 and ypred.ndim == 2 |
|
|
42 |
vary = np.var(y, axis=0) |
|
|
43 |
out = 1 - np.var(y-ypred)/vary |
|
|
44 |
out[vary < 1e-10] = 0 |
|
|
45 |
return out |
|
|
46 |
|
|
|
47 |
def ncc(ypred, y): |
|
|
48 |
return np.corrcoef(ypred, y)[1,0] |
|
|
49 |
|
|
|
50 |
def flatten_arrays(arrs): |
|
|
51 |
return np.concatenate([arr.flat for arr in arrs]) |
|
|
52 |
|
|
|
53 |
def unflatten_vector(vec, shapes): |
|
|
54 |
i=0 |
|
|
55 |
arrs = [] |
|
|
56 |
for shape in shapes: |
|
|
57 |
size = np.prod(shape) |
|
|
58 |
arr = vec[i:i+size].reshape(shape) |
|
|
59 |
arrs.append(arr) |
|
|
60 |
i += size |
|
|
61 |
return arrs |
|
|
62 |
|
|
|
63 |
def discount_with_boundaries(X, New, gamma): |
|
|
64 |
""" |
|
|
65 |
X: 2d array of floats, time x features |
|
|
66 |
New: 2d array of bools, indicating when a new episode has started |
|
|
67 |
""" |
|
|
68 |
Y = np.zeros_like(X) |
|
|
69 |
T = X.shape[0] |
|
|
70 |
Y[T-1] = X[T-1] |
|
|
71 |
for t in range(T-2, -1, -1): |
|
|
72 |
Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1]) |
|
|
73 |
return Y |
|
|
74 |
|
|
|
75 |
def test_discount_with_boundaries(): |
|
|
76 |
gamma=0.9 |
|
|
77 |
x = np.array([1.0, 2.0, 3.0, 4.0], 'float32') |
|
|
78 |
starts = [1.0, 0.0, 0.0, 1.0] |
|
|
79 |
y = discount_with_boundaries(x, starts, gamma) |
|
|
80 |
assert np.allclose(y, [ |
|
|
81 |
1 + gamma * 2 + gamma**2 * 3, |
|
|
82 |
2 + gamma * 3, |
|
|
83 |
3, |
|
|
84 |
4 |
|
|
85 |
]) |