Switch to unified view

a b/baselines/common/math_util.py
1
import numpy as np
2
import scipy.signal
3
4
5
def discount(x, gamma):
6
    """
7
    computes discounted sums along 0th dimension of x.
8
9
    inputs
10
    ------
11
    x: ndarray
12
    gamma: float
13
14
    outputs
15
    -------
16
    y: ndarray with same shape as x, satisfying
17
18
        y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k],
19
                where k = len(x) - t - 1
20
21
    """
22
    assert x.ndim >= 1
23
    return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1]
24
25
def explained_variance(ypred,y):
26
    """
27
    Computes fraction of variance that ypred explains about y.
28
    Returns 1 - Var[y-ypred] / Var[y]
29
30
    interpretation:
31
        ev=0  =>  might as well have predicted zero
32
        ev=1  =>  perfect prediction
33
        ev<0  =>  worse than just predicting zero
34
35
    """
36
    assert y.ndim == 1 and ypred.ndim == 1
37
    vary = np.var(y)
38
    return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
39
40
def explained_variance_2d(ypred, y):
41
    assert y.ndim == 2 and ypred.ndim == 2
42
    vary = np.var(y, axis=0)
43
    out = 1 - np.var(y-ypred)/vary
44
    out[vary < 1e-10] = 0
45
    return out
46
47
def ncc(ypred, y):
48
    return np.corrcoef(ypred, y)[1,0]
49
50
def flatten_arrays(arrs):
51
    return np.concatenate([arr.flat for arr in arrs])
52
53
def unflatten_vector(vec, shapes):
54
    i=0
55
    arrs = []
56
    for shape in shapes:
57
        size = np.prod(shape)
58
        arr = vec[i:i+size].reshape(shape)
59
        arrs.append(arr)
60
        i += size
61
    return arrs
62
63
def discount_with_boundaries(X, New, gamma):
64
    """
65
    X: 2d array of floats, time x features
66
    New: 2d array of bools, indicating when a new episode has started
67
    """
68
    Y = np.zeros_like(X)
69
    T = X.shape[0]
70
    Y[T-1] = X[T-1]
71
    for t in range(T-2, -1, -1):
72
        Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1])
73
    return Y
74
75
def test_discount_with_boundaries():
76
    gamma=0.9
77
    x = np.array([1.0, 2.0, 3.0, 4.0], 'float32')
78
    starts = [1.0, 0.0, 0.0, 1.0]
79
    y = discount_with_boundaries(x, starts, gamma)
80
    assert np.allclose(y, [
81
        1 + gamma * 2 + gamma**2 * 3,
82
        2 + gamma * 3,
83
        3,
84
        4
85
    ])