Switch to side-by-side view

--- a
+++ b/2-Generating Synthetic ECG Data/ecg_simulation_multichannel.py
@@ -0,0 +1,418 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[5]:
+
+
+import neurokit2 as nk
+import numpy as np
+import pandas as pd
+import parameters as para
+
+
+# -*- coding: utf-8 -*-
+import math
+
+import numpy as np
+import scipy
+
+from neurokit2.signal import signal_distort, signal_resample
+
+
+def ecg_simulate_multichannel(duration=10, length=None, sampling_rate=1000, noise=0.01, Anoise=0.01, heart_rate=70, method="ecgsyn", 
+    random_state=None, ti=(-70, -15, 0, 15, 100), ai=(1.2, -5, 30, -7.5, 0.75), bi=(0.25, 0.1, 0.1, 0.1, 0.4), gamma=np.ones((12,5))):
+    """Simulate an ECG/EKG signal.
+
+    Generate an artificial (synthetic) ECG signal of a given duration and sampling rate using either
+    the ECGSYN dynamical model (McSharry et al., 2003) or a simpler model based on Daubechies wavelets
+    to roughly approximate cardiac cycles.
+
+    Parameters
+    ----------
+    duration : int
+        Desired recording length in seconds.
+    sampling_rate : int
+        The desired sampling rate (in Hz, i.e., samples/second).
+    length : int
+        The desired length of the signal (in samples).
+    noise : float
+        Noise level (amplitude of the laplace noise).
+    heart_rate : int
+        Desired simulated heart rate (in beats per minute). The default is 70. Note that for the
+        ECGSYN method, random fluctuations are to be expected to mimick a real heart rate. These
+        fluctuations can cause some slight discrepancies between the requested heart rate and the
+        empirical heart rate, especially for shorter signals.
+    method : str
+        The model used to generate the signal. Can be 'simple' for a simulation based on Daubechies
+        wavelets that roughly approximates a single cardiac cycle. If 'ecgsyn' (default), will use an
+        advanced model desbribed `McSharry et al. (2003) <https://physionet.org/content/ecgsyn/>`_.
+    random_state : int
+        Seed for the random number generator.
+
+    Returns
+    -------
+    array
+        Vector containing the ECG signal.
+
+    Examples
+    ----------
+    >>> import pandas as pd
+    >>> import neurokit2 as nk
+    >>>
+    >>> ecg1 = nk.ecg_simulate_multichannel(duration=10, method="simple")
+    >>> ecg2 = nk.ecg_simulate_multichannel(duration=10, method="ecgsyn")
+    >>> pd.DataFrame({"ECG_Simple": ecg1,
+    ...               "ECG_Complex": ecg2}).plot(subplots=True) #doctest: +ELLIPSIS
+    array([<AxesSubplot:>, <AxesSubplot:>], dtype=object)
+
+    See Also
+    --------
+    rsp_simulate, eda_simulate, ppg_simulate, emg_simulate
+
+
+    References
+    -----------
+    - McSharry, P. E., Clifford, G. D., Tarassenko, L., & Smith, L. A. (2003). A dynamical model for
+    generating synthetic electrocardiogram signals. IEEE transactions on biomedical engineering, 50(3), 289-294.
+    - https://github.com/diarmaidocualain/ecg_simulation
+
+    """
+    # Seed the random generator for reproducible results
+    np.random.seed(random_state)
+
+    # Generate number of samples automatically if length is unspecified
+    if length is None:
+        length = duration * sampling_rate
+    if duration is None:
+        duration = length / sampling_rate
+
+    # Run appropriate method
+    if method.lower() in ["simple", "daubechies"]:
+        ecg = _ecg_simulate_multichannel_daubechies(
+            duration=duration, length=length, sampling_rate=sampling_rate, heart_rate=heart_rate
+        )
+    else:
+        approx_number_beats = int(np.round(duration * (heart_rate / 60)))
+        ecgs, results = _ecg_simulate_multichannel_ecgsyn_multichannel(
+            sfecg=sampling_rate,
+            N=approx_number_beats,
+            Anoise=Anoise,
+            hrmean=heart_rate,
+            hrstd=1,
+            lfhfratio=0.5,
+            sfint=sampling_rate,
+            ti=ti,#(-70, -15, 0, 15, 100),
+            ai=ai,#(1.2, -5, 30, -7.5, 0.75),
+            bi=bi,#(0.25, 0.1, 0.1, 0.1, 0.4),
+            gamma=gamma
+        )
+        # Cut to match expected length
+        for i in range(len(ecgs)):
+            ecgs[i] = ecgs[i][0:length]
+
+    for i in range(len(ecgs)):
+        # Add random noise
+        if noise > 0:
+            ecgs[i] = signal_distort(
+                ecgs[i],
+                sampling_rate=sampling_rate,
+                noise_amplitude=noise,
+                noise_frequency=[5, 10, 100],
+                noise_shape="laplace",
+                random_state=random_state,
+                silent=True,
+            )
+
+    # Reset random seed (so it doesn't affect global)
+    np.random.seed(None)
+    return ecgs, results
+
+
+
+
+# =============================================================================
+# Daubechies
+# =============================================================================
+def _ecg_simulate_multichannel_daubechies(duration=10, length=None, sampling_rate=1000, heart_rate=70):
+    """Generate an artificial (synthetic) ECG signal of a given duration and sampling rate.
+
+    It uses a 'Daubechies' wavelet that roughly approximates a single cardiac cycle.
+    This function is based on `this script <https://github.com/diarmaidocualain/ecg_simulation>`_.
+
+    """
+    # The "Daubechies" wavelet is a rough approximation to a real, single, cardiac cycle
+    cardiac = scipy.signal.wavelets.daub(10)
+
+    # Add the gap after the pqrst when the heart is resting.
+    cardiac = np.concatenate([cardiac, np.zeros(10)])
+
+    # Caculate the number of beats in capture time period
+    num_heart_beats = int(duration * heart_rate / 60)
+
+    # Concatenate together the number of heart beats needed
+    ecg = np.tile(cardiac, num_heart_beats)
+
+    # Change amplitude
+    ecg = ecg * 10
+
+    # Resample
+    ecg = signal_resample(
+        ecg, sampling_rate=int(len(ecg) / 10), desired_length=length, desired_sampling_rate=sampling_rate
+    )
+
+    return ecg
+
+
+# =============================================================================
+# ECGSYN
+# =============================================================================
+def _ecg_simulate_multichannel_ecgsyn_multichannel(
+    sfecg=256,
+    N=256,
+    Anoise=0,
+    hrmean=60,
+    hrstd=1,
+    lfhfratio=0.5,
+    sfint=512,
+    ti=(-70, -15, 0, 15, 100),
+    ai=(1.2, -5, 30, -7.5, 0.75),
+    bi=(0.25, 0.1, 0.1, 0.1, 0.4),
+    gamma=np.ones((12,5))
+):
+    """This function is a python translation of the matlab script by `McSharry & Clifford (2013)
+
+    <https://physionet.org/content/ecgsyn>`_.
+
+    Parameters
+    ----------
+    % Operation uses the following parameters (default values in []s):
+    % sfecg: ECG sampling frequency [256 Hertz]
+    % N: approximate number of heart beats [256]
+    % Anoise: Additive uniformly distributed measurement noise [0 mV]
+    % hrmean: Mean heart rate [60 beats per minute]
+    % hrstd: Standard deviation of heart rate [1 beat per minute]
+    % lfhfratio: LF/HF ratio [0.5]
+    % sfint: Internal sampling frequency [256 Hertz]
+    % Order of extrema: (P Q R S T)
+    % ti = angles of extrema (in degrees)
+    % ai = z-position of extrema
+    % bi = Gaussian width of peaks
+
+    Returns
+    -------
+    array
+        Vector containing simulated ecg signal.
+
+#    Examples
+#    --------
+#    >>> import matplotlib.pyplot as plt
+#    >>> import neurokit2 as nk
+#    >>>
+#    >>> s = _ecg_simulate_multichannel_ecgsyn_multichannelth()
+#    >>> x = np.linspace(0, len(s)-1, len(s))
+#    >>> num_points = 4000
+#    >>>
+#    >>> num_points = min(num_points, len(s))
+#    >>> plt.plot(x[:num_points], s[:num_points]) #doctest: +SKIP
+#    >>> plt.show() #doctest: +SKIP
+
+    """
+
+    if not isinstance(ti, np.ndarray):
+        ti = np.array(ti)
+    if not isinstance(ai, np.ndarray):
+        ai = np.array(ai)
+    if not isinstance(bi, np.ndarray):
+        bi = np.array(bi)
+
+    ti = ti * np.pi / 180
+
+    # Adjust extrema parameters for mean heart rate
+    hrfact = np.sqrt(hrmean / 60)
+    hrfact2 = np.sqrt(hrfact)
+    bi = hrfact * bi
+    ti = np.array([hrfact2, hrfact, 1, hrfact, hrfact2]) * ti
+
+    # Check that sfint is an integer multiple of sfecg
+    q = np.round(sfint / sfecg)
+    qd = sfint / sfecg
+    if q != qd:
+        raise ValueError(
+            "Internal sampling frequency (sfint) must be an integer multiple of the ECG sampling frequency"
+            " (sfecg). Your current choices are: sfecg = " + str(sfecg) + " and sfint = " + str(sfint) + "."
+        )
+
+    # Define frequency parameters for rr process
+    # flo and fhi correspond to the Mayer waves and respiratory rate respectively
+    flo = 0.1
+    fhi = 0.25
+    flostd = 0.01
+    fhistd = 0.01
+
+    # Calculate time scales for rr and total output
+    sfrr = 1
+    trr = 1 / sfrr
+    rrmean = 60 / hrmean
+    n = 2 ** (np.ceil(np.log2(N * rrmean / trr)))
+
+    rr0 = _ecg_simulate_multichannel_rrprocess(flo, fhi, flostd, fhistd, lfhfratio, hrmean, hrstd, sfrr, n)
+
+    # Upsample rr time series from 1 Hz to sfint Hz
+    rr = signal_resample(rr0, sampling_rate=1, desired_sampling_rate=sfint)
+
+    # Make the rrn time series
+    dt = 1 / sfint
+    rrn = np.zeros(len(rr))
+    tecg = 0
+    i = 0
+    while i < len(rr):
+        tecg += rr[i]
+        ip = int(np.round(tecg / dt))
+        rrn[i:ip] = rr[i]
+        i = ip
+    Nt = ip
+
+    # Integrate system using fourth order Runge-Kutta
+    x0 = np.array([1, 0, 0.04])
+
+    # tspan is a tuple of (min, max) which defines the lower and upper bound of t in ODE
+    # t_eval is the list of desired t points for ODE
+    # in Matlab, ode45 can accepts both tspan and t_eval in one argument
+    Tspan = [0, (Nt - 1) * dt]
+    t_eval = np.linspace(0, (Nt - 1) * dt, Nt)
+
+    # AJP: Loop over the twelve leads modifying ai in the loop to generate each lead's data
+    # AJP: Because these are all starting at the same position, it may make sense to grab a random segment within the series to simulate random phase and to forget the initial conditions
+    results = []
+    single_leads = []
+    for lead in range(12):
+
+        # as passing extra arguments to derivative function is not supported yet in solve_ivp
+        # lambda function is used to serve the purpose
+        result = scipy.integrate.solve_ivp(
+            lambda t, x: _ecg_simulate_multichannel_derivsecgsyn(t, x, rrn, ti, sfint, gamma[lead]*ai, bi), Tspan, x0, t_eval=t_eval
+        )
+        results.append(result)
+        X0 = result.y
+
+        # downsample to required sfecg
+        X = X0[:, np.arange(0, X0.shape[1], q).astype(int)]
+
+        # Scale signal to lie between -0.4 and 1.2 mV
+        z = X[2, :].copy()
+        zmin = np.min(z)
+        zmax = np.max(z)
+        zrange = zmax - zmin
+        z = (z - zmin) * 1.6 / zrange - 0.4
+
+        # include additive uniformly distributed measurement noise
+        eta = np.random.normal(0, 1, len(z))
+        #eta = 2 * np.random.uniform(len(z)) - 1 #AJP: this doesn't make any sense
+        single_lead = z + Anoise * eta#, result  # Return signal
+        single_leads.append(single_lead)
+    return single_leads, results
+    #return z + Anoise * eta, result  # Return signal
+
+
+def _ecg_simulate_multichannel_derivsecgsyn(t, x, rr, ti, sfint, ai, bi):
+
+    ta = math.atan2(x[1], x[0])
+    r0 = 1
+    a0 = 1.0 - np.sqrt(x[0] ** 2 + x[1] ** 2) / r0
+
+    ip = np.floor(t * sfint).astype(int)
+    w0 = 2 * np.pi / rr[min(ip, len(rr) - 1)]
+    # w0 = 2*np.pi/rr[ip[ip <= np.max(rr)]]
+
+    fresp = 0.25
+    zbase = 0.005 * np.sin(2 * np.pi * fresp * t)
+
+    dx1dt = a0 * x[0] - w0 * x[1]
+    dx2dt = a0 * x[1] + w0 * x[0]
+
+    # matlab rem and numpy rem are different
+    # dti = np.remainder(ta - ti, 2*np.pi)
+    dti = (ta - ti) - np.round((ta - ti) / 2 / np.pi) * 2 * np.pi
+    dx3dt = -np.sum(ai * dti * np.exp(-0.5 * (dti / bi) ** 2)) - 1 * (x[2] - zbase)
+
+    dxdt = np.array([dx1dt, dx2dt, dx3dt])
+    return dxdt
+
+
+def _ecg_simulate_multichannel_rrprocess(
+    flo=0.1, fhi=0.25, flostd=0.01, fhistd=0.01, lfhfratio=0.5, hrmean=60, hrstd=1, sfrr=1, n=256
+):
+    w1 = 2 * np.pi * flo
+    w2 = 2 * np.pi * fhi
+    c1 = 2 * np.pi * flostd
+    c2 = 2 * np.pi * fhistd
+    sig2 = 1
+    sig1 = lfhfratio
+    rrmean = 60 / hrmean
+    rrstd = 60 * hrstd / (hrmean * hrmean)
+
+    df = sfrr / n
+    w = np.arange(n) * 2 * np.pi * df
+    dw1 = w - w1
+    dw2 = w - w2
+
+    Hw1 = sig1 * np.exp(-0.5 * (dw1 / c1) ** 2) / np.sqrt(2 * np.pi * c1 ** 2)
+    Hw2 = sig2 * np.exp(-0.5 * (dw2 / c2) ** 2) / np.sqrt(2 * np.pi * c2 ** 2)
+    Hw = Hw1 + Hw2
+    Hw0 = np.concatenate((Hw[0 : int(n / 2)], Hw[int(n / 2) - 1 :: -1]))
+    Sw = (sfrr / 2) * np.sqrt(Hw0)
+
+    ph0 = 2 * np.pi * np.random.uniform(size=int(n / 2 - 1))
+    ph = np.concatenate([[0], ph0, [0], -np.flipud(ph0)])
+    SwC = Sw * np.exp(1j * ph)
+    x = (1 / n) * np.real(np.fft.ifft(SwC))
+
+    xstd = np.std(x)
+    ratio = rrstd / xstd
+    return rrmean + x * ratio  # Return RR
+
+
+
+def simulation(normal_N,abnormal_N, save_params = False):
+    normal_data = []
+    normal_params = []
+    print('Creating normal dataset')
+    for i in range(normal_N):
+        ti = np.random.normal(para.mu_t_1, para.sigma_t_1)
+        ai = np.random.normal(para.mu_a_1, para.sigma_a_1)
+        bi = np.random.normal(para.mu_b_1, para.sigma_b_1)
+        hr = np.random.normal(para.mu_hr_1, para.sigma_hr_1)
+        noise = np.random.uniform(low=para.min_noise_1, high=para.max_noise_1)
+        ecgs, _ = ecg_simulate_multichannel(duration=para.duration*2, sampling_rate=para.sampling_rate, noise=noise, Anoise=para.Anoise, heart_rate=hr, gamma=para.gamma, ti=ti, ai=ai, bi=bi)
+        ecgs = np.array(ecgs)
+        start_i = np.random.randint(len(ecgs[0])//4, len(ecgs[0])//2)
+        normal_data.append(ecgs[:,start_i:start_i+para.sampling_rate*para.duration])
+        normal_params.append({'ti':ti, 'ai':ai, 'bi':bi, 'hr':hr, 'noise':noise, 'gamma': para.gamma})
+
+    abnormal_data = []
+    abnormal_params = []
+    print('Creating abnormal dataset')
+    for i in range(abnormal_N):
+        ti = np.random.normal(para.mu_t_2, para.sigma_t_2)
+        ai = np.random.normal(para.mu_a_2, para.sigma_a_2)
+        bi = np.random.normal(para.mu_b_2, para.sigma_b_2)
+        hr = np.random.normal(para.mu_hr_2, para.sigma_hr_2)
+        noise = np.random.uniform(low=para.min_noise_2, high=para.max_noise_2)
+        ecgs, _ = ecg_simulate_multichannel(duration=para.duration*2, sampling_rate=para.sampling_rate, noise=noise, Anoise=para.Anoise, heart_rate=hr, gamma=para.gamma, ti=ti, ai=ai, bi=bi)
+        ecgs = np.array(ecgs)
+        start_i = np.random.randint(len(ecgs[0])//4, len(ecgs[0])//2)
+        abnormal_data.append(ecgs[:,start_i:start_i+para.sampling_rate*para.duration])
+        abnormal_params.append({'ti':ti, 'ai':ai, 'bi':bi, 'hr':hr, 'noise':noise, 'gamma': para.gamma})
+
+    labels = np.array([0]*len(normal_data) + [1]*len(abnormal_data))
+    permutation = np.random.permutation(len(labels))
+    data = np.array(normal_data+abnormal_data)
+    data_params = np.array(normal_params+abnormal_params)
+    labels = labels[permutation]
+    data = data[permutation]
+    data_params = data_params[permutation]
+
+    np.save('sim_ecg_data', data) # save ECG data
+    np.save('sim_ecg_labels', labels) # save label
+    if (save_params):
+        np.save('sim_ecg_params',data_params) # save parameters for each ecg sample (12,2500)
\ No newline at end of file