AI-for-Healthcare-EHR / Git / [d88862] /4. Applying AI to Wearable Device Data/3. Activity Classification/walkthroughs/feature-extraction/feature

Downloads: 1
[d88862]: / 4. Applying AI to Wearable Device Data / 3. Activity Classification / walkthroughs / feature-extraction / feature_extraction.ipynb
History
Download this file
182 lines (181 with data), 4.9 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature Extraction I\n",
    "\n",
    "Let's begin extracting features from our dataset so that we can throw them into a classifier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy as sp\n",
    "import scipy.signal\n",
    "import scipy.stats\n",
    "\n",
    "import activity_classifier_utils"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs = 256\n",
    "data = activity_classifier_utils.LoadWristPPGDataset()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Features\n",
    "Time Domain:\n",
    "* mean\n",
    "* std\n",
    "* 5, 10, 15, 20, 25 percentile\n",
    "* correlation of all pairs of channels\n",
    "* total energy\n",
    "\n",
    "Frequency Domain:\n",
    "* dominant frequency\n",
    "* fraction of energy in each 1Hz bin from 0 to 6 Hz\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Low-pass filter at 12 Hz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def LowpassFilter(signal, fs):\n",
    "    b, a = sp.signal.butter(3, 12, btype='lowpass', fs=fs)\n",
    "    return sp.signal.filtfilt(b, a, signal)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Compute Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Featurize(accx, accy, accz, fs):\n",
    "    \"\"\"A partial featurization of the accelerometer signal.\n",
    "    \n",
    "    Args:\n",
    "        accx: (np.array) x-channel of the accelerometer.\n",
    "        accy: (np.array) y-channel of the accelerometer.\n",
    "        accz: (np.array) z-channel of the accelerometer.\n",
    "        fs: (number) the sampling rate of the accelerometer\n",
    "        \n",
    "    Returns:\n",
    "        n-tuple of accelerometer features\n",
    "    \"\"\"\n",
    "    \n",
    "    accx = LowpassFilter(accx, fs)\n",
    "    accy = LowpassFilter(accy, fs)\n",
    "    accz = LowpassFilter(accz, fs)\n",
    "    \n",
    "    # The mean of the x-channel\n",
    "    mn_x = None\n",
    "\n",
    "    # The standard deviation of the x-channel\n",
    "    std_x = None\n",
    "\n",
    "    # The 5th percentile of the x-channel\n",
    "    p5_x = None\n",
    "\n",
    "    # The pearson correlation coefficient between the x and y channels\n",
    "    corr_xy = None\n",
    "\n",
    "    # The total AC energy of the x-axis\n",
    "    energy_x = np.sum(np.square(accx - np.mean(accx)))\n",
    "    \n",
    "    # Take an FFT of the signal. If the signal is too short, 0-pad it so we have at least 2046 points in the FFT.\n",
    "    fft_len = max(len(accx), 2046)\n",
    "    \n",
    "    # Create an array of frequency bins\n",
    "    fft_freqs = np.fft.rfftfreq(fft_len, 1 / fs)\n",
    "    \n",
    "    # Take an FFT of the centered signal\n",
    "    fft_x = np.fft.rfft(accx - np.mean(accx), fft_len)\n",
    "    \n",
    "    # The frequency with the most power between 0.25 and 12 Hz \n",
    "    dominant_frequency_x = fft_freq[np.argmax(np.abs(fft_x)[(fft_freqs >= 0.25) \n",
    "                                                            & (fft_freqs <= 12)])]\n",
    "\n",
    "    # The fraction of energy between 2 and 3 Hz in the x-channel\n",
    "    spectral_energy_x = np.square(np.abs(fft_x))\n",
    "    energy_23_x = np.sum(spectral_energy_x[(fft_freqs >= 2) & (fft_freqs <= 3)]) / np.sum(spectral_energy_x)\n",
    "    \n",
    "    return (mn_x,\n",
    "            std_x,\n",
    "            p5_x,\n",
    "            corr_xy,\n",
    "            energy_x,\n",
    "            dominant_frequency_x,\n",
    "            energy_23_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}