182 lines (181 with data), 4.9 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Feature Extraction I\n",
"\n",
"Let's begin extracting features from our dataset so that we can throw them into a classifier."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scipy as sp\n",
"import scipy.signal\n",
"import scipy.stats\n",
"\n",
"import activity_classifier_utils"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fs = 256\n",
"data = activity_classifier_utils.LoadWristPPGDataset()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Features\n",
"Time Domain:\n",
"* mean\n",
"* std\n",
"* 5, 10, 15, 20, 25 percentile\n",
"* correlation of all pairs of channels\n",
"* total energy\n",
"\n",
"Frequency Domain:\n",
"* dominant frequency\n",
"* fraction of energy in each 1Hz bin from 0 to 6 Hz\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Low-pass filter at 12 Hz"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def LowpassFilter(signal, fs):\n",
" b, a = sp.signal.butter(3, 12, btype='lowpass', fs=fs)\n",
" return sp.signal.filtfilt(b, a, signal)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compute Features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def Featurize(accx, accy, accz, fs):\n",
" \"\"\"A partial featurization of the accelerometer signal.\n",
" \n",
" Args:\n",
" accx: (np.array) x-channel of the accelerometer.\n",
" accy: (np.array) y-channel of the accelerometer.\n",
" accz: (np.array) z-channel of the accelerometer.\n",
" fs: (number) the sampling rate of the accelerometer\n",
" \n",
" Returns:\n",
" n-tuple of accelerometer features\n",
" \"\"\"\n",
" \n",
" accx = LowpassFilter(accx, fs)\n",
" accy = LowpassFilter(accy, fs)\n",
" accz = LowpassFilter(accz, fs)\n",
" \n",
" # The mean of the x-channel\n",
" mn_x = None\n",
"\n",
" # The standard deviation of the x-channel\n",
" std_x = None\n",
"\n",
" # The 5th percentile of the x-channel\n",
" p5_x = None\n",
"\n",
" # The pearson correlation coefficient between the x and y channels\n",
" corr_xy = None\n",
"\n",
" # The total AC energy of the x-axis\n",
" energy_x = np.sum(np.square(accx - np.mean(accx)))\n",
" \n",
" # Take an FFT of the signal. If the signal is too short, 0-pad it so we have at least 2046 points in the FFT.\n",
" fft_len = max(len(accx), 2046)\n",
" \n",
" # Create an array of frequency bins\n",
" fft_freqs = np.fft.rfftfreq(fft_len, 1 / fs)\n",
" \n",
" # Take an FFT of the centered signal\n",
" fft_x = np.fft.rfft(accx - np.mean(accx), fft_len)\n",
" \n",
" # The frequency with the most power between 0.25 and 12 Hz \n",
" dominant_frequency_x = fft_freq[np.argmax(np.abs(fft_x)[(fft_freqs >= 0.25) \n",
" & (fft_freqs <= 12)])]\n",
"\n",
" # The fraction of energy between 2 and 3 Hz in the x-channel\n",
" spectral_energy_x = np.square(np.abs(fft_x))\n",
" energy_23_x = np.sum(spectral_energy_x[(fft_freqs >= 2) & (fft_freqs <= 3)]) / np.sum(spectral_energy_x)\n",
" \n",
" return (mn_x,\n",
" std_x,\n",
" p5_x,\n",
" corr_xy,\n",
" energy_x,\n",
" dominant_frequency_x,\n",
" energy_23_x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}