666 lines (665 with data), 54.7 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from datetime import date, datetime, timedelta\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from scipy.stats import kurtosis, skew\n",
"from scipy.signal import find_peaks\n",
"\n",
"from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix\n",
"\n",
"import ruptures as rpt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Extraction"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def statistical_features(arr):\n",
" vmin = np.amin(arr)\n",
" vmax = np.amax(arr)\n",
" mean = np.mean(arr)\n",
" std = np.std(arr)\n",
" return vmin, vmax, mean, std\n",
"\n",
"def shape_features(arr):\n",
" skewness = skew(arr)\n",
" kurt = kurtosis(arr)\n",
" return skewness, kurt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"user = 'DF'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pre-processing"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import_path = f'{user}'\n",
"export_path = f'{user}'\n",
"\n",
"rightEDAdatapath = f'{import_path}/EDA.csv'\n",
"rightHRdatapath = f'{import_path}/HR.csv'\n",
"rightTEMPdatapath = f'{import_path}/TEMP.csv'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"rightHRdatapath = np.loadtxt(rightHRdatapath, delimiter = ',')\n",
"rightHRdatapath = np.repeat(rightHRdatapath, 4)\n",
"np.savetxt('hr_new.csv', rightHRdatapath, delimiter = ',')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"edat = pd.read_csv(f'{user}/EDA.csv', header = 2, names = ['EDA'])\n",
"hrt = pd.read_csv(f'hr_new.csv', header = 12, names = ['HR'])\n",
"tempt = pd.read_csv(f'{user}/Temp.csv', header = 2, names = ['TEMP'])\n",
"gt = pd.read_csv(f'{user}/EDA.csv', nrows = 1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"reference_time = gt.iloc[0,0]\n",
"min_len = min(len(edat), len(hrt), len(tempt))\n",
"\n",
"eda = edat.iloc[:min_len, 0]\n",
"hrt = hrt.iloc[:min_len, 0]\n",
"tempt = tempt.iloc[:min_len, 0]\n",
"df_original = pd.concat([eda, hrt, tempt], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"array = df_original.values\n",
"data = pd.DataFrame(array)\n",
"data.columns = ['EDA','HR','temp']\n",
"\n",
"cols = [\n",
" 'EDA_Mean','EDA_Min','EDA_Max','EDA_Std', 'EDA_Kurtosis', 'EDA_Skew','EDA_Num_Peaks','EDA_Amphitude','EDA_Duration',\n",
" 'HR_Mean','HR_Min','HR_Max','HR_Std','HR_RMS','temp_Mean', 'temp_Min','temp_Max','temp_Std'\n",
"]\n",
"df_features = pd.DataFrame(columns=cols)\n",
"\n",
"index = 0\n",
"\n",
"for i in range(0,len(data['EDA']), 20):\n",
" df_partial = data.iloc[i:i+40,]\n",
" plen = len(df_partial['EDA'])\n",
" \n",
" if plen < 40:\n",
" continue\n",
" \n",
" eda = df_partial['EDA'].values\n",
" hr = df_partial['HR'].values\n",
" temp = df_partial['temp'].values\n",
"\n",
" eda_min, eda_max, eda_mean, eda_std = statistical_features(eda)\n",
" hr_min, hr_max, hr_mean, hr_std = statistical_features(hr)\n",
" temp_min, temp_max, temp_mean, temp_std = statistical_features(temp)\n",
" eda_skew, eda_kurtosis = shape_features(eda)\n",
" \n",
" hr_rms = np.sqrt(np.mean(np.square(np.ediff1d(hr))))\n",
" temp_rms= np.sqrt(np.mean(np.square(np.ediff1d(temp))))\n",
"\n",
" peaks,properties = find_peaks(eda, width=5)\n",
" num_Peaks = len(peaks)\n",
" \n",
" prominences = np.array(properties['prominences'])\n",
" widths = np.array(properties['widths'])\n",
" amphitude = np.sum(prominences)\n",
" duration = np.sum(widths)\n",
"\n",
" df_features.loc[index] = [eda_mean, eda_min, eda_max, eda_std, eda_kurtosis, eda_skew, num_Peaks, amphitude, duration, hr_mean, hr_min, hr_max, hr_std,hr_rms, temp_mean, temp_min, temp_max, temp_std]\n",
"\n",
" index = index+1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"cols = list(map(str, range(30, 0, -1)))\n",
"df_lag_features = pd.DataFrame(columns=cols)\n",
"df_lag_features = pd.concat([\n",
" df_features['HR_Mean'].shift(10), df_features['HR_Mean'].shift(9), df_features['HR_Mean'].shift(8),\n",
" df_features['HR_Mean'].shift(7), df_features['HR_Mean'].shift(6), df_features['HR_Mean'].shift(5),\n",
" df_features['HR_Mean'].shift(4), df_features['HR_Mean'].shift(3), df_features['HR_Mean'].shift(2),\n",
" df_features['HR_Mean'].shift(1), df_features['temp_Mean'].shift(10), df_features['temp_Mean'].shift(9),\n",
" df_features['temp_Mean'].shift(8), df_features['temp_Mean'].shift(7), df_features['temp_Mean'].shift(6),\n",
" df_features['temp_Mean'].shift(5), df_features['temp_Mean'].shift(4), df_features['temp_Mean'].shift(3),\n",
" df_features['temp_Mean'].shift(2), df_features['temp_Mean'].shift(1), df_features['EDA_Mean'].shift(10),\n",
" df_features['EDA_Mean'].shift(9), df_features['EDA_Mean'].shift(8), df_features['EDA_Mean'].shift(7),\n",
" df_features['EDA_Mean'].shift(6), df_features['EDA_Mean'].shift(5), df_features['EDA_Mean'].shift(4),\n",
" df_features['EDA_Mean'].shift(3), df_features['EDA_Mean'].shift(2), df_features['EDA_Mean'].shift(1)], axis=1)\n",
"df_lag_features.columns = cols\n",
"df_lag_features = df_lag_features.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df_temp = df_features.iloc[30:3238, 0:31]\n",
"df_total = pd.concat([df_lag_features, df_temp], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Stress detection model"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"acc = 0.950572193815437\n",
"pre = 0.9493557683135115\n",
"recall = 0.931917414618591\n",
"f1 = 0.9395137969324274\n"
]
}
],
"source": [
"df_lag = pd.read_csv('combined_lagEDA.csv')\n",
"train_set = df_lag.iloc[:,0:48]\n",
"labels = df_lag.iloc[:,48:49]\n",
"\n",
"#Create a random forest Classifier\n",
"clf = RandomForestClassifier(n_estimators=100,max_depth=15)\n",
"\n",
"# Split our data\n",
"train, test, train_labels, test_labels = train_test_split(train_set, labels, test_size=0.33, random_state=30)\n",
"\n",
"#Train the model using the training sets y_pred=clf.predict(X_test)\n",
"clf.fit(train, train_labels.values.ravel())\n",
"\n",
"y_pred = clf.predict(test)\n",
"\n",
"f1score = f1_score (test_labels, y_pred, average = 'macro')\n",
"recall = recall_score (test_labels, y_pred, average = 'macro')\n",
"precision = precision_score (test_labels, y_pred, average = 'macro')\n",
"accuracy = accuracy_score (test_labels, y_pred)\n",
"\n",
"print('acc =', accuracy)\n",
"print('pre =', precision)\n",
"print('recall =', recall) \n",
"print('f1 =', f1score)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Predicting the stress"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"F:\\Anaconda3\\envs\\stress\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"df_total = df_total.dropna()\n",
"scalar = MinMaxScaler()\n",
"x_scaled = scalar.fit_transform(df_total.iloc[:,0:48])\n",
"data = pd.DataFrame(x_scaled)\n",
"data = data.fillna(0)\n",
"\n",
"pred_t = clf.predict(data)\n",
"pred_t = pd.DataFrame(pred_t)\n",
"pred_t.columns=['pred']\n",
"pred_t.head(30)\n",
"pred_t.to_csv('pred'+user+'.csv')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df_total['pred'] = pred_t\n",
"df_total.to_csv('pred{user}.csv')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df_total['MA_3'] = np.ceil(df_total.iloc[:,48:49].rolling(window=31).mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Change detection"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 155, 220, 235, 250, 300, 395, 490, 570, 685, 715, 760, 860, 940, 980, 990, 1020, 1040, 1070, 1115]\n"
]
}
],
"source": [
"signal = df_total.iloc[:,49:50].values\n",
"\n",
"algo = rpt.Window(model=\"l2\", width=10).fit(signal)\n",
"result = algo.predict(n_bkps=60)\n",
"\n",
"rpt.display(signal, result, figsize=(16, 6))\n",
"plt.title('Change Point Detection: Pelt Search Method')\n",
"plt.show()\n",
"\n",
"result.insert(0, 0)\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generating the event by start and end epoch_times"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>start</th>\n",
" <th>end</th>\n",
" <th>stress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>155.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>155.0</td>\n",
" <td>220.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>220.0</td>\n",
" <td>235.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>235.0</td>\n",
" <td>250.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>250.0</td>\n",
" <td>300.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>300.0</td>\n",
" <td>395.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>395.0</td>\n",
" <td>490.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>490.0</td>\n",
" <td>570.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>570.0</td>\n",
" <td>685.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>685.0</td>\n",
" <td>715.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>715.0</td>\n",
" <td>760.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>760.0</td>\n",
" <td>860.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>860.0</td>\n",
" <td>940.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>940.0</td>\n",
" <td>980.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>980.0</td>\n",
" <td>990.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>990.0</td>\n",
" <td>1020.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>1020.0</td>\n",
" <td>1040.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>1040.0</td>\n",
" <td>1070.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>1070.0</td>\n",
" <td>1115.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" start end stress\n",
"0 0.0 155.0 0.0\n",
"1 155.0 220.0 1.0\n",
"2 220.0 235.0 2.0\n",
"3 235.0 250.0 1.0\n",
"4 250.0 300.0 0.0\n",
"5 300.0 395.0 1.0\n",
"6 395.0 490.0 2.0\n",
"7 490.0 570.0 1.0\n",
"8 570.0 685.0 0.0\n",
"9 685.0 715.0 1.0\n",
"10 715.0 760.0 0.0\n",
"11 760.0 860.0 1.0\n",
"12 860.0 940.0 0.0\n",
"13 940.0 980.0 1.0\n",
"14 980.0 990.0 0.0\n",
"15 990.0 1020.0 1.0\n",
"16 1020.0 1040.0 2.0\n",
"17 1040.0 1070.0 1.0\n",
"18 1070.0 1115.0 2.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp_start = 0\n",
"temp_end = result[0]\n",
"\n",
"stress = []\n",
"for i in range (len(result)-1):\n",
" \n",
" temp = df_total.iloc[result[i]:(result[i+1]-1), 49:50].mean()[0]\n",
" \n",
" if temp > 1.3:\n",
" temp = 2.0\n",
" elif temp >= .65:\n",
" temp = 1\n",
" else:\n",
" temp = 0\n",
"\n",
" stress.append(temp)\n",
" \n",
"df_temp = pd.DataFrame(columns=['start', 'end', 'stress'])\n",
"c = 0\n",
"for i in range(len(result)-1):\n",
" stressLength = (result[i+1] - result[i]) / 12\n",
"\n",
" start_point = result[i]\n",
" end_point = result[i + 1]\n",
" stress_value = stress[i]\n",
" \n",
" if temp_end <= start_point:\n",
" temp_end = end_point\n",
" df_temp.loc[c] = [start_point, end_point, stress_value]\n",
" c = c +1\n",
"\n",
"df_temp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Change event time to datetime"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prev: 0:12:55 18:00:04 18:12:59 0.0\n",
"Prev: 0:05:25 18:12:59 18:18:24 1.0\n",
"Prev: 0:01:15 18:18:24 18:19:39 2.0\n",
"Prev: 0:01:15 18:19:39 18:20:54 1.0\n",
"Prev: 0:04:10 18:20:54 18:25:04 0.0\n",
"Prev: 0:07:55 18:25:04 18:32:59 1.0\n",
"Prev: 0:07:55 18:32:59 18:40:54 2.0\n",
"Prev: 0:06:40 18:40:54 18:47:34 1.0\n",
"Prev: 0:09:35 18:47:34 18:57:09 0.0\n",
"Prev: 0:02:30 18:57:09 18:59:39 1.0\n",
"Prev: 0:03:45 18:59:39 19:03:24 0.0\n",
"Prev: 0:08:20 19:03:24 19:11:44 1.0\n",
"Prev: 0:06:40 19:11:44 19:18:24 0.0\n",
"Prev: 0:03:20 19:18:24 19:21:44 1.0\n",
"Prev: 0:00:50 19:21:44 19:22:34 0.0\n",
"Prev: 0:02:30 19:22:34 19:25:04 1.0\n",
"Prev: 0:01:40 19:25:04 19:26:44 2.0\n",
"Prev: 0:02:30 19:26:44 19:29:14 1.0\n",
"Final: 0:02:30 19:29:14 19:32:59 2.0\n"
]
}
],
"source": [
"stress_start = 0\n",
"stress_end = 0\n",
"tse = 0\n",
"previous_stress = -1\n",
"\n",
"for index, row in df_temp.iterrows():\n",
"\n",
" if row['stress'] == previous_stress:\n",
" stress_end = row['end']\n",
" else:\n",
" if(previous_stress != -1):\n",
" start = datetime.fromtimestamp(reference_time + (stress_start * 5))\n",
" end = datetime.fromtimestamp(reference_time + (stress_end * 5))\n",
" timediff = end - start\n",
" print('Prev:', timediff, datetime.fromtimestamp(reference_time + (stress_start * 5)).strftime('%H:%M:%S'), datetime.fromtimestamp(reference_time + (stress_end * 5)).strftime('%H:%M:%S'), previous_stress)\n",
" \n",
" stress_start = row['start']\n",
" stress_end = row['end']\n",
" previous_stress = row['stress']\n",
" \n",
"print('Final:', timediff, datetime.fromtimestamp(reference_time + (stress_start * 5)).strftime('%H:%M:%S'), datetime.fromtimestamp(reference_time + (stress_end * 5)).strftime('%H:%M:%S'), previous_stress)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}