1248 lines (1247 with data), 122.7 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import librosa\n",
"import wave as wav\n",
"from scipy import stats\n",
"\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D\n",
"from keras.callbacks.callbacks import EarlyStopping\n",
"from keras import regularizers\n",
"from keras.optimizers import adam\n",
"from keras.utils import to_categorical\n",
"import scipy\n",
"import matplotlib.pyplot as plt\n",
"import librosa.display\n",
"import IPython.display as ipd\n",
"from sklearn import metrics\n",
"from sklearn.model_selection import cross_validate\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import classification_report, confusion_matrix\n",
"import os\n",
"import statistics\n",
"\n",
"import random\n",
"from random import randint"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"root = \"respiratory_sound_database/\"\n",
"sound_dir = root + \"audio_and_txt_files/\"\n",
"patient_diagnosis = pd.read_csv(root+\"patient_diagnosis.csv\", names=[\"patient\", \"diagnosis\"])\n",
"demographic_info = pd.read_csv(root+\"demographic_info.txt\", delimiter=\" \", names=[\"patient\", \"age\", \"sex\", \"bmi\", \"weight\", \"height\"])\n",
"train_soundfiles = pd.read_csv(root+\"train_soundfiles.csv\")\n",
"test_soundfiles = pd.read_csv(root+\"test_soundfiles.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"patient_diagnosis.set_index(\"patient\", inplace=True)\n",
"train_soundfiles.set_index(\"filename\", inplace=True)\n",
"test_soundfiles.set_index(\"filename\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>filename</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>101_1b1_Al_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>101_1b1_Pr_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>102_1b1_Ar_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>104_1b1_Al_sc_Litt3200.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>104_1b1_Ar_sc_Litt3200.wav</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [101_1b1_Al_sc_Meditron.wav, 101_1b1_Pr_sc_Meditron.wav, 102_1b1_Ar_sc_Meditron.wav, 104_1b1_Al_sc_Litt3200.wav, 104_1b1_Ar_sc_Litt3200.wav]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_soundfiles.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>filename</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>101_1b1_Al_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>101_1b1_Pr_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>102_1b1_Ar_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>104_1b1_Al_sc_Litt3200.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>104_1b1_Ar_sc_Litt3200.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" </tr>\n",
" <tr>\n",
" <th>224_1b2_Al_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>225_1b1_Pl_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>226_1b1_Al_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>226_1b1_Ll_sc_Meditron.wav</th>\n",
" </tr>\n",
" <tr>\n",
" <th>226_1b1_Pl_sc_LittC2SE.wav</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>917 rows × 0 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [101_1b1_Al_sc_Meditron.wav, 101_1b1_Pr_sc_Meditron.wav, 102_1b1_Ar_sc_Meditron.wav, 104_1b1_Al_sc_Litt3200.wav, 104_1b1_Ar_sc_Litt3200.wav, 104_1b1_Ll_sc_Litt3200.wav, 104_1b1_Lr_sc_Litt3200.wav, 104_1b1_Pl_sc_Litt3200.wav, 104_1b1_Pr_sc_Litt3200.wav, 105_1b1_Tc_sc_Meditron.wav, 106_2b1_Pl_mc_LittC2SE.wav, 106_2b1_Pr_mc_LittC2SE.wav, 107_2b3_Al_mc_AKGC417L.wav, 107_2b3_Ar_mc_AKGC417L.wav, 107_2b3_Ll_mc_AKGC417L.wav, 107_2b3_Lr_mc_AKGC417L.wav, 107_2b3_Pl_mc_AKGC417L.wav, 107_2b3_Pr_mc_AKGC417L.wav, 107_2b3_Tc_mc_AKGC417L.wav, 107_2b4_Al_mc_AKGC417L.wav, 107_2b4_Ar_mc_AKGC417L.wav, 107_2b4_Ll_mc_AKGC417L.wav, 107_2b4_Lr_mc_AKGC417L.wav, 107_2b4_Pl_mc_AKGC417L.wav, 107_2b4_Pr_mc_AKGC417L.wav, 107_2b4_Tc_mc_AKGC417L.wav, 107_2b5_Al_mc_AKGC417L.wav, 107_2b5_Ar_mc_AKGC417L.wav, 107_2b5_Ll_mc_AKGC417L.wav, 107_2b5_Lr_mc_AKGC417L.wav, 107_2b5_Pl_mc_AKGC417L.wav, 107_2b5_Pr_mc_AKGC417L.wav, 107_2b5_Tc_mc_AKGC417L.wav, 107_3p2_Al_mc_AKGC417L.wav, 107_3p2_Ar_mc_AKGC417L.wav, 107_3p2_Ll_mc_AKGC417L.wav, 107_3p2_Lr_mc_AKGC417L.wav, 107_3p2_Pl_mc_AKGC417L.wav, 107_3p2_Pr_mc_AKGC417L.wav, 107_3p2_Tc_mc_AKGC417L.wav, 109_1b1_Al_sc_Litt3200.wav, 109_1b1_Ar_sc_Litt3200.wav, 109_1b1_Ll_sc_Litt3200.wav, 109_1b1_Lr_sc_Litt3200.wav, 109_1b1_Pl_sc_Litt3200.wav, 109_1b1_Pr_sc_Litt3200.wav, 110_1b1_Pr_sc_Meditron.wav, 110_1p1_Al_sc_Meditron.wav, 110_1p1_Ll_sc_Meditron.wav, 110_1p1_Lr_sc_Meditron.wav, 110_1p1_Pr_sc_Meditron.wav, 111_1b2_Tc_sc_Meditron.wav, 111_1b3_Tc_sc_Meditron.wav, 112_1b1_Ar_sc_Meditron.wav, 112_1b1_Lr_sc_Meditron.wav, 112_1p1_Ll_sc_Litt3200.wav, 112_1p1_Pl_sc_Litt3200.wav, 112_1p1_Pr_sc_Litt3200.wav, 113_1b1_Al_sc_Litt3200.wav, 113_1b1_Ar_sc_Litt3200.wav, 113_1b1_Ll_sc_Litt3200.wav, 113_1b1_Lr_sc_Litt3200.wav, 113_1b1_Pl_sc_Litt3200.wav, 113_1b1_Pr_sc_Litt3200.wav, 114_1b4_Al_mc_AKGC417L.wav, 114_1b4_Ar_mc_AKGC417L.wav, 114_1b4_Lr_mc_AKGC417L.wav, 114_1b4_Pl_mc_AKGC417L.wav, 114_1b4_Pr_mc_AKGC417L.wav, 116_1b2_Pl_sc_Meditron.wav, 116_1b2_Tc_sc_Meditron.wav, 117_1b2_Tc_mc_LittC2SE.wav, 117_1b3_Tc_mc_LittC2SE.wav, 118_1b1_Al_sc_Litt3200.wav, 118_1b1_Ar_sc_Litt3200.wav, 118_1b1_Ll_sc_Litt3200.wav, 118_1b1_Lr_sc_Litt3200.wav, 118_1b1_Pl_sc_Litt3200.wav, 118_1b1_Pr_sc_Litt3200.wav, 119_1b1_Ar_sc_Meditron.wav, 120_1b1_Al_sc_Meditron.wav, 120_1b1_Ar_sc_Meditron.wav, 120_1b1_Lr_sc_Meditron.wav, 120_1b1_Pl_sc_Meditron.wav, 120_1b1_Pr_sc_Meditron.wav, 121_1b1_Tc_sc_Meditron.wav, 121_1p1_Tc_sc_Meditron.wav, 122_2b1_Al_mc_LittC2SE.wav, 122_2b1_Ar_mc_LittC2SE.wav, 122_2b1_Tc_mc_LittC2SE.wav, 122_2b2_Al_mc_LittC2SE.wav, 122_2b2_Ar_mc_LittC2SE.wav, 122_2b2_Tc_mc_LittC2SE.wav, 122_2b3_Al_mc_LittC2SE.wav, 122_2b3_Ar_mc_LittC2SE.wav, 122_2b3_Tc_mc_LittC2SE.wav, 123_1b1_Al_sc_Meditron.wav, 124_1b1_Al_sc_Litt3200.wav, 124_1b1_Ar_sc_Litt3200.wav, 124_1b1_Ll_sc_Litt3200.wav, ...]\n",
"\n",
"[917 rows x 0 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"soundfiles = pd.concat([train_soundfiles, test_soundfiles])\n",
"soundfiles"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Creating train feature set"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"class Config(object):\n",
" def __init__(self, n_mfcc, max_frames, sample_rate, max_audio_duration, batch_size, epochs):\n",
" self.n_mfcc = n_mfcc\n",
" self.max_frames = max_frames\n",
" self.sample_rate = sample_rate\n",
" self.max_audio_duration = max_audio_duration\n",
" self.max_audio_length = max_audio_duration * sample_rate\n",
" self.batch_size = batch_size\n",
" self.epochs = epochs"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"config = Config(n_mfcc=40, max_frames=862, sample_rate = 11025, max_audio_duration = 20, batch_size = 64,\n",
"epochs = 700)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def extract_features(filename, config):\n",
" #print(filename) For debugging\n",
" sound_root = root + \"audio_and_txt_files/\"\n",
" raw, sr = librosa.load(sound_root + filename, duration=20, res_type=\"kaiser_fast\")\n",
" mfccs = librosa.feature.mfcc(y=raw, sr=sr, n_mfcc=config.n_mfcc, hop_length=512, fmin=50, fmax=2000)\n",
" pad_width = config.max_frames - mfccs.shape[1]\n",
" mfccs = np.pad(mfccs, pad_width=((0,0), (0, pad_width)), mode='constant')\n",
" return mfccs"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Silences all sound segments longer than 25ms that have an amplitude less than 5% of max\n",
"def silence(raw, sr):\n",
" minimum = raw.min()\n",
" pos_raw = raw+abs(minimum)\n",
" thresh = pos_raw.max()*0.05\n",
" \n",
" start = 0\n",
" end = 0\n",
" quiet_segment = False\n",
" for idx, val in enumerate(raw):\n",
" if val < thresh and quiet_segment == False:\n",
" quiet_segment = True\n",
" start = idx\n",
" elif val < thresh and quiet_segment == True:\n",
" continue\n",
" elif val >= thresh and quiet_segment == True:\n",
" end = idx\n",
" if (end-start / sr > 0.025):\n",
" raw[start:end] = abs(minimum)\n",
" quiet_segment=False\n",
" elif val >= thresh and quiet_segment == False:\n",
" continue\n",
" \n",
" new_raw = pos_raw-abs(minimum)\n",
" return new_raw"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"154_2b4_Pl_mc_AKGC417L.wav\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-546.64764 -546.64764 -546.64764 ... -286.8623 -289.67197\n",
" -292.94394 ]\n",
" [ 0. 0. 0. ... 112.28939 105.16373\n",
" 106.74376 ]\n",
" [ 0. 0. 0. ... 61.456947 52.787937\n",
" 51.120117 ]\n",
" ...\n",
" [ 0. 0. 0. ... 5.2202296 -2.8576035\n",
" -4.3707957]\n",
" [ 0. 0. 0. ... -0.9986781 -6.991159\n",
" -11.324665 ]\n",
" [ 0. 0. 0. ... 5.0589476 7.4100876\n",
" 7.800515 ]]\n",
"(40, 862)\n"
]
}
],
"source": [
"sample_mfcc = extract_features(\"154_2b4_Pl_mc_AKGC417L.wav\", config)\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"librosa.display.specshow(sample_mfcc[2:], x_axis='time')\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"print(sample_mfcc)\n",
"print(sample_mfcc.shape)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"COPD 566\n",
"Pneumonia 26\n",
"Healthy 24\n",
"URTI 14\n",
"Bronchiolitis 8\n",
"Bronchiectasis 7\n",
"Name: diagnosis, dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x150f95c9b9b0>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#See what's the distribution right now\n",
"\n",
"train_diagnosis = []\n",
"for i, row in train_soundfiles.iterrows():\n",
" patient = int(i.split(\"_\")[0])\n",
" diagnosis = patient_diagnosis.loc[patient]\n",
" train_diagnosis.append(str(diagnosis[0]))\n",
" \n",
"train_diagnosis = np.array(train_diagnosis)\n",
"train_diagnosis = pd.DataFrame(train_diagnosis, columns=[\"diagnosis\"])\n",
"print(train_diagnosis.diagnosis.value_counts())\n",
"train_diagnosis.diagnosis.value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['104_1b1_Al_sc_Litt3200.wav',\n",
" '104_1b1_Ar_sc_Litt3200.wav',\n",
" '104_1b1_Ll_sc_Litt3200.wav',\n",
" '104_1b1_Lr_sc_Litt3200.wav',\n",
" '104_1b1_Pl_sc_Litt3200.wav',\n",
" '104_1b1_Pr_sc_Litt3200.wav',\n",
" '106_2b1_Pl_mc_LittC2SE.wav',\n",
" '106_2b1_Pr_mc_LittC2SE.wav',\n",
" '107_2b3_Al_mc_AKGC417L.wav',\n",
" '107_2b3_Ar_mc_AKGC417L.wav',\n",
" '107_2b3_Ll_mc_AKGC417L.wav',\n",
" '107_2b3_Lr_mc_AKGC417L.wav',\n",
" '107_2b3_Pl_mc_AKGC417L.wav',\n",
" '107_2b3_Pr_mc_AKGC417L.wav',\n",
" '107_2b3_Tc_mc_AKGC417L.wav',\n",
" '107_2b4_Al_mc_AKGC417L.wav',\n",
" '107_2b4_Ar_mc_AKGC417L.wav',\n",
" '107_2b4_Ll_mc_AKGC417L.wav',\n",
" '107_2b4_Lr_mc_AKGC417L.wav',\n",
" '107_2b4_Pl_mc_AKGC417L.wav',\n",
" '107_2b4_Pr_mc_AKGC417L.wav',\n",
" '107_2b4_Tc_mc_AKGC417L.wav',\n",
" '107_2b5_Al_mc_AKGC417L.wav',\n",
" '107_2b5_Ar_mc_AKGC417L.wav',\n",
" '107_2b5_Ll_mc_AKGC417L.wav',\n",
" '107_2b5_Lr_mc_AKGC417L.wav',\n",
" '107_2b5_Pl_mc_AKGC417L.wav',\n",
" '107_2b5_Pr_mc_AKGC417L.wav',\n",
" '107_2b5_Tc_mc_AKGC417L.wav',\n",
" '107_3p2_Al_mc_AKGC417L.wav',\n",
" '107_3p2_Ar_mc_AKGC417L.wav',\n",
" '107_3p2_Ll_mc_AKGC417L.wav',\n",
" '107_3p2_Lr_mc_AKGC417L.wav',\n",
" '107_3p2_Pl_mc_AKGC417L.wav',\n",
" '107_3p2_Pr_mc_AKGC417L.wav',\n",
" '107_3p2_Tc_mc_AKGC417L.wav',\n",
" '109_1b1_Al_sc_Litt3200.wav',\n",
" '109_1b1_Ar_sc_Litt3200.wav',\n",
" '109_1b1_Ll_sc_Litt3200.wav',\n",
" '109_1b1_Lr_sc_Litt3200.wav',\n",
" '109_1b1_Pl_sc_Litt3200.wav',\n",
" '109_1b1_Pr_sc_Litt3200.wav',\n",
" '110_1b1_Pr_sc_Meditron.wav',\n",
" '110_1p1_Al_sc_Meditron.wav',\n",
" '110_1p1_Ll_sc_Meditron.wav',\n",
" '110_1p1_Lr_sc_Meditron.wav',\n",
" '110_1p1_Pr_sc_Meditron.wav',\n",
" '112_1b1_Ar_sc_Meditron.wav',\n",
" '112_1b1_Lr_sc_Meditron.wav',\n",
" '112_1p1_Ll_sc_Litt3200.wav',\n",
" '112_1p1_Pl_sc_Litt3200.wav',\n",
" '112_1p1_Pr_sc_Litt3200.wav',\n",
" '113_1b1_Al_sc_Litt3200.wav',\n",
" '113_1b1_Ar_sc_Litt3200.wav',\n",
" '113_1b1_Ll_sc_Litt3200.wav',\n",
" '113_1b1_Lr_sc_Litt3200.wav',\n",
" '113_1b1_Pl_sc_Litt3200.wav',\n",
" '113_1b1_Pr_sc_Litt3200.wav',\n",
" '114_1b4_Al_mc_AKGC417L.wav',\n",
" '114_1b4_Ar_mc_AKGC417L.wav',\n",
" '114_1b4_Lr_mc_AKGC417L.wav',\n",
" '114_1b4_Pl_mc_AKGC417L.wav',\n",
" '114_1b4_Pr_mc_AKGC417L.wav',\n",
" '117_1b2_Tc_mc_LittC2SE.wav',\n",
" '117_1b3_Tc_mc_LittC2SE.wav',\n",
" '118_1b1_Al_sc_Litt3200.wav',\n",
" '118_1b1_Ar_sc_Litt3200.wav',\n",
" '118_1b1_Ll_sc_Litt3200.wav',\n",
" '118_1b1_Lr_sc_Litt3200.wav',\n",
" '118_1b1_Pl_sc_Litt3200.wav',\n",
" '118_1b1_Pr_sc_Litt3200.wav',\n",
" '120_1b1_Al_sc_Meditron.wav',\n",
" '120_1b1_Ar_sc_Meditron.wav',\n",
" '120_1b1_Lr_sc_Meditron.wav',\n",
" '120_1b1_Pl_sc_Meditron.wav',\n",
" '120_1b1_Pr_sc_Meditron.wav',\n",
" '124_1b1_Al_sc_Litt3200.wav',\n",
" '124_1b1_Ar_sc_Litt3200.wav',\n",
" '124_1b1_Ll_sc_Litt3200.wav',\n",
" '124_1b1_Lr_sc_Litt3200.wav',\n",
" '124_1b1_Pl_sc_Litt3200.wav',\n",
" '124_1b1_Pr_sc_Litt3200.wav',\n",
" '128_1b3_Tc_mc_LittC2SE.wav',\n",
" '130_1p2_Al_mc_AKGC417L.wav',\n",
" '130_1p2_Ar_mc_AKGC417L.wav',\n",
" '130_1p2_Ll_mc_AKGC417L.wav',\n",
" '130_1p2_Lr_mc_AKGC417L.wav',\n",
" '130_1p2_Pl_mc_AKGC417L.wav',\n",
" '130_1p2_Pr_mc_AKGC417L.wav',\n",
" '130_1p2_Tc_mc_AKGC417L.wav',\n",
" '130_1p3_Al_mc_AKGC417L.wav',\n",
" '130_1p3_Ar_mc_AKGC417L.wav',\n",
" '130_1p3_Ll_mc_AKGC417L.wav',\n",
" '130_1p3_Lr_mc_AKGC417L.wav',\n",
" '130_1p3_Pl_mc_AKGC417L.wav',\n",
" '130_1p3_Pr_mc_AKGC417L.wav',\n",
" '130_1p3_Tc_mc_AKGC417L.wav',\n",
" '130_1p4_Al_mc_AKGC417L.wav',\n",
" '130_1p4_Ar_mc_AKGC417L.wav',\n",
" '130_1p4_Ll_mc_AKGC417L.wav',\n",
" '130_1p4_Lr_mc_AKGC417L.wav',\n",
" '130_1p4_Pl_mc_AKGC417L.wav',\n",
" '130_1p4_Pr_mc_AKGC417L.wav',\n",
" '130_1p4_Tc_mc_AKGC417L.wav',\n",
" '130_2b2_Al_mc_AKGC417L.wav',\n",
" '130_2b2_Ar_mc_AKGC417L.wav',\n",
" '130_2b2_Ll_mc_AKGC417L.wav',\n",
" '130_2b2_Lr_mc_AKGC417L.wav',\n",
" '130_2b2_Pl_mc_AKGC417L.wav',\n",
" '130_2b2_Pr_mc_AKGC417L.wav',\n",
" '130_2b2_Tc_mc_AKGC417L.wav',\n",
" '130_2b3_Al_mc_AKGC417L.wav',\n",
" '130_2b3_Ar_mc_AKGC417L.wav',\n",
" '130_2b3_Ll_mc_AKGC417L.wav',\n",
" '130_2b3_Lr_mc_AKGC417L.wav',\n",
" '130_2b3_Pl_mc_AKGC417L.wav',\n",
" '130_2b3_Pr_mc_AKGC417L.wav',\n",
" '130_2b3_Tc_mc_AKGC417L.wav',\n",
" '130_2b4_Al_mc_AKGC417L.wav',\n",
" '130_2b4_Ar_mc_AKGC417L.wav',\n",
" '130_2b4_Ll_mc_AKGC417L.wav',\n",
" '130_2b4_Lr_mc_AKGC417L.wav',\n",
" '130_2b4_Pl_mc_AKGC417L.wav',\n",
" '130_2p3_Pl_mc_AKGC417L.wav',\n",
" '130_2p5_Al_mc_AKGC417L.wav',\n",
" '130_2p5_Ar_mc_AKGC417L.wav',\n",
" '130_2p5_Lr_mc_AKGC417L.wav',\n",
" '130_2p5_Pl_mc_AKGC417L.wav',\n",
" '130_2p5_Pr_mc_AKGC417L.wav',\n",
" '130_2p5_Tc_mc_AKGC417L.wav',\n",
" '130_3b3_Ll_mc_AKGC417L.wav',\n",
" '130_3b4_Al_mc_AKGC417L.wav',\n",
" '130_3b4_Ar_mc_AKGC417L.wav',\n",
" '130_3b4_Lr_mc_AKGC417L.wav',\n",
" '130_3b4_Pl_mc_AKGC417L.wav',\n",
" '130_3b4_Pr_mc_AKGC417L.wav',\n",
" '130_3p2_Al_mc_AKGC417L.wav',\n",
" '130_3p2_Ar_mc_AKGC417L.wav',\n",
" '130_3p2_Pl_mc_AKGC417L.wav',\n",
" '130_3p2_Pr_mc_AKGC417L.wav',\n",
" '130_3p2_Tc_mc_AKGC417L.wav',\n",
" '130_3p3_Al_mc_AKGC417L.wav',\n",
" '130_3p3_Pl_mc_AKGC417L.wav',\n",
" '130_3p3_Pr_mc_AKGC417L.wav',\n",
" '130_3p3_Tc_mc_AKGC417L.wav',\n",
" '130_3p4_Al_mc_AKGC417L.wav',\n",
" '130_3p4_Pl_mc_AKGC417L.wav',\n",
" '130_3p4_Pr_mc_AKGC417L.wav',\n",
" '130_3p4_Tc_mc_AKGC417L.wav',\n",
" '132_2b1_Lr_mc_LittC2SE.wav',\n",
" '132_2b2_Lr_mc_LittC2SE.wav',\n",
" '133_2p2_Al_mc_AKGC417L.wav',\n",
" '133_2p2_Ar_mc_AKGC417L.wav',\n",
" '133_2p2_Pl_mc_AKGC417L.wav',\n",
" '133_2p2_Tc_mc_AKGC417L.wav',\n",
" '133_2p3_Al_mc_AKGC417L.wav',\n",
" '133_2p3_Ar_mc_AKGC417L.wav',\n",
" '133_2p3_Pl_mc_AKGC417L.wav',\n",
" '133_2p3_Pr_mc_AKGC417L.wav',\n",
" '133_2p3_Tc_mc_AKGC417L.wav',\n",
" '133_2p4_Al_mc_AKGC417L.wav',\n",
" '133_2p4_Ar_mc_AKGC417L.wav',\n",
" '133_2p4_Pl_mc_AKGC417L.wav',\n",
" '133_2p4_Pr_mc_AKGC417L.wav',\n",
" '133_2p4_Tc_mc_AKGC417L.wav',\n",
" '133_3p2_Al_mc_AKGC417L.wav',\n",
" '133_3p2_Ar_mc_AKGC417L.wav',\n",
" '133_3p2_Pl_mc_AKGC417L.wav',\n",
" '133_3p2_Pr_mc_AKGC417L.wav',\n",
" '133_3p4_Tc_mc_AKGC417L.wav',\n",
" '134_2b1_Al_mc_LittC2SE.wav',\n",
" '134_2b1_Ar_mc_LittC2SE.wav',\n",
" '134_2b2_Al_mc_LittC2SE.wav',\n",
" '134_2b2_Ar_mc_LittC2SE.wav',\n",
" '134_2b3_Ar_mc_LittC2SE.wav',\n",
" '138_1p2_Al_mc_AKGC417L.wav',\n",
" '138_1p2_Ar_mc_AKGC417L.wav',\n",
" '138_1p2_Ll_mc_AKGC417L.wav',\n",
" '138_1p2_Lr_mc_AKGC417L.wav',\n",
" '138_1p2_Pl_mc_AKGC417L.wav',\n",
" '138_1p2_Pr_mc_AKGC417L.wav',\n",
" '138_1p2_Tc_mc_AKGC417L.wav',\n",
" '138_1p3_Al_mc_AKGC417L.wav',\n",
" '138_1p3_Ar_mc_AKGC417L.wav',\n",
" '138_1p3_Ll_mc_AKGC417L.wav',\n",
" '138_1p3_Lr_mc_AKGC417L.wav',\n",
" '138_1p3_Pl_mc_AKGC417L.wav',\n",
" '138_1p3_Pr_mc_AKGC417L.wav',\n",
" '138_1p3_Tc_mc_AKGC417L.wav',\n",
" '138_1p4_Ar_mc_AKGC417L.wav',\n",
" '138_1p4_Ll_mc_AKGC417L.wav',\n",
" '138_1p4_Lr_mc_AKGC417L.wav',\n",
" '138_1p4_Pl_mc_AKGC417L.wav',\n",
" '138_1p4_Pr_mc_AKGC417L.wav',\n",
" '138_1p4_Tc_mc_AKGC417L.wav',\n",
" '138_2p2_Al_mc_AKGC417L.wav',\n",
" '138_2p2_Ar_mc_AKGC417L.wav',\n",
" '138_2p2_Ll_mc_AKGC417L.wav',\n",
" '138_2p2_Lr_mc_AKGC417L.wav',\n",
" '138_2p2_Pl_mc_AKGC417L.wav',\n",
" '138_2p2_Pr_mc_AKGC417L.wav',\n",
" '138_2p2_Tc_mc_AKGC417L.wav',\n",
" '139_1b1_Al_sc_Litt3200.wav',\n",
" '139_1b1_Ar_sc_Litt3200.wav',\n",
" '139_1b1_Ll_sc_Litt3200.wav',\n",
" '139_1b1_Lr_sc_Litt3200.wav',\n",
" '139_1b1_Pl_sc_Litt3200.wav',\n",
" '139_1b1_Pr_sc_Litt3200.wav',\n",
" '141_1b1_Pr_mc_LittC2SE.wav',\n",
" '141_1b2_Ar_mc_LittC2SE.wav',\n",
" '141_1b2_Lr_mc_LittC2SE.wav',\n",
" '141_1b2_Pr_mc_LittC2SE.wav',\n",
" '141_1b2_Tc_mc_LittC2SE.wav',\n",
" '141_1b3_Al_mc_LittC2SE.wav',\n",
" '141_1b3_Ar_mc_LittC2SE.wav',\n",
" '141_1b3_Pr_mc_LittC2SE.wav',\n",
" '142_1b1_Pl_mc_LittC2SE.wav',\n",
" '145_2b2_Al_mc_AKGC417L.wav',\n",
" '145_2b2_Ar_mc_AKGC417L.wav',\n",
" '145_2b2_Lr_mc_AKGC417L.wav',\n",
" '145_2b2_Pr_mc_AKGC417L.wav',\n",
" '145_3b2_Ar_mc_AKGC417L.wav',\n",
" '145_3b2_Lr_mc_AKGC417L.wav',\n",
" '145_3b4_Pl_mc_AKGC417L.wav',\n",
" '146_2b2_Pl_mc_AKGC417L.wav',\n",
" '146_2b4_Al_mc_AKGC417L.wav',\n",
" '146_2b4_Ar_mc_AKGC417L.wav',\n",
" '146_2b4_Ll_mc_AKGC417L.wav',\n",
" '146_2b4_Lr_mc_AKGC417L.wav',\n",
" '146_2b4_Pr_mc_AKGC417L.wav',\n",
" '146_8p3_Al_mc_AKGC417L.wav',\n",
" '146_8p3_Ar_mc_AKGC417L.wav',\n",
" '146_8p3_Lr_mc_AKGC417L.wav',\n",
" '146_8p3_Pl_mc_AKGC417L.wav',\n",
" '146_8p3_Pr_mc_AKGC417L.wav',\n",
" '147_1b2_Tc_mc_AKGC417L.wav',\n",
" '147_1b3_Tc_mc_AKGC417L.wav',\n",
" '147_1b4_Tc_mc_AKGC417L.wav',\n",
" '147_2b2_Al_mc_AKGC417L.wav',\n",
" '147_2b2_Ar_mc_AKGC417L.wav',\n",
" '147_2b2_Pl_mc_AKGC417L.wav',\n",
" '147_2b3_Al_mc_AKGC417L.wav',\n",
" '147_2b3_Ar_mc_AKGC417L.wav',\n",
" '147_2b3_Ll_mc_AKGC417L.wav',\n",
" '147_2b3_Lr_mc_AKGC417L.wav',\n",
" '147_2b3_Pl_mc_AKGC417L.wav',\n",
" '147_2b4_Al_mc_AKGC417L.wav',\n",
" '147_2b4_Ar_mc_AKGC417L.wav',\n",
" '147_2b4_Ll_mc_AKGC417L.wav',\n",
" '147_2b4_Lr_mc_AKGC417L.wav',\n",
" '147_2b4_Pl_mc_AKGC417L.wav',\n",
" '151_2p2_Al_mc_AKGC417L.wav',\n",
" '151_2p2_Ar_mc_AKGC417L.wav',\n",
" '151_2p2_Ll_mc_AKGC417L.wav',\n",
" '151_2p2_Lr_mc_AKGC417L.wav',\n",
" '151_2p2_Pl_mc_AKGC417L.wav',\n",
" '151_2p2_Pr_mc_AKGC417L.wav',\n",
" '151_2p2_Tc_mc_AKGC417L.wav',\n",
" '151_2p3_Al_mc_AKGC417L.wav',\n",
" '151_2p3_Ar_mc_AKGC417L.wav',\n",
" '151_2p3_Ll_mc_AKGC417L.wav',\n",
" '151_2p3_Lr_mc_AKGC417L.wav',\n",
" '151_2p3_Pl_mc_AKGC417L.wav',\n",
" '151_2p3_Pr_mc_AKGC417L.wav',\n",
" '151_2p3_Tc_mc_AKGC417L.wav',\n",
" '151_2p4_Al_mc_AKGC417L.wav',\n",
" '151_2p4_Ar_mc_AKGC417L.wav',\n",
" '151_2p4_Ll_mc_AKGC417L.wav',\n",
" '151_2p4_Lr_mc_AKGC417L.wav',\n",
" '151_2p4_Pl_mc_AKGC417L.wav',\n",
" '151_2p4_Pr_mc_AKGC417L.wav',\n",
" '151_2p4_Tc_mc_AKGC417L.wav',\n",
" '151_3p2_Al_mc_AKGC417L.wav',\n",
" '151_3p2_Ar_mc_AKGC417L.wav',\n",
" '151_3p2_Lr_mc_AKGC417L.wav',\n",
" '151_3p2_Pl_mc_AKGC417L.wav',\n",
" '151_3p2_Pr_mc_AKGC417L.wav',\n",
" '151_3p2_Tc_mc_AKGC417L.wav',\n",
" '151_3p3_Ll_mc_AKGC417L.wav',\n",
" '154_1b3_Al_mc_AKGC417L.wav',\n",
" '154_1b3_Ar_mc_AKGC417L.wav',\n",
" '154_1b3_Ll_mc_AKGC417L.wav',\n",
" '154_1b3_Lr_mc_AKGC417L.wav',\n",
" '154_1b3_Pl_mc_AKGC417L.wav',\n",
" '154_1b3_Pr_mc_AKGC417L.wav',\n",
" '154_1b3_Tc_mc_AKGC417L.wav',\n",
" '154_2b4_Al_mc_AKGC417L.wav',\n",
" '154_2b4_Ar_mc_AKGC417L.wav',\n",
" '154_2b4_Ll_mc_AKGC417L.wav',\n",
" '154_2b4_Lr_mc_AKGC417L.wav',\n",
" '154_2b4_Pl_mc_AKGC417L.wav',\n",
" '154_2b4_Pr_mc_AKGC417L.wav',\n",
" '154_2b4_Tc_mc_AKGC417L.wav',\n",
" '154_3b3_Al_mc_AKGC417L.wav',\n",
" '154_3b3_Ar_mc_AKGC417L.wav',\n",
" '154_3b3_Ll_mc_AKGC417L.wav',\n",
" '154_4b4_Al_mc_AKGC417L.wav',\n",
" '154_4b4_Ar_mc_AKGC417L.wav',\n",
" '154_4b4_Ll_mc_AKGC417L.wav',\n",
" '154_4b4_Lr_mc_AKGC417L.wav',\n",
" '154_4b4_Pl_mc_AKGC417L.wav',\n",
" '154_4b4_Pr_mc_AKGC417L.wav',\n",
" '155_2b1_Al_mc_LittC2SE.wav',\n",
" '156_2b3_Al_mc_AKGC417L.wav',\n",
" '156_2b3_Ar_mc_AKGC417L.wav',\n",
" '156_2b3_Ll_mc_AKGC417L.wav',\n",
" '156_2b3_Lr_mc_AKGC417L.wav',\n",
" '156_2b3_Pl_mc_AKGC417L.wav',\n",
" '156_2b3_Pr_mc_AKGC417L.wav',\n",
" '156_5b3_Al_mc_AKGC417L.wav',\n",
" '156_5b3_Ar_mc_AKGC417L.wav',\n",
" '156_5b3_Ll_mc_AKGC417L.wav',\n",
" '156_5b3_Lr_mc_AKGC417L.wav',\n",
" '156_5b3_Pl_mc_AKGC417L.wav',\n",
" '156_5b3_Pr_mc_AKGC417L.wav',\n",
" '156_8b3_Al_mc_AKGC417L.wav',\n",
" '156_8b3_Ar_mc_AKGC417L.wav',\n",
" '156_8b3_Ll_mc_AKGC417L.wav',\n",
" '156_8b3_Lr_mc_AKGC417L.wav',\n",
" '156_8b3_Pl_mc_AKGC417L.wav',\n",
" '157_1b1_Al_sc_Meditron.wav',\n",
" '157_1b1_Ar_sc_Meditron.wav',\n",
" '157_1b1_Lr_sc_Meditron.wav',\n",
" '157_1b1_Pl_sc_Meditron.wav',\n",
" '157_1b1_Pr_sc_Meditron.wav',\n",
" '158_1b3_Ar_mc_LittC2SE.wav',\n",
" '158_1p2_Al_mc_AKGC417L.wav',\n",
" '158_1p2_Ar_mc_AKGC417L.wav',\n",
" '158_1p2_Ll_mc_AKGC417L.wav',\n",
" '158_1p2_Lr_mc_AKGC417L.wav',\n",
" '158_1p2_Pl_mc_AKGC417L.wav',\n",
" '158_1p2_Pr_mc_AKGC417L.wav',\n",
" '158_1p2_Tc_mc_AKGC417L.wav',\n",
" '158_1p3_Al_mc_AKGC417L.wav',\n",
" '158_1p3_Ar_mc_AKGC417L.wav',\n",
" '158_1p3_Ll_mc_AKGC417L.wav',\n",
" '158_1p3_Lr_mc_AKGC417L.wav',\n",
" '158_1p3_Pl_mc_AKGC417L.wav',\n",
" '158_1p3_Pr_mc_AKGC417L.wav',\n",
" '158_1p3_Tc_mc_AKGC417L.wav',\n",
" '158_1p4_Al_mc_AKGC417L.wav',\n",
" '158_1p4_Ar_mc_AKGC417L.wav',\n",
" '158_1p4_Lr_mc_AKGC417L.wav',\n",
" '158_1p4_Pl_mc_AKGC417L.wav',\n",
" '158_1p4_Pr_mc_AKGC417L.wav',\n",
" '158_1p4_Tc_mc_AKGC417L.wav',\n",
" '158_2p2_Ar_mc_AKGC417L.wav',\n",
" '158_2p3_Lr_mc_AKGC417L.wav',\n",
" '158_2p3_Tc_mc_AKGC417L.wav',\n",
" '160_1b2_Al_mc_AKGC417L.wav',\n",
" '160_1b2_Ar_mc_AKGC417L.wav',\n",
" '160_1b2_Lr_mc_AKGC417L.wav',\n",
" '160_1b2_Pl_mc_AKGC417L.wav',\n",
" '160_1b2_Pr_mc_AKGC417L.wav',\n",
" '160_1b2_Tc_mc_AKGC417L.wav',\n",
" '160_1b3_Al_mc_AKGC417L.wav',\n",
" '160_1b3_Ar_mc_AKGC417L.wav',\n",
" '160_1b3_Lr_mc_AKGC417L.wav',\n",
" '160_1b3_Pl_mc_AKGC417L.wav',\n",
" '160_1b3_Pr_mc_AKGC417L.wav',\n",
" '160_1b3_Tc_mc_AKGC417L.wav',\n",
" '160_1b4_Al_mc_AKGC417L.wav',\n",
" '160_1b4_Ar_mc_AKGC417L.wav',\n",
" '160_1b4_Lr_mc_AKGC417L.wav',\n",
" '160_1b4_Pl_mc_AKGC417L.wav',\n",
" '160_1b4_Pr_mc_AKGC417L.wav',\n",
" '160_1b4_Tc_mc_AKGC417L.wav',\n",
" '160_2b3_Lr_mc_AKGC417L.wav',\n",
" '160_2b4_Ar_mc_AKGC417L.wav',\n",
" '160_2b4_Pl_mc_AKGC417L.wav',\n",
" '160_2b4_Pr_mc_AKGC417L.wav',\n",
" '160_2b4_Tc_mc_AKGC417L.wav',\n",
" '162_1b2_Al_mc_AKGC417L.wav',\n",
" '162_1b2_Ar_mc_AKGC417L.wav',\n",
" '162_1b2_Ll_mc_AKGC417L.wav',\n",
" '162_1b2_Lr_mc_AKGC417L.wav',\n",
" '162_1b2_Pl_mc_AKGC417L.wav',\n",
" '162_1b2_Pr_mc_AKGC417L.wav',\n",
" '162_1b2_Tc_mc_AKGC417L.wav',\n",
" '162_2b2_Al_mc_AKGC417L.wav',\n",
" '162_2b2_Ar_mc_AKGC417L.wav',\n",
" '162_2b2_Pl_mc_AKGC417L.wav',\n",
" '162_2b2_Pr_mc_AKGC417L.wav',\n",
" '162_2b2_Tc_mc_AKGC417L.wav',\n",
" '162_2b3_Al_mc_AKGC417L.wav',\n",
" '162_2b3_Ar_mc_AKGC417L.wav',\n",
" '162_2b3_Lr_mc_AKGC417L.wav',\n",
" '162_2b3_Pl_mc_AKGC417L.wav',\n",
" '162_2b3_Pr_mc_AKGC417L.wav',\n",
" '162_2b3_Tc_mc_AKGC417L.wav',\n",
" '162_2b4_Al_mc_AKGC417L.wav',\n",
" '162_2b4_Ar_mc_AKGC417L.wav',\n",
" '162_2b4_Lr_mc_AKGC417L.wav',\n",
" '162_2b4_Pl_mc_AKGC417L.wav',\n",
" '162_2b4_Pr_mc_AKGC417L.wav',\n",
" '162_2b4_Tc_mc_AKGC417L.wav',\n",
" '163_2b2_Al_mc_AKGC417L.wav',\n",
" '163_2b2_Ar_mc_AKGC417L.wav',\n",
" '163_2b2_Ll_mc_AKGC417L.wav',\n",
" '163_2b2_Lr_mc_AKGC417L.wav',\n",
" '163_2b2_Pl_mc_AKGC417L.wav',\n",
" '163_2b2_Pr_mc_AKGC417L.wav',\n",
" '163_2b2_Tc_mc_AKGC417L.wav',\n",
" '163_8b3_Al_mc_AKGC417L.wav',\n",
" '163_8b3_Ar_mc_AKGC417L.wav',\n",
" '163_8b3_Ll_mc_AKGC417L.wav',\n",
" '163_8b3_Lr_mc_AKGC417L.wav',\n",
" '163_8b3_Pl_mc_AKGC417L.wav',\n",
" '163_8b3_Pr_mc_AKGC417L.wav',\n",
" '166_1p1_Al_sc_Meditron.wav',\n",
" '166_1p1_Ar_sc_Meditron.wav',\n",
" '166_1p1_Ll_sc_Meditron.wav',\n",
" '166_1p1_Pl_sc_Meditron.wav',\n",
" '166_1p1_Pr_sc_Meditron.wav',\n",
" '170_1b2_Al_mc_AKGC417L.wav',\n",
" '170_1b2_Ar_mc_AKGC417L.wav',\n",
" '170_1b2_Lr_mc_AKGC417L.wav',\n",
" '170_1b2_Pl_mc_AKGC417L.wav',\n",
" '170_1b2_Pr_mc_AKGC417L.wav',\n",
" '170_1b2_Tc_mc_AKGC417L.wav',\n",
" '170_1b3_Al_mc_AKGC417L.wav',\n",
" '170_1b3_Ar_mc_AKGC417L.wav',\n",
" '170_1b3_Ll_mc_AKGC417L.wav',\n",
" '170_1b3_Lr_mc_AKGC417L.wav',\n",
" '170_1b3_Pl_mc_AKGC417L.wav',\n",
" '170_1b3_Pr_mc_AKGC417L.wav',\n",
" '170_1b3_Tc_mc_AKGC417L.wav',\n",
" '170_1b4_Al_mc_AKGC417L.wav',\n",
" '170_1b4_Ar_mc_AKGC417L.wav',\n",
" '170_1b4_Lr_mc_AKGC417L.wav',\n",
" '170_1b4_Pl_mc_AKGC417L.wav',\n",
" '170_1b4_Pr_mc_AKGC417L.wav',\n",
" '170_1b4_Tc_mc_AKGC417L.wav',\n",
" '170_2b2_Al_mc_AKGC417L.wav',\n",
" '170_2b2_Ar_mc_AKGC417L.wav',\n",
" '170_2b2_Lr_mc_AKGC417L.wav',\n",
" '170_2b2_Pl_mc_AKGC417L.wav',\n",
" '170_2b2_Pr_mc_AKGC417L.wav',\n",
" '170_2b2_Tc_mc_AKGC417L.wav',\n",
" '172_1b3_Al_mc_AKGC417L.wav',\n",
" '172_1b3_Ar_mc_AKGC417L.wav',\n",
" '172_1b3_Ll_mc_AKGC417L.wav',\n",
" '172_1b3_Lr_mc_AKGC417L.wav',\n",
" '172_1b3_Pl_mc_AKGC417L.wav',\n",
" '172_1b3_Pr_mc_AKGC417L.wav',\n",
" '172_1b3_Tc_mc_AKGC417L.wav',\n",
" '172_1b4_Al_mc_AKGC417L.wav',\n",
" '172_1b4_Ar_mc_AKGC417L.wav',\n",
" '172_1b4_Ll_mc_AKGC417L.wav',\n",
" '172_1b4_Lr_mc_AKGC417L.wav',\n",
" '172_1b4_Pl_mc_AKGC417L.wav',\n",
" '172_1b4_Pr_mc_AKGC417L.wav',\n",
" '172_1b4_Tc_mc_AKGC417L.wav',\n",
" '172_1b5_Al_mc_AKGC417L.wav',\n",
" '172_1b5_Ar_mc_AKGC417L.wav',\n",
" '172_1b5_Ll_mc_AKGC417L.wav',\n",
" '172_1b5_Lr_mc_AKGC417L.wav',\n",
" '172_1b5_Pl_mc_AKGC417L.wav',\n",
" '172_1b5_Pr_mc_AKGC417L.wav',\n",
" '172_1b5_Tc_mc_AKGC417L.wav',\n",
" '172_2b5_Al_mc_AKGC417L.wav',\n",
" '172_2b5_Ar_mc_AKGC417L.wav',\n",
" '172_2b5_Lr_mc_AKGC417L.wav',\n",
" '172_2b5_Pl_mc_AKGC417L.wav',\n",
" '172_2b5_Pr_mc_AKGC417L.wav',\n",
" '172_2b5_Tc_mc_AKGC417L.wav',\n",
" '174_1p2_Ar_mc_AKGC417L.wav',\n",
" '174_1p2_Ll_mc_AKGC417L.wav',\n",
" '174_1p2_Lr_mc_AKGC417L.wav',\n",
" '174_1p2_Pl_mc_AKGC417L.wav',\n",
" '174_1p2_Pr_mc_AKGC417L.wav',\n",
" '174_1p2_Tc_mc_AKGC417L.wav',\n",
" '174_1p3_Ar_mc_AKGC417L.wav',\n",
" '174_1p3_Ll_mc_AKGC417L.wav',\n",
" '174_1p3_Lr_mc_AKGC417L.wav',\n",
" '174_1p3_Pl_mc_AKGC417L.wav',\n",
" '174_1p3_Pr_mc_AKGC417L.wav',\n",
" '174_1p3_Tc_mc_AKGC417L.wav',\n",
" '174_1p4_Ar_mc_AKGC417L.wav',\n",
" '174_1p4_Ll_mc_AKGC417L.wav',\n",
" '174_1p4_Lr_mc_AKGC417L.wav',\n",
" '174_1p4_Pl_mc_AKGC417L.wav',\n",
" '174_1p4_Pr_mc_AKGC417L.wav',\n",
" '174_1p4_Tc_mc_AKGC417L.wav',\n",
" '174_2p3_Al_mc_AKGC417L.wav',\n",
" '174_2p3_Ar_mc_AKGC417L.wav',\n",
" '174_2p3_Pl_mc_AKGC417L.wav',\n",
" '174_2p3_Pr_mc_AKGC417L.wav',\n",
" '174_2p3_Tc_mc_AKGC417L.wav',\n",
" '175_1b1_Al_sc_Litt3200.wav',\n",
" '175_1b1_Ar_sc_Litt3200.wav',\n",
" '175_1b1_Ll_sc_Litt3200.wav',\n",
" '175_1b1_Lr_sc_Litt3200.wav',\n",
" '175_1b1_Pl_sc_Litt3200.wav',\n",
" '175_1b1_Pr_sc_Litt3200.wav',\n",
" '176_1b3_Al_mc_AKGC417L.wav',\n",
" '176_1b3_Ar_mc_AKGC417L.wav',\n",
" '176_1b3_Ll_mc_AKGC417L.wav',\n",
" '176_1b3_Lr_mc_AKGC417L.wav',\n",
" '176_1b3_Pl_mc_AKGC417L.wav',\n",
" '176_1b3_Pr_mc_AKGC417L.wav',\n",
" '176_1b3_Tc_mc_AKGC417L.wav',\n",
" '176_1b4_Al_mc_AKGC417L.wav',\n",
" '176_1b4_Ar_mc_AKGC417L.wav',\n",
" '176_1b4_Ll_mc_AKGC417L.wav',\n",
" '176_1b4_Lr_mc_AKGC417L.wav',\n",
" '176_1b4_Pl_mc_AKGC417L.wav',\n",
" '176_1b4_Pr_mc_AKGC417L.wav',\n",
" '176_1b4_Tc_mc_AKGC417L.wav',\n",
" '176_2b3_Al_mc_AKGC417L.wav',\n",
" '176_2b3_Ar_mc_AKGC417L.wav',\n",
" '176_2b3_Ll_mc_AKGC417L.wav',\n",
" '176_2b3_Lr_mc_AKGC417L.wav',\n",
" '176_2b3_Pl_mc_AKGC417L.wav',\n",
" '176_2b3_Pr_mc_AKGC417L.wav',\n",
" '176_2b3_Tc_mc_AKGC417L.wav',\n",
" '177_1b2_Al_mc_AKGC417L.wav',\n",
" '177_1b2_Ar_mc_AKGC417L.wav',\n",
" '177_1b2_Lr_mc_AKGC417L.wav',\n",
" '177_1b2_Pl_mc_AKGC417L.wav',\n",
" '177_1b2_Pr_mc_AKGC417L.wav',\n",
" '177_1b2_Tc_mc_AKGC417L.wav',\n",
" '177_1b4_Al_mc_AKGC417L.wav',\n",
" '177_1b4_Ar_mc_AKGC417L.wav',\n",
" '177_1b4_Lr_mc_AKGC417L.wav',\n",
" '177_1b4_Pl_mc_AKGC417L.wav',\n",
" '177_1b4_Pr_mc_AKGC417L.wav',\n",
" '177_1b4_Tc_mc_AKGC417L.wav',\n",
" '177_2b4_Al_mc_AKGC417L.wav',\n",
" '177_2b4_Lr_mc_AKGC417L.wav',\n",
" '177_2b4_Pl_mc_AKGC417L.wav',\n",
" '177_2b4_Pr_mc_AKGC417L.wav',\n",
" '177_2b4_Tc_mc_AKGC417L.wav',\n",
" '178_1b2_Al_mc_AKGC417L.wav',\n",
" '178_1b2_Ar_mc_AKGC417L.wav',\n",
" '178_1b2_Lr_mc_AKGC417L.wav',\n",
" '178_1b2_Pl_mc_AKGC417L.wav',\n",
" '178_1b2_Pr_mc_AKGC417L.wav',\n",
" '178_1b2_Tc_mc_AKGC417L.wav',\n",
" '178_1b3_Al_mc_AKGC417L.wav',\n",
" '178_1b3_Ar_mc_AKGC417L.wav',\n",
" '178_1b3_Lr_mc_AKGC417L.wav',\n",
" '178_1b3_Pl_mc_AKGC417L.wav',\n",
" '178_1b3_Pr_mc_AKGC417L.wav',\n",
" '178_1b3_Tc_mc_AKGC417L.wav',\n",
" '178_1b6_Al_mc_AKGC417L.wav',\n",
" '178_1b6_Ar_mc_AKGC417L.wav',\n",
" '178_1b6_Ll_mc_AKGC417L.wav',\n",
" '178_1b6_Lr_mc_AKGC417L.wav',\n",
" '178_1b6_Pl_mc_AKGC417L.wav',\n",
" '178_1b6_Pr_mc_AKGC417L.wav',\n",
" '178_1b6_Tc_mc_AKGC417L.wav',\n",
" '178_2b2_Al_mc_AKGC417L.wav',\n",
" '178_2b2_Ar_mc_AKGC417L.wav',\n",
" '178_2b2_Lr_mc_AKGC417L.wav',\n",
" '178_2b2_Pr_mc_AKGC417L.wav',\n",
" '178_2b2_Tc_mc_AKGC417L.wav',\n",
" '180_1b4_Al_mc_AKGC417L.wav',\n",
" '180_1b4_Ar_mc_AKGC417L.wav',\n",
" '180_1b4_Lr_mc_AKGC417L.wav',\n",
" '180_1b4_Pl_mc_AKGC417L.wav',\n",
" '180_1b4_Pr_mc_AKGC417L.wav',\n",
" '181_1b1_Ar_mc_LittC2SE.wav',\n",
" '181_1b1_Tc_mc_LittC2SE.wav',\n",
" '181_1b2_Ar_mc_LittC2SE.wav',\n",
" '181_1b3_Tc_mc_LittC2SE.wav']"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Iterate through all indexes and save those that have COPD\n",
"\n",
"train_soundfiles.sort_values(by=\"filename\", ascending=True, inplace=True)\n",
"copd_pats = [i\n",
" for i, row in train_soundfiles.iterrows() \n",
" if patient_diagnosis.loc[int(i[:3])].diagnosis == 'COPD']\n",
"copd_pats"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"copd_pats = pd.DataFrame(np.array(copd_pats), columns=[\"soundfile\"])\n",
"copd_pats.set_index(\"soundfile\", inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Extract MFCCs from soundfiles (X sets)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Split patients into 5 folds\n",
"\n",
"def get_patient_folds(n_folds = 5):\n",
" n_folds = 5\n",
" patients_list = list(patient_diagnosis.index)\n",
" random_seed = random.Random(41)\n",
" random_seed.shuffle(patients_list)\n",
" fold_length = int(len(patients_list)/n_folds)\n",
" patient_folds = []\n",
" for i in range(n_folds-1):\n",
" patient_folds += [patients_list[i*fold_length : (i+1)*fold_length]]\n",
" patient_folds += [patients_list[(n_folds-1)*fold_length:]]\n",
" return patient_folds"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_soundfiles_for_patients(patient_list):\n",
" patient_soundfiles = []\n",
" for soundfile in soundfiles.index:\n",
" soundfiles_patient = int(soundfile[:3])\n",
" if (soundfiles_patient in patient_list):\n",
" patient_soundfiles.append(soundfile)\n",
" return patient_soundfiles"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Let's see what the data distribution is right now\n",
"\n",
"def get_data_distribution(soundfiles, patient_diagnosis):\n",
" train_diagnosis = []\n",
" for i in soundfiles:\n",
" patient = int(i.split(\"_\")[0])\n",
" diagnosis = get_patient_diagnosis(patient=patient)\n",
" train_diagnosis.append(diagnosis)\n",
"\n",
" train_diagnosis = np.array(train_diagnosis)\n",
" train_diagnosis = pd.DataFrame(train_diagnosis, columns=[\"diagnosis\"])\n",
" return train_diagnosis.diagnosis.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Given the patient number, return their diagnosis\n",
"def get_patient_diagnosis(patient):\n",
" return patient_diagnosis.loc[patient].diagnosis"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Input is the audio time series\n",
"def extract_features_from_sound(raw, config, verbose = 0):\n",
" if (verbose == 1):\n",
" print(\"Extracting features from sound...\")\n",
" print(\"Length of raw is\", len(raw))\n",
" \n",
" max_audio_length = config.max_audio_length\n",
" \n",
" #Random padding\n",
" if len(raw) < config.max_audio_length:\n",
" maximum_padding = config.max_audio_length - len(raw)\n",
" begin_padding = randint(a = 0, b = maximum_padding)\n",
" end_padding = maximum_padding - begin_padding\n",
" raw = np.pad(raw, pad_width=(begin_padding, end_padding), mode=\"constant\")\n",
" if (verbose == 1):\n",
" print(len(raw))\n",
" mfccs = librosa.feature.mfcc(y=raw, sr=config.sample_rate, n_mfcc=config.n_mfcc, hop_length=512, fmin=50, fmax=2000) \n",
" \n",
" if (verbose == 1):\n",
" print(\"Padding complete, length of raw is\", len(raw))\n",
" print(\"Padding, complete, shape of MFCCs is\", np.array(mfccs).shape)\n",
" print(\"Features from sound extracted!\")\n",
" \n",
" return mfccs"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_data_for_cnn(soundfiles, directory, verbose = 0):\n",
" all_features = [] #X\n",
" all_diagnosis = [] #y\n",
" \n",
" for soundfile in soundfiles:\n",
" raw, sr = librosa.load(directory + soundfile, sr=config.sample_rate, duration=config.max_audio_duration)\n",
" if (verbose == 1):\n",
" print(\"Soundfile\", soundfile)\n",
" print(\"Sample rate is\", sr)\n",
" print(\"Just loaded sound. Length is\", len(raw))\n",
" \n",
" if (verbose == 1):\n",
" print(\"Exctracting features...\")\n",
" features = extract_features_from_sound(raw, config, verbose = verbose)\n",
" all_features.append(features)\n",
" \n",
" diagnosis = get_patient_diagnosis(int(soundfile[:3]))\n",
" all_diagnosis.append(diagnosis)\n",
" \n",
" if(verbose ==1):\n",
" print(\"Features extracted!\")\n",
" \n",
" X = np.expand_dims(np.array(all_features), -1)\n",
" return X, all_diagnosis\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Generating data\n",
"#We have split the patients into 5 folds already here\n",
"#And will generate data separately for each fold\n",
"#Alternatively it is possible to modify the code and generate data for all patients at once\n",
"#And then split them info folds later\n",
"\n",
"patient_folds = get_patient_folds()\n",
"all_results = []\n",
"\n",
"for i in range(5):\n",
" print(\"We're at fold\", i+1)\n",
" test_patients = patient_folds[i]\n",
" train_patients = patient_folds.copy()\n",
" del train_patients[i]\n",
" train_patients = [item for sublist in train_patients for item in sublist] #flatten list\n",
" \n",
" test_soundfiles = get_soundfiles_for_patients(test_patients)\n",
" train_soundfiles = get_soundfiles_for_patients(train_patients)\n",
"\n",
" X_train, labels_train = get_data_for_cnn(train_soundfiles, sound_dir)\n",
" X_test, labels_test = get_data_for_cnn(test_soundfiles, sound_dir)\n",
" \n",
" all_labels = labels_train + labels_test\n",
" test_labels_start = len(labels_train)\n",
" factorized_labels, classes = pd.factorize(all_labels)\n",
" print(classes)\n",
" categorical_labels = to_categorical(factorized_labels)\n",
" y_train = categorical_labels[:test_labels_start]\n",
" y_test = categorical_labels[test_labels_start:]\n",
" \n",
" np.save(root + \"dataframes/X_train_no_aug_04_08_split_\" + str(i), X_train)\n",
" np.save(root + \"dataframes/X_test_no_aug_04_08_split_\" + str(i), X_test)\n",
" np.save(root + \"dataframes/y_train_no_aug_04_08_split_\" + str(i), y_train)\n",
" np.save(root + \"dataframes/y_test_no_aug_04_08_split_\" + str(i), y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (venv)",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}