Switch to side-by-side view

--- a
+++ b/notebooks/exploring_our_dataset.ipynb
@@ -0,0 +1,269 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "748b57ba-2925-414f-bed0-15f0483ad8a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "'Videos Test'\t\t        labeled_dataset_drive.csv\n",
+      "'Videos Test.zip'\t        single_extracted_landmarks.csv\n",
+      " dataset_cluttered.zip\t        single_extracted_landmarks_ambiguous.csv\n",
+      " dataset_photos\t\t        single_extracted_landmarks_bad.csv\n",
+      " dataset_videos\t\t        single_extracted_landmarks_inclass.csv\n",
+      " extracted_landmarks.csv        single_extracted_landmarks_inclass_front.csv\n",
+      " extracted_landmarks_test.csv   videos\n",
+      " labeled_dataset.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls ../assets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c7b5e954-4b22-4d55-8f33-235e427fe4ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "03546cdf-4be5-4e52-8032-61cbf3e71992",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('../assets/labeled_dataset.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "d44d30ae-9231-466c-ad0c-b7c062c46566",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>video</th>\n",
+       "      <th>group</th>\n",
+       "      <th>frame</th>\n",
+       "      <th>landmarks</th>\n",
+       "      <th>Label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>video_15.mp4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>[     334.75      178.55     0.98386      339....</td>\n",
+       "      <td>bad</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>video_15.mp4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>[     329.95      181.47     0.99063      334....</td>\n",
+       "      <td>bad</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>video_15.mp4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>17</td>\n",
+       "      <td>[      329.7      182.92     0.99079       334...</td>\n",
+       "      <td>bad</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>video_15.mp4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>23</td>\n",
+       "      <td>[     329.32      187.55     0.98055      334....</td>\n",
+       "      <td>bad</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>video_15.mp4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>29</td>\n",
+       "      <td>[     331.31      194.96       0.985      335....</td>\n",
+       "      <td>bad</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          video  group  frame  \\\n",
+       "0  video_15.mp4      1      5   \n",
+       "1  video_15.mp4      1     11   \n",
+       "2  video_15.mp4      1     17   \n",
+       "3  video_15.mp4      1     23   \n",
+       "4  video_15.mp4      1     29   \n",
+       "\n",
+       "                                           landmarks Label  \n",
+       "0  [     334.75      178.55     0.98386      339....   bad  \n",
+       "1  [     329.95      181.47     0.99063      334....   bad  \n",
+       "2  [      329.7      182.92     0.99079       334...   bad  \n",
+       "3  [     329.32      187.55     0.98055      334....   bad  \n",
+       "4  [     331.31      194.96       0.985      335....   bad  "
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "cca6bb1c-9bcf-4aec-9603-5825aac9ea29",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f7117be5010>"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.groupby(['video', 'group'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "810fa636-8e7f-4b20-8853-085fad815eba",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Label\n",
+       "good    54.928407\n",
+       "bad     45.071593\n",
+       "Name: proportion, dtype: float64"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data['Label'].value_counts(normalize=True)*100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "48bfad57-b974-4b3c-ae6f-ed655cca9d57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "video_group_counts = data.groupby(['video', 'group']).size().reset_index(name='count')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "d94e6709-d3e2-4e3f-b7c7-7b21df17084c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "count\n",
+       "10    0.900606\n",
+       "5     0.012727\n",
+       "7     0.012727\n",
+       "3     0.012121\n",
+       "2     0.012121\n",
+       "4     0.011515\n",
+       "1     0.010909\n",
+       "6     0.010303\n",
+       "9     0.008485\n",
+       "8     0.008485\n",
+       "Name: proportion, dtype: float64"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "video_group_counts['count'].value_counts(normalize=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5fc30d0-412b-4de9-95b9-d7761e226989",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}