[a5e8ec]: / notebooks / exploring_our_dataset.ipynb

Download this file

270 lines (269 with data), 7.1 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "748b57ba-2925-414f-bed0-15f0483ad8a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'Videos Test'\t\t        labeled_dataset_drive.csv\n",
      "'Videos Test.zip'\t        single_extracted_landmarks.csv\n",
      " dataset_cluttered.zip\t        single_extracted_landmarks_ambiguous.csv\n",
      " dataset_photos\t\t        single_extracted_landmarks_bad.csv\n",
      " dataset_videos\t\t        single_extracted_landmarks_inclass.csv\n",
      " extracted_landmarks.csv        single_extracted_landmarks_inclass_front.csv\n",
      " extracted_landmarks_test.csv   videos\n",
      " labeled_dataset.csv\n"
     ]
    }
   ],
   "source": [
    "!ls ../assets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c7b5e954-4b22-4d55-8f33-235e427fe4ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "03546cdf-4be5-4e52-8032-61cbf3e71992",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('../assets/labeled_dataset.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d44d30ae-9231-466c-ad0c-b7c062c46566",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>video</th>\n",
       "      <th>group</th>\n",
       "      <th>frame</th>\n",
       "      <th>landmarks</th>\n",
       "      <th>Label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>video_15.mp4</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>[     334.75      178.55     0.98386      339....</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>video_15.mp4</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>[     329.95      181.47     0.99063      334....</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>video_15.mp4</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>[      329.7      182.92     0.99079       334...</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>video_15.mp4</td>\n",
       "      <td>1</td>\n",
       "      <td>23</td>\n",
       "      <td>[     329.32      187.55     0.98055      334....</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>video_15.mp4</td>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "      <td>[     331.31      194.96       0.985      335....</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          video  group  frame  \\\n",
       "0  video_15.mp4      1      5   \n",
       "1  video_15.mp4      1     11   \n",
       "2  video_15.mp4      1     17   \n",
       "3  video_15.mp4      1     23   \n",
       "4  video_15.mp4      1     29   \n",
       "\n",
       "                                           landmarks Label  \n",
       "0  [     334.75      178.55     0.98386      339....   bad  \n",
       "1  [     329.95      181.47     0.99063      334....   bad  \n",
       "2  [      329.7      182.92     0.99079       334...   bad  \n",
       "3  [     329.32      187.55     0.98055      334....   bad  \n",
       "4  [     331.31      194.96       0.985      335....   bad  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "cca6bb1c-9bcf-4aec-9603-5825aac9ea29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f7117be5010>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby(['video', 'group'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "810fa636-8e7f-4b20-8853-085fad815eba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Label\n",
       "good    54.928407\n",
       "bad     45.071593\n",
       "Name: proportion, dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Label'].value_counts(normalize=True)*100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "48bfad57-b974-4b3c-ae6f-ed655cca9d57",
   "metadata": {},
   "outputs": [],
   "source": [
    "video_group_counts = data.groupby(['video', 'group']).size().reset_index(name='count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "d94e6709-d3e2-4e3f-b7c7-7b21df17084c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count\n",
       "10    0.900606\n",
       "5     0.012727\n",
       "7     0.012727\n",
       "3     0.012121\n",
       "2     0.012121\n",
       "4     0.011515\n",
       "1     0.010909\n",
       "6     0.010303\n",
       "9     0.008485\n",
       "8     0.008485\n",
       "Name: proportion, dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "video_group_counts['count'].value_counts(normalize=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5fc30d0-412b-4de9-95b9-d7761e226989",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}