--- a +++ b/notebooks/exploring_our_dataset.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "id": "748b57ba-2925-414f-bed0-15f0483ad8a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Videos Test'\t\t labeled_dataset_drive.csv\n", + "'Videos Test.zip'\t single_extracted_landmarks.csv\n", + " dataset_cluttered.zip\t single_extracted_landmarks_ambiguous.csv\n", + " dataset_photos\t\t single_extracted_landmarks_bad.csv\n", + " dataset_videos\t\t single_extracted_landmarks_inclass.csv\n", + " extracted_landmarks.csv single_extracted_landmarks_inclass_front.csv\n", + " extracted_landmarks_test.csv videos\n", + " labeled_dataset.csv\n" + ] + } + ], + "source": [ + "!ls ../assets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c7b5e954-4b22-4d55-8f33-235e427fe4ea", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "03546cdf-4be5-4e52-8032-61cbf3e71992", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('../assets/labeled_dataset.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d44d30ae-9231-466c-ad0c-b7c062c46566", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>video</th>\n", + " <th>group</th>\n", + " <th>frame</th>\n", + " <th>landmarks</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>video_15.mp4</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>[ 334.75 178.55 0.98386 339....</td>\n", + " <td>bad</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>video_15.mp4</td>\n", + " <td>1</td>\n", + " <td>11</td>\n", + " <td>[ 329.95 181.47 0.99063 334....</td>\n", + " <td>bad</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>video_15.mp4</td>\n", + " <td>1</td>\n", + " <td>17</td>\n", + " <td>[ 329.7 182.92 0.99079 334...</td>\n", + " <td>bad</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>video_15.mp4</td>\n", + " <td>1</td>\n", + " <td>23</td>\n", + " <td>[ 329.32 187.55 0.98055 334....</td>\n", + " <td>bad</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>video_15.mp4</td>\n", + " <td>1</td>\n", + " <td>29</td>\n", + " <td>[ 331.31 194.96 0.985 335....</td>\n", + " <td>bad</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " video group frame \\\n", + "0 video_15.mp4 1 5 \n", + "1 video_15.mp4 1 11 \n", + "2 video_15.mp4 1 17 \n", + "3 video_15.mp4 1 23 \n", + "4 video_15.mp4 1 29 \n", + "\n", + " landmarks Label \n", + "0 [ 334.75 178.55 0.98386 339.... bad \n", + "1 [ 329.95 181.47 0.99063 334.... bad \n", + "2 [ 329.7 182.92 0.99079 334... bad \n", + "3 [ 329.32 187.55 0.98055 334.... bad \n", + "4 [ 331.31 194.96 0.985 335.... bad " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cca6bb1c-9bcf-4aec-9603-5825aac9ea29", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f7117be5010>" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['video', 'group'])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "810fa636-8e7f-4b20-8853-085fad815eba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Label\n", + "good 54.928407\n", + "bad 45.071593\n", + "Name: proportion, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['Label'].value_counts(normalize=True)*100" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "48bfad57-b974-4b3c-ae6f-ed655cca9d57", + "metadata": {}, + "outputs": [], + "source": [ + "video_group_counts = data.groupby(['video', 'group']).size().reset_index(name='count')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d94e6709-d3e2-4e3f-b7c7-7b21df17084c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count\n", + "10 0.900606\n", + "5 0.012727\n", + "7 0.012727\n", + "3 0.012121\n", + "2 0.012121\n", + "4 0.011515\n", + "1 0.010909\n", + "6 0.010303\n", + "9 0.008485\n", + "8 0.008485\n", + "Name: proportion, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "video_group_counts['count'].value_counts(normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5fc30d0-412b-4de9-95b9-d7761e226989", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}