--- a +++ b/notebooks/manual_model_test.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "import numpy as np\n", + "import re\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('../assets/single_extracted_landmarks_inclass_front.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def landmarks_to_arr(stringarr):\n", + " arr = stringarr.replace(\"[\", \"\").replace(\"]\", \"\")\n", + " # removing the last bc of persoinal mistake kkkk\n", + " return [float(x) for x in arr.split()[:-1]]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "data['landmarks'] = data['landmarks'].apply(landmarks_to_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing of the landmarks\n", + "### Consists on:\n", + "- Grouping the sequences:\n", + " * Shapes must be (9, 10, 51)\n", + " * numpy arrays\n", + "- Isolate each set within the sequence and normalize them.\n", + " * Normalize with MinMaxScaler() no params." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(9, 10, 50)\n" + ] + } + ], + "source": [ + "sequences_list = []\n", + "SEQUENCE_LENGTH = 10\n", + "\n", + "# Grouping by sequences\n", + "grouped_data = data.groupby(['video', 'group'])\n", + "for i, group in grouped_data:\n", + " landmarks = group['landmarks'].tolist()\n", + " if len(landmarks) == 10:\n", + " sequences_list.append(landmarks)\n", + "\n", + "sequences = np.array(sequences_list)\n", + "print(sequences.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "scaler = MinMaxScaler()\n", + "normalized_sequences = np.zeros_like(sequences)\n", + "# Normalizing\n", + "for i in range(sequences.shape[0]):\n", + " for j in range(sequences.shape[1]):\n", + " # Flatten the landmarks for each set within the sequence\n", + " landmarks_flattened = np.reshape(sequences[i, j], (-1, 1))\n", + " # Normalize the landmarks\n", + " landmarks_normalized = scaler.fit_transform(landmarks_flattened)\n", + " # Reshape the normalized landmarks back to the original shape\n", + " normalized_landmarks = np.reshape(landmarks_normalized, sequences[i, j].shape)\n", + " # Update the normalized landmarks in the sequences array\n", + " normalized_sequences[i, j] = normalized_landmarks" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9, 10, 50)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(sequences).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n", + "(1, 10, 50)\n" + ] + } + ], + "source": [ + "for seq in sequences:\n", + " reshaped = seq[np.newaxis, :, :]\n", + " print(reshaped.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}