185 lines (184 with data), 4.3 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"import numpy as np\n",
"import re\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('../assets/single_extracted_landmarks_inclass_front.csv')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def landmarks_to_arr(stringarr):\n",
" arr = stringarr.replace(\"[\", \"\").replace(\"]\", \"\")\n",
" # removing the last bc of persoinal mistake kkkk\n",
" return [float(x) for x in arr.split()[:-1]]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"data['landmarks'] = data['landmarks'].apply(landmarks_to_arr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preprocessing of the landmarks\n",
"### Consists on:\n",
"- Grouping the sequences:\n",
" * Shapes must be (9, 10, 51)\n",
" * numpy arrays\n",
"- Isolate each set within the sequence and normalize them.\n",
" * Normalize with MinMaxScaler() no params."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(9, 10, 50)\n"
]
}
],
"source": [
"sequences_list = []\n",
"SEQUENCE_LENGTH = 10\n",
"\n",
"# Grouping by sequences\n",
"grouped_data = data.groupby(['video', 'group'])\n",
"for i, group in grouped_data:\n",
" landmarks = group['landmarks'].tolist()\n",
" if len(landmarks) == 10:\n",
" sequences_list.append(landmarks)\n",
"\n",
"sequences = np.array(sequences_list)\n",
"print(sequences.shape)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"scaler = MinMaxScaler()\n",
"normalized_sequences = np.zeros_like(sequences)\n",
"# Normalizing\n",
"for i in range(sequences.shape[0]):\n",
" for j in range(sequences.shape[1]):\n",
" # Flatten the landmarks for each set within the sequence\n",
" landmarks_flattened = np.reshape(sequences[i, j], (-1, 1))\n",
" # Normalize the landmarks\n",
" landmarks_normalized = scaler.fit_transform(landmarks_flattened)\n",
" # Reshape the normalized landmarks back to the original shape\n",
" normalized_landmarks = np.reshape(landmarks_normalized, sequences[i, j].shape)\n",
" # Update the normalized landmarks in the sequences array\n",
" normalized_sequences[i, j] = normalized_landmarks"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9, 10, 50)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array(sequences).shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n",
"(1, 10, 50)\n"
]
}
],
"source": [
"for seq in sequences:\n",
" reshaped = seq[np.newaxis, :, :]\n",
" print(reshaped.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}