--- a +++ b/landmark_extraction/our-estimation.ipynb @@ -0,0 +1,823 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import os\n", + "from models.experimental import attempt_load\n", + "import cv2\n", + "from utils.datasets import letterbox\n", + "from torchvision import transforms\n", + "import numpy as np\n", + "import time\n", + "from utils.general import non_max_suppression_kpt,strip_optimizer,xyxy2xywh\n", + "from utils.plots import output_to_keypoint, plot_skeleton_kpts,colors,plot_one_box_kpt\n", + "import logging\n", + "import pandas as pd\n", + "import itertools\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "device ='cuda' if torch.cuda.is_available() else 'cpu'\n", + "model_path = '../models/yolov7-w6-pose.pt'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def strip_model(device, model_path):\n", + " x = torch.load(model_path, map_location=torch.device(device))\n", + "\n", + " if x.get('ema'):\n", + " x['model'] = x['ema'] # replace model with ema\n", + " for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys\n", + " x[k] = None\n", + " x['epoch'] = -1\n", + " if device!='cpu':\n", + " x['model'].half() # to FP16\n", + " else:\n", + " x['model'].float()\n", + " for p in x['model'].parameters():\n", + " p.requires_grad = False\n", + "\n", + " torch.save(x, model_path)\n", + " mb = os.path.getsize(model_path) / 1E6 # filesize\n", + " print(f\"Optimizer stripped from {model_path},{(f' saved as {model_path},') if model_path else ''} {mb:.1f}MB\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimizer stripped from ../models/yolov7-w6-pose.pt, saved as ../models/yolov7-w6-pose.pt, 161.1MB\n", + "Fusing layers... \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/g/.local/lib/python3.11/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3483.)\n", + " return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]\n" + ] + } + ], + "source": [ + "strip_model(device, model_path)\n", + "model = attempt_load(model_path, map_location=device)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Things to identify: ['person']\n" + ] + } + ], + "source": [ + "_ = model.eval()\n", + "names = model.module.names if hasattr(model, 'module') else model.names # get class names\n", + "print(f\"Things to identify: {names}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To open Video capture:\n", + " - integer that represents a webcam\n", + " - path to a video file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def load_source(source):\n", + " if source.isnumeric() : \n", + " cap = cv2.VideoCapture(int(source)) #pass video to videocapture object\n", + " else :\n", + " cap = cv2.VideoCapture(source) #pass video to videocapture object\n", + " if cap.isOpened() == False: #check if videocapture not opened\n", + " print('Source not found. Check path')\n", + " else:\n", + " frame_width = int(cap.get(3)) #get video frame width\n", + " frame_height = int(cap.get(4)) #get video frame height\n", + " return cap" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Iterating over frames\n", + "For the process of retrieving sequences of landmarks, we will have a `sequence_length` which is the amount of frames taken into consideration for a single sequence of landmark. And also we will have a `separation` which is the amount of frames *ignored* between one set of landmarks and another inside one sequence of landmarks.\n", + "\n", + "We will capture landmarks every `separation` and from those captured we will create `N` arrays of length `sequence_length` containing those landmarks \n", + "\n", + "### How will we identify the same person in every iteration\n", + "First, identify the object with more landmarks identified and store that set of landmarks in `base_landmarks`. Then considering the distance between `base_landmarks` and the other objects identified in the next objects identified" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Sequence row:\n", + " Video Timestamp Set of landmarks \n", + "\"\"\"\n", + "\n", + "\n", + "def landmarks_sequence_for_video(video_path, sequence_length=10, separation=6):\n", + " \"\"\"\n", + " Args\n", + " Returns\n", + " \"\"\"\n", + " sequence_length = sequence_length\n", + " separation = separation\n", + "\n", + " count = 0\n", + "\n", + " sequences = [[]]\n", + "\n", + " cap = load_source(video_path)\n", + " frame_width = int(cap.get(3)) \n", + " frame_height = int(cap.get(4))\n", + " \n", + " base_landmarks = None\n", + " \n", + " start = time.time()\n", + " current_frame = 0\n", + " video_name = video_path.split('/')[-1]\n", + " current_group = 1\n", + " while(cap.isOpened): \n", + " ret, frame = cap.read()\n", + "\n", + " if not ret: \n", + " break\n", + "\n", + " count += 1\n", + "\n", + " orig_image = frame #store frame\n", + " image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) #convert frame to RGB\n", + " image = letterbox(image, (frame_width), stride=64, auto=True)[0]\n", + " image_ = image.copy()\n", + " image = transforms.ToTensor()(image)\n", + " image = torch.tensor(np.array([image.numpy()]))\n", + "\n", + " image = image.to(device) #convert image data to device\n", + " image = image.float() #convert image to float precision (cpu) \n", + "\n", + "\n", + " image = image.cpu().squeeze().numpy().transpose((1, 2, 0))\n", + " \n", + " # this frame size works with yolov7, since we don't want to touch their model, we just resize the frame.\n", + " desired_width = 640\n", + " desired_height = 512\n", + " image = cv2.resize(image, (desired_width, desired_height), interpolation=cv2.INTER_LINEAR)\n", + " image = image[:desired_height, :desired_width]\n", + "\n", + " # Convert the cropped image back to a torch.Tensor\n", + " image = torch.from_numpy(image.transpose((2, 0, 1))).unsqueeze(0).cuda() \n", + "\n", + " with torch.no_grad(): #get predictions\n", + " output_data, _ = model(image)\n", + "\n", + " output_data = non_max_suppression_kpt(output_data, #Apply non max suppression\n", + " 0.25, # Conf. Threshold.\n", + " 0.65, # IoU Threshold.\n", + " nc=model.yaml['nc'], # Number of classes.\n", + " nkpt=model.yaml['nkpt'], # Number of keypoints.\n", + " kpt_label=True)\n", + "\n", + " output = output_to_keypoint(output_data)\n", + "\n", + " im0 = image[0].permute(1, 2, 0) * 255 # Change format [b, c, h, w] to [h, w, c] for displaying the image.\n", + " im0 = im0.cpu().numpy().astype(np.uint8)\n", + "\n", + " im0 = cv2.cvtColor(im0, cv2.COLOR_RGB2BGR) #reshape image format to (BGR)\n", + " gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh\n", + "\n", + " for i, pose in enumerate(output_data): # detections per image \n", + " if len(output_data): #check if no pose\n", + " for c in pose[:, 5].unique(): # Print results\n", + " n = (pose[:, 5] == c).sum() # detections per class\n", + "\n", + " for det_index, (*xyxy, conf, cls) in enumerate(reversed(pose[:,:6])): #loop over poses for drawing on frame\n", + " c = int(cls) # integer class\n", + " kpts = pose[det_index, 6:]\n", + " label = None # if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}')\n", + " plot_one_box_kpt(xyxy, im0, label=label, color=colors(c, True), \n", + " line_thickness=3,kpt_label=True, kpts=kpts, steps=3, \n", + " orig_shape=im0.shape[:2])\n", + "\n", + "\n", + " if count == separation:\n", + " # cv2.imshow(\"YOLOv7 Pose Estimation Demo\", im0)\n", + "\n", + " if len(sequences[-1]) >= sequence_length:\n", + " sequences += [[]] # init new empty sequence\n", + " current_group += 1\n", + " else:\n", + " # TODO: make sure that the landmarks stored are the desired ones\n", + " # Use position difference.\n", + " if len(output):\n", + " sequences[-1] += [[video_name, current_group, current_frame, output[0, 7:].T]]\n", + "\n", + " count = 0\n", + " \n", + "\n", + " # Press Q on keyboard to exit\n", + " if cv2.waitKey(25) & 0xFF == ord('q'):\n", + " break\n", + " current_frame += 1\n", + "\n", + " cap.release()\n", + " print(f\"\\tfinished after {round(time.time() - start)}s\")\n", + " return sequences" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correctly loaded 88 videos from ../assets/dataset_videos/batch3/\n", + "From video_117.mp4 to video_204.mp4\n" + ] + } + ], + "source": [ + "# landmark retrieval\n", + "videos_path = '../assets/dataset_videos/batch3/' \n", + "videos = [videos_path + v for v in os.listdir(videos_path) if v.endswith('.mp4')]\n", + "dataset = []\n", + "c = 0\n", + "first_vid, last_vid = '', ''\n", + "cf, cl = float('inf'), float('-inf')\n", + "\n", + "for v in videos:\n", + "\tif os.path.exists(v):\n", + "\t\t\n", + "\t\tvn = os.path.basename(v)\n", + "\t\tdigit = vn.index('_') + 1\n", + "\t\ttry:\n", + "\t\t\tvideo_number = int(vn[digit:vn.index('.')])\n", + "\t\texcept:\n", + "\t\t\tprint(vn, 'has no index')\n", + "\t\tif video_number < cf:\n", + "\t\t\tcf = video_number\n", + "\t\t\tfirst_vid = vn\n", + "\t\tif video_number > cl:\n", + "\t\t\tcl = video_number\n", + "\t\t\tlast_vid = vn\n", + "\telse:\n", + "\t\tc += 1\n", + "if c == 0:\n", + "\tprint(f\"Correctly loaded {len(videos)} videos from {videos_path}\")\n", + "\tprint(f\"From {first_vid} to {last_vid}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for a single video test" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dataset_photos\t\t labeled_dataset.csv\t\t single_video_good.mp4\n", + "dataset_videos\t\t single_extracted_landmarks.csv\n", + "extracted_landmarks.csv single_video_ambiguous.mp4\n" + ] + } + ], + "source": [ + "!ls ../assets" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tfinished after 38s\n" + ] + } + ], + "source": [ + "video_path = '../assets/single_video_inclass_front.mp4'\n", + "single_sequence = landmarks_sequence_for_video(video_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "flatten_data = list(itertools.chain.from_iterable([single_sequence]))\n", + "flatten_data = list(itertools.chain.from_iterable(flatten_data))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns='video group frame landmarks'.split(), data=flatten_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group\n", + "1 10\n", + "2 10\n", + "3 10\n", + "4 10\n", + "5 10\n", + "6 10\n", + "7 10\n", + "8 10\n", + "9 10\n", + "10 2\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.to_csv('../assets/single_extracted_landmarks_inclass_front.csv', index=False)\n", + "\n", + "df['group'].value_counts()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### For multiple videos" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing video #0 at '../assets/dataset_videos/batch3/video_145.mp4'\n", + "\tfinished after 68s\n", + "Processing video #1 at '../assets/dataset_videos/batch3/video_195.mp4'\n", + "\tfinished after 22s\n", + "Processing video #2 at '../assets/dataset_videos/batch3/video_162.mp4'\n", + "\tfinished after 36s\n", + "Processing video #3 at '../assets/dataset_videos/batch3/video_142.mp4'\n", + "\tfinished after 56s\n", + "Processing video #4 at '../assets/dataset_videos/batch3/video_121.mp4'\n", + "\tfinished after 34s\n", + "Processing video #5 at '../assets/dataset_videos/batch3/video_165.mp4'\n", + "\tfinished after 29s\n", + "Processing video #6 at '../assets/dataset_videos/batch3/video_194.mp4'\n", + "\tfinished after 18s\n", + "Processing video #7 at '../assets/dataset_videos/batch3/video_149.mp4'\n", + "\tfinished after 78s\n", + "Processing video #8 at '../assets/dataset_videos/batch3/video_124.mp4'\n", + "\tfinished after 39s\n", + "Processing video #9 at '../assets/dataset_videos/batch3/video_180.mp4'\n", + "\tfinished after 24s\n", + "Processing video #10 at '../assets/dataset_videos/batch3/video_168.mp4'\n", + "\tfinished after 18s\n", + "Processing video #11 at '../assets/dataset_videos/batch3/video_157.mp4'\n", + "\tfinished after 57s\n", + "Processing video #12 at '../assets/dataset_videos/batch3/video_131.mp4'\n", + "\tfinished after 19s\n", + "Processing video #13 at '../assets/dataset_videos/batch3/video_204.mp4'\n", + "\tfinished after 36s\n", + "Processing video #14 at '../assets/dataset_videos/batch3/video_176.mp4'\n", + "\tfinished after 39s\n", + "Processing video #15 at '../assets/dataset_videos/batch3/video_161.mp4'\n", + "\tfinished after 30s\n", + "Processing video #16 at '../assets/dataset_videos/batch3/video_164.mp4'\n", + "\tfinished after 57s\n", + "Processing video #17 at '../assets/dataset_videos/batch3/video_118.mp4'\n", + "\tfinished after 36s\n", + "Processing video #18 at '../assets/dataset_videos/batch3/video_126.mp4'\n", + "\tfinished after 47s\n", + "Processing video #19 at '../assets/dataset_videos/batch3/video_197.mp4'\n", + "\tfinished after 29s\n", + "Processing video #20 at '../assets/dataset_videos/batch3/video_136.mp4'\n", + "\tfinished after 35s\n", + "Processing video #21 at '../assets/dataset_videos/batch3/video_134.mp4'\n", + "\tfinished after 40s\n", + "Processing video #22 at '../assets/dataset_videos/batch3/video_187.mp4'\n", + "\tfinished after 22s\n", + "Processing video #23 at '../assets/dataset_videos/batch3/video_151.mp4'\n", + "\tfinished after 16s\n", + "Processing video #24 at '../assets/dataset_videos/batch3/video_130.mp4'\n", + "\tfinished after 25s\n", + "Processing video #25 at '../assets/dataset_videos/batch3/video_117.mp4'\n", + "\tfinished after 19s\n", + "Processing video #26 at '../assets/dataset_videos/batch3/video_120.mp4'\n", + "\tfinished after 8s\n", + "Processing video #27 at '../assets/dataset_videos/batch3/video_170.mp4'\n", + "\tfinished after 28s\n", + "Processing video #28 at '../assets/dataset_videos/batch3/video_155.mp4'\n", + "\tfinished after 54s\n", + "Processing video #29 at '../assets/dataset_videos/batch3/video_179.mp4'\n", + "\tfinished after 12s\n", + "Processing video #30 at '../assets/dataset_videos/batch3/video_137.mp4'\n", + "\tfinished after 36s\n", + "Processing video #31 at '../assets/dataset_videos/batch3/video_119.mp4'\n", + "\tfinished after 17s\n", + "Processing video #32 at '../assets/dataset_videos/batch3/video_122.mp4'\n", + "\tfinished after 13s\n", + "Processing video #33 at '../assets/dataset_videos/batch3/video_152.mp4'\n", + "\tfinished after 35s\n", + "Processing video #34 at '../assets/dataset_videos/batch3/video_202.mp4'\n", + "\tfinished after 25s\n", + "Processing video #35 at '../assets/dataset_videos/batch3/video_150.mp4'\n", + "\tfinished after 21s\n", + "Processing video #36 at '../assets/dataset_videos/batch3/video_154.mp4'\n", + "\tfinished after 51s\n", + "Processing video #37 at '../assets/dataset_videos/batch3/video_153.mp4'\n", + "\tfinished after 21s\n", + "Processing video #38 at '../assets/dataset_videos/batch3/video_199.mp4'\n", + "\tfinished after 18s\n", + "Processing video #39 at '../assets/dataset_videos/batch3/video_141.mp4'\n", + "\tfinished after 93s\n", + "Processing video #40 at '../assets/dataset_videos/batch3/video_203.mp4'\n", + "\tfinished after 30s\n", + "Processing video #41 at '../assets/dataset_videos/batch3/video_139.mp4'\n", + "\tfinished after 21s\n", + "Processing video #42 at '../assets/dataset_videos/batch3/video_190.mp4'\n", + "\tfinished after 14s\n", + "Processing video #43 at '../assets/dataset_videos/batch3/video_178.mp4'\n", + "\tfinished after 33s\n", + "Processing video #44 at '../assets/dataset_videos/batch3/video_181.mp4'\n", + "\tfinished after 14s\n", + "Processing video #45 at '../assets/dataset_videos/batch3/video_148.mp4'\n", + "\tfinished after 74s\n", + "Processing video #46 at '../assets/dataset_videos/batch3/video_189.mp4'\n", + "\tfinished after 25s\n", + "Processing video #47 at '../assets/dataset_videos/batch3/video_183.mp4'\n", + "\tfinished after 28s\n", + "Processing video #48 at '../assets/dataset_videos/batch3/video_186.mp4'\n", + "\tfinished after 19s\n", + "Processing video #49 at '../assets/dataset_videos/batch3/video_163.mp4'\n", + "\tfinished after 32s\n", + "Processing video #50 at '../assets/dataset_videos/batch3/video_200.mp4'\n", + "\tfinished after 22s\n", + "Processing video #51 at '../assets/dataset_videos/batch3/video_158.mp4'\n", + "\tfinished after 16s\n", + "Processing video #52 at '../assets/dataset_videos/batch3/video_167.mp4'\n", + "\tfinished after 30s\n", + "Processing video #53 at '../assets/dataset_videos/batch3/video_166.mp4'\n", + "\tfinished after 27s\n", + "Processing video #54 at '../assets/dataset_videos/batch3/video_198.mp4'\n", + "\tfinished after 15s\n", + "Processing video #55 at '../assets/dataset_videos/batch3/video_123.mp4'\n", + "\tfinished after 10s\n", + "Processing video #56 at '../assets/dataset_videos/batch3/video_175.mp4'\n", + "\tfinished after 33s\n", + "Processing video #57 at '../assets/dataset_videos/batch3/video_146.mp4'\n", + "\tfinished after 31s\n", + "Processing video #58 at '../assets/dataset_videos/batch3/video_128.mp4'\n", + "\tfinished after 23s\n", + "Processing video #59 at '../assets/dataset_videos/batch3/video_127.mp4'\n", + "\tfinished after 16s\n", + "Processing video #60 at '../assets/dataset_videos/batch3/video_129.mp4'\n", + "\tfinished after 21s\n", + "Processing video #61 at '../assets/dataset_videos/batch3/video_191.mp4'\n", + "\tfinished after 30s\n", + "Processing video #62 at '../assets/dataset_videos/batch3/video_172.mp4'\n", + "\tfinished after 50s\n", + "Processing video #63 at '../assets/dataset_videos/batch3/video_185.mp4'\n", + "\tfinished after 22s\n", + "Processing video #64 at '../assets/dataset_videos/batch3/video_201.mp4'\n", + "\tfinished after 57s\n", + "Processing video #65 at '../assets/dataset_videos/batch3/video_193.mp4'\n", + "\tfinished after 19s\n", + "Processing video #66 at '../assets/dataset_videos/batch3/video_156.mp4'\n", + "\tfinished after 26s\n", + "Processing video #67 at '../assets/dataset_videos/batch3/video_169.mp4'\n", + "\tfinished after 29s\n", + "Processing video #68 at '../assets/dataset_videos/batch3/video_173.mp4'\n", + "\tfinished after 33s\n", + "Processing video #69 at '../assets/dataset_videos/batch3/video_133.mp4'\n", + "\tfinished after 25s\n", + "Processing video #70 at '../assets/dataset_videos/batch3/video_147.mp4'\n", + "\tfinished after 15s\n", + "Processing video #71 at '../assets/dataset_videos/batch3/video_171.mp4'\n", + "\tfinished after 31s\n", + "Processing video #72 at '../assets/dataset_videos/batch3/video_188.mp4'\n", + "\tfinished after 26s\n", + "Processing video #73 at '../assets/dataset_videos/batch3/video_182.mp4'\n", + "\tfinished after 28s\n", + "Processing video #74 at '../assets/dataset_videos/batch3/video_125.mp4'\n", + "\tfinished after 18s\n", + "Processing video #75 at '../assets/dataset_videos/batch3/video_135.mp4'\n", + "\tfinished after 27s\n", + "Processing video #76 at '../assets/dataset_videos/batch3/video_140.mp4'\n", + "\tfinished after 13s\n", + "Processing video #77 at '../assets/dataset_videos/batch3/video_174.mp4'\n", + "\tfinished after 41s\n", + "Processing video #78 at '../assets/dataset_videos/batch3/video_138.mp4'\n", + "\tfinished after 46s\n", + "Processing video #79 at '../assets/dataset_videos/batch3/video_184.mp4'\n", + "\tfinished after 14s\n", + "Processing video #80 at '../assets/dataset_videos/batch3/video_160.mp4'\n", + "\tfinished after 52s\n", + "Processing video #81 at '../assets/dataset_videos/batch3/video_132.mp4'\n", + "\tfinished after 44s\n", + "Processing video #82 at '../assets/dataset_videos/batch3/video_144.mp4'\n", + "\tfinished after 28s\n", + "Processing video #83 at '../assets/dataset_videos/batch3/video_159.mp4'\n", + "\tfinished after 37s\n", + "Processing video #84 at '../assets/dataset_videos/batch3/video_177.mp4'\n", + "\tfinished after 33s\n", + "Processing video #85 at '../assets/dataset_videos/batch3/video_192.mp4'\n", + "\tfinished after 22s\n", + "Processing video #86 at '../assets/dataset_videos/batch3/video_196.mp4'\n", + "\tfinished after 19s\n", + "Processing video #87 at '../assets/dataset_videos/batch3/video_143.mp4'\n", + "\tfinished after 26s\n" + ] + } + ], + "source": [ + "\n", + "for i, v in enumerate(videos):\n", + " print(f\"Processing video #{i} at '{v}'\")\n", + " sequences = landmarks_sequence_for_video(v)\n", + " dataset += [sequences]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# this merges every set of landmarks. \n", + "flatten_data = list(itertools.chain.from_iterable(dataset))\n", + "flatten_data = list(itertools.chain.from_iterable(flatten_data))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns='video group frame landmarks'.split(), data=flatten_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>video</th>\n", + " <th>group</th>\n", + " <th>frame</th>\n", + " <th>landmarks</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>video_145.mp4</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>[322.3541259765625, 157.76251220703125, 0.9959...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>video_145.mp4</td>\n", + " <td>1</td>\n", + " <td>11</td>\n", + " <td>[322.62451171875, 157.82583618164062, 0.995745...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>video_145.mp4</td>\n", + " <td>1</td>\n", + " <td>17</td>\n", + " <td>[322.64154052734375, 157.83653259277344, 0.995...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>video_145.mp4</td>\n", + " <td>1</td>\n", + " <td>23</td>\n", + " <td>[321.91632080078125, 157.7036895751953, 0.9957...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>video_145.mp4</td>\n", + " <td>1</td>\n", + " <td>29</td>\n", + " <td>[320.7390441894531, 157.41201782226562, 0.9958...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " video group frame \n", + "0 video_145.mp4 1 5 \\\n", + "1 video_145.mp4 1 11 \n", + "2 video_145.mp4 1 17 \n", + "3 video_145.mp4 1 23 \n", + "4 video_145.mp4 1 29 \n", + "\n", + " landmarks \n", + "0 [322.3541259765625, 157.76251220703125, 0.9959... \n", + "1 [322.62451171875, 157.82583618164062, 0.995745... \n", + "2 [322.64154052734375, 157.83653259277344, 0.995... \n", + "3 [321.91632080078125, 157.7036895751953, 0.9957... \n", + "4 [320.7390441894531, 157.41201782226562, 0.9958... " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('../assets/extracted_landmarks.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>video</th>\n", + " <th>group</th>\n", + " <th>frame</th>\n", + " <th>landmarks</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [video, group, frame, landmarks]\n", + "Index: []" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['video'] == 'video3.mp4']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}