# -----------------------------------------------------------------------------
# Adapted from https://github.com/anibali/h36m-fetch
# Original license: Copyright (c) Aiden Nibali, under the Apache License.
# -----------------------------------------------------------------------------
import argparse
import os
import pickle
import tarfile
import xml.etree.ElementTree as ET
from os.path import join
import cv2
import numpy as np
from spacepy import pycdf
class PreprocessH36m:
"""Preprocess Human3.6M dataset.
Args:
metadata (str): Path to metadata.xml.
original_dir (str): Directory of the original dataset with all files
compressed. Specifically, .tgz files belonging to subject 1
should be placed under the subdirectory 's1'.
extracted_dir (str): Directory of the extracted files. If not given, it
will be placed under the same parent directory as original_dir.
processed_der (str): Directory of the processed files. If not given, it
will be placed under the same parent directory as original_dir.
sample_rate (int): Downsample FPS to `1 / sample_rate`. Default: 5.
"""
def __init__(self,
metadata,
original_dir,
extracted_dir=None,
processed_dir=None,
sample_rate=5):
self.metadata = metadata
self.original_dir = original_dir
self.sample_rate = sample_rate
if extracted_dir is None:
self.extracted_dir = join(
os.path.dirname(os.path.abspath(self.original_dir)),
'extracted')
else:
self.extracted_dir = extracted_dir
if processed_dir is None:
self.processed_dir = join(
os.path.dirname(os.path.abspath(self.original_dir)),
'processed')
else:
self.processed_dir = processed_dir
self.subjects = []
self.sequence_mappings = {}
self.action_names = {}
self.camera_ids = []
self._load_metadata()
self.subjects_annot = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11']
self.subjects_splits = {
'train': ['S1', 'S5', 'S6', 'S7', 'S8'],
'test': ['S9', 'S11']
}
self.extract_files = ['Videos', 'D2_Positions', 'D3_Positions_mono']
self.movable_joints = [
0, 1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27
]
self.scale_factor = 1.2
self.image_sizes = {
'54138969': {
'width': 1000,
'height': 1002
},
'55011271': {
'width': 1000,
'height': 1000
},
'58860488': {
'width': 1000,
'height': 1000
},
'60457274': {
'width': 1000,
'height': 1002
}
}
def extract_tgz(self):
"""Extract files from self.extrct_files."""
os.makedirs(self.extracted_dir, exist_ok=True)
for subject in self.subjects_annot:
cur_dir = join(self.original_dir, subject.lower())
for file in self.extract_files:
filename = join(cur_dir, file + '.tgz')
print(f'Extracting {filename} ...')
with tarfile.open(filename) as tar:
tar.extractall(self.extracted_dir)
print('Extraction done.\n')
def generate_cameras_file(self):
"""Generate cameras.pkl which contains camera parameters for 11
subjects each with 4 cameras."""
cameras = {}
for subject in range(1, 12):
for camera in range(4):
key = (f'S{subject}', self.camera_ids[camera])
cameras[key] = self._get_camera_params(camera, subject)
out_file = join(self.processed_dir, 'annotation_body3d', 'cameras.pkl')
with open(out_file, 'wb') as fout:
pickle.dump(cameras, fout)
print(f'Camera parameters have been written to "{out_file}".\n')
def generate_annotations(self):
"""Generate annotations for training and testing data."""
output_dir = join(self.processed_dir, 'annotation_body3d',
f'fps{50 // self.sample_rate}')
os.makedirs(output_dir, exist_ok=True)
for data_split in ('train', 'test'):
imgnames_all = []
centers_all = []
scales_all = []
kps2d_all = []
kps3d_all = []
for subject in self.subjects_splits[data_split]:
for action, subaction in self.sequence_mappings[subject].keys(
):
if action == '1':
# exclude action "_ALL"
continue
for camera in self.camera_ids:
imgnames, centers, scales, kps2d, kps3d\
= self._load_annotations(
subject, action, subaction, camera)
imgnames_all.append(imgnames)
centers_all.append(centers)
scales_all.append(scales)
kps2d_all.append(kps2d)
kps3d_all.append(kps3d)
imgnames_all = np.concatenate(imgnames_all)
centers_all = np.concatenate(centers_all)
scales_all = np.concatenate(scales_all)
kps2d_all = np.concatenate(kps2d_all)
kps3d_all = np.concatenate(kps3d_all)
out_file = join(output_dir, f'h36m_{data_split}.npz')
np.savez(
out_file,
imgname=imgnames_all,
center=centers_all,
scale=scales_all,
part=kps2d_all,
S=kps3d_all)
print(
f'All annotations of {data_split}ing data have been written to'
f' "{out_file}". {len(imgnames_all)} samples in total.\n')
if data_split == 'train':
kps_3d_all = kps3d_all[..., :3] # remove visibility
mean_3d, std_3d = self._get_pose_stats(kps_3d_all)
kps_2d_all = kps2d_all[..., :2] # remove visibility
mean_2d, std_2d = self._get_pose_stats(kps_2d_all)
# centered around root
# the root keypoint is 0-index
kps_3d_rel = kps_3d_all[..., 1:, :] - kps_3d_all[..., :1, :]
mean_3d_rel, std_3d_rel = self._get_pose_stats(kps_3d_rel)
kps_2d_rel = kps_2d_all[..., 1:, :] - kps_2d_all[..., :1, :]
mean_2d_rel, std_2d_rel = self._get_pose_stats(kps_2d_rel)
stats = {
'joint3d_stats': {
'mean': mean_3d,
'std': std_3d
},
'joint2d_stats': {
'mean': mean_2d,
'std': std_2d
},
'joint3d_rel_stats': {
'mean': mean_3d_rel,
'std': std_3d_rel
},
'joint2d_rel_stats': {
'mean': mean_2d_rel,
'std': std_2d_rel
}
}
for name, stat_dict in stats.items():
out_file = join(output_dir, f'{name}.pkl')
with open(out_file, 'wb') as f:
pickle.dump(stat_dict, f)
print(f'Create statistic data file: {out_file}')
@staticmethod
def _get_pose_stats(kps):
"""Get statistic information `mean` and `std` of pose data.
Args:
kps (ndarray): keypoints in shape [..., K, C] where K and C is
the keypoint category number and dimension.
Returns:
mean (ndarray): [K, C]
"""
assert kps.ndim > 2
K, C = kps.shape[-2:]
kps = kps.reshape(-1, K, C)
mean = kps.mean(axis=0)
std = kps.std(axis=0)
return mean, std
def _load_metadata(self):
"""Load meta data from metadata.xml."""
assert os.path.exists(self.metadata)
tree = ET.parse(self.metadata)
root = tree.getroot()
for i, tr in enumerate(root.find('mapping')):
if i == 0:
_, _, *self.subjects = [td.text for td in tr]
self.sequence_mappings \
= {subject: {} for subject in self.subjects}
elif i < 33:
action_id, subaction_id, *prefixes = [td.text for td in tr]
for subject, prefix in zip(self.subjects, prefixes):
self.sequence_mappings[subject][(action_id, subaction_id)]\
= prefix
for i, elem in enumerate(root.find('actionnames')):
action_id = str(i + 1)
self.action_names[action_id] = elem.text
self.camera_ids \
= [elem.text for elem in root.find('dbcameras/index2id')]
w0 = root.find('w0')
self.cameras_raw = [float(num) for num in w0.text[1:-1].split()]
def _get_base_filename(self, subject, action, subaction, camera):
"""Get base filename given subject, action, subaction and camera."""
return f'{self.sequence_mappings[subject][(action, subaction)]}' + \
f'.{camera}'
def _get_camera_params(self, camera, subject):
"""Get camera parameters given camera id and subject id."""
metadata_slice = np.zeros(15)
start = 6 * (camera * 11 + (subject - 1))
metadata_slice[:6] = self.cameras_raw[start:start + 6]
metadata_slice[6:] = self.cameras_raw[265 + camera * 9 - 1:265 +
(camera + 1) * 9 - 1]
# extrinsics
x, y, z = -metadata_slice[0], metadata_slice[1], -metadata_slice[2]
R_x = np.array([[1, 0, 0], [0, np.cos(x), np.sin(x)],
[0, -np.sin(x), np.cos(x)]])
R_y = np.array([[np.cos(y), 0, np.sin(y)], [0, 1, 0],
[-np.sin(y), 0, np.cos(y)]])
R_z = np.array([[np.cos(z), np.sin(z), 0], [-np.sin(z),
np.cos(z), 0], [0, 0, 1]])
R = (R_x @ R_y @ R_z).T
T = metadata_slice[3:6].reshape(-1, 1)
# convert unit from millimeter to meter
T *= 0.001
# intrinsics
c = metadata_slice[8:10, None]
f = metadata_slice[6:8, None]
# distortion
k = metadata_slice[10:13, None]
p = metadata_slice[13:15, None]
return {
'R': R,
'T': T,
'c': c,
'f': f,
'k': k,
'p': p,
'w': self.image_sizes[self.camera_ids[camera]]['width'],
'h': self.image_sizes[self.camera_ids[camera]]['height'],
'name': f'camera{camera + 1}',
'id': self.camera_ids[camera]
}
def _load_annotations(self, subject, action, subaction, camera):
"""Load annotations for a sequence."""
subj_dir = join(self.extracted_dir, subject)
basename = self._get_base_filename(subject, action, subaction, camera)
# load 2D keypoints
with pycdf.CDF(
join(subj_dir, 'MyPoseFeatures', 'D2_Positions',
basename + '.cdf')) as cdf:
kps_2d = np.array(cdf['Pose'])
num_frames = kps_2d.shape[1]
kps_2d = kps_2d.reshape((num_frames, 32, 2))[::self.sample_rate,
self.movable_joints]
kps_2d = np.concatenate([kps_2d, np.ones((len(kps_2d), 17, 1))],
axis=2)
# load 3D keypoints
with pycdf.CDF(
join(subj_dir, 'MyPoseFeatures', 'D3_Positions_mono',
basename + '.cdf')) as cdf:
kps_3d = np.array(cdf['Pose'])
kps_3d = kps_3d.reshape(
(num_frames, 32, 3))[::self.sample_rate,
self.movable_joints] / 1000.
kps_3d = np.concatenate([kps_3d, np.ones((len(kps_3d), 17, 1))],
axis=2)
# calculate bounding boxes
bboxes = np.stack([
np.min(kps_2d[:, :, 0], axis=1),
np.min(kps_2d[:, :, 1], axis=1),
np.max(kps_2d[:, :, 0], axis=1),
np.max(kps_2d[:, :, 1], axis=1)
],
axis=1)
centers = np.stack([(bboxes[:, 0] + bboxes[:, 2]) / 2,
(bboxes[:, 1] + bboxes[:, 3]) / 2],
axis=1)
scales = self.scale_factor * np.max(
bboxes[:, 2:] - bboxes[:, :2], axis=1) / 200
# extract frames and save imgnames
imgnames = []
video_path = join(subj_dir, 'Videos', basename + '.mp4')
sub_base = subject + '_' + basename.replace(' ', '_')
img_dir = join(self.processed_dir, 'images', subject, sub_base)
os.makedirs(img_dir, exist_ok=True)
prefix = join(subject, sub_base, sub_base)
cap = cv2.VideoCapture(video_path)
i = 0
while True:
success, img = cap.read()
if not success:
break
if i % self.sample_rate == 0:
imgname = f'{prefix}_{i + 1:06d}.jpg'
imgnames.append(imgname)
dest_path = join(self.processed_dir, 'images', imgname)
if not os.path.exists(dest_path):
cv2.imwrite(dest_path, img)
if len(imgnames) == len(centers):
break
i += 1
cap.release()
imgnames = np.array(imgnames)
print(f'Annoatations for sequence "{subject} {basename}" are loaded. '
f'{len(imgnames)} samples in total.')
return imgnames, centers, scales, kps_2d, kps_3d
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--metadata', type=str, required=True, help='Path to metadata.xml')
parser.add_argument(
'--original',
type=str,
required=True,
help='Directory of the original dataset with all files compressed. '
'Specifically, .tgz files belonging to subject 1 should be placed '
'under the subdirectory \"s1\".')
parser.add_argument(
'--extracted',
type=str,
default=None,
help='Directory of the extracted files. If not given, it will be '
'placed under the same parent directory as original_dir.')
parser.add_argument(
'--processed',
type=str,
default=None,
help='Directory of the processed files. If not given, it will be '
'placed under the same parent directory as original_dir.')
parser.add_argument(
'--sample_rate',
type=int,
default=5,
help='Downsample FPS to `1 / sample_rate`. Default: 5.')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
h36m = PreprocessH36m(
metadata=args.metadata,
original_dir=args.original,
extracted_dir=args.extracted,
processed_dir=args.processed,
sample_rate=args.sample_rate)
h36m.extract_tgz()
h36m.generate_cameras_file()
h36m.generate_annotations()