| import sys |
| import argparse |
| import cv2 |
| from lib.preprocess import h36m_coco_format, revise_kpts |
| from lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose |
| import os |
| import numpy as np |
| import torch |
| import torch.nn as nn |
| import glob |
| from tqdm import tqdm |
| import copy |
|
|
| sys.path.append(os.getcwd()) |
| from common.model_poseformer import PoseTransformerV2 as Model |
| from common.camera import * |
|
|
| import matplotlib |
| import matplotlib.pyplot as plt |
| from mpl_toolkits.mplot3d import Axes3D |
| import matplotlib.gridspec as gridspec |
|
|
| plt.switch_backend('agg') |
| matplotlib.rcParams['pdf.fonttype'] = 42 |
| matplotlib.rcParams['ps.fonttype'] = 42 |
|
|
| def show2Dpose(kps, img): |
| connections = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], |
| [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], |
| [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] |
|
|
| LR = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=bool) |
|
|
| lcolor = (255, 0, 0) |
| rcolor = (0, 0, 255) |
| thickness = 3 |
|
|
| for j,c in enumerate(connections): |
| start = map(int, kps[c[0]]) |
| end = map(int, kps[c[1]]) |
| start = list(start) |
| end = list(end) |
| cv2.line(img, (start[0], start[1]), (end[0], end[1]), lcolor if LR[j] else rcolor, thickness) |
| cv2.circle(img, (start[0], start[1]), thickness=-1, color=(0, 255, 0), radius=3) |
| cv2.circle(img, (end[0], end[1]), thickness=-1, color=(0, 255, 0), radius=3) |
|
|
| return img |
|
|
|
|
| def show3Dpose(vals, ax): |
| ax.view_init(elev=15., azim=70) |
|
|
| lcolor=(0,0,1) |
| rcolor=(1,0,0) |
|
|
| I = np.array( [0, 0, 1, 4, 2, 5, 0, 7, 8, 8, 14, 15, 11, 12, 8, 9]) |
| J = np.array( [1, 4, 2, 5, 3, 6, 7, 8, 14, 11, 15, 16, 12, 13, 9, 10]) |
|
|
| LR = np.array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0], dtype=bool) |
|
|
| for i in np.arange( len(I) ): |
| x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)] |
| ax.plot(x, y, z, lw=2, color = lcolor if LR[i] else rcolor) |
|
|
| RADIUS = 0.72 |
| RADIUS_Z = 0.7 |
|
|
| xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] |
| ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot]) |
| ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot]) |
| ax.set_zlim3d([-RADIUS_Z+zroot, RADIUS_Z+zroot]) |
| ax.set_aspect('auto') |
|
|
| white = (1.0, 1.0, 1.0, 0.0) |
| ax.xaxis.set_pane_color(white) |
| ax.yaxis.set_pane_color(white) |
| ax.zaxis.set_pane_color(white) |
|
|
| ax.tick_params('x', labelbottom = False) |
| ax.tick_params('y', labelleft = False) |
| ax.tick_params('z', labelleft = False) |
|
|
|
|
| def get_pose2D(video_path, output_dir): |
| cap = cv2.VideoCapture(video_path) |
| width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) |
| height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) |
|
|
| print('\nGenerating 2D pose...') |
| keypoints, scores = hrnet_pose(video_path, det_dim=416, num_peroson=1, gen_output=True) |
| keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores) |
| re_kpts = revise_kpts(keypoints, scores, valid_frames) |
| print('Generating 2D pose successful!') |
|
|
| output_dir += 'input_2D/' |
| os.makedirs(output_dir, exist_ok=True) |
|
|
| output_npz = output_dir + 'keypoints.npz' |
| np.savez_compressed(output_npz, reconstruction=keypoints) |
|
|
|
|
| def img2video(video_path, output_dir): |
| cap = cv2.VideoCapture(video_path) |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) + 5 |
|
|
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
|
| names = sorted(glob.glob(os.path.join(output_dir + 'pose/', '*.png'))) |
| img = cv2.imread(names[0]) |
| size = (img.shape[1], img.shape[0]) |
|
|
| videoWrite = cv2.VideoWriter(output_dir + video_name + '.mp4', fourcc, fps, size) |
|
|
| for name in names: |
| img = cv2.imread(name) |
| videoWrite.write(img) |
|
|
| videoWrite.release() |
|
|
|
|
| def showimage(ax, img): |
| ax.set_xticks([]) |
| ax.set_yticks([]) |
| plt.axis('off') |
| ax.imshow(img) |
|
|
|
|
| def get_pose3D(video_path, output_dir): |
| args, _ = argparse.ArgumentParser().parse_known_args() |
| args.embed_dim_ratio, args.depth, args.frames = 32, 4, 243 |
| args.number_of_kept_frames, args.number_of_kept_coeffs = 27, 27 |
| args.pad = (args.frames - 1) // 2 |
| args.previous_dir = 'checkpoint/' |
| args.n_joints, args.out_joints = 17, 17 |
|
|
| |
| cuda_available = torch.cuda.is_available() |
| print(f"CUDA available in get_pose3D: {cuda_available}") |
| if cuda_available: |
| print(f"CUDA device count: {torch.cuda.device_count()}") |
| print(f"CUDA device name: {torch.cuda.get_device_name(0)}") |
| |
| device = torch.device('cuda' if cuda_available else 'cpu') |
| print(f"Using device: {device}") |
| |
| base_model = Model(args=args) |
| |
| |
| if cuda_available: |
| model = nn.DataParallel(base_model).to(device) |
| else: |
| model = base_model.to(device) |
|
|
| model_dict = model.state_dict() |
| |
| |
| |
| if os.path.exists("./demo/lib/checkpoint/27_243_45.2.bin"): |
| model_path = "./demo/lib/checkpoint/27_243_45.2.bin" |
| elif os.path.exists("./lib/checkpoint/27_243_45.2.bin"): |
| model_path = "./lib/checkpoint/27_243_45.2.bin" |
| else: |
| model_path = "./checkpoint/27_243_45.2.bin" |
|
|
| map_location = device |
| pre_dict = torch.load(model_path, map_location=map_location, weights_only=False) |
| |
| |
| state_dict = pre_dict['model_pos'] |
| from collections import OrderedDict |
| new_state_dict = OrderedDict() |
| |
| |
| checkpoint_has_module = any(k.startswith('module.') for k in state_dict.keys()) |
| model_has_module = isinstance(model, nn.DataParallel) |
| |
| if checkpoint_has_module and not model_has_module: |
| |
| for k, v in state_dict.items(): |
| name = k[7:] if k.startswith('module.') else k |
| new_state_dict[name] = v |
| elif not checkpoint_has_module and model_has_module: |
| |
| for k, v in state_dict.items(): |
| name = 'module.' + k if not k.startswith('module.') else k |
| new_state_dict[name] = v |
| else: |
| |
| new_state_dict = state_dict |
| |
| model.load_state_dict(new_state_dict, strict=True) |
|
|
| model.eval() |
|
|
| |
| keypoints = np.load(output_dir + 'input_2D/keypoints.npz', allow_pickle=True)['reconstruction'] |
|
|
| cap = cv2.VideoCapture(video_path) |
| video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
| |
| print('\nGenerating 3D pose...') |
| keypoints_3D = [] |
| for i in tqdm(range(video_length)): |
| ret, img = cap.read() |
| if img is None: |
| continue |
| img_size = img.shape |
|
|
| |
| start = max(0, i - args.pad) |
| end = min(i + args.pad, len(keypoints[0])-1) |
|
|
| input_2D_no = keypoints[0][start:end+1] |
| |
| left_pad, right_pad = 0, 0 |
| if input_2D_no.shape[0] != args.frames: |
| if i < args.pad: |
| left_pad = args.pad - i |
| if i > len(keypoints[0]) - args.pad - 1: |
| right_pad = i + args.pad - (len(keypoints[0]) - 1) |
|
|
| input_2D_no = np.pad(input_2D_no, ((left_pad, right_pad), (0, 0), (0, 0)), 'edge') |
| |
| joints_left = [4, 5, 6, 11, 12, 13] |
| joints_right = [1, 2, 3, 14, 15, 16] |
|
|
| |
| input_2D = normalize_screen_coordinates(input_2D_no, w=img_size[1], h=img_size[0]) |
|
|
| input_2D_aug = copy.deepcopy(input_2D) |
| input_2D_aug[ :, :, 0] *= -1 |
| input_2D_aug[ :, joints_left + joints_right] = input_2D_aug[ :, joints_right + joints_left] |
| input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0) |
| |
| |
| input_2D = input_2D[np.newaxis, :, :, :, :] |
|
|
| input_2D = torch.from_numpy(input_2D.astype('float32')).to(device) |
|
|
| N = input_2D.size(0) |
|
|
| |
| output_3D_non_flip = model(input_2D[:, 0]) |
| output_3D_flip = model(input_2D[:, 1]) |
| |
|
|
| output_3D_flip[:, :, :, 0] *= -1 |
| output_3D_flip[:, :, joints_left + joints_right, :] = output_3D_flip[:, :, joints_right + joints_left, :] |
|
|
| output_3D = (output_3D_non_flip + output_3D_flip) / 2 |
|
|
| output_3D[:, :, 0, :] = 0 |
| post_out = output_3D[0, 0].cpu().detach().numpy() |
| keypoints_3D.append(post_out) |
| |
|
|
| rot = [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088] |
| rot = np.array(rot, dtype='float32') |
| post_out = camera_to_world(post_out, R=rot, t=0) |
| post_out[:, 2] -= np.min(post_out[:, 2]) |
|
|
| input_2D_no = input_2D_no[args.pad] |
|
|
| |
| image = show2Dpose(input_2D_no, copy.deepcopy(img)) |
|
|
| output_dir_2D = output_dir +'pose2D/' |
| os.makedirs(output_dir_2D, exist_ok=True) |
| cv2.imwrite(output_dir_2D + str(('%04d'% i)) + '_2D.png', image) |
|
|
| |
| fig = plt.figure(figsize=(9.6, 5.4)) |
| gs = gridspec.GridSpec(1, 1) |
| gs.update(wspace=-0.00, hspace=0.05) |
| ax = plt.subplot(gs[0], projection='3d') |
| show3Dpose( post_out, ax) |
|
|
| output_dir_3D = output_dir +'pose3D/' |
| os.makedirs(output_dir_3D, exist_ok=True) |
| plt.savefig(output_dir_3D + str(('%04d'% i)) + '_3D.png', dpi=200, format='png', bbox_inches = 'tight') |
| plt.clf() |
| plt.close(fig) |
| |
| output_npz = output_dir + 'keypoints_3D.npz' |
| np.savez_compressed(output_npz, reconstruction=keypoints_3D) |
| print('Generating 3D pose successful!') |
|
|
| |
| image_dir = 'results/' |
| image_2d_dir = sorted(glob.glob(os.path.join(output_dir_2D, '*.png'))) |
| image_3d_dir = sorted(glob.glob(os.path.join(output_dir_3D, '*.png'))) |
|
|
| print('\nGenerating demo...') |
| for i in tqdm(range(len(image_2d_dir))): |
| image_2d = plt.imread(image_2d_dir[i]) |
| image_3d = plt.imread(image_3d_dir[i]) |
|
|
| |
| edge = (image_2d.shape[1] - image_2d.shape[0]) // 2 |
| image_2d = image_2d[:, edge:image_2d.shape[1] - edge] |
|
|
| edge = 130 |
| image_3d = image_3d[edge:image_3d.shape[0] - edge, edge:image_3d.shape[1] - edge] |
|
|
| |
| font_size = 12 |
| fig = plt.figure(figsize=(15.0, 5.4)) |
| ax = plt.subplot(121) |
| showimage(ax, image_2d) |
| ax.set_title("Input", fontsize = font_size) |
|
|
| ax = plt.subplot(122) |
| showimage(ax, image_3d) |
| ax.set_title("Reconstruction", fontsize = font_size) |
|
|
| |
| output_dir_pose = output_dir +'pose/' |
| os.makedirs(output_dir_pose, exist_ok=True) |
| plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) |
| plt.margins(0, 0) |
| plt.savefig(output_dir_pose + str(('%04d'% i)) + '_pose.png', dpi=200, bbox_inches = 'tight') |
| plt.clf() |
| plt.close(fig) |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--video', type=str, default='sample_video.mp4', help='input video') |
| parser.add_argument('--gpu', type=str, default='0', help='GPU device ID (set CUDA_VISIBLE_DEVICES before running if needed)') |
| args = parser.parse_args() |
|
|
| |
| |
| |
| |
| |
| print(f"CUDA available: {torch.cuda.is_available()}") |
| if torch.cuda.is_available(): |
| print(f"CUDA device count: {torch.cuda.device_count()}") |
| print(f"Current device: {torch.cuda.current_device()}") |
| print(f"Device name: {torch.cuda.get_device_name(0)}") |
| if "CUDA_VISIBLE_DEVICES" in os.environ: |
| print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}") |
| else: |
| print("WARNING: CUDA is not available!") |
| print("This might be because:") |
| print(" 1. CUDA_VISIBLE_DEVICES was set incorrectly") |
| print(" 2. PyTorch was installed without CUDA support") |
| print(" 3. GPU drivers are not installed") |
| print("\nTo use GPU, set CUDA_VISIBLE_DEVICES BEFORE running Python:") |
| print(" PowerShell: $env:CUDA_VISIBLE_DEVICES='0'") |
| print(" Bash: export CUDA_VISIBLE_DEVICES=0") |
| print("\nOr don't set it at all to use the default GPU") |
|
|
| video_path = './demo/video/' + args.video |
| video_name = video_path.split('/')[-1].split('.')[0] |
| output_dir = './demo/output/' + video_name + '/' |
|
|
| get_pose2D(video_path, output_dir) |
| get_pose3D(video_path, output_dir) |
| img2video(video_path, output_dir) |
| print('Generating demo successful!') |
|
|
|
|
|
|