From f00d1f68a3888237fd2851b45358cff90a873333 Mon Sep 17 00:00:00 2001 From: Sixone Jiang Date: Thu, 27 Mar 2025 14:56:31 +0800 Subject: [PATCH] Fix crop_driving is not track right --- src/live_portrait_pipeline.py | 2 +- src/utils/cropper.py | 51 +++++++++++++++-------------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/src/live_portrait_pipeline.py b/src/live_portrait_pipeline.py index 2a58d403..444aa90a 100644 --- a/src/live_portrait_pipeline.py +++ b/src/live_portrait_pipeline.py @@ -151,7 +151,7 @@ def execute(self, args: ArgumentConfig): else: n_frames = driving_n_frames if inf_cfg.flag_crop_driving_video or (not is_square_video(args.driving)): - ret_d = self.cropper.crop_driving_video(driving_rgb_lst) + ret_d = self.cropper.crop_driving_video(driving_rgb_lst, crop_cfg) log(f'Driving video is cropped, {len(ret_d["frame_crop_lst"])} frames are processed.') if len(ret_d["frame_crop_lst"]) is not n_frames and flag_is_driving_video: n_frames = min(n_frames, len(ret_d["frame_crop_lst"])) diff --git a/src/utils/cropper.py b/src/utils/cropper.py index 64fd75ec..347c2782 100644 --- a/src/utils/cropper.py +++ b/src/utils/cropper.py @@ -222,7 +222,8 @@ def crop_source_video(self, source_rgb_lst, crop_cfg: CropConfig, **kwargs): "M_c2o_lst": trajectory.M_c2o_lst, } - def crop_driving_video(self, driving_rgb_lst, **kwargs): + # Fast fix untrack bug + def crop_driving_video(self, driving_rgb_lst, crop_cfg: CropConfig, **kwargs): """Tracking based landmarks/alignment and cropping""" trajectory = Trajectory() direction = kwargs.get("direction", "large-small") @@ -231,7 +232,8 @@ def crop_driving_video(self, driving_rgb_lst, **kwargs): src_face = self.face_analysis_wrapper.get( contiguous(frame_rgb[..., ::-1]), flag_do_landmark_2d_106=True, - direction=direction, + direction=crop_cfg.direction, + max_face_num=crop_cfg.max_face_num, ) if len(src_face) == 0: log(f"No face detected in the frame #{idx}") @@ -247,34 +249,25 @@ def crop_driving_video(self, driving_rgb_lst, **kwargs): trajectory.end = idx trajectory.lmk_lst.append(lmk) - ret_bbox = parse_bbox_from_landmark( - lmk, - scale=self.crop_cfg.scale_crop_driving_video, - vx_ratio_crop_driving_video=self.crop_cfg.vx_ratio_crop_driving_video, - vy_ratio=self.crop_cfg.vy_ratio_crop_driving_video, - )["bbox"] - bbox = [ - ret_bbox[0, 0], - ret_bbox[0, 1], - ret_bbox[2, 0], - ret_bbox[2, 1], - ] # 4, - trajectory.bbox_lst.append(bbox) # bbox - trajectory.frame_rgb_lst.append(frame_rgb) - - global_bbox = average_bbox_lst(trajectory.bbox_lst) - - for idx, (frame_rgb, lmk) in enumerate(zip(trajectory.frame_rgb_lst, trajectory.lmk_lst)): - ret_dct = crop_image_by_bbox( - frame_rgb, - global_bbox, - lmk=lmk, - dsize=kwargs.get("dsize", 512), - flag_rot=False, - borderValue=(0, 0, 0), + + # crop the face + ret_dct = crop_image( + frame_rgb, # ndarray + lmk, # 106x2 or Nx2 + dsize=crop_cfg.dsize, + scale=crop_cfg.scale_crop_driving_video, + vx_ratio=crop_cfg.vx_ratio_crop_driving_video, + vy_ratio=crop_cfg.vy_ratio_crop_driving_video, + flag_do_rot=crop_cfg.flag_do_rot, ) - trajectory.frame_rgb_crop_lst.append(ret_dct["img_crop"]) - trajectory.lmk_crop_lst.append(ret_dct["lmk_crop"]) + + # update a 256x256 version for network input + ret_dct["img_crop_256x256"] = cv2.resize(ret_dct["img_crop"], (256, 256), interpolation=cv2.INTER_AREA) + ret_dct["lmk_crop_256x256"] = ret_dct["pt_crop"] * 256 / crop_cfg.dsize + + trajectory.frame_rgb_crop_lst.append(ret_dct["img_crop_256x256"]) + trajectory.lmk_crop_lst.append(ret_dct["lmk_crop_256x256"]) + # trajectory.M_c2o_lst.append(ret_dct['M_c2o']) return { "frame_crop_lst": trajectory.frame_rgb_crop_lst,