From f00d1f68a3888237fd2851b45358cff90a873333 Mon Sep 17 00:00:00 2001
From: Sixone Jiang <sixonejiang@SixonedeMacBook-Pro.local>
Date: Thu, 27 Mar 2025 14:56:31 +0800
Subject: [PATCH] Fix crop_driving is not track right

---
 src/live_portrait_pipeline.py |  2 +-
 src/utils/cropper.py          | 51 +++++++++++++++--------------------
 2 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/src/live_portrait_pipeline.py b/src/live_portrait_pipeline.py
index 2a58d403..444aa90a 100644
--- a/src/live_portrait_pipeline.py
+++ b/src/live_portrait_pipeline.py
@@ -151,7 +151,7 @@ def execute(self, args: ArgumentConfig):
             else:
                 n_frames = driving_n_frames
             if inf_cfg.flag_crop_driving_video or (not is_square_video(args.driving)):
-                ret_d = self.cropper.crop_driving_video(driving_rgb_lst)
+                ret_d = self.cropper.crop_driving_video(driving_rgb_lst, crop_cfg)
                 log(f'Driving video is cropped, {len(ret_d["frame_crop_lst"])} frames are processed.')
                 if len(ret_d["frame_crop_lst"]) is not n_frames and flag_is_driving_video:
                     n_frames = min(n_frames, len(ret_d["frame_crop_lst"]))
diff --git a/src/utils/cropper.py b/src/utils/cropper.py
index 64fd75ec..347c2782 100644
--- a/src/utils/cropper.py
+++ b/src/utils/cropper.py
@@ -222,7 +222,8 @@ def crop_source_video(self, source_rgb_lst, crop_cfg: CropConfig, **kwargs):
             "M_c2o_lst": trajectory.M_c2o_lst,
         }
 
-    def crop_driving_video(self, driving_rgb_lst, **kwargs):
+    # Fast fix untrack bug
+    def crop_driving_video(self, driving_rgb_lst, crop_cfg: CropConfig, **kwargs):
         """Tracking based landmarks/alignment and cropping"""
         trajectory = Trajectory()
         direction = kwargs.get("direction", "large-small")
@@ -231,7 +232,8 @@ def crop_driving_video(self, driving_rgb_lst, **kwargs):
                 src_face = self.face_analysis_wrapper.get(
                     contiguous(frame_rgb[..., ::-1]),
                     flag_do_landmark_2d_106=True,
-                    direction=direction,
+                    direction=crop_cfg.direction,
+                    max_face_num=crop_cfg.max_face_num,
                 )
                 if len(src_face) == 0:
                     log(f"No face detected in the frame #{idx}")
@@ -247,34 +249,25 @@ def crop_driving_video(self, driving_rgb_lst, **kwargs):
                 trajectory.end = idx
 
             trajectory.lmk_lst.append(lmk)
-            ret_bbox = parse_bbox_from_landmark(
-                lmk,
-                scale=self.crop_cfg.scale_crop_driving_video,
-                vx_ratio_crop_driving_video=self.crop_cfg.vx_ratio_crop_driving_video,
-                vy_ratio=self.crop_cfg.vy_ratio_crop_driving_video,
-            )["bbox"]
-            bbox = [
-                ret_bbox[0, 0],
-                ret_bbox[0, 1],
-                ret_bbox[2, 0],
-                ret_bbox[2, 1],
-            ]  # 4,
-            trajectory.bbox_lst.append(bbox)  # bbox
-            trajectory.frame_rgb_lst.append(frame_rgb)
-
-        global_bbox = average_bbox_lst(trajectory.bbox_lst)
-
-        for idx, (frame_rgb, lmk) in enumerate(zip(trajectory.frame_rgb_lst, trajectory.lmk_lst)):
-            ret_dct = crop_image_by_bbox(
-                frame_rgb,
-                global_bbox,
-                lmk=lmk,
-                dsize=kwargs.get("dsize", 512),
-                flag_rot=False,
-                borderValue=(0, 0, 0),
+
+            # crop the face
+            ret_dct = crop_image(
+                frame_rgb,  # ndarray
+                lmk,  # 106x2 or Nx2
+                dsize=crop_cfg.dsize,
+                scale=crop_cfg.scale_crop_driving_video,
+                vx_ratio=crop_cfg.vx_ratio_crop_driving_video,
+                vy_ratio=crop_cfg.vy_ratio_crop_driving_video,
+                flag_do_rot=crop_cfg.flag_do_rot,
             )
-            trajectory.frame_rgb_crop_lst.append(ret_dct["img_crop"])
-            trajectory.lmk_crop_lst.append(ret_dct["lmk_crop"])
+
+            # update a 256x256 version for network input
+            ret_dct["img_crop_256x256"] = cv2.resize(ret_dct["img_crop"], (256, 256), interpolation=cv2.INTER_AREA)
+            ret_dct["lmk_crop_256x256"] = ret_dct["pt_crop"] * 256 / crop_cfg.dsize
+
+            trajectory.frame_rgb_crop_lst.append(ret_dct["img_crop_256x256"])
+            trajectory.lmk_crop_lst.append(ret_dct["lmk_crop_256x256"])
+            # trajectory.M_c2o_lst.append(ret_dct['M_c2o'])
 
         return {
             "frame_crop_lst": trajectory.frame_rgb_crop_lst,