SEA/segmentation.py at main · J22Melody/SEA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
import argparse
import os
import subprocess
import shlex
from itertools import product
from tqdm import tqdm
import multiprocessing


def process_video(vid, args, model, sign_b, sign_o):
    # Pin this worker process to a dedicated CPU from its allowed set.
    try:
        available_cpus = os.sched_getaffinity(0)
        cpu_list = sorted(available_cpus)
        cpu_id = cpu_list[os.getpid() % len(cpu_list)]
        os.sched_setaffinity(0, {cpu_id})
    except Exception as e:
        print(f"Error setting CPU affinity for video {vid}: {e}")

    # Determine the sub-directory name.
    model_name = model
    if model_name.startswith("model_"):
        model_name = model_name[len("model_"):]
    if model_name.endswith(".pth"):
        model_name = model_name[:-4]
    sub_save_dir = os.path.join(args.save_dir, f"{model_name}_{sign_b}_{sign_o}")
    os.makedirs(sub_save_dir, exist_ok=True)

    pose_file = os.path.join(args.pose_dir, f"{vid}.pose")
    elan_file = os.path.join(sub_save_dir, f"{vid}.eaf")

    # Skip processing if output already exists and overwrite is not set.
    if not args.overwrite and os.path.exists(elan_file):
        return f"Skipping {vid} for {model_name}_{sign_b}_{sign_o}: output already exists at {elan_file}"

    # Build the pose_to_segments command using the current combination.
    cmd = (
        f"pose_to_segments --no-pose-link --model={shlex.quote(model)} "
        f"--pose={shlex.quote(pose_file)} --elan={shlex.quote(elan_file)} "
        f"--sign-b-threshold {sign_b} --sign-o-threshold {sign_o}"
    )

    # Check for the video file.
    video_file = os.path.join(args.video_dir, f"{vid}.mp4")
    if os.path.exists(video_file):
        cmd += f" --video={shlex.quote(f'./{vid}.mp4')}"

    # Check for the automatic subtitles file (.vtt or .srt)
    subtitle_file = None
    for ext in ['.vtt', '.srt']:
        candidate = os.path.join(args.subtitle_dir, f"{vid}{ext}")
        if os.path.exists(candidate):
            subtitle_file = candidate
            break
    if subtitle_file:
        cmd += f" --subtitles={shlex.quote(subtitle_file)}"

    # Check for the manually corrected subtitles file (.vtt or .srt)
    subtitle_corrected_file = None
    for ext in ['.vtt', '.srt']:
        candidate = os.path.join(args.subtitle_dir_corrected, f"{vid}{ext}")
        if os.path.exists(candidate):
            subtitle_corrected_file = candidate
            break
    if subtitle_corrected_file:
        cmd += f" --subtitles-corrected={shlex.quote(subtitle_corrected_file)}"

    # Run the command.
    print(cmd)
    result = subprocess.run(cmd, shell=True)
    if result.returncode != 0:
        return f"Error processing video id {vid} for {model_name}_{sign_b}_{sign_o} (return code {result.returncode}): {cmd}"
    return f"Processed {vid} for {model_name}_{sign_b}_{sign_o}"

def process_task(task):
    vid, model, sign_b, sign_o, args = task
    return process_video(vid, args, model, sign_b, sign_o)


def main():
    parser = argparse.ArgumentParser(
        description="Segment videos based on their pose files and save results."
    )
    parser.add_argument(
        "--video_ids",
        type=str,
        default="/users/zifan/subtitle_align/data/bobsl_align.txt",
        help="Path to text file containing video ids (one per line), or 'all' to auto-discover from pose_dir."
    )
    parser.add_argument(
        "--pose_dir",
        type=str,
        default="/scratch/shared/beegfs/zifan/bobsl/video_features/mediapipe_v2_refine_face_complexity_2",
        help="Directory where pose files are stored."
    )
    parser.add_argument(
        "--save_dir",
        type=str,
        default="/scratch/shared/beegfs/zifan/bobsl/segmentation",
        help="Directory to store segmentation results."
    )
    parser.add_argument(
        "--overwrite",
        action='store_true',
        help="Overwrite existing feature files if set"
    )
    parser.add_argument(
        "--video_dir",
        type=str,
        default="/users/zifan/BOBSL/derivatives/original_videos",
        help="Directory containing original videos."
    )
    parser.add_argument(
        "--subtitle_dir",
        type=str,
        default="/users/zifan/BOBSL/v1.4/automatic_annotations/signing_aligned_subtitles/audio_aligned_heuristic_correction",
        help="Directory containing automatically aligned subtitles."
    )
    parser.add_argument(
        "--subtitle_dir_corrected",
        type=str,
        default="/users/zifan/BOBSL/v1.4/manual_annotations/signing_aligned_subtitles",
        help="Directory containing manually corrected subtitles."
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=1,
        help="Number of parallel workers to process videos. Default is 1 (sequential processing)."
    )
    parser.add_argument("--model", nargs='+', default=["model_E4s-1.pth"], type=str, help="Path(s) to model file")
    parser.add_argument("--sign-b-threshold", nargs='+', default=[60], type=int, help="Threshold(s) for sign B")
    parser.add_argument("--sign-o-threshold", nargs='+', default=[50], type=int, help="Threshold(s) for sign O")
    args = parser.parse_args()

    # Ensure that the save directory exists.
    os.makedirs(args.save_dir, exist_ok=True)

    # Determine video IDs
    if args.video_ids.lower() == "all":
        # Discover all pose files
        try:
            files = os.listdir(args.pose_dir)
            video_ids = [os.path.splitext(f)[0] for f in files if f.endswith('.pose')]
            print(f"Discovered {len(video_ids)} videos from pose_dir: {video_ids}")
        except Exception as e:
            print(f"Error listing pose_dir '{args.pose_dir}': {e}")
            return
    else:
        # Read video ids from the provided file.
        with open(args.video_ids, "r") as file:
            video_ids = [line.strip() for line in file if line.strip()]

    # Create all combinations of model, sign-b-threshold, and sign-o-threshold.
    combinations = list(product(args.model, args.sign_b_threshold, args.sign_o_threshold))

    # Build tasks as (video_id, model, sign_b, sign_o, args) for each video and each combination.
    tasks = []
    for vid in video_ids:
        for combo in combinations:
            tasks.append((vid, combo[0], combo[1], combo[2], args))

    # Process tasks
    if args.num_workers > 1:
        with multiprocessing.Pool(args.num_workers) as pool:
            for res in tqdm(pool.imap_unordered(process_task, tasks),
                            total=len(tasks), desc="Processing videos"):
                tqdm.write(res)
    else:
        for task in tqdm(tasks, desc="Processing videos"):
            res = process_task(task)
            tqdm.write(res)

if __name__ == "__main__":
    main()