From b51b46f00f8ce8b929aecf1e0d5ce9918859123f Mon Sep 17 00:00:00 2001 From: happys <50853067+guifaChild@users.noreply.github.com> Date: Sat, 7 Jun 2025 08:33:18 +0800 Subject: [PATCH] Remove pycache --- README.md | 36 +++++++++++++- video_matcher/__init__.py | 11 +++++ video_matcher/audio_transcriber.py | 27 ++++++++++ video_matcher/cli.py | 31 ++++++++++++ video_matcher/frame_extractor.py | 38 ++++++++++++++ video_matcher/matching.py | 79 ++++++++++++++++++++++++++++++ 6 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 video_matcher/__init__.py create mode 100644 video_matcher/audio_transcriber.py create mode 100644 video_matcher/cli.py create mode 100644 video_matcher/frame_extractor.py create mode 100644 video_matcher/matching.py diff --git a/README.md b/README.md index 2840109..ed3d872 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,36 @@ # codextest -这是一个由codex生成的项目。 + +这是一个由Codex生成的示例项目,包含一个简单的 "video_matcher" 包,用于在较长视频中寻找短视频片段的位置。 + +## 功能 +- 使用 OpenCV 提取视频帧的灰度均值作为特征。 +- 通过动态规划实现的动态时间规整(DTW)来比较特征序列。 +- 提供命令行工具 `video_matcher.cli`,可以在两个视频之间寻找最佳匹配位置。 + +## 安装依赖 +示例代码依赖以下第三方库: +- `opencv-python` +- `SpeechRecognition`(可选,用于语音转文字) + +在实际使用前请确保环境中已安装这些库,例如: + +```bash +pip install opencv-python SpeechRecognition +``` + +## 使用方法 +``` +python -m video_matcher.cli path/to/long.mp4 path/to/short.mp4 --stride 5 --window 10 +``` +输出示例: +``` +Extracting features from long video... +Extracting features from short video... +Matching videos... +Best match starts at frame 123 with cost 456.7 +``` + +该输出表示在长视频的第 123 帧附近找到与短视频最相似的片段,匹配代价为 456.7(值越小越相似)。 + +## 限制 +此项目仅为演示用途,真实场景可能需要更复杂的特征和算法,例如关键帧匹配、语音识别或深度学习特征等,用户可根据需要自行扩展。 diff --git a/video_matcher/__init__.py b/video_matcher/__init__.py new file mode 100644 index 0000000..3be76de --- /dev/null +++ b/video_matcher/__init__.py @@ -0,0 +1,11 @@ +"""Video matcher package.""" + +from .frame_extractor import extract_frame_features +from .matching import find_best_match +from .audio_transcriber import transcribe_audio + +__all__ = [ + "extract_frame_features", + "find_best_match", + "transcribe_audio", +] diff --git a/video_matcher/audio_transcriber.py b/video_matcher/audio_transcriber.py new file mode 100644 index 0000000..deba36b --- /dev/null +++ b/video_matcher/audio_transcriber.py @@ -0,0 +1,27 @@ +"""Audio transcription utilities.""" + +from typing import List +import speech_recognition as sr + + +def transcribe_audio(audio_path: str) -> List[str]: + """Transcribe an audio file into a list of sentences. + + Parameters + ---------- + audio_path: + Path to an audio file extracted from a video. + + Returns + ------- + List[str] + Sentences recognized from the audio. + """ + recognizer = sr.Recognizer() + with sr.AudioFile(audio_path) as source: + audio = recognizer.record(source) + try: + text = recognizer.recognize_google(audio, language="zh-CN") + except sr.UnknownValueError: + text = "" + return text.split() diff --git a/video_matcher/cli.py b/video_matcher/cli.py new file mode 100644 index 0000000..388a572 --- /dev/null +++ b/video_matcher/cli.py @@ -0,0 +1,31 @@ +"""Command line interface for video matching.""" + +import argparse +from pathlib import Path +from . import extract_frame_features, find_best_match + + +def main(argv=None): + parser = argparse.ArgumentParser(description="Match a short video to a long video") + parser.add_argument("long_video", type=Path, help="Path to the long video") + parser.add_argument("short_video", type=Path, help="Path to the short video") + parser.add_argument("--stride", type=int, default=1, help="Frame stride for feature extraction") + parser.add_argument("--window", type=int, default=10, help="Search window size") + + args = parser.parse_args(argv) + + print("Extracting features from long video...") + long_features = extract_frame_features(str(args.long_video), stride=args.stride) + print("Extracting features from short video...") + short_features = extract_frame_features(str(args.short_video), stride=args.stride) + + print("Matching videos...") + index, cost = find_best_match(long_features, short_features, window=args.window) + if index >= 0: + print(f"Best match starts at frame {index} with cost {cost}") + else: + print("No match found") + + +if __name__ == "__main__": + main() diff --git a/video_matcher/frame_extractor.py b/video_matcher/frame_extractor.py new file mode 100644 index 0000000..20c0a03 --- /dev/null +++ b/video_matcher/frame_extractor.py @@ -0,0 +1,38 @@ +"""Utilities for extracting frame features from videos.""" + +from typing import List +import cv2 + + +def extract_frame_features(video_path: str, stride: int = 1) -> List[float]: + """Extract simple frame features from ``video_path``. + + Parameters + ---------- + video_path: + Path to the video file. + stride: + Interval between frames. Defaults to 1 (use every frame). + + Returns + ------- + List[float] + Sequence of grayscale average intensities for each sampled frame. + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise IOError(f"Cannot open video: {video_path}") + + features = [] + index = 0 + while True: + ret, frame = cap.read() + if not ret: + break + if index % stride == 0: + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + features.append(float(gray.mean())) + index += 1 + + cap.release() + return features diff --git a/video_matcher/matching.py b/video_matcher/matching.py new file mode 100644 index 0000000..c67529f --- /dev/null +++ b/video_matcher/matching.py @@ -0,0 +1,79 @@ +"""Algorithms for matching two sequences of frame features.""" + +from typing import List, Tuple + + +def dynamic_time_warping(seq_a: List[float], seq_b: List[float]) -> Tuple[float, List[Tuple[int, int]]]: + """Compute DTW distance between two sequences. + + Parameters + ---------- + seq_a, seq_b: + Feature sequences. + + Returns + ------- + Tuple containing the DTW cost and the alignment path. + """ + n = len(seq_a) + m = len(seq_b) + # Initialize cost matrix + cost = [[float('inf')] * (m + 1) for _ in range(n + 1)] + cost[0][0] = 0.0 + # Compute dynamic programming table + for i in range(1, n + 1): + for j in range(1, m + 1): + diff = seq_a[i - 1] - seq_b[j - 1] + val = diff * diff + cost[i][j] = val + min( + cost[i - 1][j], # insertion + cost[i][j - 1], # deletion + cost[i - 1][j - 1] # match + ) + # Backtrack to find path + i, j = n, m + path = [] + while i > 0 and j > 0: + path.append((i - 1, j - 1)) + diag = cost[i - 1][j - 1] + left = cost[i][j - 1] + up = cost[i - 1][j] + if diag <= left and diag <= up: + i -= 1 + j -= 1 + elif left < up: + j -= 1 + else: + i -= 1 + path.reverse() + return cost[n][m], path + + +def find_best_match(long_seq: List[float], short_seq: List[float], window: int = 10) -> Tuple[int, float]: + """Find the index in ``long_seq`` that best matches ``short_seq``. + + Parameters + ---------- + long_seq: + Feature sequence from the longer video. + short_seq: + Feature sequence from the shorter video. + window: + Search window size. + + Returns + ------- + Tuple[int, float] + Start index of the best match and the matching cost. + """ + best_index = -1 + best_cost = float('inf') + for start in range(0, max(1, len(long_seq) - len(short_seq) + 1), window): + segment = long_seq[start : start + len(short_seq)] + if len(segment) < len(short_seq): + break + cost, _ = dynamic_time_warping(segment, short_seq) + if cost < best_cost: + best_cost = cost + best_index = start + return best_index, best_cost