Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions app/engines/stt/qwen3asr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Qwen3-ASR STT Engine"""

from app.engines.stt.qwen3asr.config import Qwen3ASRConfig
from app.engines.stt.qwen3asr.engine import Qwen3ASREngine

__all__ = ["Qwen3ASRConfig", "Qwen3ASREngine"]
67 changes: 67 additions & 0 deletions app/engines/stt/qwen3asr/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Configuration for Qwen3-ASR STT Engine"""

from pydantic import Field, field_validator

from app.models.engine import EngineConfig


class Qwen3ASRConfig(EngineConfig):
"""
Configuration for Qwen3-ASR STT engine (vLLM backend)

Extends EngineConfig with Qwen3-ASR specific parameters.

Attributes:
model_name: HuggingFace model ID (Qwen/Qwen3-ASR-1.7B or Qwen/Qwen3-ASR-0.6B)
dtype: Torch dtype for model loading (bfloat16 recommended)
gpu_memory_utilization: Fraction of GPU memory to use for vLLM
max_inference_batch_size: Maximum batch size for inference
max_new_tokens: Maximum tokens to generate (increase for long audio)
Comment on lines +16 to +19
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config docstring lists a dtype attribute, but Qwen3ASRConfig doesn't define it. Please update the docstring to match the actual fields (or add the field if it’s intended to be supported).

Suggested change
dtype: Torch dtype for model loading (bfloat16 recommended)
gpu_memory_utilization: Fraction of GPU memory to use for vLLM
max_inference_batch_size: Maximum batch size for inference
max_new_tokens: Maximum tokens to generate (increase for long audio)
gpu_memory_utilization: Fraction of GPU memory to use for vLLM
max_inference_batch_size: Maximum batch size for inference
max_new_tokens: Maximum tokens to generate (increase for long audio)
forced_aligner: Path or name of ForcedAligner model for timestamps
unfixed_chunk_num: Number of unfixed chunks for streaming state
unfixed_token_num: Number of unfixed tokens for streaming state
chunk_size_sec: Size of each audio chunk in seconds for streaming
language: Default language code for transcription (e.g., 'en', 'vi')

Copilot uses AI. Check for mistakes.
"""

@field_validator("model_name")
@classmethod
def validate_model(cls, v: str) -> str:
"""Validate Qwen3-ASR model name"""
valid = [
"Qwen/Qwen3-ASR-1.7B",
"Qwen/Qwen3-ASR-0.6B",
]
# Allow local paths
if v in valid or v.startswith("/") or v.startswith("./"):
return v
raise ValueError(f"Qwen3-ASR model must be one of {valid} or a local path")

# vLLM backend specific fields
gpu_memory_utilization: float = Field(
default=0.7,
ge=0.1,
le=0.95,
description="Fraction of GPU memory to use for vLLM",
)
max_inference_batch_size: int = Field(
default=32,
gt=0,
description="Maximum batch size for inference. -1 for unlimited.",
)
Comment on lines +44 to +46
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max_inference_batch_size is documented as supporting -1 for unlimited, but the field is validated with gt=0, which rejects -1. Either allow -1 in validation (e.g., ge=-1 with a custom validator) or adjust the description/tests to match the enforced constraint.

Suggested change
gt=0,
description="Maximum batch size for inference. -1 for unlimited.",
)
ge=-1,
description="Maximum batch size for inference. -1 for unlimited.",
)
@field_validator("max_inference_batch_size")
@classmethod
def validate_max_inference_batch_size(cls, v: int) -> int:
"""
Ensure max_inference_batch_size is either -1 (for unlimited) or a positive integer.
"""
if v == -1 or v > 0:
return v
raise ValueError(
"max_inference_batch_size must be -1 (for unlimited) or a positive integer"
)

Copilot uses AI. Check for mistakes.
max_new_tokens: int = Field(
default=512,
gt=0,
description="Maximum tokens to generate. Increase for long audio.",
)
forced_aligner: str | None = Field(
default=None, description="Path or name of ForcedAligner model for timestamps."
)
unfixed_chunk_num: int = Field(
default=4, description="Number of unfixed chunks for streaming state"
)
unfixed_token_num: int = Field(
default=5, description="Number of unfixed tokens for streaming state"
)
chunk_size_sec: float = Field(
default=1.0, description="Size of each audio chunk in seconds for streaming"
)
language: str | None = Field(
default=None,
description="Default language for transcription (e.g., 'en', 'vi').",
)
Loading