56 lines
1.5 KiB
Python
56 lines
1.5 KiB
Python
|
|
"""Whisper transcription service — CPU-only, async-ready."""
|
||
|
|
|
||
|
|
import io
|
||
|
|
import tempfile
|
||
|
|
import time
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
_model = None
|
||
|
|
_model_name = None
|
||
|
|
|
||
|
|
|
||
|
|
def _load_model(name: str = "base"):
|
||
|
|
"""Lazy-load Whisper model (downloads on first use)."""
|
||
|
|
global _model, _model_name
|
||
|
|
import whisper
|
||
|
|
|
||
|
|
if _model is None or _model_name != name:
|
||
|
|
_model = whisper.load_model(name)
|
||
|
|
_model_name = name
|
||
|
|
return _model
|
||
|
|
|
||
|
|
|
||
|
|
def transcribe_bytes(audio_bytes: bytes, model_name: str = "base") -> dict:
|
||
|
|
"""Transcribe audio from bytes. Returns {"text": "...", "segments": [...], "language": "..."}"""
|
||
|
|
t0 = time.time()
|
||
|
|
|
||
|
|
model = _load_model(model_name)
|
||
|
|
|
||
|
|
# Write to temp file (whisper needs a file path or numpy array)
|
||
|
|
suffix = ".wav"
|
||
|
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||
|
|
tmp.write(audio_bytes)
|
||
|
|
tmp_path = tmp.name
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = model.transcribe(tmp_path, fp16=False) # fp16=False for CPU
|
||
|
|
finally:
|
||
|
|
Path(tmp_path).unlink(missing_ok=True)
|
||
|
|
|
||
|
|
elapsed = round(time.time() - t0, 1)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"text": result["text"].strip(),
|
||
|
|
"segments": [
|
||
|
|
{
|
||
|
|
"start": round(seg["start"], 2),
|
||
|
|
"end": round(seg["end"], 2),
|
||
|
|
"text": seg["text"].strip(),
|
||
|
|
}
|
||
|
|
for seg in result.get("segments", [])
|
||
|
|
],
|
||
|
|
"language": result.get("language", "unknown"),
|
||
|
|
"duration_seconds": elapsed,
|
||
|
|
"model": model_name,
|
||
|
|
}
|