Files
micro-api/app/services/transcriber.py
T

56 lines
1.5 KiB
Python
Raw Normal View History

2026-06-05 03:02:42 +02:00
"""Whisper transcription service — CPU-only, async-ready."""
import io
import tempfile
import time
from pathlib import Path
_model = None
_model_name = None
def _load_model(name: str = "base"):
"""Lazy-load Whisper model (downloads on first use)."""
global _model, _model_name
import whisper
if _model is None or _model_name != name:
_model = whisper.load_model(name)
_model_name = name
return _model
def transcribe_bytes(audio_bytes: bytes, model_name: str = "base") -> dict:
"""Transcribe audio from bytes. Returns {"text": "...", "segments": [...], "language": "..."}"""
t0 = time.time()
model = _load_model(model_name)
# Write to temp file (whisper needs a file path or numpy array)
suffix = ".wav"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(audio_bytes)
tmp_path = tmp.name
try:
result = model.transcribe(tmp_path, fp16=False) # fp16=False for CPU
finally:
Path(tmp_path).unlink(missing_ok=True)
elapsed = round(time.time() - t0, 1)
return {
"text": result["text"].strip(),
"segments": [
{
"start": round(seg["start"], 2),
"end": round(seg["end"], 2),
"text": seg["text"].strip(),
}
for seg in result.get("segments", [])
],
"language": result.get("language", "unknown"),
"duration_seconds": elapsed,
"model": model_name,
}