Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""add anthropic + google-vertex to provider_enum and seed test model_config rows

Revision ID: 064
Revises: 063
Create Date: 2026-05-28 00:00:00.000000

"""

from alembic import op


revision = "064"
down_revision = "063"
branch_labels = None
depends_on = None


def upgrade():
# ALTER TYPE ... ADD VALUE cannot run inside a transaction block; use
# autocommit per existing pattern (see migration 056). The added values
# are visible to subsequent statements once the autocommit_block exits.
with op.get_context().autocommit_block():
op.execute(
"ALTER TYPE global.provider_enum ADD VALUE IF NOT EXISTS 'anthropic'"
)
op.execute(
"ALTER TYPE global.provider_enum ADD VALUE IF NOT EXISTS 'google-vertex'"
)

# Pass-through seed rows for testing. Pricing values are placeholders;
# revise once real cost data is available.
op.execute(
"""
INSERT INTO global.model_config
(provider, model_name, completion_type, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at)
VALUES
-- Anthropic text models
('anthropic', 'claude-opus-4-7', 'text',
'{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Sampling temperature."}}',
'{TEXT,IMAGE,PDF}', '{TEXT}',
'{"response": {"input_token_cost": 15.0, "output_token_cost": 75.0}, "batch": {"input_token_cost": 7.5, "output_token_cost": 37.5}}',
true, NOW(), NOW()),
('anthropic', 'claude-sonnet-4-6', 'text',
'{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Sampling temperature."}}',
'{TEXT,IMAGE,PDF}', '{TEXT}',
'{"response": {"input_token_cost": 3.0, "output_token_cost": 15.0}, "batch": {"input_token_cost": 1.5, "output_token_cost": 7.5}}',
true, NOW(), NOW()),
('anthropic', 'claude-haiku-4-5-20251001', 'text',
'{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "description": "Sampling temperature."}}',
'{TEXT,IMAGE,PDF}', '{TEXT}',
'{"response": {"input_token_cost": 1.0, "output_token_cost": 5.0}, "batch": {"input_token_cost": 0.5, "output_token_cost": 2.5}}',
true, NOW(), NOW()),
-- Google Vertex STT models (Gemini 3.x family — GA per
-- https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/google-models)
('google-vertex', 'gemini-3.1-pro-preview', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["low", "medium", "high"], "description": "Max reasoning depth before output. high = best quality, low = faster/cheaper."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 2.0, "output_token_cost": 12.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 12.0}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-3-pro', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["low", "medium", "high"], "description": "Max reasoning depth before output."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 1.5, "output_token_cost": 10.0}, "audio": {"input_token_cost": 3.0, "output_token_cost": 10.0}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-3.5-flash', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["minimal", "low", "medium", "high"], "description": "Max reasoning depth before output."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.6, "output_token_cost": 3.5}, "audio": {"input_token_cost": 1.2, "output_token_cost": 3.5}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-3-flash-preview', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["minimal", "low", "medium", "high"], "description": "Max reasoning depth before output."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.5, "output_token_cost": 3.0}, "audio": {"input_token_cost": 1.0, "output_token_cost": 3.0}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-3.1-flash-lite', 'stt',
'{"temperature": {"type": "float", "default": 0.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.1, "output_token_cost": 0.4}, "audio": {"input_token_cost": 0.3, "output_token_cost": 0.4}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-2.5-flash', 'stt',
'{"temperature": {"type": "float", "default": 0.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.3, "output_token_cost": 2.5}, "audio": {"input_token_cost": 1.0, "output_token_cost": 2.5}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-2.5-pro', 'stt',
'{"temperature": {"type": "float", "default": 0.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 1.25, "output_token_cost": 10.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 10.0}}',
true, NOW(), NOW()),
-- Google Vertex TTS models
('google-vertex', 'gemini-2.5-flash-preview-tts', 'tts',
'{"voice": {"type": "enum", "default": "Kore", "options": ["Aoede", "Charon", "Fenrir", "Kore", "Puck"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}',
'{"response": {"input_token_cost": 0.5, "output_token_cost": 10.0}, "audio": {"input_token_cost": 0.5, "output_token_cost": 10.0}}',
true, NOW(), NOW()),
('google-vertex', 'gemini-2.5-pro-preview-tts', 'tts',
'{"voice": {"type": "enum", "default": "Kore", "options": ["Aoede", "Charon", "Fenrir", "Kore", "Puck"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}',
'{"response": {"input_token_cost": 1.0, "output_token_cost": 20.0}, "audio": {"input_token_cost": 1.0, "output_token_cost": 20.0}}',
true, NOW(), NOW())
ON CONFLICT (provider, model_name) DO NOTHING
"""
)


def downgrade():
op.execute(
"""
DELETE FROM global.model_config
WHERE provider IN ('anthropic', 'google-vertex')
"""
)
# Enum value removal requires rebuilding the type and re-pointing every
# referencing column. Skipped — see migrations 035 / 056 for the same
# convention.
50 changes: 44 additions & 6 deletions backend/app/core/audio_utils.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,56 @@
"""
Audio processing utilities for format conversion.
"""Audio processing utilities: format conversion + STT input carrier."""

This module provides utilities for converting audio between different formats,
particularly for TTS output post-processing.
"""
import io
import logging
import os
import tempfile
import wave
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Iterator

from pydub import AudioSegment


logger = logging.getLogger(__name__)

_MIME_TO_EXT = {
"audio/wav": ".wav",
"audio/mpeg": ".mp3",
"audio/mp3": ".mp3",
"audio/ogg": ".ogg",
"audio/flac": ".flac",
"audio/webm": ".webm",
"audio/mp4": ".mp4",
"audio/m4a": ".m4a",
"audio/aac": ".aac",
"audio/aiff": ".aiff",
}


@dataclass(frozen=True)
class AudioRef:
"""In-memory STT input. Providers consume ``bytes_`` directly or call
``to_path()`` when an SDK needs a filesystem path. Temp files are owned
by the provider's ``with`` scope — no framework-level cleanup needed.
"""

bytes_: bytes
mime_type: str = "audio/wav"

@contextmanager
def to_path(self) -> Iterator[str]:
ext = _MIME_TO_EXT.get(self.mime_type, ".audio")
tmp = tempfile.NamedTemporaryFile(suffix=ext, delete=False, prefix="audio_")
try:
tmp.write(self.bytes_)
tmp.close()
yield tmp.name
finally:
try:
os.unlink(tmp.name)
except OSError:
pass


def convert_pcm_to_mp3(
pcm_bytes: bytes, sample_rate: int = 24000
Expand Down
1 change: 1 addition & 0 deletions backend/app/core/cloud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
CloudStorage,
CloudStorageError,
get_cloud_storage,
upload_audio_to_gcs,
)
Loading
Loading