diff --git a/docs/README.md b/docs/README.md index 7ac873bd1..2623d0807 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,9 +28,9 @@ Use these when you want deeper customization, integration, or extension details. | Topic | Repo docs | What it covers | |---|---|---| +| Development | [`development.md`](./development.md) | Contributor notes for adding providers and transcription adapters | | Memory | [`memory.md`](./memory.md) | How nanobot stores, consolidates, and restores memory | | Python SDK | [`python-sdk.md`](./python-sdk.md) | Use nanobot programmatically from Python | | Channel plugin guide | [`channel-plugin-guide.md`](./channel-plugin-guide.md) | Build and test custom chat channel plugins | | WebSocket channel | [`websocket.md`](./websocket.md) | Real-time WebSocket access and protocol details | | Custom tools | [`my-tool.md`](./my-tool.md) | Inspect and tune runtime state with the `my` tool | - diff --git a/docs/configuration.md b/docs/configuration.md index 1ae86d5fc..1fbbd5db5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -119,7 +119,7 @@ ANTHROPIC_API_KEY="$(bw get password api/anthropic)" nanobot agent ## Providers > [!TIP] -> - **Voice transcription**: Voice messages and WebUI/desktop microphone input use the shared top-level `transcription` settings. By default Groq Whisper is used; set `transcription.provider` to `"openai"` for OpenAI Whisper, `"openrouter"` for OpenRouter speech-to-text models, or `"xiaomi_mimo"` for Xiaomi MiMo ASR. API keys still live in the matching `providers.` config. +> - **Voice transcription**: Voice messages and WebUI/desktop microphone input use the shared top-level `transcription` settings. By default Groq Whisper is used; set `transcription.provider` to `"openai"` for OpenAI Whisper, `"openrouter"` for OpenRouter speech-to-text models, `"xiaomi_mimo"` for Xiaomi MiMo ASR, or `"assemblyai"` for AssemblyAI. API keys still live in the matching `providers.` config. > - **MiniMax Coding Plan**: Exclusive discount links for the nanobot community: [Overseas](https://platform.minimax.io/subscribe/coding-plan?code=9txpdXw04g&source=link) · [Mainland China](https://platform.minimaxi.com/subscribe/token-plan?code=GILTJpMTqZ&source=link) > - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config. > - **MiniMax thinking mode**: Use `providers.minimaxAnthropic` when you want `reasoningEffort` / thinking mode. MiniMax exposes that capability through its Anthropic-compatible endpoint, so nanobot keeps it as a separate provider instead of guessing MiniMax-specific thinking parameters on the generic OpenAI-compatible `minimax` endpoint. It uses the same `MINIMAX_API_KEY`. Default Anthropic-compatible base URL: `https://api.minimax.io/anthropic`; for mainland China use `https://api.minimaxi.com/anthropic`. @@ -143,6 +143,7 @@ ANTHROPIC_API_KEY="$(bw get password api/anthropic)" nanobot agent | `azure_openai` | LLM (Azure OpenAI) | [portal.azure.com](https://portal.azure.com) | | `bedrock` | LLM (AWS Bedrock Converse, Claude/Nova/Llama/etc.) | [aws.amazon.com/bedrock](https://aws.amazon.com/bedrock/) | | `openai` | LLM + Voice transcription (Whisper) | [platform.openai.com](https://platform.openai.com) | +| `assemblyai` | Voice transcription only | [assemblyai.com](https://www.assemblyai.com/) | | `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) | | `groq` | LLM + Voice transcription (Whisper, default) | [console.groq.com](https://console.groq.com) | | `minimax` | LLM (MiniMax direct) | [platform.minimaxi.com](https://platform.minimaxi.com) | @@ -957,48 +958,8 @@ vllm serve meta-llama/Llama-3.1-8B-Instruct --port 8000 -
-Adding a New Provider (Developer Guide) - -nanobot uses a **Provider Registry** (`nanobot/providers/registry.py`) as the single source of truth. -Adding a new provider only takes **2 steps** — no if-elif chains to touch. - -**Step 1.** Add a `ProviderSpec` entry to `PROVIDERS` in `nanobot/providers/registry.py`: - -```python -ProviderSpec( - name="myprovider", # config field name - keywords=("myprovider", "mymodel"), # model-name keywords for auto-matching - env_key="MYPROVIDER_API_KEY", # env var name - display_name="My Provider", # shown in `nanobot status` - default_api_base="https://api.myprovider.com/v1", # OpenAI-compatible endpoint -) -``` - -**Step 2.** Add a field to `ProvidersConfig` in `nanobot/config/schema.py`: - -```python -class ProvidersConfig(BaseModel): - ... - myprovider: ProviderConfig = ProviderConfig() -``` - -That's it! Environment variables, model routing, config matching, and `nanobot status` display will all work automatically. - -**Common `ProviderSpec` options:** - -| Field | Description | Example | -|-------|-------------|---------| -| `default_api_base` | OpenAI-compatible base URL | `"https://api.deepseek.com"` | -| `env_extras` | Additional env vars to set | `(("ZHIPUAI_API_KEY", "{api_key}"),)` | -| `model_overrides` | Per-model parameter overrides | `(("kimi-k2.5", {"temperature": 1.0}), ("kimi-k2.6", {"temperature": 1.0}),)` | -| `is_gateway` | Can route any model (like OpenRouter) | `True` | -| `detect_by_key_prefix` | Detect gateway by API key prefix | `"sk-or-"` | -| `detect_by_base_keyword` | Detect gateway by API base URL | `"openrouter"` | -| `strip_model_prefix` | Strip provider prefix before sending to gateway | `True` (for AiHubMix) | -| `supports_max_completion_tokens` | Use `max_completion_tokens` instead of `max_tokens`; required for providers that reject both being set simultaneously (e.g. VolcEngine) | `True` | - -
+Contributor notes for adding new providers live in +[`development.md`](./development.md#adding-an-llm-provider). ## Model Presets @@ -1122,8 +1083,8 @@ Configure transcription under the top-level `transcription` section: | Setting | Default | Description | |---------|---------|-------------| | `enabled` | `true` | Enables audio transcription for both chat-channel voice messages and WebUI/desktop microphone input. | -| `provider` | `"groq"` | Transcription backend: `"groq"`, `"openai"`, `"openrouter"`, or `"xiaomi_mimo"`. | -| `model` | provider default | Optional transcription model override. Defaults to `whisper-large-v3` for Groq, `whisper-1` for OpenAI, `openai/whisper-1` for OpenRouter, and `mimo-v2.5-asr` for Xiaomi MiMo ASR. OpenRouter accepts only speech-to-text models on its transcription endpoint, such as `nvidia/parakeet-tdt-0.6b-v3`, `openai/whisper-1`, or `openai/gpt-4o-transcribe`; chat LLMs are rejected there. | +| `provider` | `"groq"` | Transcription backend: `"groq"`, `"openai"`, `"openrouter"`, `"xiaomi_mimo"`, or `"assemblyai"`. | +| `model` | provider default | Optional transcription model override. Defaults to `whisper-large-v3` for Groq, `whisper-1` for OpenAI, `openai/whisper-1` for OpenRouter, `mimo-v2.5-asr` for Xiaomi MiMo ASR, and `universal-3-pro,universal-2` for AssemblyAI. OpenRouter accepts only speech-to-text models on its transcription endpoint, such as `nvidia/parakeet-tdt-0.6b-v3`, `openai/whisper-1`, or `openai/gpt-4o-transcribe`; chat LLMs are rejected there. AssemblyAI accepts a comma-separated model fallback list. | | `language` | `null` | Optional ISO-639 language hint, e.g. `"en"`, `"zh"`, `"ko"`, or `"ja"`. | | `maxDurationSec` | `120` | Maximum WebUI/desktop recording duration. | | `maxUploadMb` | `25` | Maximum WebUI/desktop audio upload size. | @@ -1155,6 +1116,9 @@ Transcription credentials are intentionally not stored in `transcription`. Put t Selecting a transcription provider does not configure credentials by itself. For example, the effective provider may default to Groq for compatibility, but transcription is only usable when `providers.groq.apiKey` or the matching environment-backed config is available. The Settings UI writes only the top-level `transcription` fields. +If you are adding a new transcription provider, see +[`development.md`](./development.md#adding-a-transcription-provider). + ## Channel Settings Global settings that apply to all channels. Configure under the `channels` section in `~/.nanobot/config.json`: diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 000000000..f19014314 --- /dev/null +++ b/docs/development.md @@ -0,0 +1,132 @@ +# Development + +This page collects contributor-facing notes for extending nanobot. User-facing setup +and runtime options live in [`configuration.md`](./configuration.md). + +## Adding an LLM Provider + +nanobot uses the provider registry in `nanobot/providers/registry.py` as the +source of truth for LLM provider metadata. Most OpenAI-compatible providers need +only two changes. + +1. Add a `ProviderSpec` entry to `PROVIDERS`: + +```python +ProviderSpec( + name="myprovider", + keywords=("myprovider", "mymodel"), + env_key="MYPROVIDER_API_KEY", + display_name="My Provider", + default_api_base="https://api.myprovider.com/v1", +) +``` + +2. Add a field to `ProvidersConfig` in `nanobot/config/schema.py`: + +```python +class ProvidersConfig(BaseModel): + ... + myprovider: ProviderConfig = Field(default_factory=ProviderConfig) +``` + +Environment variables, config matching, provider status, and WebUI credential +display derive from those two entries. + +Useful `ProviderSpec` options: + +| Field | Description | +|---|---| +| `default_api_base` | Default OpenAI-compatible base URL. | +| `env_extras` | Additional environment variables derived from the provider config. | +| `model_overrides` | Per-model request parameter overrides. | +| `is_gateway` | Provider can route many model families, like OpenRouter. | +| `detect_by_key_prefix` | Match configured gateways by API-key prefix. | +| `detect_by_base_keyword` | Match configured gateways by API base URL. | +| `strip_model_prefix` | Strip `provider/` before sending the model to the upstream API. | +| `supports_max_completion_tokens` | Use `max_completion_tokens` instead of `max_tokens`. | +| `is_transcription_only` | Provider has credentials but cannot serve chat completions. | + +## Adding a Transcription Provider + +Transcription is intentionally split into two layers: + +- `nanobot/audio/transcription_registry.py` owns provider names, aliases, default + models, and adapter loading. +- `nanobot/providers/transcription.py` owns provider-specific HTTP behavior. + +Credentials still live under `providers.` so chat channels, WebUI, and +desktop resolve API keys and API bases the same way. + +1. Add provider credentials to `ProvidersConfig`. + +```python +class ProvidersConfig(BaseModel): + ... + my_stt: ProviderConfig = Field(default_factory=ProviderConfig) +``` + +2. Add a `ProviderSpec` in `nanobot/providers/registry.py`. + +For transcription-only providers, set `is_transcription_only=True` so they show up +in credential/settings surfaces but stay out of chat model selection. + +```python +ProviderSpec( + name="my_stt", + keywords=("my_stt",), + env_key="MY_STT_API_KEY", + display_name="My STT", + default_api_base="https://api.example.com/v1", + is_transcription_only=True, +) +``` + +3. Add an adapter class in `nanobot/providers/transcription.py`. + +Adapters receive resolved credentials and settings. They return an empty string +for provider errors so channel voice messages fail quietly instead of crashing the +agent loop. + +```python +class MySTTTranscriptionProvider: + def __init__( + self, + api_key: str | None = None, + api_base: str | None = None, + language: str | None = None, + model: str | None = None, + ): + self.api_key = api_key or os.environ.get("MY_STT_API_KEY") + self.api_base = api_base or "https://api.example.com/v1" + self.language = language or None + self.model = model or "my-default-stt-model" + + async def transcribe(self, file_path: str | Path) -> str: + ... +``` + +4. Register the adapter in `nanobot/audio/transcription_registry.py`. + +```python +TranscriptionProviderSpec( + name="my_stt", + default_model="my-default-stt-model", + adapter="nanobot.providers.transcription:MySTTTranscriptionProvider", + aliases=("mystt",), +) +``` + +5. Add tests. + +At minimum, cover: + +- config resolution in `tests/providers/test_transcription.py` +- adapter request/response behavior and retry/error handling +- WebUI settings payload/update behavior in `tests/webui/test_settings_api.py` +- provider brand mapping if the provider appears in Settings + +6. Update user-facing docs. + +Add the provider to [`configuration.md`](./configuration.md) where users choose +`transcription.provider`, but keep implementation details in this development +guide. diff --git a/nanobot/audio/transcription.py b/nanobot/audio/transcription.py index 7e97517fa..fa46dbb23 100644 --- a/nanobot/audio/transcription.py +++ b/nanobot/audio/transcription.py @@ -11,26 +11,20 @@ from __future__ import annotations from contextlib import suppress from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import Any from loguru import logger +from nanobot.audio.transcription_registry import ( + get_transcription_provider, + resolve_transcription_provider, +) from nanobot.config.paths import get_media_dir from nanobot.utils.media_decode import FileSizeExceeded, save_base64_data_url -TranscriptionProviderName = Literal["groq", "openai", "openrouter", "xiaomi_mimo"] +TranscriptionProviderName = str _DEFAULT_PROVIDER: TranscriptionProviderName = "groq" -_DEFAULT_MODELS: dict[TranscriptionProviderName, str] = { - "groq": "whisper-large-v3", - "openai": "whisper-1", - "openrouter": "openai/whisper-1", - "xiaomi_mimo": "mimo-v2.5-asr", -} -_PROVIDER_ALIASES: dict[str, TranscriptionProviderName] = { - "mimo": "xiaomi_mimo", - "xiaomi": "xiaomi_mimo", -} _MAX_AUDIO_BYTES_FALLBACK = 25 * 1024 * 1024 _AUDIO_MIME_ALLOWED: frozenset[str] = frozenset({ "audio/aac", @@ -72,13 +66,8 @@ class TranscriptionIngressError(Exception): def _as_provider(value: Any) -> TranscriptionProviderName | None: - if isinstance(value, str): - name = value.strip().lower() - if name in _PROVIDER_ALIASES: - return _PROVIDER_ALIASES[name] - if name in _DEFAULT_MODELS: - return name # type: ignore[return-value] - return None + spec = resolve_transcription_provider(value) + return spec.name if spec else None def _provider_config(config: Any, provider: str) -> Any: @@ -101,11 +90,17 @@ def resolve_transcription_config(config: Any) -> EffectiveTranscriptionConfig: or _as_provider(getattr(channels, "transcription_provider", None)) or _DEFAULT_PROVIDER ) + spec = get_transcription_provider(provider) + if spec is None: + logger.warning("Unknown transcription provider {}; falling back to {}", provider, _DEFAULT_PROVIDER) + provider = _DEFAULT_PROVIDER + spec = get_transcription_provider(provider) + default_model = spec.default_model if spec else "" provider_cfg = _provider_config(config, provider) return EffectiveTranscriptionConfig( enabled=bool(getattr(top, "enabled", True)), provider=provider, - model=(getattr(top, "model", None) or _DEFAULT_MODELS[provider]).strip(), + model=(getattr(top, "model", None) or default_model).strip(), language=getattr(top, "language", None) or getattr(channels, "transcription_language", None), api_key=getattr(provider_cfg, "api_key", None) or "", api_base=getattr(provider_cfg, "api_base", None) or "", @@ -170,40 +165,14 @@ async def transcribe_audio_file( """Transcribe *file_path* using the already-resolved transcription config.""" if not config.enabled or not config.configured: return "" - if config.provider == "openai": - from nanobot.providers.transcription import OpenAITranscriptionProvider - - provider = OpenAITranscriptionProvider( - api_key=config.api_key, - api_base=config.api_base or None, - language=config.language, - model=config.model, - ) - elif config.provider == "openrouter": - from nanobot.providers.transcription import OpenRouterTranscriptionProvider - - provider = OpenRouterTranscriptionProvider( - api_key=config.api_key, - api_base=config.api_base or None, - language=config.language, - model=config.model, - ) - elif config.provider == "xiaomi_mimo": - from nanobot.providers.transcription import XiaomiMiMoTranscriptionProvider - - provider = XiaomiMiMoTranscriptionProvider( - api_key=config.api_key, - api_base=config.api_base or None, - language=config.language, - model=config.model, - ) - else: - from nanobot.providers.transcription import GroqTranscriptionProvider - - provider = GroqTranscriptionProvider( - api_key=config.api_key, - api_base=config.api_base or None, - language=config.language, - model=config.model, - ) + spec = get_transcription_provider(config.provider) + if spec is None: + logger.warning("Unknown transcription provider: {}", config.provider) + return "" + provider = spec.load_adapter()( + api_key=config.api_key, + api_base=config.api_base or None, + language=config.language, + model=config.model, + ) return await provider.transcribe(file_path) diff --git a/nanobot/audio/transcription_registry.py b/nanobot/audio/transcription_registry.py new file mode 100644 index 000000000..3cea122fb --- /dev/null +++ b/nanobot/audio/transcription_registry.py @@ -0,0 +1,90 @@ +"""Registry for speech-to-text providers. + +Provider-specific HTTP adapters live in ``nanobot.providers.transcription``. +This module is the app-level source of truth for provider names, aliases, +default models, and adapter class paths. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from importlib import import_module +from pathlib import Path +from typing import Any, Protocol + + +class TranscriptionProviderAdapter(Protocol): + """Runtime protocol implemented by provider-specific transcription adapters.""" + + def __init__( + self, + api_key: str | None = None, + api_base: str | None = None, + language: str | None = None, + model: str | None = None, + ) -> None: ... + + async def transcribe(self, file_path: str | Path) -> str: ... + + +@dataclass(frozen=True) +class TranscriptionProviderSpec: + name: str + default_model: str + adapter: str + aliases: tuple[str, ...] = () + + def load_adapter(self) -> type[TranscriptionProviderAdapter]: + module_name, _, class_name = self.adapter.partition(":") + if not module_name or not class_name: + raise RuntimeError(f"Invalid transcription adapter path: {self.adapter}") + adapter = getattr(import_module(module_name), class_name) + return adapter + + +TRANSCRIPTION_PROVIDERS: tuple[TranscriptionProviderSpec, ...] = ( + TranscriptionProviderSpec( + name="groq", + default_model="whisper-large-v3", + adapter="nanobot.providers.transcription:GroqTranscriptionProvider", + ), + TranscriptionProviderSpec( + name="openai", + default_model="whisper-1", + adapter="nanobot.providers.transcription:OpenAITranscriptionProvider", + ), + TranscriptionProviderSpec( + name="openrouter", + default_model="openai/whisper-1", + adapter="nanobot.providers.transcription:OpenRouterTranscriptionProvider", + ), + TranscriptionProviderSpec( + name="xiaomi_mimo", + default_model="mimo-v2.5-asr", + adapter="nanobot.providers.transcription:XiaomiMiMoTranscriptionProvider", + aliases=("mimo", "xiaomi"), + ), + TranscriptionProviderSpec( + name="assemblyai", + default_model="universal-3-pro,universal-2", + adapter="nanobot.providers.transcription:AssemblyAITranscriptionProvider", + ), +) + +_BY_NAME = {spec.name: spec for spec in TRANSCRIPTION_PROVIDERS} +_BY_ALIAS = {alias: spec for spec in TRANSCRIPTION_PROVIDERS for alias in spec.aliases} + + +def transcription_provider_names() -> tuple[str, ...]: + return tuple(spec.name for spec in TRANSCRIPTION_PROVIDERS) + + +def get_transcription_provider(name: str) -> TranscriptionProviderSpec | None: + return _BY_NAME.get(name) + + +def resolve_transcription_provider(value: Any) -> TranscriptionProviderSpec | None: + if not isinstance(value, str): + return None + name = value.strip().lower() + return _BY_NAME.get(name) or _BY_ALIAS.get(name) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index e597052d6..53a8eacd5 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -47,7 +47,7 @@ class TranscriptionConfig(Base): """Cross-channel audio transcription configuration.""" enabled: bool = True - provider: Literal["groq", "openai", "openrouter", "xiaomi_mimo"] | None = None + provider: str | None = None # Validated by nanobot.audio.transcription_registry. model: str | None = None language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") max_duration_sec: int = Field(default=120, ge=1, le=600) @@ -202,6 +202,7 @@ class ProvidersConfig(Base): anthropic: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + assemblyai: ProviderConfig = Field(default_factory=ProviderConfig) # AssemblyAI voice transcription huggingface: ProviderConfig = Field(default_factory=ProviderConfig) skywork: ProviderConfig = Field(default_factory=ProviderConfig) # Skywork / APIFree API gateway deepseek: ProviderConfig = Field(default_factory=ProviderConfig) @@ -402,6 +403,8 @@ class Config(BaseSettings): # Explicit provider prefix wins — prevents `github-copilot/...codex` matching openai_codex. for spec in PROVIDERS: + if spec.is_transcription_only: + continue p = getattr(self.providers, spec.name, None) if p and model_prefix and normalized_prefix == spec.name: if spec.is_oauth or spec.is_local or spec.is_direct or p.api_key: @@ -409,6 +412,8 @@ class Config(BaseSettings): # Match by keyword (order follows PROVIDERS registry) for spec in PROVIDERS: + if spec.is_transcription_only: + continue p = getattr(self.providers, spec.name, None) if p and any(_kw_matches(kw) for kw in spec.keywords): if spec.is_oauth or spec.is_local or spec.is_direct or p.api_key: @@ -435,7 +440,7 @@ class Config(BaseSettings): # Fallback: gateways first, then others (follows registry order) # OAuth providers are NOT valid fallbacks — they require explicit model selection for spec in PROVIDERS: - if spec.is_oauth: + if spec.is_oauth or spec.is_transcription_only: continue p = getattr(self.providers, spec.name, None) if p and p.api_key: diff --git a/nanobot/providers/factory.py b/nanobot/providers/factory.py index 2e6b68c7d..d4371bd10 100644 --- a/nanobot/providers/factory.py +++ b/nanobot/providers/factory.py @@ -41,6 +41,8 @@ def _make_provider_core( provider_name = config.get_provider_name(model, preset=resolved) p = config.get_provider(model, preset=resolved) spec = find_by_name(provider_name) if provider_name else None + if spec and spec.is_transcription_only: + raise ValueError(f"Provider '{provider_name}' only supports transcription.") backend = spec.backend if spec else "openai_compat" if backend == "azure_openai": diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py index ab7e2cf1e..1beb14cdf 100644 --- a/nanobot/providers/registry.py +++ b/nanobot/providers/registry.py @@ -60,6 +60,9 @@ class ProviderSpec: # Direct providers skip API-key validation (user supplies everything) is_direct: bool = False + # Provider is listed for shared credentials but cannot serve chat completions. + is_transcription_only: bool = False + # Provider supports cache_control on content blocks (e.g. Anthropic prompt caching) supports_prompt_caching: bool = False @@ -507,6 +510,17 @@ PROVIDERS: tuple[ProviderSpec, ...] = ( backend="openai_compat", default_api_base="https://api.groq.com/openai/v1", ), + # AssemblyAI: voice transcription only. It appears in provider settings so + # users can manage credentials, but WebUI excludes it from chat model pickers. + ProviderSpec( + name="assemblyai", + keywords=("assemblyai",), + env_key="ASSEMBLYAI_API_KEY", + display_name="AssemblyAI", + backend="openai_compat", + default_api_base="https://api.assemblyai.com/v2", + is_transcription_only=True, + ), # Qianfan (百度千帆): OpenAI-compatible API ProviderSpec( name="qianfan", diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py index 997228bd0..f2b7051c3 100644 --- a/nanobot/providers/transcription.py +++ b/nanobot/providers/transcription.py @@ -1,7 +1,7 @@ """Provider-specific voice transcription adapters. This module only knows how to call external transcription APIs such as Groq, -OpenAI Whisper, OpenRouter, and Xiaomi MiMo ASR. Product-level config fallback, +OpenAI Whisper, OpenRouter, Xiaomi MiMo ASR, and AssemblyAI. Product-level config fallback, WebUI upload validation, and channel integration live in ``nanobot.audio.transcription``. """ @@ -19,6 +19,9 @@ from loguru import logger _CHAT_COMPLETIONS_PATH = "chat/completions" _TRANSCRIPTIONS_PATH = "audio/transcriptions" +_ASSEMBLYAI_DEFAULT_API_BASE = "https://api.assemblyai.com/v2" +_ASSEMBLYAI_POLL_ATTEMPTS = 60 +_ASSEMBLYAI_POLL_INTERVAL_S = 2.0 _AUDIO_MIME_OVERRIDES = { ".m4a": "audio/mp4", ".mpga": "audio/mpeg", @@ -63,6 +66,11 @@ def _resolve_chat_completions_url(api_base: str | None, default_url: str) -> str return f"{base}/{_CHAT_COMPLETIONS_PATH}" +def _resolve_api_path(api_base: str | None, default_base: str, path: str) -> str: + base = (api_base or default_base).rstrip("/") + return f"{base}/{path.lstrip('/')}" + + def _audio_mime_type(path: Path) -> str: return ( _AUDIO_MIME_OVERRIDES.get(path.suffix.lower()) @@ -93,6 +101,90 @@ _RETRYABLE_EXCEPTIONS = ( ) +async def _request_json_with_retry( + client: httpx.AsyncClient, + method: str, + url: str, + *, + provider_label: str, + **kwargs: object, +) -> dict[str, Any] | None: + for attempt in range(_MAX_RETRIES + 1): + try: + request = getattr(client, method.lower(), None) + if request is None: + response = await client.request(method, url, **kwargs) + else: + response = await request(url, **kwargs) + except _RETRYABLE_EXCEPTIONS as e: + if attempt < _MAX_RETRIES: + logger.warning( + "{} transcription transient error (attempt {}/{}): {}", + provider_label, + attempt + 1, + _MAX_RETRIES + 1, + e, + ) + await asyncio.sleep(_BACKOFF_S[attempt]) + continue + logger.exception( + "{} transcription error after {} attempts: {}", + provider_label, + _MAX_RETRIES + 1, + e, + ) + return None + except Exception as e: + logger.exception("{} transcription error: {}", provider_label, e) + return None + + if response.status_code in _RETRYABLE_STATUS and attempt < _MAX_RETRIES: + logger.warning( + "{} transcription transient HTTP {} (attempt {}/{})", + provider_label, + response.status_code, + attempt + 1, + _MAX_RETRIES + 1, + ) + await asyncio.sleep(_BACKOFF_S[attempt]) + continue + + try: + response.raise_for_status() + except httpx.HTTPStatusError: + body = response.text.strip().replace("\n", " ")[:500] + logger.error( + "{} transcription HTTP {}{}{}", + provider_label, + response.status_code, + f" {response.reason_phrase}" if response.reason_phrase else "", + f": {body}" if body else "", + ) + return None + except Exception as e: + logger.exception("{} transcription error: {}", provider_label, e) + return None + + try: + payload = response.json() + except Exception as e: + logger.exception( + "{} transcription error: malformed response body: {}", + provider_label, + e, + ) + return None + if not isinstance(payload, dict): + logger.error( + "{} transcription error: unexpected response shape: {!r}", + provider_label, + type(payload).__name__, + ) + return None + return payload + return None + + async def _post_transcription_with_retry( url: str, *, @@ -305,6 +397,107 @@ def _text_from_chat_payload(payload: dict[str, Any]) -> str: return text if isinstance(text, str) else "" +def _assemblyai_speech_models(model: str | None) -> list[str]: + return [part for part in (part.strip() for part in (model or "").split(",")) if part] + + +class AssemblyAITranscriptionProvider: + """Voice transcription provider using AssemblyAI's asynchronous REST API.""" + + def __init__( + self, + api_key: str | None = None, + api_base: str | None = None, + language: str | None = None, + model: str | None = None, + ): + base = api_base or os.environ.get("ASSEMBLYAI_BASE_URL") + self.api_key = api_key or os.environ.get("ASSEMBLYAI_API_KEY") + self.upload_url = _resolve_api_path(base, _ASSEMBLYAI_DEFAULT_API_BASE, "upload") + self.transcript_url = _resolve_api_path(base, _ASSEMBLYAI_DEFAULT_API_BASE, "transcript") + self.language = language or None + self.model = model or "universal-3-pro,universal-2" + logger.debug("AssemblyAI transcription endpoint: {}", self.transcript_url) + + async def transcribe(self, file_path: str | Path) -> str: + if not self.api_key: + logger.warning("AssemblyAI API key not configured for transcription") + return "" + path = Path(file_path) + if not path.exists(): + logger.error("Audio file not found: {}", file_path) + return "" + try: + data = path.read_bytes() + except OSError as e: + logger.exception("AssemblyAI transcription error: cannot read audio file: {}", e) + return "" + + headers = {"Authorization": self.api_key} + async with httpx.AsyncClient() as client: + upload = await _request_json_with_retry( + client, + "POST", + self.upload_url, + provider_label="AssemblyAI", + headers={**headers, "Content-Type": "application/octet-stream"}, + content=data, + timeout=60.0, + ) + upload_url = upload.get("upload_url") if upload else None + if not isinstance(upload_url, str) or not upload_url: + logger.error("AssemblyAI transcription error: upload_url missing") + return "" + + body: dict[str, object] = {"audio_url": upload_url} + speech_models = _assemblyai_speech_models(self.model) + if speech_models: + body["speech_models"] = speech_models + if self.language: + body["language_code"] = self.language + + transcript = await _request_json_with_retry( + client, + "POST", + self.transcript_url, + provider_label="AssemblyAI", + headers=headers, + json=body, + timeout=30.0, + ) + transcript_id = transcript.get("id") if transcript else None + if not isinstance(transcript_id, str) or not transcript_id: + logger.error("AssemblyAI transcription error: transcript id missing") + return "" + + poll_url = f"{self.transcript_url.rstrip('/')}/{transcript_id}" + for attempt in range(_ASSEMBLYAI_POLL_ATTEMPTS): + payload = await _request_json_with_retry( + client, + "GET", + poll_url, + provider_label="AssemblyAI", + headers=headers, + timeout=30.0, + ) + if not payload: + return "" + status = str(payload.get("status") or "").lower() + if status == "completed": + text = payload.get("text") + return text if isinstance(text, str) else "" + if status in {"error", "failed"}: + logger.error( + "AssemblyAI transcription failed: {}", + payload.get("error") or payload, + ) + return "" + if attempt < _ASSEMBLYAI_POLL_ATTEMPTS - 1: + await asyncio.sleep(_ASSEMBLYAI_POLL_INTERVAL_S) + logger.error("AssemblyAI transcription timed out while polling transcript") + return "" + + class OpenAITranscriptionProvider: """Voice transcription provider using OpenAI's Whisper API.""" diff --git a/nanobot/webui/settings_api.py b/nanobot/webui/settings_api.py index 71c7e08bf..87d0b77e1 100644 --- a/nanobot/webui/settings_api.py +++ b/nanobot/webui/settings_api.py @@ -16,6 +16,10 @@ from zoneinfo import ZoneInfo import httpx from nanobot.audio.transcription import resolve_transcription_config +from nanobot.audio.transcription_registry import ( + resolve_transcription_provider, + transcription_provider_names, +) from nanobot.config.loader import get_config_path, load_config, save_config from nanobot.config.schema import ModelPresetConfig from nanobot.providers.image_generation import ( @@ -91,7 +95,6 @@ _IMAGE_GENERATION_ASPECT_RATIOS = { "2:3", "21:9", } -_TRANSCRIPTION_PROVIDERS = ("groq", "openai", "openrouter", "xiaomi_mimo") _CONTEXT_WINDOW_TOKEN_OPTIONS = {65_536, 262_144} _MODEL_CONFIGURATION_SLUG_RE = re.compile(r"[^a-z0-9_-]+") _ENV_REF_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}") @@ -424,9 +427,13 @@ def provider_models_payload(query: QueryParams) -> dict[str, Any]: "fetched_at": time.time(), } if ( - spec.backend in _MODEL_LIST_UNSUPPORTED_BACKENDS - and spec.name != "minimax_anthropic" - ) or spec.is_oauth: + spec.is_transcription_only + or ( + spec.backend in _MODEL_LIST_UNSUPPORTED_BACKENDS + and spec.name != "minimax_anthropic" + ) + or spec.is_oauth + ): return { **base_payload, "status": "unsupported", @@ -542,6 +549,8 @@ def _validate_configured_provider(config: Any, provider: str) -> None: spec = find_by_name(provider) if spec is None: raise WebUISettingsError("unknown provider") + if spec.is_transcription_only: + raise WebUISettingsError("provider does not support chat models") provider_config = getattr(config.providers, provider, None) if ( provider_config is None @@ -580,7 +589,7 @@ def _image_generation_provider_rows(config: Any) -> list[dict[str, Any]]: def _transcription_provider_rows(config: Any) -> list[dict[str, Any]]: rows: list[dict[str, Any]] = [] - for name in _TRANSCRIPTION_PROVIDERS: + for name in transcription_provider_names(): spec = find_by_name(name) provider_config = getattr(config.providers, name, None) rows.append({ @@ -640,6 +649,7 @@ def settings_payload( "api_key_hint": _mask_secret_hint(provider_config.api_key), "api_base": provider_config.api_base, "default_api_base": spec.default_api_base or None, + "model_selectable": not spec.is_transcription_only, } if oauth_status is not None: row["oauth_account"] = oauth_status["account"] @@ -1357,10 +1367,12 @@ def update_transcription_settings(query: QueryParams) -> dict[str, Any]: provider = _query_first(query, "provider") if provider is not None: provider = provider.strip().lower() - if provider not in _TRANSCRIPTION_PROVIDERS: + provider_spec = resolve_transcription_provider(provider) + if provider_spec is None: raise WebUISettingsError("unknown transcription provider") + provider = provider_spec.name if transcription.provider != provider: - transcription.provider = provider # type: ignore[assignment] + transcription.provider = provider changed = True model = _query_first(query, "model") diff --git a/tests/config/test_model_presets.py b/tests/config/test_model_presets.py index 06e015746..d36127df9 100644 --- a/tests/config/test_model_presets.py +++ b/tests/config/test_model_presets.py @@ -245,3 +245,18 @@ def test_match_provider_routes_forced_novita_model_api_models() -> None: assert config.get_provider_name() == "novita" assert config.get_api_base() == "https://api.novita.ai/openai" + + +def test_transcription_only_provider_is_not_chat_fallback() -> None: + config = Config.model_validate({ + "providers": { + "assemblyai": {"apiKey": "aai-test"}, + }, + "agents": { + "defaults": { + "model": "assemblyai/universal-3-pro", + } + }, + }) + + assert config.get_provider_name() is None diff --git a/tests/providers/test_transcription.py b/tests/providers/test_transcription.py index 574d5a44b..dadf59440 100644 --- a/tests/providers/test_transcription.py +++ b/tests/providers/test_transcription.py @@ -14,8 +14,14 @@ from nanobot.audio.transcription import ( resolve_transcription_config, transcribe_audio_file, ) +from nanobot.audio.transcription_registry import ( + get_transcription_provider, + resolve_transcription_provider, + transcription_provider_names, +) from nanobot.config.schema import Config from nanobot.providers.transcription import ( + AssemblyAITranscriptionProvider, GroqTranscriptionProvider, OpenAITranscriptionProvider, OpenRouterTranscriptionProvider, @@ -44,6 +50,17 @@ def _raw_response(status: int, content: bytes) -> httpx.Response: return httpx.Response(status_code=status, content=content, request=request) +def _json_response( + status: int, + payload: dict[str, object], + *, + method: str = "POST", + url: str = "https://example.test/audio/transcriptions", +) -> httpx.Response: + request = httpx.Request(method, url) + return httpx.Response(status_code=status, json=payload, request=request) + + def test_resolver_uses_legacy_channel_provider_when_top_level_is_unset() -> None: config = Config() config.channels.transcription_provider = "openai" @@ -128,6 +145,29 @@ def test_resolver_accepts_legacy_xiaomi_transcription_alias() -> None: assert resolved.api_key == "mimo-test" +def test_transcription_registry_lists_providers_and_aliases() -> None: + assert "assemblyai" in transcription_provider_names() + assert get_transcription_provider("assemblyai").default_model == "universal-3-pro,universal-2" + assert resolve_transcription_provider("mimo").name == "xiaomi_mimo" + + +def test_resolver_supports_assemblyai_provider_config() -> None: + config = Config() + config.transcription.provider = "assemblyai" + config.transcription.model = "universal-3-pro" + config.transcription.language = "en" + config.providers.assemblyai.api_key = "aai-test" + config.providers.assemblyai.api_base = "https://assembly.example/v2" + + resolved = resolve_transcription_config(config) + + assert resolved.provider == "assemblyai" + assert resolved.model == "universal-3-pro" + assert resolved.language == "en" + assert resolved.api_key == "aai-test" + assert resolved.api_base == "https://assembly.example/v2" + + @pytest.mark.asyncio async def test_transcribe_audio_file_routes_openrouter_provider(audio_file: Path) -> None: captured: dict[str, object] = {} @@ -200,6 +240,42 @@ async def test_transcribe_audio_file_routes_xiaomi_mimo_provider(audio_file: Pat } +@pytest.mark.asyncio +async def test_transcribe_audio_file_routes_assemblyai_provider(audio_file: Path) -> None: + captured: dict[str, object] = {} + + class StubAssemblyAI: + def __init__(self, **kwargs): + captured.update(kwargs) + + async def transcribe(self, file_path: str | Path) -> str: + captured["file_path"] = Path(file_path) + return "assembly ok" + + config = EffectiveTranscriptionConfig( + enabled=True, + provider="assemblyai", + model="universal-3-pro", + language="en", + api_key="aai-test", + api_base="https://assembly.example/v2", + max_duration_sec=120, + max_upload_mb=25, + ) + + with patch("nanobot.providers.transcription.AssemblyAITranscriptionProvider", StubAssemblyAI): + result = await transcribe_audio_file(audio_file, config) + + assert result == "assembly ok" + assert captured == { + "api_key": "aai-test", + "api_base": "https://assembly.example/v2", + "language": "en", + "model": "universal-3-pro", + "file_path": audio_file, + } + + def test_resolved_transcription_repr_hides_api_key() -> None: config = Config() config.providers.groq.api_key = "gsk-secret" @@ -628,6 +704,126 @@ async def test_xiaomi_mimo_shares_retry_contract(audio_file: Path) -> None: assert post.await_count == 2 +def test_assemblyai_defaults_and_base_normalization() -> None: + provider = AssemblyAITranscriptionProvider(api_key="aai-test") + assert provider.upload_url == "https://api.assemblyai.com/v2/upload" + assert provider.transcript_url == "https://api.assemblyai.com/v2/transcript" + assert provider.model == "universal-3-pro,universal-2" + + custom = AssemblyAITranscriptionProvider( + api_key="aai-test", + api_base="https://assembly.example/v2", + model="universal-3-pro", + ) + assert custom.upload_url == "https://assembly.example/v2/upload" + assert custom.transcript_url == "https://assembly.example/v2/transcript" + assert custom.model == "universal-3-pro" + + +@pytest.mark.asyncio +async def test_assemblyai_uploads_creates_and_polls(audio_file: Path) -> None: + provider = AssemblyAITranscriptionProvider( + api_key="aai-test", + api_base="https://assembly.example/v2", + language="en", + model="universal-3-pro,universal-2", + ) + post = AsyncMock( + side_effect=[ + _json_response(200, {"upload_url": "https://cdn.example/audio"}, url=provider.upload_url), + _json_response(200, {"id": "tr_123"}, url=provider.transcript_url), + ] + ) + get = AsyncMock( + return_value=_json_response( + 200, + {"status": "completed", "text": "assembly ok"}, + method="GET", + url=f"{provider.transcript_url}/tr_123", + ) + ) + + with patch("httpx.AsyncClient.post", post), patch("httpx.AsyncClient.get", get), patch( + "asyncio.sleep", AsyncMock() + ): + result = await provider.transcribe(audio_file) + + assert result == "assembly ok" + assert post.await_count == 2 + assert get.await_count == 1 + upload_call, create_call = post.await_args_list + assert upload_call.args == ("https://assembly.example/v2/upload",) + assert upload_call.kwargs["headers"]["Authorization"] == "aai-test" + assert upload_call.kwargs["headers"]["Content-Type"] == "application/octet-stream" + assert upload_call.kwargs["content"] == audio_file.read_bytes() + assert create_call.args == ("https://assembly.example/v2/transcript",) + assert create_call.kwargs["json"] == { + "audio_url": "https://cdn.example/audio", + "speech_models": ["universal-3-pro", "universal-2"], + "language_code": "en", + } + assert get.await_args.args == ("https://assembly.example/v2/transcript/tr_123",) + + +@pytest.mark.asyncio +async def test_assemblyai_polls_until_completed(audio_file: Path) -> None: + provider = AssemblyAITranscriptionProvider(api_key="aai-test") + post = AsyncMock( + side_effect=[ + _json_response(200, {"upload_url": "https://cdn.example/audio"}, url=provider.upload_url), + _json_response(200, {"id": "tr_123"}, url=provider.transcript_url), + ] + ) + get = AsyncMock( + side_effect=[ + _json_response(200, {"status": "processing"}, method="GET"), + _json_response(200, {"status": "completed", "text": "done"}, method="GET"), + ] + ) + sleep = AsyncMock() + + with patch("httpx.AsyncClient.post", post), patch("httpx.AsyncClient.get", get), patch( + "asyncio.sleep", sleep + ): + assert await provider.transcribe(audio_file) == "done" + + assert get.await_count == 2 + assert sleep.await_count == 1 + + +@pytest.mark.asyncio +async def test_assemblyai_returns_empty_on_failed_transcript(audio_file: Path) -> None: + provider = AssemblyAITranscriptionProvider(api_key="aai-test") + post = AsyncMock( + side_effect=[ + _json_response(200, {"upload_url": "https://cdn.example/audio"}, url=provider.upload_url), + _json_response(200, {"id": "tr_123"}, url=provider.transcript_url), + ] + ) + get = AsyncMock( + return_value=_json_response( + 200, + {"status": "error", "error": "bad audio"}, + method="GET", + ) + ) + + with patch("httpx.AsyncClient.post", post), patch("httpx.AsyncClient.get", get), patch( + "asyncio.sleep", AsyncMock() + ): + assert await provider.transcribe(audio_file) == "" + + +@pytest.mark.asyncio +async def test_assemblyai_missing_api_key_short_circuits(audio_file: Path) -> None: + with patch.dict("os.environ", {}, clear=True): + provider = AssemblyAITranscriptionProvider(api_key=None) + post = AsyncMock() + with patch("httpx.AsyncClient.post", post): + assert await provider.transcribe(audio_file) == "" + assert post.await_count == 0 + + @pytest.mark.parametrize("status", [408, 429, 500, 502, 503, 504]) @pytest.mark.asyncio async def test_retries_on_every_advertised_transient_status( diff --git a/tests/webui/test_settings_api.py b/tests/webui/test_settings_api.py index 754a74449..76518c576 100644 --- a/tests/webui/test_settings_api.py +++ b/tests/webui/test_settings_api.py @@ -299,6 +299,50 @@ def test_settings_payload_exposes_xiaomi_mimo_transcription_provider( assert providers["xiaomi_mimo"]["configured"] is True +def test_settings_payload_exposes_assemblyai_transcription_provider( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_path = tmp_path / "config.json" + config = Config() + config.transcription.provider = "assemblyai" + config.providers.assemblyai.api_key = "aai-test" + save_config(config, config_path) + monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path) + + payload = settings_payload() + + assert payload["transcription"]["provider"] == "assemblyai" + assert payload["transcription"]["provider_configured"] is True + providers = {provider["name"]: provider for provider in payload["transcription"]["providers"]} + assert providers["assemblyai"]["label"] == "AssemblyAI" + assert providers["assemblyai"]["configured"] is True + assert providers["assemblyai"]["default_api_base"] == "https://api.assemblyai.com/v2" + provider_rows = {provider["name"]: provider for provider in payload["providers"]} + assert provider_rows["assemblyai"]["configured"] is True + assert provider_rows["assemblyai"]["model_selectable"] is False + + +def test_model_configuration_rejects_transcription_only_provider( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_path = tmp_path / "config.json" + config = Config() + config.providers.assemblyai.api_key = "aai-test" + save_config(config, config_path) + monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path) + + with pytest.raises(WebUISettingsError, match="does not support chat models"): + create_model_configuration( + { + "label": ["Voice only"], + "provider": ["assemblyai"], + "model": ["universal-3-pro"], + } + ) + + def test_update_transcription_settings_writes_top_level_only( tmp_path, monkeypatch: pytest.MonkeyPatch, @@ -385,6 +429,30 @@ def test_update_transcription_settings_accepts_xiaomi_mimo( assert payload["transcription"]["provider_configured"] is True +def test_update_transcription_settings_accepts_assemblyai( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_path = tmp_path / "config.json" + config = Config() + config.providers.assemblyai.api_key = "aai-test" + save_config(config, config_path) + monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path) + + payload = update_transcription_settings( + { + "provider": ["assemblyai"], + "model": ["universal-3-pro"], + } + ) + + saved = load_config(config_path) + assert saved.transcription.provider == "assemblyai" + assert saved.transcription.model == "universal-3-pro" + assert payload["transcription"]["provider"] == "assemblyai" + assert payload["transcription"]["provider_configured"] is True + + def test_update_transcription_settings_validates_language( tmp_path, monkeypatch: pytest.MonkeyPatch, diff --git a/webui/src/components/settings/SettingsView.tsx b/webui/src/components/settings/SettingsView.tsx index c06bd41ae..27f37e60d 100644 --- a/webui/src/components/settings/SettingsView.tsx +++ b/webui/src/components/settings/SettingsView.tsx @@ -779,7 +779,7 @@ export function SettingsView({ const configuredModelProviderOptions = useMemo( () => settings?.providers - .filter((provider) => provider.configured) + .filter((provider) => provider.configured && provider.model_selectable !== false) .map((provider) => ({ name: provider.name, label: provider.label })) ?? [], [settings], ); diff --git a/webui/src/lib/provider-brand.ts b/webui/src/lib/provider-brand.ts index 93571238b..10fc5a6d7 100644 --- a/webui/src/lib/provider-brand.ts +++ b/webui/src/lib/provider-brand.ts @@ -113,6 +113,7 @@ const PROVIDER_BRANDS: Record = { aihubmix: brand("aihubmix.com", "#111827", "AH"), ant_ling: brand("ant-ling.com", "#7C3AED", "AL"), anthropic: brand("anthropic.com", "#D97757", "A"), + assemblyai: brand("assemblyai.com", "#111827", "AA"), atomic_chat: brand("atomic.chat", "#111827", "AC"), azure_openai: brand("azure.microsoft.com", "#0078D4", "AZ"), bedrock: brand("aws.amazon.com", "#FF9900", "AWS"), diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts index 9b858e360..2731c9ddd 100644 --- a/webui/src/lib/types.ts +++ b/webui/src/lib/types.ts @@ -343,6 +343,7 @@ export interface SettingsPayload { api_key_hint?: string | null; api_base?: string | null; default_api_base?: string | null; + model_selectable?: boolean; api_type?: "auto" | "chat_completions" | "responses"; oauth_account?: string | null; oauth_expires_at?: number | null; diff --git a/webui/src/tests/provider-brand.test.ts b/webui/src/tests/provider-brand.test.ts index c0babc874..6110fe46e 100644 --- a/webui/src/tests/provider-brand.test.ts +++ b/webui/src/tests/provider-brand.test.ts @@ -47,4 +47,9 @@ describe("provider brand logos", () => { expect(providerBrand("openrouter")?.logoUrls).toContain("https://openrouter.ai/favicon.ico"); expect(providerBrand("openrouter")?.initials).toBe("OR"); }); + + it("keeps AssemblyAI voice settings on the first-party brand domain", () => { + expect(providerBrand("assemblyai")?.logoUrls).toContain("https://assemblyai.com/favicon.ico"); + expect(providerBrand("assemblyai")?.initials).toBe("AA"); + }); });