mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-16 07:44:06 +00:00
feat(transcription): add shared voice input support (#4232)
* feat(webui): add voice transcription input * feat(webui): render ANSI output in code blocks * refactor(webui): isolate voice recorder logic * refactor(transcription): keep websocket ingress thin * refactor(transcription): resolve channel audio settings on demand * style(webui): neutralize voice waveform color * feat(webui): add voice input tooltip * feat(webui): add voice input keyboard shortcut * fix(webui): distinguish voice shortcut platforms * fix(webui): place voice button after model selector * refactor(webui): share voice hold recording helpers * fix(desktop): allow microphone voice input * fix(webui): stabilize token usage month labels * feat(webui): show voice input on settings overview * fix(webui): label voice capability as recognition * fix(webui): align capability overview status * refactor(webui): isolate transcription socket handling * fix(webui): soften silent voice waveform * refactor(audio): clarify transcription service location * docs(transcription): clarify audio and provider boundaries * fix(exec): reduce session output polling flake
This commit is contained in:
parent
06d454a225
commit
9c81280300
@ -47,6 +47,9 @@
|
|||||||
],
|
],
|
||||||
"mac": {
|
"mac": {
|
||||||
"category": "public.app-category.developer-tools",
|
"category": "public.app-category.developer-tools",
|
||||||
|
"extendInfo": {
|
||||||
|
"NSMicrophoneUsageDescription": "nanobot uses the microphone to transcribe voice input before you send messages."
|
||||||
|
},
|
||||||
"target": [
|
"target": [
|
||||||
"dmg"
|
"dmg"
|
||||||
]
|
]
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import {
|
|||||||
protocol,
|
protocol,
|
||||||
session,
|
session,
|
||||||
shell,
|
shell,
|
||||||
|
systemPreferences,
|
||||||
} from "electron";
|
} from "electron";
|
||||||
import type { IpcMainInvokeEvent, WebContents } from "electron";
|
import type { IpcMainInvokeEvent, WebContents } from "electron";
|
||||||
|
|
||||||
@ -100,6 +101,58 @@ function isTrustedAppUrl(rawUrl: string): boolean {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isTrustedPermissionRequest(
|
||||||
|
webContents: WebContents | null,
|
||||||
|
details: unknown,
|
||||||
|
): boolean {
|
||||||
|
return [
|
||||||
|
permissionDetail(details, "requestingUrl"),
|
||||||
|
permissionDetail(details, "securityOrigin"),
|
||||||
|
webContents?.getURL(),
|
||||||
|
].some((url) => typeof url === "string" && isTrustedAppUrl(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
function permissionDetail(details: unknown, key: string): unknown {
|
||||||
|
return typeof details === "object" && details !== null
|
||||||
|
? (details as Record<string, unknown>)[key]
|
||||||
|
: undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isAudioOnlyMediaRequest(details: unknown): boolean {
|
||||||
|
const mediaTypes = permissionDetail(details, "mediaTypes");
|
||||||
|
if (Array.isArray(mediaTypes)) {
|
||||||
|
return mediaTypes.includes("audio") && !mediaTypes.includes("video");
|
||||||
|
}
|
||||||
|
return permissionDetail(details, "mediaType") === "audio";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function requestNativeMicrophoneAccess(): Promise<boolean> {
|
||||||
|
if (process.platform !== "darwin") return true;
|
||||||
|
const status = systemPreferences.getMediaAccessStatus("microphone");
|
||||||
|
if (status === "granted") return true;
|
||||||
|
if (status === "denied" || status === "restricted") return false;
|
||||||
|
return await systemPreferences.askForMediaAccess("microphone");
|
||||||
|
}
|
||||||
|
|
||||||
|
function registerPermissionHandlers(): void {
|
||||||
|
session.defaultSession.setPermissionCheckHandler((webContents, permission, _origin, details) => (
|
||||||
|
permission === "media"
|
||||||
|
&& isTrustedPermissionRequest(webContents, details)
|
||||||
|
&& isAudioOnlyMediaRequest(details)
|
||||||
|
));
|
||||||
|
session.defaultSession.setPermissionRequestHandler((webContents, permission, callback, details) => {
|
||||||
|
if (
|
||||||
|
permission !== "media"
|
||||||
|
|| !isTrustedPermissionRequest(webContents, details)
|
||||||
|
|| !isAudioOnlyMediaRequest(details)
|
||||||
|
) {
|
||||||
|
callback(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void requestNativeMicrophoneAccess().then(callback, () => callback(false));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function assertTrustedIpc(event: IpcMainInvokeEvent): void {
|
function assertTrustedIpc(event: IpcMainInvokeEvent): void {
|
||||||
const frameUrl = event.senderFrame?.url || event.sender.getURL();
|
const frameUrl = event.senderFrame?.url || event.sender.getURL();
|
||||||
if (!isTrustedAppUrl(frameUrl)) {
|
if (!isTrustedAppUrl(frameUrl)) {
|
||||||
@ -749,6 +802,7 @@ app.whenReady().then(async () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
registerIpcHandlers();
|
registerIpcHandlers();
|
||||||
|
registerPermissionHandlers();
|
||||||
registerAppProtocol(webDist, devUrl);
|
registerAppProtocol(webDist, devUrl);
|
||||||
|
|
||||||
mainWindow = createWindow();
|
mainWindow = createWindow();
|
||||||
|
|||||||
@ -234,7 +234,7 @@ nanobot channels login <channel_name> --force # re-authenticate
|
|||||||
| `_handle_message(sender_id, chat_id, content, media?, metadata?, session_key?)` | **Call this when you receive a message.** Checks `is_allowed()`, then publishes to the bus. Automatically sets `_wants_stream` if `supports_streaming` is true. |
|
| `_handle_message(sender_id, chat_id, content, media?, metadata?, session_key?)` | **Call this when you receive a message.** Checks `is_allowed()`, then publishes to the bus. Automatically sets `_wants_stream` if `supports_streaming` is true. |
|
||||||
| `is_allowed(sender_id)` | Checks against `config.allow_from`; `"*"` allows all, `[]` denies all. |
|
| `is_allowed(sender_id)` | Checks against `config.allow_from`; `"*"` allows all, `[]` denies all. |
|
||||||
| `default_config()` (classmethod) | Returns default config dict for `nanobot onboard`. Override to declare your fields. |
|
| `default_config()` (classmethod) | Returns default config dict for `nanobot onboard`. Override to declare your fields. |
|
||||||
| `transcribe_audio(file_path)` | Transcribes audio via Groq Whisper (if configured). |
|
| `transcribe_audio(file_path)` | Transcribes audio via the shared top-level `transcription` config (if configured). |
|
||||||
| `supports_streaming` (property) | `True` when config has `"streaming": true` **and** subclass overrides `send_delta()`. |
|
| `supports_streaming` (property) | `True` when config has `"streaming": true` **and** subclass overrides `send_delta()`. |
|
||||||
| `is_running` | Returns `self._running`. |
|
| `is_running` | Returns `self._running`. |
|
||||||
| `login(force=False)` | Perform interactive login (e.g. QR code scan). Returns `True` if already authenticated or login succeeds. Override in subclasses that support interactive login. |
|
| `login(force=False)` | Perform interactive login (e.g. QR code scan). Returns `True` if already authenticated or login succeeds. Override in subclasses that support interactive login. |
|
||||||
|
|||||||
@ -119,7 +119,7 @@ ANTHROPIC_API_KEY="$(bw get password api/anthropic)" nanobot agent
|
|||||||
## Providers
|
## Providers
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> - **Voice transcription**: Voice messages (Telegram, WhatsApp) are automatically transcribed using Whisper. By default Groq is used (free tier). Set `"transcriptionProvider": "openai"` under `channels` to use OpenAI Whisper instead, and optionally set `"transcriptionLanguage": "en"` (or another ISO-639-1 code) for more accurate transcription. The API key is picked from the matching provider config.
|
> - **Voice transcription**: Voice messages and WebUI/desktop microphone input use the shared top-level `transcription` settings. By default Groq Whisper is used; set `transcription.provider` to `"openai"` to use OpenAI Whisper. API keys still live in the matching `providers.<provider>` config.
|
||||||
> - **MiniMax Coding Plan**: Exclusive discount links for the nanobot community: [Overseas](https://platform.minimax.io/subscribe/coding-plan?code=9txpdXw04g&source=link) · [Mainland China](https://platform.minimaxi.com/subscribe/token-plan?code=GILTJpMTqZ&source=link)
|
> - **MiniMax Coding Plan**: Exclusive discount links for the nanobot community: [Overseas](https://platform.minimax.io/subscribe/coding-plan?code=9txpdXw04g&source=link) · [Mainland China](https://platform.minimaxi.com/subscribe/token-plan?code=GILTJpMTqZ&source=link)
|
||||||
> - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config.
|
> - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config.
|
||||||
> - **MiniMax thinking mode**: Use `providers.minimaxAnthropic` when you want `reasoningEffort` / thinking mode. MiniMax exposes that capability through its Anthropic-compatible endpoint, so nanobot keeps it as a separate provider instead of guessing MiniMax-specific thinking parameters on the generic OpenAI-compatible `minimax` endpoint. It uses the same `MINIMAX_API_KEY`. Default Anthropic-compatible base URL: `https://api.minimax.io/anthropic`; for mainland China use `https://api.minimaxi.com/anthropic`.
|
> - **MiniMax thinking mode**: Use `providers.minimaxAnthropic` when you want `reasoningEffort` / thinking mode. MiniMax exposes that capability through its Anthropic-compatible endpoint, so nanobot keeps it as a separate provider instead of guessing MiniMax-specific thinking parameters on the generic OpenAI-compatible `minimax` endpoint. It uses the same `MINIMAX_API_KEY`. Default Anthropic-compatible base URL: `https://api.minimax.io/anthropic`; for mainland China use `https://api.minimaxi.com/anthropic`.
|
||||||
@ -1100,6 +1100,61 @@ Set `agents.defaults.modelPreset` to start with a named preset:
|
|||||||
|
|
||||||
When `modelPreset` is `null` or omitted, startup uses the implicit `default` preset from `agents.defaults.*`. Runtime changes made with `/model <preset>` are not written back to `config.json`; they affect future turns until the process restarts or another model/config change replaces them.
|
When `modelPreset` is `null` or omitted, startup uses the implicit `default` preset from `agents.defaults.*`. Runtime changes made with `/model <preset>` are not written back to `config.json`; they affect future turns until the process restarts or another model/config change replaces them.
|
||||||
|
|
||||||
|
## Transcription Settings
|
||||||
|
|
||||||
|
Audio transcription is a shared capability used by chat-channel voice messages and by WebUI/desktop microphone input. Chat-channel voice messages are transcribed automatically before they enter the agent. WebUI and desktop microphone input is transcribed into the composer first, so you can edit the text before sending.
|
||||||
|
|
||||||
|
Configure transcription under the top-level `transcription` section:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"transcription": {
|
||||||
|
"enabled": true,
|
||||||
|
"provider": "groq",
|
||||||
|
"model": null,
|
||||||
|
"language": null,
|
||||||
|
"maxDurationSec": 120,
|
||||||
|
"maxUploadMb": 25
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Setting | Default | Description |
|
||||||
|
|---------|---------|-------------|
|
||||||
|
| `enabled` | `true` | Enables audio transcription for both chat-channel voice messages and WebUI/desktop microphone input. |
|
||||||
|
| `provider` | `"groq"` | Transcription backend: `"groq"` or `"openai"`. |
|
||||||
|
| `model` | provider default | Optional transcription model override. Defaults to `whisper-large-v3` for Groq and `whisper-1` for OpenAI. |
|
||||||
|
| `language` | `null` | Optional ISO-639 language hint, e.g. `"en"`, `"zh"`, `"ko"`, or `"ja"`. |
|
||||||
|
| `maxDurationSec` | `120` | Maximum WebUI/desktop recording duration. |
|
||||||
|
| `maxUploadMb` | `25` | Maximum WebUI/desktop audio upload size. |
|
||||||
|
|
||||||
|
Provider and language resolution is intentionally ordered for backwards compatibility:
|
||||||
|
|
||||||
|
1. `transcription.provider` / `transcription.language`
|
||||||
|
2. Legacy `channels.transcriptionProvider` / `channels.transcriptionLanguage`
|
||||||
|
3. Built-in defaults (`provider: "groq"`, no language hint)
|
||||||
|
|
||||||
|
The legacy `channels.*` transcription fields existed before transcription became a shared capability across chat channels and WebUI/desktop microphone input. They are still read so older `config.json` files keep working, but they are no longer the preferred configuration surface. If both old and new fields are present, the top-level `transcription` values are the source of truth.
|
||||||
|
|
||||||
|
Transcription credentials are intentionally not stored in `transcription`. Put the API key and optional endpoint in the matching provider config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"providers": {
|
||||||
|
"groq": {
|
||||||
|
"apiKey": "gsk-...",
|
||||||
|
"apiBase": "https://api.groq.com/openai/v1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"transcription": {
|
||||||
|
"provider": "groq",
|
||||||
|
"language": "zh"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Selecting a transcription provider does not configure credentials by itself. For example, the effective provider may default to Groq for compatibility, but transcription is only usable when `providers.groq.apiKey` or the matching environment-backed config is available. The Settings UI writes only the top-level `transcription` fields.
|
||||||
|
|
||||||
## Channel Settings
|
## Channel Settings
|
||||||
|
|
||||||
Global settings that apply to all channels. Configure under the `channels` section in `~/.nanobot/config.json`:
|
Global settings that apply to all channels. Configure under the `channels` section in `~/.nanobot/config.json`:
|
||||||
@ -1111,8 +1166,6 @@ Global settings that apply to all channels. Configure under the `channels` secti
|
|||||||
"sendToolHints": false,
|
"sendToolHints": false,
|
||||||
"extractDocumentText": true,
|
"extractDocumentText": true,
|
||||||
"sendMaxRetries": 3,
|
"sendMaxRetries": 3,
|
||||||
"transcriptionProvider": "groq",
|
|
||||||
"transcriptionLanguage": null,
|
|
||||||
"telegram": { ... }
|
"telegram": { ... }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1125,8 +1178,8 @@ Global settings that apply to all channels. Configure under the `channels` secti
|
|||||||
| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Reasoning flows as a dedicated stream with `_reasoning_delta` / `_reasoning_end` markers — channels override `send_reasoning_delta` / `send_reasoning_end` to render in-place updates. Even with `true`, channels without those overrides stay no-op silently. Currently surfaced on CLI and WebSocket/WebUI (italic shimmer header, auto-collapses after the stream ends); Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-op until their bubble UI is adapted. Independent of `sendProgress`. |
|
| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Reasoning flows as a dedicated stream with `_reasoning_delta` / `_reasoning_end` markers — channels override `send_reasoning_delta` / `send_reasoning_end` to render in-place updates. Even with `true`, channels without those overrides stay no-op silently. Currently surfaced on CLI and WebSocket/WebUI (italic shimmer header, auto-collapses after the stream ends); Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-op until their bubble UI is adapted. Independent of `sendProgress`. |
|
||||||
| `extractDocumentText` | `true` | Extract supported document/text attachments into the model prompt. Set to `false` to keep document content out of the prompt and include attachment path references instead. |
|
| `extractDocumentText` | `true` | Extract supported document/text attachments into the model prompt. Set to `false` to keep document content out of the prompt and include attachment path references instead. |
|
||||||
| `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
|
| `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
|
||||||
| `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key and optional `apiBase` are auto-resolved from the matching provider config. Chat-style bases such as `https://api.groq.com/openai/v1` are normalized to the audio transcription endpoint. |
|
|
||||||
| `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. |
|
`channels.transcriptionProvider` and `channels.transcriptionLanguage` are deprecated compatibility fields. They remain as a read-only fallback for older configs, but new configuration should use top-level `transcription.provider` and `transcription.language`.
|
||||||
|
|
||||||
`sendProgress` and `sendToolHints` can also be overridden per channel. The
|
`sendProgress` and `sendToolHints` can also be overridden per channel. The
|
||||||
global values stay as defaults for channels that do not set their own value:
|
global values stay as defaults for channels that do not set their own value:
|
||||||
|
|||||||
@ -24,6 +24,7 @@ DEFAULT_WAIT_FOR_MS = 10_000
|
|||||||
MAX_WAIT_FOR_MS = 120_000
|
MAX_WAIT_FOR_MS = 120_000
|
||||||
DEFAULT_MAX_OUTPUT_CHARS = 10_000
|
DEFAULT_MAX_OUTPUT_CHARS = 10_000
|
||||||
MAX_OUTPUT_CHARS = 50_000
|
MAX_OUTPUT_CHARS = 50_000
|
||||||
|
OUTPUT_DRAIN_GRACE_S = 0.1
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
@ -139,6 +140,8 @@ class _ExecSession:
|
|||||||
asyncio.gather(self._stdout_task, self._stderr_task),
|
asyncio.gather(self._stdout_task, self._stderr_task),
|
||||||
timeout=2.0,
|
timeout=2.0,
|
||||||
)
|
)
|
||||||
|
elif yield_time_ms > 0:
|
||||||
|
await self._wait_for_buffered_output()
|
||||||
|
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
output = "".join(self._chunks)
|
output = "".join(self._chunks)
|
||||||
@ -163,6 +166,14 @@ class _ExecSession:
|
|||||||
with suppress(asyncio.TimeoutError):
|
with suppress(asyncio.TimeoutError):
|
||||||
await asyncio.wait_for(self.process.wait(), timeout=5.0)
|
await asyncio.wait_for(self.process.wait(), timeout=5.0)
|
||||||
|
|
||||||
|
async def _wait_for_buffered_output(self) -> None:
|
||||||
|
deadline = time.monotonic() + OUTPUT_DRAIN_GRACE_S
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
async with self._lock:
|
||||||
|
if self._chunks:
|
||||||
|
return
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
|
|
||||||
|
|
||||||
class ExecSessionManager:
|
class ExecSessionManager:
|
||||||
def __init__(self, *, max_sessions: int = 8, idle_timeout: int = 1800) -> None:
|
def __init__(self, *, max_sessions: int = 8, idle_timeout: int = 1800) -> None:
|
||||||
|
|||||||
2
nanobot/audio/__init__.py
Normal file
2
nanobot/audio/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
"""Shared audio service helpers."""
|
||||||
|
|
||||||
183
nanobot/audio/transcription.py
Normal file
183
nanobot/audio/transcription.py
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
"""Application-level audio transcription service.
|
||||||
|
|
||||||
|
This module owns nanobot's transcription behavior: config resolution,
|
||||||
|
legacy channel fallback, upload validation, temporary-file handling, and
|
||||||
|
dispatch to provider adapters. It deliberately does not know provider-specific
|
||||||
|
HTTP details; those live in ``nanobot.providers.transcription``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import suppress
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from nanobot.config.paths import get_media_dir
|
||||||
|
from nanobot.utils.media_decode import FileSizeExceeded, save_base64_data_url
|
||||||
|
|
||||||
|
TranscriptionProviderName = Literal["groq", "openai"]
|
||||||
|
|
||||||
|
_DEFAULT_PROVIDER: TranscriptionProviderName = "groq"
|
||||||
|
_DEFAULT_MODELS: dict[TranscriptionProviderName, str] = {
|
||||||
|
"groq": "whisper-large-v3",
|
||||||
|
"openai": "whisper-1",
|
||||||
|
}
|
||||||
|
_MAX_AUDIO_BYTES_FALLBACK = 25 * 1024 * 1024
|
||||||
|
_AUDIO_MIME_ALLOWED: frozenset[str] = frozenset({
|
||||||
|
"audio/aac",
|
||||||
|
"audio/flac",
|
||||||
|
"audio/m4a",
|
||||||
|
"audio/mp4",
|
||||||
|
"audio/mpeg",
|
||||||
|
"audio/ogg",
|
||||||
|
"audio/wav",
|
||||||
|
"audio/webm",
|
||||||
|
"audio/x-m4a",
|
||||||
|
"audio/x-wav",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class EffectiveTranscriptionConfig:
|
||||||
|
enabled: bool
|
||||||
|
provider: TranscriptionProviderName
|
||||||
|
model: str
|
||||||
|
language: str | None
|
||||||
|
api_key: str = field(repr=False)
|
||||||
|
api_base: str
|
||||||
|
max_duration_sec: int
|
||||||
|
max_upload_mb: int
|
||||||
|
|
||||||
|
@property
|
||||||
|
def configured(self) -> bool:
|
||||||
|
return bool(self.api_key)
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptionIngressError(Exception):
|
||||||
|
"""Stable transcription upload error surfaced to WebUI clients."""
|
||||||
|
|
||||||
|
def __init__(self, detail: str, **extra: Any):
|
||||||
|
super().__init__(detail)
|
||||||
|
self.detail = detail
|
||||||
|
self.extra = extra
|
||||||
|
|
||||||
|
|
||||||
|
def _as_provider(value: Any) -> TranscriptionProviderName | None:
|
||||||
|
if isinstance(value, str):
|
||||||
|
name = value.strip().lower()
|
||||||
|
if name in _DEFAULT_MODELS:
|
||||||
|
return name # type: ignore[return-value]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _provider_config(config: Any, provider: str) -> Any:
|
||||||
|
return getattr(getattr(config, "providers", None), provider, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_data_url_mime(url: str) -> str | None:
|
||||||
|
header, _, _ = url.partition(",")
|
||||||
|
if not header.startswith("data:") or ";base64" not in header:
|
||||||
|
return None
|
||||||
|
return header[5:].split(";", 1)[0].strip().lower() or None
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_transcription_config(config: Any) -> EffectiveTranscriptionConfig:
|
||||||
|
"""Resolve top-level transcription settings with legacy channel fallback."""
|
||||||
|
top = getattr(config, "transcription", None)
|
||||||
|
channels = getattr(config, "channels", None)
|
||||||
|
provider = (
|
||||||
|
_as_provider(getattr(top, "provider", None))
|
||||||
|
or _as_provider(getattr(channels, "transcription_provider", None))
|
||||||
|
or _DEFAULT_PROVIDER
|
||||||
|
)
|
||||||
|
provider_cfg = _provider_config(config, provider)
|
||||||
|
return EffectiveTranscriptionConfig(
|
||||||
|
enabled=bool(getattr(top, "enabled", True)),
|
||||||
|
provider=provider,
|
||||||
|
model=(getattr(top, "model", None) or _DEFAULT_MODELS[provider]).strip(),
|
||||||
|
language=getattr(top, "language", None) or getattr(channels, "transcription_language", None),
|
||||||
|
api_key=getattr(provider_cfg, "api_key", None) or "",
|
||||||
|
api_base=getattr(provider_cfg, "api_base", None) or "",
|
||||||
|
max_duration_sec=int(getattr(top, "max_duration_sec", 120)),
|
||||||
|
max_upload_mb=int(getattr(top, "max_upload_mb", 25)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def transcribe_audio_data_url(
|
||||||
|
data_url: Any,
|
||||||
|
config: EffectiveTranscriptionConfig,
|
||||||
|
*,
|
||||||
|
duration_ms: Any = None,
|
||||||
|
) -> str:
|
||||||
|
"""Validate, persist, transcribe, and remove a WebUI audio data URL."""
|
||||||
|
if not isinstance(data_url, str) or not data_url:
|
||||||
|
raise TranscriptionIngressError("missing_audio")
|
||||||
|
if not config.enabled:
|
||||||
|
raise TranscriptionIngressError("disabled")
|
||||||
|
if not config.configured:
|
||||||
|
raise TranscriptionIngressError("not_configured", provider=config.provider)
|
||||||
|
if (
|
||||||
|
isinstance(duration_ms, (int, float))
|
||||||
|
and duration_ms > (config.max_duration_sec * 1000 + 1000)
|
||||||
|
):
|
||||||
|
raise TranscriptionIngressError("duration")
|
||||||
|
if _extract_data_url_mime(data_url) not in _AUDIO_MIME_ALLOWED:
|
||||||
|
raise TranscriptionIngressError("mime")
|
||||||
|
|
||||||
|
audio_path: str | None = None
|
||||||
|
max_bytes = max(
|
||||||
|
1,
|
||||||
|
config.max_upload_mb * 1024 * 1024 if config.max_upload_mb else _MAX_AUDIO_BYTES_FALLBACK,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
audio_path = save_base64_data_url(
|
||||||
|
data_url,
|
||||||
|
get_media_dir("webui-transcription"),
|
||||||
|
max_bytes=max_bytes,
|
||||||
|
)
|
||||||
|
except FileSizeExceeded as exc:
|
||||||
|
raise TranscriptionIngressError("size") from exc
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("transcription audio decode failed: {}", exc)
|
||||||
|
if not audio_path:
|
||||||
|
raise TranscriptionIngressError("decode")
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = await transcribe_audio_file(audio_path, config)
|
||||||
|
finally:
|
||||||
|
with suppress(OSError):
|
||||||
|
Path(audio_path).unlink(missing_ok=True)
|
||||||
|
if not text:
|
||||||
|
raise TranscriptionIngressError("empty")
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
async def transcribe_audio_file(
|
||||||
|
file_path: str | Path,
|
||||||
|
config: EffectiveTranscriptionConfig,
|
||||||
|
) -> str:
|
||||||
|
"""Transcribe *file_path* using the already-resolved transcription config."""
|
||||||
|
if not config.enabled or not config.configured:
|
||||||
|
return ""
|
||||||
|
if config.provider == "openai":
|
||||||
|
from nanobot.providers.transcription import OpenAITranscriptionProvider
|
||||||
|
|
||||||
|
provider = OpenAITranscriptionProvider(
|
||||||
|
api_key=config.api_key,
|
||||||
|
api_base=config.api_base or None,
|
||||||
|
language=config.language,
|
||||||
|
model=config.model,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
from nanobot.providers.transcription import GroqTranscriptionProvider
|
||||||
|
|
||||||
|
provider = GroqTranscriptionProvider(
|
||||||
|
api_key=config.api_key,
|
||||||
|
api_base=config.api_base or None,
|
||||||
|
language=config.language,
|
||||||
|
model=config.model,
|
||||||
|
)
|
||||||
|
return await provider.transcribe(file_path)
|
||||||
@ -28,10 +28,6 @@ class BaseChannel(ABC):
|
|||||||
|
|
||||||
name: str = "base"
|
name: str = "base"
|
||||||
display_name: str = "Base"
|
display_name: str = "Base"
|
||||||
transcription_provider: str = "groq"
|
|
||||||
transcription_api_key: str = ""
|
|
||||||
transcription_api_base: str = ""
|
|
||||||
transcription_language: str | None = None
|
|
||||||
send_progress: bool = True
|
send_progress: bool = True
|
||||||
send_tool_hints: bool = False
|
send_tool_hints: bool = False
|
||||||
show_reasoning: bool = True
|
show_reasoning: bool = True
|
||||||
@ -51,24 +47,14 @@ class BaseChannel(ABC):
|
|||||||
|
|
||||||
async def transcribe_audio(self, file_path: str | Path) -> str:
|
async def transcribe_audio(self, file_path: str | Path) -> str:
|
||||||
"""Transcribe an audio file via Whisper (OpenAI or Groq). Returns empty string on failure."""
|
"""Transcribe an audio file via Whisper (OpenAI or Groq). Returns empty string on failure."""
|
||||||
if not self.transcription_api_key:
|
|
||||||
return ""
|
|
||||||
try:
|
try:
|
||||||
if self.transcription_provider == "openai":
|
from nanobot.audio.transcription import (
|
||||||
from nanobot.providers.transcription import OpenAITranscriptionProvider
|
resolve_transcription_config,
|
||||||
provider = OpenAITranscriptionProvider(
|
transcribe_audio_file,
|
||||||
api_key=self.transcription_api_key,
|
)
|
||||||
api_base=self.transcription_api_base or None,
|
from nanobot.config.loader import load_config
|
||||||
language=self.transcription_language or None,
|
|
||||||
)
|
return await transcribe_audio_file(file_path, resolve_transcription_config(load_config()))
|
||||||
else:
|
|
||||||
from nanobot.providers.transcription import GroqTranscriptionProvider
|
|
||||||
provider = GroqTranscriptionProvider(
|
|
||||||
api_key=self.transcription_api_key,
|
|
||||||
api_base=self.transcription_api_base or None,
|
|
||||||
language=self.transcription_language or None,
|
|
||||||
)
|
|
||||||
return await provider.transcribe(file_path)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
self.logger.exception("Audio transcription failed")
|
self.logger.exception("Audio transcription failed")
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@ -80,11 +80,6 @@ class ChannelManager:
|
|||||||
"""Initialize channels discovered via pkgutil scan + entry_points plugins."""
|
"""Initialize channels discovered via pkgutil scan + entry_points plugins."""
|
||||||
from nanobot.channels.registry import discover_channel_names, discover_enabled
|
from nanobot.channels.registry import discover_channel_names, discover_enabled
|
||||||
|
|
||||||
transcription_provider = self.config.channels.transcription_provider
|
|
||||||
transcription_key = self._resolve_transcription_key(transcription_provider)
|
|
||||||
transcription_base = self._resolve_transcription_base(transcription_provider)
|
|
||||||
transcription_language = self.config.channels.transcription_language
|
|
||||||
|
|
||||||
# Collect enabled module names first, then only import those.
|
# Collect enabled module names first, then only import those.
|
||||||
# Channel configs live in ChannelsConfig's extra fields (via
|
# Channel configs live in ChannelsConfig's extra fields (via
|
||||||
# extra="allow"), so we enumerate candidates from pkgutil scan
|
# extra="allow"), so we enumerate candidates from pkgutil scan
|
||||||
@ -135,10 +130,6 @@ class ChannelManager:
|
|||||||
)
|
)
|
||||||
kwargs["gateway"] = gateway
|
kwargs["gateway"] = gateway
|
||||||
channel = cls(section, self.bus, **kwargs)
|
channel = cls(section, self.bus, **kwargs)
|
||||||
channel.transcription_provider = transcription_provider
|
|
||||||
channel.transcription_api_key = transcription_key
|
|
||||||
channel.transcription_api_base = transcription_base
|
|
||||||
channel.transcription_language = transcription_language
|
|
||||||
channel.send_progress = self._resolve_bool_override(
|
channel.send_progress = self._resolve_bool_override(
|
||||||
section, "send_progress", self.config.channels.send_progress,
|
section, "send_progress", self.config.channels.send_progress,
|
||||||
)
|
)
|
||||||
@ -155,24 +146,6 @@ class ChannelManager:
|
|||||||
|
|
||||||
self._validate_allow_from()
|
self._validate_allow_from()
|
||||||
|
|
||||||
def _resolve_transcription_key(self, provider: str) -> str:
|
|
||||||
"""Pick the API key for the configured transcription provider."""
|
|
||||||
try:
|
|
||||||
if provider == "openai":
|
|
||||||
return self.config.providers.openai.api_key
|
|
||||||
return self.config.providers.groq.api_key
|
|
||||||
except AttributeError:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def _resolve_transcription_base(self, provider: str) -> str:
|
|
||||||
"""Pick the API base URL for the configured transcription provider."""
|
|
||||||
try:
|
|
||||||
if provider == "openai":
|
|
||||||
return self.config.providers.openai.api_base or ""
|
|
||||||
return self.config.providers.groq.api_base or ""
|
|
||||||
except AttributeError:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def _validate_allow_from(self) -> None:
|
def _validate_allow_from(self) -> None:
|
||||||
for name, ch in self.channels.items():
|
for name, ch in self.channels.items():
|
||||||
cfg = ch.config
|
cfg = ch.config
|
||||||
|
|||||||
@ -45,6 +45,7 @@ from nanobot.webui.http_utils import (
|
|||||||
query_first as _query_first,
|
query_first as _query_first,
|
||||||
)
|
)
|
||||||
from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions
|
from nanobot.webui.mcp_presets_api import normalize_mcp_preset_mentions
|
||||||
|
from nanobot.webui.transcription_ws import webui_transcription_event
|
||||||
from nanobot.webui.websocket_logging import websockets_server_logger
|
from nanobot.webui.websocket_logging import websockets_server_logger
|
||||||
|
|
||||||
|
|
||||||
@ -235,7 +236,7 @@ _VIDEO_MIME_ALLOWED: frozenset[str] = frozenset({
|
|||||||
|
|
||||||
_UPLOAD_MIME_ALLOWED: frozenset[str] = _IMAGE_MIME_ALLOWED | _VIDEO_MIME_ALLOWED
|
_UPLOAD_MIME_ALLOWED: frozenset[str] = _IMAGE_MIME_ALLOWED | _VIDEO_MIME_ALLOWED
|
||||||
|
|
||||||
_DATA_URL_MIME_RE = re.compile(r"^data:([^;]+);base64,", re.DOTALL)
|
_DATA_URL_MIME_RE = re.compile(r"^data:([^;,]+)(?:;[^,]*)*;base64,", re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def _extract_data_url_mime(url: str) -> str | None:
|
def _extract_data_url_mime(url: str) -> str | None:
|
||||||
@ -419,7 +420,6 @@ class WebSocketChannel(BaseChannel):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# -- Server lifecycle and connection ingress ---------------------------
|
# -- Server lifecycle and connection ingress ---------------------------
|
||||||
# -- Server lifecycle and connection ingress ---------------------------
|
|
||||||
|
|
||||||
async def start(self) -> None:
|
async def start(self) -> None:
|
||||||
from nanobot.utils.logging_bridge import redirect_lib_logging
|
from nanobot.utils.logging_bridge import redirect_lib_logging
|
||||||
@ -703,6 +703,10 @@ class WebSocketChannel(BaseChannel):
|
|||||||
workspace_scope=scope.payload(),
|
workspace_scope=scope.payload(),
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
if t == "transcribe_audio":
|
||||||
|
event, payload = await webui_transcription_event(envelope)
|
||||||
|
await self._send_event(connection, event, **payload)
|
||||||
|
return
|
||||||
if t == "message":
|
if t == "message":
|
||||||
cid = envelope.get("chat_id")
|
cid = envelope.get("chat_id")
|
||||||
content = envelope.get("content")
|
content = envelope.get("content")
|
||||||
|
|||||||
@ -39,8 +39,19 @@ class ChannelsConfig(Base):
|
|||||||
show_reasoning: bool = True # surface model reasoning when channel implements it
|
show_reasoning: bool = True # surface model reasoning when channel implements it
|
||||||
extract_document_text: bool = True # extract text from document attachments before sending to the model
|
extract_document_text: bool = True # extract text from document attachments before sending to the model
|
||||||
send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included)
|
send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included)
|
||||||
transcription_provider: str = "groq" # Voice transcription backend: "groq" or "openai"
|
transcription_provider: str = "groq" # Deprecated: use top-level transcription.provider
|
||||||
transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") # Optional ISO-639-1 hint for audio transcription
|
transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") # Deprecated: use top-level transcription.language
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptionConfig(Base):
|
||||||
|
"""Cross-channel audio transcription configuration."""
|
||||||
|
|
||||||
|
enabled: bool = True
|
||||||
|
provider: Literal["groq", "openai"] | None = None
|
||||||
|
model: str | None = None
|
||||||
|
language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$")
|
||||||
|
max_duration_sec: int = Field(default=120, ge=1, le=600)
|
||||||
|
max_upload_mb: int = Field(default=25, ge=1, le=100)
|
||||||
|
|
||||||
|
|
||||||
class DreamConfig(Base):
|
class DreamConfig(Base):
|
||||||
@ -167,7 +178,7 @@ class AgentsConfig(Base):
|
|||||||
class ProviderConfig(Base):
|
class ProviderConfig(Base):
|
||||||
"""LLM provider configuration."""
|
"""LLM provider configuration."""
|
||||||
|
|
||||||
api_key: str | None = None
|
api_key: str | None = Field(default=None, repr=False)
|
||||||
api_base: str | None = None
|
api_base: str | None = None
|
||||||
api_type: Literal["auto", "chat_completions", "responses"] = "auto" # Request API surface
|
api_type: Literal["auto", "chat_completions", "responses"] = "auto" # Request API surface
|
||||||
extra_headers: dict[str, str] | None = None # Custom headers (e.g. APP-Code for AiHubMix)
|
extra_headers: dict[str, str] | None = None # Custom headers (e.g. APP-Code for AiHubMix)
|
||||||
@ -312,6 +323,7 @@ class Config(BaseSettings):
|
|||||||
|
|
||||||
agents: AgentsConfig = Field(default_factory=AgentsConfig)
|
agents: AgentsConfig = Field(default_factory=AgentsConfig)
|
||||||
channels: ChannelsConfig = Field(default_factory=ChannelsConfig)
|
channels: ChannelsConfig = Field(default_factory=ChannelsConfig)
|
||||||
|
transcription: TranscriptionConfig = Field(default_factory=TranscriptionConfig)
|
||||||
providers: ProvidersConfig = Field(default_factory=ProvidersConfig)
|
providers: ProvidersConfig = Field(default_factory=ProvidersConfig)
|
||||||
api: ApiConfig = Field(default_factory=ApiConfig)
|
api: ApiConfig = Field(default_factory=ApiConfig)
|
||||||
gateway: GatewayConfig = Field(default_factory=GatewayConfig)
|
gateway: GatewayConfig = Field(default_factory=GatewayConfig)
|
||||||
|
|||||||
@ -1,6 +1,12 @@
|
|||||||
"""Voice transcription providers (Groq and OpenAI Whisper)."""
|
"""Provider-specific voice transcription adapters.
|
||||||
|
|
||||||
|
This module only knows how to call external transcription APIs such as Groq
|
||||||
|
and OpenAI Whisper. Product-level config fallback, WebUI upload validation,
|
||||||
|
and channel integration live in ``nanobot.audio.transcription``.
|
||||||
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -8,6 +14,15 @@ import httpx
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
_TRANSCRIPTIONS_PATH = "audio/transcriptions"
|
_TRANSCRIPTIONS_PATH = "audio/transcriptions"
|
||||||
|
_AUDIO_MIME_OVERRIDES = {
|
||||||
|
".m4a": "audio/mp4",
|
||||||
|
".mpga": "audio/mpeg",
|
||||||
|
".ogg": "audio/ogg",
|
||||||
|
".opus": "audio/ogg",
|
||||||
|
".wav": "audio/wav",
|
||||||
|
".weba": "audio/webm",
|
||||||
|
".webm": "audio/webm",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
|
def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
|
||||||
@ -26,6 +41,14 @@ def _resolve_transcription_url(api_base: str | None, default_url: str) -> str:
|
|||||||
return f"{base}/{_TRANSCRIPTIONS_PATH}"
|
return f"{base}/{_TRANSCRIPTIONS_PATH}"
|
||||||
|
|
||||||
|
|
||||||
|
def _audio_mime_type(path: Path) -> str:
|
||||||
|
return (
|
||||||
|
_AUDIO_MIME_OVERRIDES.get(path.suffix.lower())
|
||||||
|
or mimetypes.guess_type(path.name)[0]
|
||||||
|
or "application/octet-stream"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Up to 3 retries (4 attempts total) with exponential backoff on transient
|
# Up to 3 retries (4 attempts total) with exponential backoff on transient
|
||||||
# failures. Whisper endpoints occasionally return 502/503 under load, and
|
# failures. Whisper endpoints occasionally return 502/503 under load, and
|
||||||
# mobile-network transcription callers hit sporadic connect/read errors.
|
# mobile-network transcription callers hit sporadic connect/read errors.
|
||||||
@ -71,7 +94,7 @@ async def _post_transcription_with_retry(
|
|||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
for attempt in range(_MAX_RETRIES + 1):
|
for attempt in range(_MAX_RETRIES + 1):
|
||||||
files = {
|
files = {
|
||||||
"file": (path.name, data),
|
"file": (path.name, data, _audio_mime_type(path)),
|
||||||
"model": (None, model),
|
"model": (None, model),
|
||||||
}
|
}
|
||||||
if language:
|
if language:
|
||||||
@ -113,6 +136,16 @@ async def _post_transcription_with_retry(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
body = response.text.strip().replace("\n", " ")[:500]
|
||||||
|
logger.error(
|
||||||
|
"{} transcription HTTP {}{}{}",
|
||||||
|
provider_label,
|
||||||
|
response.status_code,
|
||||||
|
f" {response.reason_phrase}" if response.reason_phrase else "",
|
||||||
|
f": {body}" if body else "",
|
||||||
|
)
|
||||||
|
return ""
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("{} transcription error: {}", provider_label, e)
|
logger.exception("{} transcription error: {}", provider_label, e)
|
||||||
return ""
|
return ""
|
||||||
@ -144,6 +177,7 @@ class OpenAITranscriptionProvider:
|
|||||||
api_key: str | None = None,
|
api_key: str | None = None,
|
||||||
api_base: str | None = None,
|
api_base: str | None = None,
|
||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
|
model: str | None = None,
|
||||||
):
|
):
|
||||||
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
||||||
self.api_url = _resolve_transcription_url(
|
self.api_url = _resolve_transcription_url(
|
||||||
@ -151,6 +185,7 @@ class OpenAITranscriptionProvider:
|
|||||||
"https://api.openai.com/v1/audio/transcriptions",
|
"https://api.openai.com/v1/audio/transcriptions",
|
||||||
)
|
)
|
||||||
self.language = language or None
|
self.language = language or None
|
||||||
|
self.model = model or "whisper-1"
|
||||||
logger.debug("OpenAI transcription endpoint: {}", self.api_url)
|
logger.debug("OpenAI transcription endpoint: {}", self.api_url)
|
||||||
|
|
||||||
async def transcribe(self, file_path: str | Path) -> str:
|
async def transcribe(self, file_path: str | Path) -> str:
|
||||||
@ -165,7 +200,7 @@ class OpenAITranscriptionProvider:
|
|||||||
self.api_url,
|
self.api_url,
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
path=path,
|
path=path,
|
||||||
model="whisper-1",
|
model=self.model,
|
||||||
provider_label="OpenAI",
|
provider_label="OpenAI",
|
||||||
language=self.language,
|
language=self.language,
|
||||||
)
|
)
|
||||||
@ -183,6 +218,7 @@ class GroqTranscriptionProvider:
|
|||||||
api_key: str | None = None,
|
api_key: str | None = None,
|
||||||
api_base: str | None = None,
|
api_base: str | None = None,
|
||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
|
model: str | None = None,
|
||||||
):
|
):
|
||||||
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
||||||
self.api_url = _resolve_transcription_url(
|
self.api_url = _resolve_transcription_url(
|
||||||
@ -190,6 +226,7 @@ class GroqTranscriptionProvider:
|
|||||||
"https://api.groq.com/openai/v1/audio/transcriptions",
|
"https://api.groq.com/openai/v1/audio/transcriptions",
|
||||||
)
|
)
|
||||||
self.language = language or None
|
self.language = language or None
|
||||||
|
self.model = model or "whisper-large-v3"
|
||||||
logger.debug("Groq transcription endpoint: {}", self.api_url)
|
logger.debug("Groq transcription endpoint: {}", self.api_url)
|
||||||
|
|
||||||
async def transcribe(self, file_path: str | Path) -> str:
|
async def transcribe(self, file_path: str | Path) -> str:
|
||||||
@ -215,7 +252,7 @@ class GroqTranscriptionProvider:
|
|||||||
self.api_url,
|
self.api_url,
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
path=path,
|
path=path,
|
||||||
model="whisper-large-v3",
|
model=self.model,
|
||||||
provider_label="Groq",
|
provider_label="Groq",
|
||||||
language=self.language,
|
language=self.language,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -18,13 +18,30 @@ from nanobot.utils.helpers import safe_filename
|
|||||||
DEFAULT_MAX_BYTES = 10 * 1024 * 1024
|
DEFAULT_MAX_BYTES = 10 * 1024 * 1024
|
||||||
MAX_FILE_SIZE = DEFAULT_MAX_BYTES
|
MAX_FILE_SIZE = DEFAULT_MAX_BYTES
|
||||||
|
|
||||||
_DATA_URL_RE = re.compile(r"^data:([^;]+);base64,(.+)$", re.DOTALL)
|
_DATA_URL_RE = re.compile(r"^data:([^;,]+)(?:;[^,]*)*;base64,(.+)$", re.DOTALL)
|
||||||
|
_MIME_EXTENSION_OVERRIDES = {
|
||||||
|
# Python's ``mimetypes`` maps browser-recorded audio/webm to ``.weba`` and
|
||||||
|
# audio/ogg to ``.oga`` on macOS. Some transcription APIs validate by the
|
||||||
|
# file extension and accept the canonical container extensions instead.
|
||||||
|
"application/ogg": ".ogg",
|
||||||
|
"audio/ogg": ".ogg",
|
||||||
|
"audio/mpga": ".mpga",
|
||||||
|
"audio/wav": ".wav",
|
||||||
|
"audio/webm": ".webm",
|
||||||
|
"audio/x-m4a": ".m4a",
|
||||||
|
"audio/x-wav": ".wav",
|
||||||
|
"audio/vnd.wave": ".wav",
|
||||||
|
"video/webm": ".webm",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class FileSizeExceeded(Exception):
|
class FileSizeExceededError(Exception):
|
||||||
"""Raised when a decoded payload exceeds the caller's size limit."""
|
"""Raised when a decoded payload exceeds the caller's size limit."""
|
||||||
|
|
||||||
|
|
||||||
|
FileSizeExceeded = FileSizeExceededError
|
||||||
|
|
||||||
|
|
||||||
def save_base64_data_url(
|
def save_base64_data_url(
|
||||||
data_url: str,
|
data_url: str,
|
||||||
media_dir: Path,
|
media_dir: Path,
|
||||||
@ -40,7 +57,7 @@ def save_base64_data_url(
|
|||||||
m = _DATA_URL_RE.match(data_url)
|
m = _DATA_URL_RE.match(data_url)
|
||||||
if not m:
|
if not m:
|
||||||
return None
|
return None
|
||||||
mime_type, b64_payload = m.group(1), m.group(2)
|
mime_type, b64_payload = m.group(1).strip().lower(), m.group(2)
|
||||||
try:
|
try:
|
||||||
raw = base64.b64decode(b64_payload)
|
raw = base64.b64decode(b64_payload)
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -48,7 +65,7 @@ def save_base64_data_url(
|
|||||||
limit = DEFAULT_MAX_BYTES if max_bytes is None else max_bytes
|
limit = DEFAULT_MAX_BYTES if max_bytes is None else max_bytes
|
||||||
if len(raw) > limit:
|
if len(raw) > limit:
|
||||||
raise FileSizeExceeded(f"File exceeds {limit // (1024 * 1024)}MB limit")
|
raise FileSizeExceeded(f"File exceeds {limit // (1024 * 1024)}MB limit")
|
||||||
ext = mimetypes.guess_extension(mime_type) or ".bin"
|
ext = _MIME_EXTENSION_OVERRIDES.get(mime_type) or mimetypes.guess_extension(mime_type) or ".bin"
|
||||||
filename = f"{uuid.uuid4().hex[:12]}{ext}"
|
filename = f"{uuid.uuid4().hex[:12]}{ext}"
|
||||||
dest = media_dir / safe_filename(filename)
|
dest = media_dir / safe_filename(filename)
|
||||||
dest.write_bytes(raw)
|
dest.write_bytes(raw)
|
||||||
|
|||||||
@ -15,6 +15,7 @@ from zoneinfo import ZoneInfo
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
from nanobot.audio.transcription import resolve_transcription_config
|
||||||
from nanobot.config.loader import get_config_path, load_config, save_config
|
from nanobot.config.loader import get_config_path, load_config, save_config
|
||||||
from nanobot.config.schema import ModelPresetConfig
|
from nanobot.config.schema import ModelPresetConfig
|
||||||
from nanobot.providers.image_generation import (
|
from nanobot.providers.image_generation import (
|
||||||
@ -90,6 +91,7 @@ _IMAGE_GENERATION_ASPECT_RATIOS = {
|
|||||||
"2:3",
|
"2:3",
|
||||||
"21:9",
|
"21:9",
|
||||||
}
|
}
|
||||||
|
_TRANSCRIPTION_PROVIDERS = ("groq", "openai")
|
||||||
_CONTEXT_WINDOW_TOKEN_OPTIONS = {65_536, 262_144}
|
_CONTEXT_WINDOW_TOKEN_OPTIONS = {65_536, 262_144}
|
||||||
_MODEL_CONFIGURATION_SLUG_RE = re.compile(r"[^a-z0-9_-]+")
|
_MODEL_CONFIGURATION_SLUG_RE = re.compile(r"[^a-z0-9_-]+")
|
||||||
_ENV_REF_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}")
|
_ENV_REF_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}")
|
||||||
@ -576,6 +578,22 @@ def _image_generation_provider_rows(config: Any) -> list[dict[str, Any]]:
|
|||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def _transcription_provider_rows(config: Any) -> list[dict[str, Any]]:
|
||||||
|
rows: list[dict[str, Any]] = []
|
||||||
|
for name in _TRANSCRIPTION_PROVIDERS:
|
||||||
|
spec = find_by_name(name)
|
||||||
|
provider_config = getattr(config.providers, name, None)
|
||||||
|
rows.append({
|
||||||
|
"name": name,
|
||||||
|
"label": spec.label if spec is not None else name,
|
||||||
|
"configured": bool(getattr(provider_config, "api_key", None)),
|
||||||
|
"api_key_hint": _mask_secret_hint(getattr(provider_config, "api_key", None)),
|
||||||
|
"api_base": getattr(provider_config, "api_base", None),
|
||||||
|
"default_api_base": spec.default_api_base if spec and spec.default_api_base else None,
|
||||||
|
})
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def settings_payload(
|
def settings_payload(
|
||||||
*,
|
*,
|
||||||
requires_restart: bool = False,
|
requires_restart: bool = False,
|
||||||
@ -633,6 +651,7 @@ def settings_payload(
|
|||||||
|
|
||||||
search_config = config.tools.web.search
|
search_config = config.tools.web.search
|
||||||
image_config = config.tools.image_generation
|
image_config = config.tools.image_generation
|
||||||
|
transcription = resolve_transcription_config(config)
|
||||||
search_provider = (
|
search_provider = (
|
||||||
search_config.provider
|
search_config.provider
|
||||||
if search_config.provider in _WEB_SEARCH_PROVIDER_BY_NAME
|
if search_config.provider in _WEB_SEARCH_PROVIDER_BY_NAME
|
||||||
@ -733,6 +752,16 @@ def settings_payload(
|
|||||||
"save_dir": image_config.save_dir,
|
"save_dir": image_config.save_dir,
|
||||||
"providers": image_providers,
|
"providers": image_providers,
|
||||||
},
|
},
|
||||||
|
"transcription": {
|
||||||
|
"enabled": transcription.enabled,
|
||||||
|
"provider": transcription.provider,
|
||||||
|
"provider_configured": transcription.configured,
|
||||||
|
"model": transcription.model,
|
||||||
|
"language": transcription.language,
|
||||||
|
"max_duration_sec": transcription.max_duration_sec,
|
||||||
|
"max_upload_mb": transcription.max_upload_mb,
|
||||||
|
"providers": _transcription_provider_rows(config),
|
||||||
|
},
|
||||||
"runtime": {
|
"runtime": {
|
||||||
"config_path": str(get_config_path().expanduser()),
|
"config_path": str(get_config_path().expanduser()),
|
||||||
"workspace_path": str(config.workspace_path),
|
"workspace_path": str(config.workspace_path),
|
||||||
@ -1311,3 +1340,71 @@ def update_image_generation_settings(query: QueryParams) -> dict[str, Any]:
|
|||||||
if changed:
|
if changed:
|
||||||
save_config(config)
|
save_config(config)
|
||||||
return settings_payload(requires_restart=changed)
|
return settings_payload(requires_restart=changed)
|
||||||
|
|
||||||
|
|
||||||
|
def update_transcription_settings(query: QueryParams) -> dict[str, Any]:
|
||||||
|
config = load_config()
|
||||||
|
transcription = config.transcription
|
||||||
|
changed = False
|
||||||
|
|
||||||
|
enabled = _query_first(query, "enabled")
|
||||||
|
if enabled is not None:
|
||||||
|
parsed_enabled = _parse_bool(enabled, "enabled")
|
||||||
|
if transcription.enabled != parsed_enabled:
|
||||||
|
transcription.enabled = parsed_enabled
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
provider = _query_first(query, "provider")
|
||||||
|
if provider is not None:
|
||||||
|
provider = provider.strip().lower()
|
||||||
|
if provider not in _TRANSCRIPTION_PROVIDERS:
|
||||||
|
raise WebUISettingsError("unknown transcription provider")
|
||||||
|
if transcription.provider != provider:
|
||||||
|
transcription.provider = provider # type: ignore[assignment]
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
model = _query_first(query, "model")
|
||||||
|
if model is not None:
|
||||||
|
model = model.strip() or None
|
||||||
|
if model is not None and len(model) > 200:
|
||||||
|
raise WebUISettingsError("transcription model is too long")
|
||||||
|
if transcription.model != model:
|
||||||
|
transcription.model = model
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
language = _query_first(query, "language")
|
||||||
|
if language is not None:
|
||||||
|
language = language.strip().lower() or None
|
||||||
|
if language is not None and not re.fullmatch(r"[a-z]{2,3}", language):
|
||||||
|
raise WebUISettingsError("transcription language must be 2-3 lowercase letters")
|
||||||
|
if transcription.language != language:
|
||||||
|
transcription.language = language
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
max_duration_sec = _query_first_alias(query, "max_duration_sec", "maxDurationSec")
|
||||||
|
if max_duration_sec is not None:
|
||||||
|
try:
|
||||||
|
parsed_duration = int(max_duration_sec)
|
||||||
|
except ValueError:
|
||||||
|
raise WebUISettingsError("max_duration_sec must be an integer") from None
|
||||||
|
if parsed_duration < 1 or parsed_duration > 600:
|
||||||
|
raise WebUISettingsError("max_duration_sec must be between 1 and 600")
|
||||||
|
if transcription.max_duration_sec != parsed_duration:
|
||||||
|
transcription.max_duration_sec = parsed_duration
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
max_upload_mb = _query_first_alias(query, "max_upload_mb", "maxUploadMb")
|
||||||
|
if max_upload_mb is not None:
|
||||||
|
try:
|
||||||
|
parsed_upload = int(max_upload_mb)
|
||||||
|
except ValueError:
|
||||||
|
raise WebUISettingsError("max_upload_mb must be an integer") from None
|
||||||
|
if parsed_upload < 1 or parsed_upload > 100:
|
||||||
|
raise WebUISettingsError("max_upload_mb must be between 1 and 100")
|
||||||
|
if transcription.max_upload_mb != parsed_upload:
|
||||||
|
transcription.max_upload_mb = parsed_upload
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
save_config(config)
|
||||||
|
return settings_payload()
|
||||||
|
|||||||
@ -33,6 +33,7 @@ from nanobot.webui.settings_api import (
|
|||||||
update_model_configuration,
|
update_model_configuration,
|
||||||
update_network_safety_settings,
|
update_network_safety_settings,
|
||||||
update_provider_settings,
|
update_provider_settings,
|
||||||
|
update_transcription_settings,
|
||||||
update_web_search_settings,
|
update_web_search_settings,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -100,6 +101,8 @@ class WebUISettingsRouter:
|
|||||||
return self._handle_settings_web_search_update(request)
|
return self._handle_settings_web_search_update(request)
|
||||||
if path == "/api/settings/image-generation/update":
|
if path == "/api/settings/image-generation/update":
|
||||||
return self._handle_settings_image_generation_update(request)
|
return self._handle_settings_image_generation_update(request)
|
||||||
|
if path == "/api/settings/transcription/update":
|
||||||
|
return self._handle_settings_transcription_update(request)
|
||||||
if path == "/api/settings/network-safety/update":
|
if path == "/api/settings/network-safety/update":
|
||||||
return self._handle_settings_network_safety_update(request)
|
return self._handle_settings_network_safety_update(request)
|
||||||
if path == "/api/settings/cli-apps":
|
if path == "/api/settings/cli-apps":
|
||||||
@ -275,6 +278,15 @@ class WebUISettingsRouter:
|
|||||||
return self._error_response(e.status, e.message)
|
return self._error_response(e.status, e.message)
|
||||||
return self._json_response(self._with_restart_state(payload, section="image"))
|
return self._json_response(self._with_restart_state(payload, section="image"))
|
||||||
|
|
||||||
|
def _handle_settings_transcription_update(self, request: WsRequest) -> Response:
|
||||||
|
if not self._authorized(request):
|
||||||
|
return self._unauthorized()
|
||||||
|
try:
|
||||||
|
payload = update_transcription_settings(self._query(request))
|
||||||
|
except WebUISettingsError as e:
|
||||||
|
return self._error_response(e.status, e.message)
|
||||||
|
return self._json_response(self._with_restart_state(payload))
|
||||||
|
|
||||||
def _handle_settings_network_safety_update(self, request: WsRequest) -> Response:
|
def _handle_settings_network_safety_update(self, request: WsRequest) -> Response:
|
||||||
if not self._authorized(request):
|
if not self._authorized(request):
|
||||||
return self._unauthorized()
|
return self._unauthorized()
|
||||||
|
|||||||
46
nanobot/webui/transcription_ws.py
Normal file
46
nanobot/webui/transcription_ws.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
"""WebUI transcription envelope handling.
|
||||||
|
|
||||||
|
The WebSocket channel owns transport and subscription fan-out. This module owns
|
||||||
|
the WebUI-specific audio transcription action carried over that socket.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from nanobot.audio.transcription import (
|
||||||
|
TranscriptionIngressError,
|
||||||
|
resolve_transcription_config,
|
||||||
|
transcribe_audio_data_url,
|
||||||
|
)
|
||||||
|
from nanobot.config.loader import load_config
|
||||||
|
|
||||||
|
_MAX_REQUEST_ID_LENGTH = 80
|
||||||
|
|
||||||
|
|
||||||
|
async def webui_transcription_event(envelope: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||||
|
"""Return the WS event name and payload for one WebUI transcription request."""
|
||||||
|
request_id = envelope.get("request_id")
|
||||||
|
valid_request_id = (
|
||||||
|
isinstance(request_id, str)
|
||||||
|
and 0 < len(request_id) <= _MAX_REQUEST_ID_LENGTH
|
||||||
|
)
|
||||||
|
|
||||||
|
def error(detail: str, **extra: Any) -> tuple[str, dict[str, Any]]:
|
||||||
|
payload: dict[str, Any] = {"detail": detail, **extra}
|
||||||
|
if valid_request_id:
|
||||||
|
payload["request_id"] = request_id
|
||||||
|
return "transcription_error", payload
|
||||||
|
|
||||||
|
if not valid_request_id:
|
||||||
|
return error("invalid_request")
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = await transcribe_audio_data_url(
|
||||||
|
envelope.get("data_url"),
|
||||||
|
resolve_transcription_config(load_config()),
|
||||||
|
duration_ms=envelope.get("duration_ms"),
|
||||||
|
)
|
||||||
|
except TranscriptionIngressError as exc:
|
||||||
|
return error(exc.detail, **exc.extra)
|
||||||
|
return "transcription_result", {"request_id": request_id, "text": text}
|
||||||
@ -12,7 +12,8 @@ from nanobot.bus.events import OutboundMessage
|
|||||||
from nanobot.bus.queue import MessageBus
|
from nanobot.bus.queue import MessageBus
|
||||||
from nanobot.channels.base import BaseChannel
|
from nanobot.channels.base import BaseChannel
|
||||||
from nanobot.channels.manager import ChannelManager
|
from nanobot.channels.manager import ChannelManager
|
||||||
from nanobot.config.schema import ChannelsConfig
|
from nanobot.config.loader import save_config
|
||||||
|
from nanobot.config.schema import ChannelsConfig, Config
|
||||||
from nanobot.providers.transcription import GroqTranscriptionProvider as _GroqProvider
|
from nanobot.providers.transcription import GroqTranscriptionProvider as _GroqProvider
|
||||||
from nanobot.providers.transcription import OpenAITranscriptionProvider as _OpenAIProvider
|
from nanobot.providers.transcription import OpenAITranscriptionProvider as _OpenAIProvider
|
||||||
from nanobot.utils.restart import RestartNotice
|
from nanobot.utils.restart import RestartNotice
|
||||||
@ -238,102 +239,103 @@ async def test_manager_loads_plugin_from_dict_config():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_manager_propagates_groq_transcription_api_base_to_channels():
|
async def test_base_channel_reads_current_transcription_config_each_call(
|
||||||
from nanobot.channels.manager import ChannelManager
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
fake_config = SimpleNamespace(
|
):
|
||||||
channels=ChannelsConfig.model_validate({
|
"""BaseChannel.transcribe_audio resolves config at call time, not manager init time."""
|
||||||
"fakeplugin": {"enabled": True, "allowFrom": ["*"]},
|
|
||||||
"transcriptionLanguage": "en",
|
|
||||||
}),
|
|
||||||
providers=SimpleNamespace(
|
|
||||||
groq=SimpleNamespace(api_key="groq-key", api_base="http://proxy.local/v1/audio/transcriptions"),
|
|
||||||
openai=SimpleNamespace(api_key="openai-key", api_base="https://api.openai.com/v1/audio/transcriptions"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
with patch(
|
|
||||||
"nanobot.channels.registry.discover_enabled",
|
|
||||||
return_value={"fakeplugin": _FakePlugin},
|
|
||||||
):
|
|
||||||
mgr = ChannelManager.__new__(ChannelManager)
|
|
||||||
mgr.config = fake_config
|
|
||||||
mgr.bus = MessageBus()
|
|
||||||
mgr.channels = {}
|
|
||||||
mgr._dispatch_task = None
|
|
||||||
mgr._init_channels()
|
|
||||||
|
|
||||||
channel = mgr.channels["fakeplugin"]
|
|
||||||
assert channel.transcription_provider == "groq"
|
|
||||||
assert channel.transcription_api_key == "groq-key"
|
|
||||||
assert channel.transcription_api_base == "http://proxy.local/v1/audio/transcriptions"
|
|
||||||
assert channel.transcription_language == "en"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_manager_propagates_openai_transcription_api_base_to_channels():
|
|
||||||
from nanobot.channels.manager import ChannelManager
|
|
||||||
|
|
||||||
fake_config = SimpleNamespace(
|
|
||||||
channels=ChannelsConfig.model_validate({
|
|
||||||
"fakeplugin": {"enabled": True, "allowFrom": ["*"]},
|
|
||||||
"transcriptionProvider": "openai",
|
|
||||||
}),
|
|
||||||
providers=SimpleNamespace(
|
|
||||||
openai=SimpleNamespace(
|
|
||||||
api_key="openai-key",
|
|
||||||
api_base="http://proxy.local/v1/audio/transcriptions",
|
|
||||||
),
|
|
||||||
groq=SimpleNamespace(api_key="groq-key", api_base=""),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
with patch(
|
|
||||||
"nanobot.channels.registry.discover_enabled",
|
|
||||||
return_value={"fakeplugin": _FakePlugin},
|
|
||||||
):
|
|
||||||
mgr = ChannelManager.__new__(ChannelManager)
|
|
||||||
mgr.config = fake_config
|
|
||||||
mgr.bus = MessageBus()
|
|
||||||
mgr.channels = {}
|
|
||||||
mgr._dispatch_task = None
|
|
||||||
mgr._init_channels()
|
|
||||||
|
|
||||||
channel = mgr.channels["fakeplugin"]
|
|
||||||
assert channel.transcription_provider == "openai"
|
|
||||||
assert channel.transcription_api_key == "openai-key"
|
|
||||||
assert channel.transcription_api_base == "http://proxy.local/v1/audio/transcriptions"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_base_channel_passes_api_base_to_openai_transcription_provider():
|
|
||||||
"""BaseChannel.transcribe_audio must forward transcription_api_base to OpenAI."""
|
|
||||||
from nanobot.providers import transcription as transcription_mod
|
from nanobot.providers import transcription as transcription_mod
|
||||||
|
|
||||||
channel = _FakePlugin({"enabled": True, "allowFrom": ["*"]}, MessageBus())
|
config_path = tmp_path / "config.json"
|
||||||
channel.transcription_provider = "openai"
|
config = Config()
|
||||||
channel.transcription_api_key = "k"
|
config.transcription.provider = "openai"
|
||||||
channel.transcription_api_base = "http://override/v1/audio/transcriptions"
|
config.transcription.model = "whisper-custom"
|
||||||
channel.transcription_language = "en"
|
config.transcription.language = "en"
|
||||||
|
config.providers.openai.api_key = "openai-key"
|
||||||
|
config.providers.openai.api_base = "http://openai.local/v1/audio/transcriptions"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
captured: dict[str, object] = {}
|
channel = _FakePlugin({"enabled": True, "allowFrom": ["*"]}, MessageBus())
|
||||||
|
|
||||||
|
calls: list[dict[str, object]] = []
|
||||||
|
|
||||||
class _StubOpenAI:
|
class _StubOpenAI:
|
||||||
def __init__(self, api_key=None, api_base=None, language=None):
|
def __init__(self, api_key=None, api_base=None, language=None, model=None):
|
||||||
captured["api_key"] = api_key
|
calls.append({
|
||||||
captured["api_base"] = api_base
|
"provider": "openai",
|
||||||
captured["language"] = language
|
"api_key": api_key,
|
||||||
|
"api_base": api_base,
|
||||||
|
"language": language,
|
||||||
|
"model": model,
|
||||||
|
})
|
||||||
|
|
||||||
async def transcribe(self, file_path):
|
async def transcribe(self, file_path):
|
||||||
return "ok"
|
return "openai-ok"
|
||||||
|
|
||||||
with patch.object(transcription_mod, "OpenAITranscriptionProvider", _StubOpenAI):
|
class _StubGroq:
|
||||||
result = await channel.transcribe_audio("/tmp/does-not-matter.wav")
|
def __init__(self, api_key=None, api_base=None, language=None, model=None):
|
||||||
|
calls.append({
|
||||||
|
"provider": "groq",
|
||||||
|
"api_key": api_key,
|
||||||
|
"api_base": api_base,
|
||||||
|
"language": language,
|
||||||
|
"model": model,
|
||||||
|
})
|
||||||
|
|
||||||
assert result == "ok"
|
async def transcribe(self, file_path):
|
||||||
assert captured["api_key"] == "k"
|
return "groq-ok"
|
||||||
assert captured["api_base"] == "http://override/v1/audio/transcriptions"
|
|
||||||
assert captured["language"] == "en"
|
with (
|
||||||
|
patch.object(transcription_mod, "OpenAITranscriptionProvider", _StubOpenAI),
|
||||||
|
patch.object(transcription_mod, "GroqTranscriptionProvider", _StubGroq),
|
||||||
|
):
|
||||||
|
assert await channel.transcribe_audio("/tmp/does-not-matter.wav") == "openai-ok"
|
||||||
|
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
config.transcription.model = "whisper-large-v3-turbo"
|
||||||
|
config.transcription.language = "ko"
|
||||||
|
config.providers.groq.api_key = "groq-key"
|
||||||
|
config.providers.groq.api_base = "http://groq.local/v1/audio/transcriptions"
|
||||||
|
save_config(config, config_path)
|
||||||
|
|
||||||
|
assert await channel.transcribe_audio("/tmp/does-not-matter.wav") == "groq-ok"
|
||||||
|
|
||||||
|
assert calls == [
|
||||||
|
{
|
||||||
|
"provider": "openai",
|
||||||
|
"api_key": "openai-key",
|
||||||
|
"api_base": "http://openai.local/v1/audio/transcriptions",
|
||||||
|
"language": "en",
|
||||||
|
"model": "whisper-custom",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"provider": "groq",
|
||||||
|
"api_key": "groq-key",
|
||||||
|
"api_base": "http://groq.local/v1/audio/transcriptions",
|
||||||
|
"language": "ko",
|
||||||
|
"model": "whisper-large-v3-turbo",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_base_channel_respects_disabled_transcription_config(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
):
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.transcription.enabled = False
|
||||||
|
config.providers.groq.api_key = "groq-key"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
channel = _FakePlugin({"enabled": True, "allowFrom": ["*"]}, MessageBus())
|
||||||
|
|
||||||
|
with patch("nanobot.providers.transcription.GroqTranscriptionProvider") as provider:
|
||||||
|
assert await channel.transcribe_audio("/tmp/does-not-matter.wav") == ""
|
||||||
|
provider.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
def test_openai_transcription_provider_honors_api_base_argument():
|
def test_openai_transcription_provider_honors_api_base_argument():
|
||||||
@ -348,37 +350,6 @@ def test_openai_transcription_provider_honors_api_base_argument():
|
|||||||
assert custom.api_url == "http://override/v1/audio/transcriptions"
|
assert custom.api_url == "http://override/v1/audio/transcriptions"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_base_channel_passes_language_to_groq_transcription_provider():
|
|
||||||
"""BaseChannel.transcribe_audio must forward transcription_language to Groq."""
|
|
||||||
from nanobot.providers import transcription as transcription_mod
|
|
||||||
|
|
||||||
channel = _FakePlugin({"enabled": True, "allowFrom": ["*"]}, MessageBus())
|
|
||||||
channel.transcription_provider = "groq"
|
|
||||||
channel.transcription_api_key = "k"
|
|
||||||
channel.transcription_api_base = "http://override/v1/audio/transcriptions"
|
|
||||||
channel.transcription_language = "ko"
|
|
||||||
|
|
||||||
captured: dict[str, object] = {}
|
|
||||||
|
|
||||||
class _StubGroq:
|
|
||||||
def __init__(self, api_key=None, api_base=None, language=None):
|
|
||||||
captured["api_key"] = api_key
|
|
||||||
captured["api_base"] = api_base
|
|
||||||
captured["language"] = language
|
|
||||||
|
|
||||||
async def transcribe(self, file_path):
|
|
||||||
return "ok"
|
|
||||||
|
|
||||||
with patch.object(transcription_mod, "GroqTranscriptionProvider", _StubGroq):
|
|
||||||
result = await channel.transcribe_audio("/tmp/does-not-matter.wav")
|
|
||||||
|
|
||||||
assert result == "ok"
|
|
||||||
assert captured["api_key"] == "k"
|
|
||||||
assert captured["api_base"] == "http://override/v1/audio/transcriptions"
|
|
||||||
assert captured["language"] == "ko"
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Transcription provider HTTP tests
|
# Transcription provider HTTP tests
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -69,6 +69,7 @@ def _make_channel() -> WebSocketChannel:
|
|||||||
[
|
[
|
||||||
("data:image/png;base64,AAAA", "image/png"),
|
("data:image/png;base64,AAAA", "image/png"),
|
||||||
("data:image/jpeg;base64,AAAA", "image/jpeg"),
|
("data:image/jpeg;base64,AAAA", "image/jpeg"),
|
||||||
|
("data:audio/webm;codecs=opus;base64,AAAA", "audio/webm"),
|
||||||
("data:IMAGE/PNG;base64,AAAA", "image/png"),
|
("data:IMAGE/PNG;base64,AAAA", "image/png"),
|
||||||
("data:image/svg+xml;base64,AAAA", "image/svg+xml"),
|
("data:image/svg+xml;base64,AAAA", "image/svg+xml"),
|
||||||
("data:text/plain;base64,AAAA", "text/plain"),
|
("data:text/plain;base64,AAAA", "text/plain"),
|
||||||
|
|||||||
@ -271,8 +271,6 @@ async def test_lid_to_phone_cache_resolves_lid_only_messages():
|
|||||||
async def test_voice_message_transcription_uses_media_path():
|
async def test_voice_message_transcription_uses_media_path():
|
||||||
"""Voice messages are transcribed when media path is available."""
|
"""Voice messages are transcribed when media path is available."""
|
||||||
ch = WhatsAppChannel({"enabled": True, "allowFrom": ["*"]}, MagicMock())
|
ch = WhatsAppChannel({"enabled": True, "allowFrom": ["*"]}, MagicMock())
|
||||||
ch.transcription_provider = "openai"
|
|
||||||
ch.transcription_api_key = "sk-test"
|
|
||||||
ch._handle_message = AsyncMock()
|
ch._handle_message = AsyncMock()
|
||||||
ch.transcribe_audio = AsyncMock(return_value="Hello world")
|
ch.transcribe_audio = AsyncMock(return_value="Hello world")
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,8 @@ from unittest.mock import AsyncMock, patch
|
|||||||
import httpx
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from nanobot.audio.transcription import resolve_transcription_config
|
||||||
|
from nanobot.config.schema import Config
|
||||||
from nanobot.providers.transcription import (
|
from nanobot.providers.transcription import (
|
||||||
GroqTranscriptionProvider,
|
GroqTranscriptionProvider,
|
||||||
OpenAITranscriptionProvider,
|
OpenAITranscriptionProvider,
|
||||||
@ -33,6 +35,65 @@ def _raw_response(status: int, content: bytes) -> httpx.Response:
|
|||||||
return httpx.Response(status_code=status, content=content, request=request)
|
return httpx.Response(status_code=status, content=content, request=request)
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolver_uses_legacy_channel_provider_when_top_level_is_unset() -> None:
|
||||||
|
config = Config()
|
||||||
|
config.channels.transcription_provider = "openai"
|
||||||
|
config.channels.transcription_language = "en"
|
||||||
|
config.providers.openai.api_key = "sk-test"
|
||||||
|
config.providers.openai.api_base = "https://proxy.example/v1"
|
||||||
|
|
||||||
|
resolved = resolve_transcription_config(config)
|
||||||
|
|
||||||
|
assert resolved.provider == "openai"
|
||||||
|
assert resolved.model == "whisper-1"
|
||||||
|
assert resolved.language == "en"
|
||||||
|
assert resolved.api_key == "sk-test"
|
||||||
|
assert resolved.api_base == "https://proxy.example/v1"
|
||||||
|
assert resolved.configured is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolver_prefers_top_level_transcription_over_legacy_channels() -> None:
|
||||||
|
config = Config()
|
||||||
|
config.channels.transcription_provider = "openai"
|
||||||
|
config.channels.transcription_language = "en"
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
config.transcription.model = "whisper-large-v3-turbo"
|
||||||
|
config.transcription.language = "ko"
|
||||||
|
config.providers.groq.api_key = "gsk-test"
|
||||||
|
config.providers.groq.api_base = "https://groq.example/openai/v1"
|
||||||
|
|
||||||
|
resolved = resolve_transcription_config(config)
|
||||||
|
|
||||||
|
assert resolved.provider == "groq"
|
||||||
|
assert resolved.model == "whisper-large-v3-turbo"
|
||||||
|
assert resolved.language == "ko"
|
||||||
|
assert resolved.api_key == "gsk-test"
|
||||||
|
assert resolved.api_base == "https://groq.example/openai/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolved_transcription_repr_hides_api_key() -> None:
|
||||||
|
config = Config()
|
||||||
|
config.providers.groq.api_key = "gsk-secret"
|
||||||
|
|
||||||
|
resolved = resolve_transcription_config(config)
|
||||||
|
|
||||||
|
assert "gsk-secret" not in repr(resolved)
|
||||||
|
assert "api_key" not in repr(resolved)
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolver_keeps_enabled_and_limits_on_effective_config() -> None:
|
||||||
|
config = Config()
|
||||||
|
config.transcription.enabled = False
|
||||||
|
config.transcription.max_duration_sec = 45
|
||||||
|
config.transcription.max_upload_mb = 12
|
||||||
|
|
||||||
|
resolved = resolve_transcription_config(config)
|
||||||
|
|
||||||
|
assert resolved.enabled is False
|
||||||
|
assert resolved.max_duration_sec == 45
|
||||||
|
assert resolved.max_upload_mb == 12
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# OpenAI provider — retry on transient HTTP + network errors
|
# OpenAI provider — retry on transient HTTP + network errors
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -215,6 +276,32 @@ async def test_provider_omits_language_when_unset(
|
|||||||
assert "language" not in files
|
assert "language" not in files
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_provider_forwards_custom_model_in_multipart(audio_file: Path) -> None:
|
||||||
|
provider = GroqTranscriptionProvider(api_key="k", model="whisper-large-v3-turbo")
|
||||||
|
post = AsyncMock(return_value=_response(200, {"text": "ok"}))
|
||||||
|
with patch("httpx.AsyncClient.post", post), patch("asyncio.sleep", AsyncMock()):
|
||||||
|
result = await provider.transcribe(audio_file)
|
||||||
|
|
||||||
|
assert result == "ok"
|
||||||
|
files = post.await_args_list[0].kwargs["files"]
|
||||||
|
assert files["model"] == (None, "whisper-large-v3-turbo")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_provider_forwards_file_mime_type(tmp_path: Path) -> None:
|
||||||
|
audio = tmp_path / "voice.webm"
|
||||||
|
audio.write_bytes(b"audio")
|
||||||
|
provider = GroqTranscriptionProvider(api_key="k")
|
||||||
|
post = AsyncMock(return_value=_response(200, {"text": "ok"}))
|
||||||
|
with patch("httpx.AsyncClient.post", post), patch("asyncio.sleep", AsyncMock()):
|
||||||
|
result = await provider.transcribe(audio)
|
||||||
|
|
||||||
|
assert result == "ok"
|
||||||
|
files = post.await_args_list[0].kwargs["files"]
|
||||||
|
assert files["file"] == ("voice.webm", b"audio", "audio/webm")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_language_survives_retry(audio_file: Path) -> None:
|
async def test_language_survives_retry(audio_file: Path) -> None:
|
||||||
"""Regression: language must be present on every retry attempt, not just the first."""
|
"""Regression: language must be present on every retry attempt, not just the first."""
|
||||||
|
|||||||
@ -6,8 +6,12 @@ import shlex
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from nanobot.agent.tools.exec_session import (
|
||||||
|
ExecSessionManager,
|
||||||
|
ListExecSessionsTool,
|
||||||
|
WriteStdinTool,
|
||||||
|
)
|
||||||
from nanobot.agent.tools.shell import ExecTool
|
from nanobot.agent.tools.shell import ExecTool
|
||||||
from nanobot.agent.tools.exec_session import ExecSessionManager, ListExecSessionsTool, WriteStdinTool
|
|
||||||
|
|
||||||
|
|
||||||
def _python_command(code: str) -> str:
|
def _python_command(code: str) -> str:
|
||||||
@ -141,7 +145,7 @@ def test_exec_can_continue_with_stdin(tmp_path):
|
|||||||
return initial, result
|
return initial, result
|
||||||
|
|
||||||
initial, result = asyncio.run(run())
|
initial, result = asyncio.run(run())
|
||||||
assert "ready" in initial
|
assert "ready" in initial + result
|
||||||
assert "Process running" in initial
|
assert "Process running" in initial
|
||||||
assert "Elapsed:" in initial
|
assert "Elapsed:" in initial
|
||||||
assert "got:ping" in result
|
assert "got:ping" in result
|
||||||
@ -170,7 +174,7 @@ def test_write_stdin_can_close_stdin(tmp_path):
|
|||||||
return initial, result
|
return initial, result
|
||||||
|
|
||||||
initial, result = asyncio.run(run())
|
initial, result = asyncio.run(run())
|
||||||
assert "ready" in initial
|
assert "ready" in initial + result
|
||||||
assert "got:payload" in result
|
assert "got:payload" in result
|
||||||
assert "Stdin closed." in result
|
assert "Stdin closed." in result
|
||||||
assert "Exit code: 0" in result
|
assert "Exit code: 0" in result
|
||||||
@ -185,14 +189,20 @@ def test_write_stdin_can_terminate_session(tmp_path):
|
|||||||
"import time; print('ready', flush=True); time.sleep(30)"
|
"import time; print('ready', flush=True); time.sleep(30)"
|
||||||
)
|
)
|
||||||
|
|
||||||
initial = await exec_tool.execute(command=command, yield_time_ms=500)
|
initial = await exec_tool.execute(command=command, yield_time_ms=100)
|
||||||
sid = _session_id(initial)
|
sid = _session_id(initial)
|
||||||
|
waited = await stdin_tool.execute(
|
||||||
|
session_id=sid,
|
||||||
|
wait_for="ready",
|
||||||
|
wait_timeout_ms=3000,
|
||||||
|
yield_time_ms=0,
|
||||||
|
)
|
||||||
result = await stdin_tool.execute(
|
result = await stdin_tool.execute(
|
||||||
session_id=sid,
|
session_id=sid,
|
||||||
terminate=True,
|
terminate=True,
|
||||||
yield_time_ms=0,
|
yield_time_ms=0,
|
||||||
)
|
)
|
||||||
return initial, result
|
return initial + waited, result
|
||||||
|
|
||||||
initial, result = asyncio.run(run())
|
initial, result = asyncio.run(run())
|
||||||
assert "ready" in initial
|
assert "ready" in initial
|
||||||
@ -243,7 +253,7 @@ def test_write_stdin_preserves_completed_session_output_until_polled(tmp_path):
|
|||||||
|
|
||||||
initial, final = asyncio.run(run())
|
initial, final = asyncio.run(run())
|
||||||
|
|
||||||
assert "ready" in initial
|
assert "ready" in initial + final
|
||||||
assert "done" in final
|
assert "done" in final
|
||||||
assert "Exit code: 0" in final
|
assert "Exit code: 0" in final
|
||||||
|
|
||||||
|
|||||||
@ -8,8 +8,8 @@ import pytest
|
|||||||
|
|
||||||
from nanobot.utils.media_decode import (
|
from nanobot.utils.media_decode import (
|
||||||
DEFAULT_MAX_BYTES,
|
DEFAULT_MAX_BYTES,
|
||||||
FileSizeExceeded,
|
|
||||||
MAX_FILE_SIZE,
|
MAX_FILE_SIZE,
|
||||||
|
FileSizeExceeded,
|
||||||
save_base64_data_url,
|
save_base64_data_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -25,6 +25,31 @@ def test_saves_png_with_correct_extension(tmp_path) -> None:
|
|||||||
assert (tmp_path / result.split("/")[-1]).read_bytes() == b"fake png"
|
assert (tmp_path / result.split("/")[-1]).read_bytes() == b"fake png"
|
||||||
|
|
||||||
|
|
||||||
|
def test_saves_data_url_with_mime_parameters(tmp_path) -> None:
|
||||||
|
result = save_base64_data_url(_data_url(b"voice", mime="audio/webm;codecs=opus"), tmp_path)
|
||||||
|
assert result is not None
|
||||||
|
assert result.endswith(".webm")
|
||||||
|
assert (tmp_path / result.split("/")[-1]).read_bytes() == b"voice"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("mime", "suffix"),
|
||||||
|
[
|
||||||
|
("audio/webm", ".webm"),
|
||||||
|
("video/webm", ".webm"),
|
||||||
|
("audio/ogg", ".ogg"),
|
||||||
|
("audio/wav", ".wav"),
|
||||||
|
("audio/mpga", ".mpga"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_saves_common_audio_with_api_friendly_extension(
|
||||||
|
tmp_path, mime: str, suffix: str
|
||||||
|
) -> None:
|
||||||
|
result = save_base64_data_url(_data_url(b"voice", mime=mime), tmp_path)
|
||||||
|
assert result is not None
|
||||||
|
assert result.endswith(suffix)
|
||||||
|
|
||||||
|
|
||||||
def test_returns_none_for_malformed_data_url(tmp_path) -> None:
|
def test_returns_none_for_malformed_data_url(tmp_path) -> None:
|
||||||
assert save_base64_data_url("not-a-data-url", tmp_path) is None
|
assert save_base64_data_url("not-a-data-url", tmp_path) is None
|
||||||
|
|
||||||
|
|||||||
@ -18,6 +18,7 @@ from nanobot.webui.settings_api import (
|
|||||||
update_agent_settings,
|
update_agent_settings,
|
||||||
update_model_configuration,
|
update_model_configuration,
|
||||||
update_network_safety_settings,
|
update_network_safety_settings,
|
||||||
|
update_transcription_settings,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -243,6 +244,75 @@ def test_settings_payload_includes_network_safety_fields(
|
|||||||
assert payload["advanced"]["ssrf_whitelist_count"] == 1
|
assert payload["advanced"]["ssrf_whitelist_count"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_payload_includes_effective_transcription_config(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.channels.transcription_provider = "openai"
|
||||||
|
config.channels.transcription_language = "en"
|
||||||
|
config.providers.openai.api_key = "sk-test"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
payload = settings_payload()
|
||||||
|
|
||||||
|
assert payload["transcription"]["enabled"] is True
|
||||||
|
assert payload["transcription"]["provider"] == "openai"
|
||||||
|
assert payload["transcription"]["provider_configured"] is True
|
||||||
|
assert payload["transcription"]["model"] == "whisper-1"
|
||||||
|
assert payload["transcription"]["language"] == "en"
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_transcription_settings_writes_top_level_only(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.channels.transcription_provider = "openai"
|
||||||
|
config.channels.transcription_language = "en"
|
||||||
|
config.providers.groq.api_key = "gsk-test"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
payload = update_transcription_settings(
|
||||||
|
{
|
||||||
|
"enabled": ["true"],
|
||||||
|
"provider": ["groq"],
|
||||||
|
"model": ["whisper-large-v3-turbo"],
|
||||||
|
"language": ["ko"],
|
||||||
|
"maxDurationSec": ["90"],
|
||||||
|
"maxUploadMb": ["20"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
saved = load_config(config_path)
|
||||||
|
assert saved.channels.transcription_provider == "openai"
|
||||||
|
assert saved.channels.transcription_language == "en"
|
||||||
|
assert saved.transcription.enabled is True
|
||||||
|
assert saved.transcription.provider == "groq"
|
||||||
|
assert saved.transcription.model == "whisper-large-v3-turbo"
|
||||||
|
assert saved.transcription.language == "ko"
|
||||||
|
assert saved.transcription.max_duration_sec == 90
|
||||||
|
assert saved.transcription.max_upload_mb == 20
|
||||||
|
assert payload["transcription"]["provider"] == "groq"
|
||||||
|
assert payload["transcription"]["provider_configured"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_transcription_settings_validates_language(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
save_config(Config(), config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
with pytest.raises(WebUISettingsError, match="transcription language"):
|
||||||
|
update_transcription_settings({"language": ["en-US"]})
|
||||||
|
|
||||||
|
|
||||||
def test_settings_payload_includes_token_usage_summary(
|
def test_settings_payload_includes_token_usage_summary(
|
||||||
tmp_path,
|
tmp_path,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
|||||||
129
tests/webui/test_transcription_ws.py
Normal file
129
tests/webui/test_transcription_ws.py
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
"""Tests for WebUI transcription envelopes carried over the gateway socket."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from nanobot.config.loader import save_config
|
||||||
|
from nanobot.config.schema import Config
|
||||||
|
from nanobot.webui.transcription_ws import webui_transcription_event
|
||||||
|
|
||||||
|
|
||||||
|
def _audio_data_url(payload: bytes = b"voice", mime: str = "audio/webm") -> str:
|
||||||
|
return f"data:{mime};base64,{base64.b64encode(payload).decode('ascii')}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webui_transcribe_audio_rejects_unconfigured_provider(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
event, payload = await webui_transcription_event({
|
||||||
|
"request_id": "voice-1",
|
||||||
|
"data_url": _audio_data_url(),
|
||||||
|
})
|
||||||
|
|
||||||
|
assert event == "transcription_error"
|
||||||
|
assert payload == {
|
||||||
|
"request_id": "voice-1",
|
||||||
|
"detail": "not_configured",
|
||||||
|
"provider": "groq",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webui_transcribe_audio_rejects_unsupported_mime(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
config.providers.groq.api_key = "gsk-test"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
|
||||||
|
event, payload = await webui_transcription_event({
|
||||||
|
"request_id": "voice-1",
|
||||||
|
"data_url": _audio_data_url(mime="text/plain"),
|
||||||
|
})
|
||||||
|
|
||||||
|
assert event == "transcription_error"
|
||||||
|
assert payload["request_id"] == "voice-1"
|
||||||
|
assert payload["detail"] == "mime"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webui_transcribe_audio_rejects_oversized_audio(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config = Config()
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
config.transcription.max_upload_mb = 1
|
||||||
|
config.providers.groq.api_key = "gsk-test"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
monkeypatch.setattr("nanobot.audio.transcription.get_media_dir", lambda _channel=None: tmp_path)
|
||||||
|
|
||||||
|
event, payload = await webui_transcription_event({
|
||||||
|
"request_id": "voice-1",
|
||||||
|
"data_url": _audio_data_url(payload=b"x" * (1024 * 1024 + 1)),
|
||||||
|
})
|
||||||
|
|
||||||
|
assert event == "transcription_error"
|
||||||
|
assert payload["request_id"] == "voice-1"
|
||||||
|
assert payload["detail"] == "size"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_webui_transcribe_audio_returns_text_and_removes_temp_file(
|
||||||
|
tmp_path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
media_dir = tmp_path / "media"
|
||||||
|
media_dir.mkdir()
|
||||||
|
config = Config()
|
||||||
|
config.transcription.provider = "groq"
|
||||||
|
config.providers.groq.api_key = "gsk-test"
|
||||||
|
save_config(config, config_path)
|
||||||
|
monkeypatch.setattr("nanobot.config.loader._current_config_path", config_path)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"nanobot.audio.transcription.get_media_dir",
|
||||||
|
lambda _channel=None: media_dir,
|
||||||
|
)
|
||||||
|
captured_paths: list[Path] = []
|
||||||
|
|
||||||
|
async def fake_transcribe_audio_file(path: str | Path, _resolved: Any) -> str:
|
||||||
|
p = Path(path)
|
||||||
|
assert p.exists()
|
||||||
|
captured_paths.append(p)
|
||||||
|
return "hello voice"
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"nanobot.audio.transcription.transcribe_audio_file",
|
||||||
|
fake_transcribe_audio_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
event, payload = await webui_transcription_event({
|
||||||
|
"request_id": "voice-1",
|
||||||
|
"data_url": _audio_data_url(payload=b"webm voice", mime="audio/webm;codecs=opus"),
|
||||||
|
"duration_ms": 1200,
|
||||||
|
})
|
||||||
|
|
||||||
|
assert event == "transcription_result"
|
||||||
|
assert payload == {"request_id": "voice-1", "text": "hello voice"}
|
||||||
|
assert captured_paths
|
||||||
|
assert not captured_paths[0].exists()
|
||||||
@ -81,6 +81,7 @@ const SETTINGS_SECTION_KEYS: SettingsSectionKey[] = [
|
|||||||
"appearance",
|
"appearance",
|
||||||
"models",
|
"models",
|
||||||
"image",
|
"image",
|
||||||
|
"voice",
|
||||||
"browser",
|
"browser",
|
||||||
"apps",
|
"apps",
|
||||||
"skills",
|
"skills",
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
import { Suspense, lazy, useCallback, useState } from "react";
|
import { Suspense, lazy, useCallback, useState, type ReactNode } from "react";
|
||||||
import { Check, Copy } from "lucide-react";
|
import { Check, Copy } from "lucide-react";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
|
|
||||||
import { useThemeValue } from "@/hooks/useTheme";
|
import { useThemeValue } from "@/hooks/useTheme";
|
||||||
|
import { hasAnsi, parseAnsiSegments, stripAnsi } from "@/lib/ansi";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
interface CodeBlockProps {
|
interface CodeBlockProps {
|
||||||
@ -36,6 +37,10 @@ const CODE_FONT_STACK = [
|
|||||||
"monospace",
|
"monospace",
|
||||||
].join(", ");
|
].join(", ");
|
||||||
|
|
||||||
|
const ANSI_LANGUAGES = new Set(["ansi", "ansi-output"]);
|
||||||
|
const CODE_SURFACE_LIGHT = "#f4f4f5";
|
||||||
|
const CODE_SURFACE_DARK = "#27272a";
|
||||||
|
|
||||||
const LazyHighlightedCode = lazy(async () => {
|
const LazyHighlightedCode = lazy(async () => {
|
||||||
const [
|
const [
|
||||||
{ default: SyntaxHighlighter },
|
{ default: SyntaxHighlighter },
|
||||||
@ -74,7 +79,11 @@ const LazyHighlightedCode = lazy(async () => {
|
|||||||
language={language || "text"}
|
language={language || "text"}
|
||||||
style={transparentTheme}
|
style={transparentTheme}
|
||||||
customStyle={{
|
customStyle={{
|
||||||
background: chrome === "none" ? "transparent" : undefined,
|
background: chrome === "none"
|
||||||
|
? "transparent"
|
||||||
|
: isDark
|
||||||
|
? CODE_SURFACE_DARK
|
||||||
|
: CODE_SURFACE_LIGHT,
|
||||||
margin: 0,
|
margin: 0,
|
||||||
padding: chrome === "none" ? "0.75rem 1rem" : "1rem",
|
padding: chrome === "none" ? "0.75rem 1rem" : "1rem",
|
||||||
fontFamily: CODE_FONT_STACK,
|
fontFamily: CODE_FONT_STACK,
|
||||||
@ -83,10 +92,10 @@ const LazyHighlightedCode = lazy(async () => {
|
|||||||
tabSize: 2,
|
tabSize: 2,
|
||||||
}}
|
}}
|
||||||
codeTagProps={{
|
codeTagProps={{
|
||||||
style: chrome === "none" ? {
|
style: {
|
||||||
background: "transparent",
|
background: "transparent",
|
||||||
fontFamily: CODE_FONT_STACK,
|
fontFamily: CODE_FONT_STACK,
|
||||||
} : undefined,
|
},
|
||||||
}}
|
}}
|
||||||
lineNumberStyle={{
|
lineNumberStyle={{
|
||||||
minWidth: "2.6em",
|
minWidth: "2.6em",
|
||||||
@ -106,14 +115,32 @@ const LazyHighlightedCode = lazy(async () => {
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
function PlainCodeFallback({
|
function renderPlainText(value: string): ReactNode {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderAnsiText(value: string): ReactNode {
|
||||||
|
return parseAnsiSegments(value).map((segment, index) => (
|
||||||
|
<span key={index} style={segment.style}>
|
||||||
|
{segment.text}
|
||||||
|
</span>
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
function CodeTextBlock({
|
||||||
code,
|
code,
|
||||||
chrome,
|
chrome,
|
||||||
showLineNumbers,
|
showLineNumbers,
|
||||||
|
testId,
|
||||||
|
className,
|
||||||
|
renderText = renderPlainText,
|
||||||
}: {
|
}: {
|
||||||
code: string;
|
code: string;
|
||||||
chrome: "default" | "none";
|
chrome: "default" | "none";
|
||||||
showLineNumbers: boolean;
|
showLineNumbers: boolean;
|
||||||
|
testId: string;
|
||||||
|
className?: string;
|
||||||
|
renderText?: (value: string) => ReactNode;
|
||||||
}) {
|
}) {
|
||||||
const lines = code.split("\n");
|
const lines = code.split("\n");
|
||||||
return (
|
return (
|
||||||
@ -121,10 +148,11 @@ function PlainCodeFallback({
|
|||||||
className={cn(
|
className={cn(
|
||||||
"m-0 overflow-x-auto p-4 font-mono text-sm leading-[1.6] text-foreground/90",
|
"m-0 overflow-x-auto p-4 font-mono text-sm leading-[1.6] text-foreground/90",
|
||||||
showLineNumbers ? "whitespace-pre" : "whitespace-pre-wrap",
|
showLineNumbers ? "whitespace-pre" : "whitespace-pre-wrap",
|
||||||
chrome === "default" ? "bg-background" : "bg-transparent",
|
chrome === "default" ? "bg-zinc-100 dark:bg-zinc-800" : "bg-transparent",
|
||||||
chrome === "none" && "p-3 text-[13px] leading-[1.55]",
|
chrome === "none" && "p-3 text-[13px] leading-[1.55]",
|
||||||
|
className,
|
||||||
)}
|
)}
|
||||||
data-testid="plain-code-fallback"
|
data-testid={testId}
|
||||||
>
|
>
|
||||||
<code className="text-inherit">
|
<code className="text-inherit">
|
||||||
{showLineNumbers ? (
|
{showLineNumbers ? (
|
||||||
@ -133,16 +161,21 @@ function PlainCodeFallback({
|
|||||||
<span className="w-10 shrink-0 select-none pr-4 text-right text-muted-foreground/60">
|
<span className="w-10 shrink-0 select-none pr-4 text-right text-muted-foreground/60">
|
||||||
{index + 1}
|
{index + 1}
|
||||||
</span>
|
</span>
|
||||||
<span className="whitespace-pre">{line || " "}</span>
|
<span className="whitespace-pre">{renderText(line || " ")}</span>
|
||||||
{index < lines.length - 1 ? "\n" : null}
|
{index < lines.length - 1 ? "\n" : null}
|
||||||
</span>
|
</span>
|
||||||
))
|
))
|
||||||
) : code}
|
) : renderText(code)}
|
||||||
</code>
|
</code>
|
||||||
</pre>
|
</pre>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function shouldRenderAnsi(language: string | undefined, code: string): boolean {
|
||||||
|
const normalized = language?.trim().toLowerCase();
|
||||||
|
return Boolean((normalized && ANSI_LANGUAGES.has(normalized)) || hasAnsi(code));
|
||||||
|
}
|
||||||
|
|
||||||
export function CodeBlock({
|
export function CodeBlock({
|
||||||
language,
|
language,
|
||||||
code,
|
code,
|
||||||
@ -156,19 +189,20 @@ export function CodeBlock({
|
|||||||
const [copied, setCopied] = useState(false);
|
const [copied, setCopied] = useState(false);
|
||||||
const isDark = useThemeValue() === "dark";
|
const isDark = useThemeValue() === "dark";
|
||||||
const hasChrome = chrome === "default";
|
const hasChrome = chrome === "default";
|
||||||
|
const renderAnsi = shouldRenderAnsi(language, code);
|
||||||
|
|
||||||
const onCopy = useCallback(() => {
|
const onCopy = useCallback(() => {
|
||||||
if (!navigator.clipboard) return;
|
if (!navigator.clipboard) return;
|
||||||
navigator.clipboard.writeText(code).then(() => {
|
navigator.clipboard.writeText(renderAnsi ? stripAnsi(code) : code).then(() => {
|
||||||
setCopied(true);
|
setCopied(true);
|
||||||
setTimeout(() => setCopied(false), 1_500);
|
setTimeout(() => setCopied(false), 1_500);
|
||||||
});
|
});
|
||||||
}, [code]);
|
}, [code, renderAnsi]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
className={cn(
|
className={cn(
|
||||||
"overflow-hidden",
|
"not-prose overflow-hidden",
|
||||||
hasChrome && "rounded-lg border",
|
hasChrome && "rounded-lg border",
|
||||||
hasChrome && (isDark ? "border-white/10" : "border-black/10"),
|
hasChrome && (isDark ? "border-white/10" : "border-black/10"),
|
||||||
className,
|
className,
|
||||||
@ -177,7 +211,7 @@ export function CodeBlock({
|
|||||||
{hasChrome ? (
|
{hasChrome ? (
|
||||||
<div
|
<div
|
||||||
className={cn(
|
className={cn(
|
||||||
"flex items-center justify-between px-4 py-1.5 text-xs font-medium",
|
"flex items-center justify-between px-4 pb-1.5 pt-2 text-xs font-medium",
|
||||||
isDark
|
isDark
|
||||||
? "bg-zinc-800 text-zinc-300"
|
? "bg-zinc-800 text-zinc-300"
|
||||||
: "bg-zinc-100 text-zinc-600",
|
: "bg-zinc-100 text-zinc-600",
|
||||||
@ -206,13 +240,22 @@ export function CodeBlock({
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
) : null}
|
) : null}
|
||||||
{highlight ? (
|
{renderAnsi ? (
|
||||||
|
<CodeTextBlock
|
||||||
|
code={code}
|
||||||
|
chrome={chrome}
|
||||||
|
showLineNumbers={showLineNumbers}
|
||||||
|
testId="ansi-code"
|
||||||
|
renderText={renderAnsiText}
|
||||||
|
/>
|
||||||
|
) : highlight ? (
|
||||||
<Suspense
|
<Suspense
|
||||||
fallback={
|
fallback={
|
||||||
<PlainCodeFallback
|
<CodeTextBlock
|
||||||
code={code}
|
code={code}
|
||||||
chrome={chrome}
|
chrome={chrome}
|
||||||
showLineNumbers={showLineNumbers}
|
showLineNumbers={showLineNumbers}
|
||||||
|
testId="plain-code-fallback"
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
>
|
>
|
||||||
@ -226,10 +269,11 @@ export function CodeBlock({
|
|||||||
/>
|
/>
|
||||||
</Suspense>
|
</Suspense>
|
||||||
) : (
|
) : (
|
||||||
<PlainCodeFallback
|
<CodeTextBlock
|
||||||
code={code}
|
code={code}
|
||||||
chrome={chrome}
|
chrome={chrome}
|
||||||
showLineNumbers={showLineNumbers}
|
showLineNumbers={showLineNumbers}
|
||||||
|
testId="plain-code-fallback"
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -31,6 +31,7 @@ import {
|
|||||||
Layers,
|
Layers,
|
||||||
Loader2,
|
Loader2,
|
||||||
LogOut,
|
LogOut,
|
||||||
|
Mic,
|
||||||
Moon,
|
Moon,
|
||||||
PlayCircle,
|
PlayCircle,
|
||||||
Plus,
|
Plus,
|
||||||
@ -92,6 +93,7 @@ import {
|
|||||||
updateNetworkSafetySettings,
|
updateNetworkSafetySettings,
|
||||||
updateProviderSettings,
|
updateProviderSettings,
|
||||||
updateSettings,
|
updateSettings,
|
||||||
|
updateTranscriptionSettings,
|
||||||
updateWebSearchSettings,
|
updateWebSearchSettings,
|
||||||
} from "@/lib/api";
|
} from "@/lib/api";
|
||||||
import { notifyCliAppsChanged } from "@/lib/cli-app-events";
|
import { notifyCliAppsChanged } from "@/lib/cli-app-events";
|
||||||
@ -115,6 +117,7 @@ import type {
|
|||||||
ProviderModelsPayload,
|
ProviderModelsPayload,
|
||||||
SettingsPayload,
|
SettingsPayload,
|
||||||
SkillSummary,
|
SkillSummary,
|
||||||
|
TranscriptionSettingsUpdate,
|
||||||
WebSearchSettingsUpdate,
|
WebSearchSettingsUpdate,
|
||||||
WebuiDefaultAccessMode,
|
WebuiDefaultAccessMode,
|
||||||
} from "@/lib/types";
|
} from "@/lib/types";
|
||||||
@ -124,6 +127,7 @@ export type SettingsSectionKey =
|
|||||||
| "appearance"
|
| "appearance"
|
||||||
| "models"
|
| "models"
|
||||||
| "image"
|
| "image"
|
||||||
|
| "voice"
|
||||||
| "browser"
|
| "browser"
|
||||||
| "apps"
|
| "apps"
|
||||||
| "skills"
|
| "skills"
|
||||||
@ -367,6 +371,26 @@ const DEFAULT_IMAGE_GENERATION_FORM: ImageGenerationSettingsUpdate = {
|
|||||||
maxImagesPerTurn: 4,
|
maxImagesPerTurn: 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const DEFAULT_TRANSCRIPTION_FORM: TranscriptionSettingsUpdate = {
|
||||||
|
enabled: true,
|
||||||
|
provider: "groq",
|
||||||
|
model: "",
|
||||||
|
language: "",
|
||||||
|
maxDurationSec: 120,
|
||||||
|
maxUploadMb: 25,
|
||||||
|
};
|
||||||
|
|
||||||
|
const DEFAULT_TRANSCRIPTION_SETTINGS: NonNullable<SettingsPayload["transcription"]> = {
|
||||||
|
enabled: true,
|
||||||
|
provider: "groq",
|
||||||
|
provider_configured: false,
|
||||||
|
model: "whisper-large-v3",
|
||||||
|
language: null,
|
||||||
|
max_duration_sec: 120,
|
||||||
|
max_upload_mb: 25,
|
||||||
|
providers: [],
|
||||||
|
};
|
||||||
|
|
||||||
const DEFAULT_NETWORK_SAFETY_FORM: NetworkSafetySettingsUpdate = {
|
const DEFAULT_NETWORK_SAFETY_FORM: NetworkSafetySettingsUpdate = {
|
||||||
webuiAllowLocalServiceAccess: true,
|
webuiAllowLocalServiceAccess: true,
|
||||||
webuiDefaultAccessMode: "default",
|
webuiDefaultAccessMode: "default",
|
||||||
@ -419,6 +443,18 @@ function imageGenerationFormFromPayload(payload: SettingsPayload): ImageGenerati
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function transcriptionFormFromPayload(payload: SettingsPayload): TranscriptionSettingsUpdate {
|
||||||
|
const transcription = payload.transcription ?? DEFAULT_TRANSCRIPTION_SETTINGS;
|
||||||
|
return {
|
||||||
|
enabled: transcription.enabled,
|
||||||
|
provider: transcription.provider,
|
||||||
|
model: transcription.model,
|
||||||
|
language: transcription.language ?? "",
|
||||||
|
maxDurationSec: transcription.max_duration_sec,
|
||||||
|
maxUploadMb: transcription.max_upload_mb,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function networkSafetyFormFromPayload(payload: SettingsPayload): NetworkSafetySettingsUpdate {
|
function networkSafetyFormFromPayload(payload: SettingsPayload): NetworkSafetySettingsUpdate {
|
||||||
return {
|
return {
|
||||||
webuiAllowLocalServiceAccess:
|
webuiAllowLocalServiceAccess:
|
||||||
@ -479,6 +515,7 @@ export function SettingsView({
|
|||||||
const [providerSaving, setProviderSaving] = useState<string | null>(null);
|
const [providerSaving, setProviderSaving] = useState<string | null>(null);
|
||||||
const [webSearchSaving, setWebSearchSaving] = useState(false);
|
const [webSearchSaving, setWebSearchSaving] = useState(false);
|
||||||
const [imageGenerationSaving, setImageGenerationSaving] = useState(false);
|
const [imageGenerationSaving, setImageGenerationSaving] = useState(false);
|
||||||
|
const [transcriptionSaving, setTranscriptionSaving] = useState(false);
|
||||||
const [networkSafetySaving, setNetworkSafetySaving] = useState(false);
|
const [networkSafetySaving, setNetworkSafetySaving] = useState(false);
|
||||||
const [hostEngineApplying, setHostEngineApplying] = useState(false);
|
const [hostEngineApplying, setHostEngineApplying] = useState(false);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
@ -511,6 +548,9 @@ export function SettingsView({
|
|||||||
? imageGenerationFormFromPayload(initialSettings)
|
? imageGenerationFormFromPayload(initialSettings)
|
||||||
: DEFAULT_IMAGE_GENERATION_FORM,
|
: DEFAULT_IMAGE_GENERATION_FORM,
|
||||||
);
|
);
|
||||||
|
const [transcriptionForm, setTranscriptionForm] = useState<TranscriptionSettingsUpdate>(
|
||||||
|
() => initialSettings ? transcriptionFormFromPayload(initialSettings) : DEFAULT_TRANSCRIPTION_FORM,
|
||||||
|
);
|
||||||
const [networkSafetyForm, setNetworkSafetyForm] = useState<NetworkSafetySettingsUpdate>(() =>
|
const [networkSafetyForm, setNetworkSafetyForm] = useState<NetworkSafetySettingsUpdate>(() =>
|
||||||
initialSettings ? networkSafetyFormFromPayload(initialSettings) : DEFAULT_NETWORK_SAFETY_FORM,
|
initialSettings ? networkSafetyFormFromPayload(initialSettings) : DEFAULT_NETWORK_SAFETY_FORM,
|
||||||
);
|
);
|
||||||
@ -543,6 +583,7 @@ export function SettingsView({
|
|||||||
setForm(agentDraftFromPayload(payload));
|
setForm(agentDraftFromPayload(payload));
|
||||||
setWebSearchForm((prev) => webSearchFormFromPayload(payload, prev));
|
setWebSearchForm((prev) => webSearchFormFromPayload(payload, prev));
|
||||||
setImageGenerationForm(imageGenerationFormFromPayload(payload));
|
setImageGenerationForm(imageGenerationFormFromPayload(payload));
|
||||||
|
setTranscriptionForm(transcriptionFormFromPayload(payload));
|
||||||
setNetworkSafetyForm(networkSafetyFormFromPayload(payload));
|
setNetworkSafetyForm(networkSafetyFormFromPayload(payload));
|
||||||
if (payload.restart_required_sections) {
|
if (payload.restart_required_sections) {
|
||||||
setPendingRestartSections(pendingRestartSectionsFromPayload(payload));
|
setPendingRestartSections(pendingRestartSectionsFromPayload(payload));
|
||||||
@ -711,6 +752,19 @@ export function SettingsView({
|
|||||||
);
|
);
|
||||||
}, [imageGenerationForm, settings]);
|
}, [imageGenerationForm, settings]);
|
||||||
|
|
||||||
|
const transcriptionDirty = useMemo(() => {
|
||||||
|
if (!settings) return false;
|
||||||
|
const transcription = settings.transcription ?? DEFAULT_TRANSCRIPTION_SETTINGS;
|
||||||
|
return (
|
||||||
|
transcriptionForm.enabled !== transcription.enabled ||
|
||||||
|
transcriptionForm.provider !== transcription.provider ||
|
||||||
|
transcriptionForm.model !== transcription.model ||
|
||||||
|
transcriptionForm.language !== (transcription.language ?? "") ||
|
||||||
|
transcriptionForm.maxDurationSec !== transcription.max_duration_sec ||
|
||||||
|
transcriptionForm.maxUploadMb !== transcription.max_upload_mb
|
||||||
|
);
|
||||||
|
}, [settings, transcriptionForm]);
|
||||||
|
|
||||||
const networkSafetyDirty = useMemo(() => {
|
const networkSafetyDirty = useMemo(() => {
|
||||||
if (!settings) return false;
|
if (!settings) return false;
|
||||||
const currentLocalServiceAccess =
|
const currentLocalServiceAccess =
|
||||||
@ -913,6 +967,24 @@ export function SettingsView({
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const saveTranscriptionSettings = async () => {
|
||||||
|
if (!settings || !transcriptionDirty || transcriptionSaving) return;
|
||||||
|
setTranscriptionSaving(true);
|
||||||
|
try {
|
||||||
|
const payload = await updateTranscriptionSettings(token, transcriptionForm);
|
||||||
|
applyPayload(payload);
|
||||||
|
if (payload.requires_restart) {
|
||||||
|
setPendingRestartSections((prev) => ({ ...prev, browser: true }));
|
||||||
|
}
|
||||||
|
await maybeRestartHostEngine(payload);
|
||||||
|
setError(null);
|
||||||
|
} catch (err) {
|
||||||
|
setError((err as Error).message);
|
||||||
|
} finally {
|
||||||
|
setTranscriptionSaving(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const saveNetworkSafetySettings = async () => {
|
const saveNetworkSafetySettings = async () => {
|
||||||
if (!settings || !networkSafetyDirty || networkSafetySaving) return;
|
if (!settings || !networkSafetyDirty || networkSafetySaving) return;
|
||||||
setNetworkSafetySaving(true);
|
setNetworkSafetySaving(true);
|
||||||
@ -1333,6 +1405,22 @@ export function SettingsView({
|
|||||||
requiresRestartPending={pendingRestartSections.image}
|
requiresRestartPending={pendingRestartSections.image}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
|
case "voice":
|
||||||
|
return (
|
||||||
|
<TranscriptionSettings
|
||||||
|
settings={settings}
|
||||||
|
form={transcriptionForm}
|
||||||
|
dirty={transcriptionDirty}
|
||||||
|
saving={transcriptionSaving}
|
||||||
|
onChangeForm={setTranscriptionForm}
|
||||||
|
onSave={saveTranscriptionSettings}
|
||||||
|
onOpenProviders={() => selectSection("models")}
|
||||||
|
showBrandLogos={localPrefs.brandLogos}
|
||||||
|
onRestart={restartViaSettingsSurface}
|
||||||
|
isRestarting={isRestarting || hostEngineApplying}
|
||||||
|
requiresRestartPending={pendingRestartSections.browser}
|
||||||
|
/>
|
||||||
|
);
|
||||||
case "browser":
|
case "browser":
|
||||||
return (
|
return (
|
||||||
<WebSettings
|
<WebSettings
|
||||||
@ -1523,6 +1611,7 @@ const SETTINGS_NAV_ITEMS: Array<{ key: SettingsSectionKey; icon: LucideIcon; fal
|
|||||||
{ key: "appearance", icon: Palette, fallback: "Appearance" },
|
{ key: "appearance", icon: Palette, fallback: "Appearance" },
|
||||||
{ key: "models", icon: SlidersHorizontal, fallback: "Models" },
|
{ key: "models", icon: SlidersHorizontal, fallback: "Models" },
|
||||||
{ key: "image", icon: ImageIcon, fallback: "Image" },
|
{ key: "image", icon: ImageIcon, fallback: "Image" },
|
||||||
|
{ key: "voice", icon: Mic, fallback: "Voice" },
|
||||||
{ key: "browser", icon: Globe2, fallback: "Web" },
|
{ key: "browser", icon: Globe2, fallback: "Web" },
|
||||||
{ key: "runtime", icon: Server, fallback: "System" },
|
{ key: "runtime", icon: Server, fallback: "System" },
|
||||||
{ key: "advanced", icon: ShieldCheck, fallback: "Security" },
|
{ key: "advanced", icon: ShieldCheck, fallback: "Security" },
|
||||||
@ -1642,6 +1731,24 @@ function OverviewSettings({
|
|||||||
const webStatus = settings.web.enable
|
const webStatus = settings.web.enable
|
||||||
? tx("settings.values.enabled", "Enabled")
|
? tx("settings.values.enabled", "Enabled")
|
||||||
: tx("settings.values.disabled", "Disabled");
|
: tx("settings.values.disabled", "Disabled");
|
||||||
|
const webSearchProvider =
|
||||||
|
settings.web_search.providers.find((provider) => provider.name === settings.web_search.provider) ??
|
||||||
|
settings.web_search.providers[0];
|
||||||
|
const webSearchProviderLabel = providerDisplayLabel(
|
||||||
|
settings.web_search.providers,
|
||||||
|
settings.web_search.provider,
|
||||||
|
);
|
||||||
|
const webSearchCredentialStatus =
|
||||||
|
webSearchProvider?.credential === "none"
|
||||||
|
? tx("settings.byok.webSearch.noCredentialRequired", "No key required")
|
||||||
|
: webSearchProvider?.credential === "base_url"
|
||||||
|
? settings.web_search.base_url
|
||||||
|
? tx("settings.values.configured", "Configured")
|
||||||
|
: tx("settings.values.notConfigured", "Not configured")
|
||||||
|
: settings.web_search.api_key_hint
|
||||||
|
? tx("settings.values.configured", "Configured")
|
||||||
|
: tx("settings.values.notConfigured", "Not configured");
|
||||||
|
const webCaption = `${webSearchProviderLabel} · ${webSearchCredentialStatus}`;
|
||||||
const imageStatus = settings.image_generation.enabled
|
const imageStatus = settings.image_generation.enabled
|
||||||
? tx("settings.values.enabled", "Enabled")
|
? tx("settings.values.enabled", "Enabled")
|
||||||
: tx("settings.values.disabled", "Disabled");
|
: tx("settings.values.disabled", "Disabled");
|
||||||
@ -1650,6 +1757,15 @@ function OverviewSettings({
|
|||||||
? tx("settings.values.configured", "Configured")
|
? tx("settings.values.configured", "Configured")
|
||||||
: tx("settings.values.notConfigured", "Not configured")
|
: tx("settings.values.notConfigured", "Not configured")
|
||||||
}`;
|
}`;
|
||||||
|
const transcription = settings.transcription ?? DEFAULT_TRANSCRIPTION_SETTINGS;
|
||||||
|
const voiceStatus = transcription.enabled
|
||||||
|
? tx("settings.values.enabled", "Enabled")
|
||||||
|
: tx("settings.values.disabled", "Disabled");
|
||||||
|
const voiceCaption = `${providerDisplayLabel(transcription.providers, transcription.provider)} · ${
|
||||||
|
transcription.provider_configured
|
||||||
|
? tx("settings.values.configured", "Configured")
|
||||||
|
: tx("settings.values.notConfigured", "Not configured")
|
||||||
|
}`;
|
||||||
const isNativeHost = (settings.surface ?? settings.runtime_surface) === "native";
|
const isNativeHost = (settings.surface ?? settings.runtime_surface) === "native";
|
||||||
const workspaceCaption = shortWorkspacePath(settings.runtime.workspace_path);
|
const workspaceCaption = shortWorkspacePath(settings.runtime.workspace_path);
|
||||||
const runtimeTitle = isNativeHost
|
const runtimeTitle = isNativeHost
|
||||||
@ -1691,8 +1807,8 @@ function OverviewSettings({
|
|||||||
icon={Globe2}
|
icon={Globe2}
|
||||||
valueLogoProvider={settings.web_search.provider}
|
valueLogoProvider={settings.web_search.provider}
|
||||||
title={tx("settings.overview.webSearch", "Web search")}
|
title={tx("settings.overview.webSearch", "Web search")}
|
||||||
value={providerDisplayLabel(settings.web_search.providers, settings.web_search.provider)}
|
value={webStatus}
|
||||||
caption={webStatus}
|
caption={webCaption}
|
||||||
showBrandLogos={showBrandLogos}
|
showBrandLogos={showBrandLogos}
|
||||||
onClick={() => onSelectSection("browser")}
|
onClick={() => onSelectSection("browser")}
|
||||||
/>
|
/>
|
||||||
@ -1705,6 +1821,15 @@ function OverviewSettings({
|
|||||||
showBrandLogos={showBrandLogos}
|
showBrandLogos={showBrandLogos}
|
||||||
onClick={() => onSelectSection("image")}
|
onClick={() => onSelectSection("image")}
|
||||||
/>
|
/>
|
||||||
|
<OverviewListRow
|
||||||
|
icon={Mic}
|
||||||
|
valueLogoProvider={transcription.provider}
|
||||||
|
title={tx("settings.overview.voiceInput", "Voice input")}
|
||||||
|
value={voiceStatus}
|
||||||
|
caption={voiceCaption}
|
||||||
|
showBrandLogos={showBrandLogos}
|
||||||
|
onClick={() => onSelectSection("voice")}
|
||||||
|
/>
|
||||||
</SettingsGroup>
|
</SettingsGroup>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@ -2654,6 +2779,137 @@ function ImageGenerationSettings({
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function TranscriptionSettings({
|
||||||
|
settings,
|
||||||
|
form,
|
||||||
|
dirty,
|
||||||
|
saving,
|
||||||
|
onChangeForm,
|
||||||
|
onSave,
|
||||||
|
onOpenProviders,
|
||||||
|
showBrandLogos,
|
||||||
|
onRestart,
|
||||||
|
isRestarting,
|
||||||
|
requiresRestartPending,
|
||||||
|
}: {
|
||||||
|
settings: SettingsPayload;
|
||||||
|
form: TranscriptionSettingsUpdate;
|
||||||
|
dirty: boolean;
|
||||||
|
saving: boolean;
|
||||||
|
onChangeForm: Dispatch<SetStateAction<TranscriptionSettingsUpdate>>;
|
||||||
|
onSave: () => void;
|
||||||
|
onOpenProviders: () => void;
|
||||||
|
showBrandLogos: boolean;
|
||||||
|
onRestart?: () => void;
|
||||||
|
isRestarting?: boolean;
|
||||||
|
requiresRestartPending: boolean;
|
||||||
|
}) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const tx = (key: string, fallback: string) => t(key, { defaultValue: fallback });
|
||||||
|
const transcription = settings.transcription ?? DEFAULT_TRANSCRIPTION_SETTINGS;
|
||||||
|
const selectedProvider =
|
||||||
|
transcription.providers.find((provider) => provider.name === form.provider) ??
|
||||||
|
transcription.providers[0];
|
||||||
|
const providerConfigured = !!selectedProvider?.configured;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<section>
|
||||||
|
<SettingsSectionTitle>{tx("settings.sections.voiceInput", "Voice input")}</SettingsSectionTitle>
|
||||||
|
<SettingsGroup>
|
||||||
|
<SettingsRow
|
||||||
|
title={tx("settings.rows.transcription", "Transcription")}
|
||||||
|
description={tx("settings.help.transcription", "Transcribe microphone input before sending it. Chat channel voice messages use the same settings.")}
|
||||||
|
>
|
||||||
|
<ToggleButton
|
||||||
|
checked={form.enabled}
|
||||||
|
onChange={(enabled) => onChangeForm((prev) => ({ ...prev, enabled }))}
|
||||||
|
ariaLabel={tx("settings.rows.transcription", "Transcription")}
|
||||||
|
label={form.enabled ? tx("settings.values.on", "On") : tx("settings.values.off", "Off")}
|
||||||
|
/>
|
||||||
|
</SettingsRow>
|
||||||
|
<SettingsRow
|
||||||
|
title={tx("settings.rows.transcriptionProvider", "Provider")}
|
||||||
|
description={tx("settings.help.transcriptionProvider", "Uses the matching provider credentials from Providers.")}
|
||||||
|
>
|
||||||
|
<ProviderPicker
|
||||||
|
providers={transcription.providers}
|
||||||
|
value={form.provider}
|
||||||
|
emptyLabel={tx("settings.voice.selectProvider", "Select provider")}
|
||||||
|
showProviderLogos={showBrandLogos}
|
||||||
|
onChange={(provider) => onChangeForm((prev) => ({ ...prev, provider }))}
|
||||||
|
/>
|
||||||
|
</SettingsRow>
|
||||||
|
<SettingsRow
|
||||||
|
title={tx("settings.rows.transcriptionProviderStatus", "Provider status")}
|
||||||
|
description={tx("settings.help.transcriptionProviderStatus", "API keys stay under providers, not in transcription settings.")}
|
||||||
|
>
|
||||||
|
<div className="flex flex-wrap items-center justify-end gap-2">
|
||||||
|
<StatusPill tone={providerConfigured ? "success" : "neutral"}>
|
||||||
|
{providerConfigured
|
||||||
|
? tx("settings.values.configured", "Configured")
|
||||||
|
: tx("settings.values.notConfigured", "Not configured")}
|
||||||
|
</StatusPill>
|
||||||
|
{!providerConfigured ? (
|
||||||
|
<Button size="sm" variant="outline" onClick={onOpenProviders} className="rounded-full">
|
||||||
|
{tx("settings.voice.configureProvider", "Configure provider")}
|
||||||
|
</Button>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
</SettingsRow>
|
||||||
|
<SettingsRow
|
||||||
|
title={tx("settings.rows.transcriptionModel", "Model")}
|
||||||
|
description={tx("settings.help.transcriptionModel", "Leave as the resolved default unless your provider needs a custom model id.")}
|
||||||
|
>
|
||||||
|
<Input
|
||||||
|
value={form.model}
|
||||||
|
onChange={(event) => onChangeForm((prev) => ({ ...prev, model: event.target.value }))}
|
||||||
|
className="h-8 w-[min(300px,70vw)] rounded-full text-[13px]"
|
||||||
|
/>
|
||||||
|
</SettingsRow>
|
||||||
|
<SettingsRow
|
||||||
|
title={tx("settings.rows.transcriptionLanguage", "Language")}
|
||||||
|
description={tx("settings.help.transcriptionLanguage", "Optional ISO-639 hint such as en, zh, ja, or ko.")}
|
||||||
|
>
|
||||||
|
<Input
|
||||||
|
value={form.language}
|
||||||
|
onChange={(event) => onChangeForm((prev) => ({ ...prev, language: event.target.value }))}
|
||||||
|
placeholder={tx("settings.voice.languageAuto", "Auto")}
|
||||||
|
className="h-8 w-[min(180px,60vw)] rounded-full text-[13px]"
|
||||||
|
/>
|
||||||
|
</SettingsRow>
|
||||||
|
<SettingsRow title={tx("settings.rows.voiceLimits", "Limits")}>
|
||||||
|
<div className="flex flex-wrap justify-end gap-2">
|
||||||
|
<NumberInput
|
||||||
|
value={form.maxDurationSec}
|
||||||
|
min={1}
|
||||||
|
max={600}
|
||||||
|
suffix="s"
|
||||||
|
onChange={(maxDurationSec) => onChangeForm((prev) => ({ ...prev, maxDurationSec }))}
|
||||||
|
/>
|
||||||
|
<NumberInput
|
||||||
|
value={form.maxUploadMb}
|
||||||
|
min={1}
|
||||||
|
max={100}
|
||||||
|
suffix="MB"
|
||||||
|
onChange={(maxUploadMb) => onChangeForm((prev) => ({ ...prev, maxUploadMb }))}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</SettingsRow>
|
||||||
|
<RestartSettingsFooter
|
||||||
|
dirty={dirty}
|
||||||
|
saving={saving}
|
||||||
|
pendingRestart={requiresRestartPending}
|
||||||
|
dirtyMessage={tx("settings.status.restartAfterSaving", "Save changes, then restart when ready.")}
|
||||||
|
pendingMessage={tx("settings.status.savedRestartApply", "Saved. Restart when ready.")}
|
||||||
|
onSave={onSave}
|
||||||
|
onRestart={onRestart}
|
||||||
|
isRestarting={isRestarting}
|
||||||
|
/>
|
||||||
|
</SettingsGroup>
|
||||||
|
</section>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function WebSettings({
|
function WebSettings({
|
||||||
settings,
|
settings,
|
||||||
form,
|
form,
|
||||||
|
|||||||
@ -78,16 +78,13 @@ function buildTokenUsageCalendar(
|
|||||||
const today = utcDateFromIsoDay(isoDayInTimeZone(new Date(), timeZone));
|
const today = utcDateFromIsoDay(isoDayInTimeZone(new Date(), timeZone));
|
||||||
const end = addUtcDays(today, 6 - today.getUTCDay());
|
const end = addUtcDays(today, 6 - today.getUTCDay());
|
||||||
const start = addUtcDays(end, -(TOKEN_HEATMAP_CELLS - 1));
|
const start = addUtcDays(end, -(TOKEN_HEATMAP_CELLS - 1));
|
||||||
const seenMonths = new Set<string>();
|
|
||||||
const monthLabels: TokenUsageMonthLabel[] = [];
|
const monthLabels: TokenUsageMonthLabel[] = [];
|
||||||
|
|
||||||
const cells = Array.from({ length: TOKEN_HEATMAP_CELLS }, (_, index) => {
|
const cells = Array.from({ length: TOKEN_HEATMAP_CELLS }, (_, index) => {
|
||||||
const date = addUtcDays(start, index);
|
const date = addUtcDays(start, index);
|
||||||
const key = isoDay(date);
|
const key = isoDay(date);
|
||||||
const row = byDate.get(key);
|
const row = byDate.get(key);
|
||||||
const monthKey = key.slice(0, 7);
|
if (date.getUTCDate() === 1) {
|
||||||
if (!seenMonths.has(monthKey)) {
|
|
||||||
seenMonths.add(monthKey);
|
|
||||||
monthLabels.push({
|
monthLabels.push({
|
||||||
label: monthFormatter.format(date),
|
label: monthFormatter.format(date),
|
||||||
column: Math.floor(index / 7) + 1,
|
column: Math.floor(index / 7) + 1,
|
||||||
@ -186,16 +183,12 @@ export function TokenUsageHeatmap({
|
|||||||
{tx("settings.usage.shortTitle", "Token Usage")}
|
{tx("settings.usage.shortTitle", "Token Usage")}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div className="relative mb-2 h-4 text-[10px] font-normal leading-4 text-muted-foreground/62" aria-hidden>
|
||||||
className="mb-2 grid min-h-4 gap-1.5 text-[10px] font-normal leading-4 text-muted-foreground/62"
|
|
||||||
style={{ gridTemplateColumns: `repeat(${TOKEN_HEATMAP_COLUMNS}, minmax(0, 1fr))` }}
|
|
||||||
aria-hidden
|
|
||||||
>
|
|
||||||
{monthLabels.map((month) => (
|
{monthLabels.map((month) => (
|
||||||
<span
|
<span
|
||||||
key={`${month.label}-${month.column}`}
|
key={`${month.label}-${month.column}`}
|
||||||
className="whitespace-nowrap"
|
className="absolute top-0 whitespace-nowrap"
|
||||||
style={{ gridColumnStart: month.column, gridColumnEnd: "span 4" }}
|
style={{ left: `${((month.column - 1) / TOKEN_HEATMAP_COLUMNS) * 100}%` }}
|
||||||
>
|
>
|
||||||
{month.label}
|
{month.label}
|
||||||
</span>
|
</span>
|
||||||
|
|||||||
@ -31,6 +31,7 @@ import {
|
|||||||
History,
|
History,
|
||||||
ImageIcon,
|
ImageIcon,
|
||||||
Loader2,
|
Loader2,
|
||||||
|
Mic,
|
||||||
Plus,
|
Plus,
|
||||||
RotateCw,
|
RotateCw,
|
||||||
Shield,
|
Shield,
|
||||||
@ -46,6 +47,12 @@ import {
|
|||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
|
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
|
import {
|
||||||
|
Tooltip,
|
||||||
|
TooltipContent,
|
||||||
|
TooltipProvider,
|
||||||
|
TooltipTrigger,
|
||||||
|
} from "@/components/ui/tooltip";
|
||||||
import {
|
import {
|
||||||
WorkspaceAccessMenu,
|
WorkspaceAccessMenu,
|
||||||
WorkspaceProjectPicker,
|
WorkspaceProjectPicker,
|
||||||
@ -59,6 +66,7 @@ import {
|
|||||||
} from "@/hooks/useAttachedImages";
|
} from "@/hooks/useAttachedImages";
|
||||||
import { useClipboardAndDrop } from "@/hooks/useClipboardAndDrop";
|
import { useClipboardAndDrop } from "@/hooks/useClipboardAndDrop";
|
||||||
import type { SendImage, SendOptions } from "@/hooks/useNanobotStream";
|
import type { SendImage, SendOptions } from "@/hooks/useNanobotStream";
|
||||||
|
import { useVoiceRecorder, type VoiceRecorderErrorKey } from "@/hooks/useVoiceRecorder";
|
||||||
import type {
|
import type {
|
||||||
CliAppInfo,
|
CliAppInfo,
|
||||||
GoalStateWsPayload,
|
GoalStateWsPayload,
|
||||||
@ -79,6 +87,9 @@ import { cn } from "@/lib/utils";
|
|||||||
/** ``<input accept>``: aligned with the server's MIME whitelist. SVG is
|
/** ``<input accept>``: aligned with the server's MIME whitelist. SVG is
|
||||||
* deliberately excluded to avoid an embedded-script XSS surface. */
|
* deliberately excluded to avoid an embedded-script XSS surface. */
|
||||||
const ACCEPT_ATTR = "image/png,image/jpeg,image/webp,image/gif";
|
const ACCEPT_ATTR = "image/png,image/jpeg,image/webp,image/gif";
|
||||||
|
const VOICE_SHORTCUT_CODE = "KeyD";
|
||||||
|
const VOICE_SHORTCUT_ARIA = "Control+Shift+D";
|
||||||
|
type VoiceShortcutPlatform = "apple" | "chromeos" | "linux" | "other" | "windows";
|
||||||
|
|
||||||
function formatBytes(n: number): string {
|
function formatBytes(n: number): string {
|
||||||
if (n < 1024) return `${n} B`;
|
if (n < 1024) return `${n} B`;
|
||||||
@ -86,6 +97,54 @@ function formatBytes(n: number): string {
|
|||||||
return `${(n / (1024 * 1024)).toFixed(1)} MB`;
|
return `${(n / (1024 * 1024)).toFixed(1)} MB`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isVoiceShortcutDown(event: KeyboardEvent): boolean {
|
||||||
|
return (
|
||||||
|
event.code === VOICE_SHORTCUT_CODE
|
||||||
|
&& event.ctrlKey
|
||||||
|
&& event.shiftKey
|
||||||
|
&& !event.altKey
|
||||||
|
&& !event.metaKey
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isVoiceShortcutRelease(event: KeyboardEvent): boolean {
|
||||||
|
return (
|
||||||
|
event.code === VOICE_SHORTCUT_CODE
|
||||||
|
|| event.key === "Control"
|
||||||
|
|| event.key === "Shift"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getVoiceShortcutPlatform(): VoiceShortcutPlatform {
|
||||||
|
if (typeof navigator === "undefined") return "other";
|
||||||
|
const userAgentData = (navigator as Navigator & { userAgentData?: { platform?: string } })
|
||||||
|
.userAgentData;
|
||||||
|
const platform = [
|
||||||
|
userAgentData?.platform,
|
||||||
|
navigator.platform,
|
||||||
|
navigator.userAgent,
|
||||||
|
].filter(Boolean).join(" ").toLowerCase();
|
||||||
|
const isIpadPretendingToBeMac =
|
||||||
|
navigator.platform === "MacIntel" && navigator.maxTouchPoints > 1;
|
||||||
|
if (isIpadPretendingToBeMac || /mac|iphone|ipad|ipod/.test(platform)) return "apple";
|
||||||
|
if (/win/.test(platform)) return "windows";
|
||||||
|
if (/cros/.test(platform)) return "chromeos";
|
||||||
|
if (/linux|x11|android/.test(platform)) return "linux";
|
||||||
|
return "other";
|
||||||
|
}
|
||||||
|
|
||||||
|
function getVoiceShortcutLabel(): string {
|
||||||
|
switch (getVoiceShortcutPlatform()) {
|
||||||
|
case "apple":
|
||||||
|
return "⌃⇧D";
|
||||||
|
case "chromeos":
|
||||||
|
case "linux":
|
||||||
|
case "windows":
|
||||||
|
case "other":
|
||||||
|
return "Ctrl ⇧ D";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface ThreadComposerProps {
|
interface ThreadComposerProps {
|
||||||
onSend: (content: string, images?: SendImage[], options?: SendOptions) => void;
|
onSend: (content: string, images?: SendImage[], options?: SendOptions) => void;
|
||||||
disabled?: boolean;
|
disabled?: boolean;
|
||||||
@ -101,6 +160,7 @@ interface ThreadComposerProps {
|
|||||||
cliApps?: CliAppInfo[];
|
cliApps?: CliAppInfo[];
|
||||||
mcpPresets?: McpPresetInfo[];
|
mcpPresets?: McpPresetInfo[];
|
||||||
onStop?: () => void;
|
onStop?: () => void;
|
||||||
|
onTranscribeAudio?: (dataUrl: string, options?: { durationMs?: number }) => Promise<string>;
|
||||||
/** Unix seconds from server; turn elapsed timer above input while set. */
|
/** Unix seconds from server; turn elapsed timer above input while set. */
|
||||||
runStartedAt?: number | null;
|
runStartedAt?: number | null;
|
||||||
/** Sustained objective for this chat (WebSocket ``goal_state``). */
|
/** Sustained objective for this chat (WebSocket ``goal_state``). */
|
||||||
@ -138,6 +198,45 @@ const QUEUED_PROMPTS_STORAGE_PREFIX = "nanobot.webui.composerQueuedGuidance.v1:"
|
|||||||
const QUEUED_PROMPTS_LIMIT = 20;
|
const QUEUED_PROMPTS_LIMIT = 20;
|
||||||
const QUEUED_PROMPT_MAX_CHARS = 4000;
|
const QUEUED_PROMPT_MAX_CHARS = 4000;
|
||||||
|
|
||||||
|
function VoiceRecordingMeter({
|
||||||
|
ariaLabel,
|
||||||
|
className,
|
||||||
|
elapsedLabel,
|
||||||
|
isHero,
|
||||||
|
levels,
|
||||||
|
}: {
|
||||||
|
ariaLabel: string;
|
||||||
|
className?: string;
|
||||||
|
elapsedLabel: string;
|
||||||
|
isHero: boolean;
|
||||||
|
levels: number[];
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={cn(
|
||||||
|
"flex min-w-0 items-center gap-2 text-neutral-700 dark:text-white",
|
||||||
|
isHero ? "h-8" : "h-9",
|
||||||
|
className,
|
||||||
|
)}
|
||||||
|
aria-live="polite"
|
||||||
|
aria-label={ariaLabel}
|
||||||
|
>
|
||||||
|
<span className="flex h-5 min-w-0 flex-1 items-center justify-between overflow-hidden" aria-hidden>
|
||||||
|
{levels.map((height, index) => (
|
||||||
|
<span
|
||||||
|
key={index}
|
||||||
|
className="w-[2px] rounded-full bg-current opacity-85 transition-[height] duration-75 ease-linear motion-reduce:transition-none"
|
||||||
|
style={{ height }}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</span>
|
||||||
|
<span className="min-w-[2.1rem] text-right text-[12px] font-medium tabular-nums text-muted-foreground">
|
||||||
|
{elapsedLabel}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
type SlashPalettePlacement = "above" | "below";
|
type SlashPalettePlacement = "above" | "below";
|
||||||
|
|
||||||
interface SlashPaletteLayout {
|
interface SlashPaletteLayout {
|
||||||
@ -656,6 +755,7 @@ export function ThreadComposer({
|
|||||||
cliApps = [],
|
cliApps = [],
|
||||||
mcpPresets = [],
|
mcpPresets = [],
|
||||||
onStop,
|
onStop,
|
||||||
|
onTranscribeAudio,
|
||||||
runStartedAt = null,
|
runStartedAt = null,
|
||||||
goalState,
|
goalState,
|
||||||
workspaceScope = null,
|
workspaceScope = null,
|
||||||
@ -685,7 +785,9 @@ export function ThreadComposer({
|
|||||||
const wasStreamingRef = useRef(isStreaming);
|
const wasStreamingRef = useRef(isStreaming);
|
||||||
const skipNextQueuedFlushRef = useRef(false);
|
const skipNextQueuedFlushRef = useRef(false);
|
||||||
const skipQueuedPromptPersistRef = useRef(false);
|
const skipQueuedPromptPersistRef = useRef(false);
|
||||||
|
const voiceShortcutDownRef = useRef(false);
|
||||||
const isHero = variant === "hero";
|
const isHero = variant === "hero";
|
||||||
|
const voiceShortcutLabel = useMemo(getVoiceShortcutLabel, []);
|
||||||
const queuedPromptStorageKey = useMemo(
|
const queuedPromptStorageKey = useMemo(
|
||||||
() => queuedPromptsStorageKey(pendingQueueKey),
|
() => queuedPromptsStorageKey(pendingQueueKey),
|
||||||
[pendingQueueKey],
|
[pendingQueueKey],
|
||||||
@ -1026,6 +1128,65 @@ export function ThreadComposer({
|
|||||||
});
|
});
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
const appendTranscription = useCallback((text: string) => {
|
||||||
|
const transcript = text.trim();
|
||||||
|
if (!transcript) return;
|
||||||
|
setValue((current) => {
|
||||||
|
if (!current.trim()) return transcript;
|
||||||
|
const separator = /[\s\n]$/.test(current) ? "" : " ";
|
||||||
|
return `${current}${separator}${transcript}`;
|
||||||
|
});
|
||||||
|
setSlashMenuDismissed(false);
|
||||||
|
setCliAppMenuDismissed(false);
|
||||||
|
setInlineError(null);
|
||||||
|
resizeTextarea();
|
||||||
|
}, [resizeTextarea]);
|
||||||
|
|
||||||
|
const clearInlineError = useCallback(() => setInlineError(null), []);
|
||||||
|
const setVoiceError = useCallback((key: VoiceRecorderErrorKey) => {
|
||||||
|
setInlineError(t(`thread.composer.voiceErrors.${key}`));
|
||||||
|
}, [t]);
|
||||||
|
const voiceRecorder = useVoiceRecorder({
|
||||||
|
disabled,
|
||||||
|
onClearError: clearInlineError,
|
||||||
|
onError: setVoiceError,
|
||||||
|
onTranscript: appendTranscription,
|
||||||
|
onTranscribeAudio,
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!onTranscribeAudio) return;
|
||||||
|
|
||||||
|
function onKeyDown(event: KeyboardEvent): void {
|
||||||
|
if (!isVoiceShortcutDown(event) || event.repeat || voiceShortcutDownRef.current) return;
|
||||||
|
event.preventDefault();
|
||||||
|
voiceShortcutDownRef.current = true;
|
||||||
|
voiceRecorder.beginShortcutHold();
|
||||||
|
}
|
||||||
|
|
||||||
|
function onKeyUp(event: KeyboardEvent): void {
|
||||||
|
if (!voiceShortcutDownRef.current || !isVoiceShortcutRelease(event)) return;
|
||||||
|
event.preventDefault();
|
||||||
|
voiceShortcutDownRef.current = false;
|
||||||
|
voiceRecorder.endShortcutHold();
|
||||||
|
}
|
||||||
|
|
||||||
|
function onWindowBlur(): void {
|
||||||
|
if (!voiceShortcutDownRef.current) return;
|
||||||
|
voiceShortcutDownRef.current = false;
|
||||||
|
voiceRecorder.endShortcutHold();
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener("keydown", onKeyDown);
|
||||||
|
window.addEventListener("keyup", onKeyUp);
|
||||||
|
window.addEventListener("blur", onWindowBlur);
|
||||||
|
return () => {
|
||||||
|
window.removeEventListener("keydown", onKeyDown);
|
||||||
|
window.removeEventListener("keyup", onKeyUp);
|
||||||
|
window.removeEventListener("blur", onWindowBlur);
|
||||||
|
};
|
||||||
|
}, [onTranscribeAudio, voiceRecorder.beginShortcutHold, voiceRecorder.endShortcutHold]);
|
||||||
|
|
||||||
const chooseSlashCommand = useCallback(
|
const chooseSlashCommand = useCallback(
|
||||||
(command: SlashCommand) => {
|
(command: SlashCommand) => {
|
||||||
if (command.command === "/stop" && isStreaming && onStop) {
|
if (command.command === "/stop" && isStreaming && onStop) {
|
||||||
@ -1341,6 +1502,23 @@ export function ThreadComposer({
|
|||||||
);
|
);
|
||||||
|
|
||||||
const attachButtonDisabled = disabled || full;
|
const attachButtonDisabled = disabled || full;
|
||||||
|
const showVoiceButton = Boolean(onTranscribeAudio);
|
||||||
|
const voiceRecordingStatusLabel = t("thread.composer.voice.recordingStatus", {
|
||||||
|
time: voiceRecorder.elapsedLabel,
|
||||||
|
defaultValue: `Recording ${voiceRecorder.elapsedLabel}`,
|
||||||
|
});
|
||||||
|
const voiceButtonLabel =
|
||||||
|
voiceRecorder.state === "recording"
|
||||||
|
? t("thread.composer.voice.stop")
|
||||||
|
: voiceRecorder.state === "transcribing"
|
||||||
|
? t("thread.composer.voice.transcribing")
|
||||||
|
: t("thread.composer.tools.voice");
|
||||||
|
const voiceButtonTooltip =
|
||||||
|
voiceRecorder.state === "recording"
|
||||||
|
? t("thread.composer.voice.stop")
|
||||||
|
: voiceRecorder.state === "transcribing"
|
||||||
|
? t("thread.composer.voice.transcribing")
|
||||||
|
: t("thread.composer.voice.hint");
|
||||||
const showStopButton = isStreaming && !!onStop;
|
const showStopButton = isStreaming && !!onStop;
|
||||||
const relaxedHeroInput = isHero && images.length === 0 && !isStreaming;
|
const relaxedHeroInput = isHero && images.length === 0 && !isStreaming;
|
||||||
const inputTextClasses = cn(
|
const inputTextClasses = cn(
|
||||||
@ -1531,7 +1709,15 @@ export function ThreadComposer({
|
|||||||
>
|
>
|
||||||
<Plus className={cn(isHero ? "h-[18px] w-[18px]" : "h-4 w-4")} />
|
<Plus className={cn(isHero ? "h-[18px] w-[18px]" : "h-4 w-4")} />
|
||||||
</Button>
|
</Button>
|
||||||
{workspaceScope ? (
|
{voiceRecorder.isRecording ? (
|
||||||
|
<VoiceRecordingMeter
|
||||||
|
ariaLabel={voiceRecordingStatusLabel}
|
||||||
|
className="mx-1 flex-1"
|
||||||
|
elapsedLabel={voiceRecorder.elapsedLabel}
|
||||||
|
isHero={isHero}
|
||||||
|
levels={voiceRecorder.levels}
|
||||||
|
/>
|
||||||
|
) : workspaceScope ? (
|
||||||
<WorkspaceAccessMenu
|
<WorkspaceAccessMenu
|
||||||
scope={workspaceScope}
|
scope={workspaceScope}
|
||||||
disabled={disabled || workspaceScopeDisabled}
|
disabled={disabled || workspaceScopeDisabled}
|
||||||
@ -1542,7 +1728,7 @@ export function ThreadComposer({
|
|||||||
) : null}
|
) : null}
|
||||||
</div>
|
</div>
|
||||||
<div className={cn("flex shrink-0 items-center", isHero ? "gap-1.5" : "gap-2")}>
|
<div className={cn("flex shrink-0 items-center", isHero ? "gap-1.5" : "gap-2")}>
|
||||||
{modelLabel ? (
|
{modelLabel && !voiceRecorder.isRecording ? (
|
||||||
<ComposerModelBadge
|
<ComposerModelBadge
|
||||||
label={modelLabel}
|
label={modelLabel}
|
||||||
provider={modelProvider}
|
provider={modelProvider}
|
||||||
@ -1552,6 +1738,53 @@ export function ThreadComposer({
|
|||||||
onClick={modelNeedsSetup ? onModelBadgeClick : undefined}
|
onClick={modelNeedsSetup ? onModelBadgeClick : undefined}
|
||||||
/>
|
/>
|
||||||
) : null}
|
) : null}
|
||||||
|
{showVoiceButton ? (
|
||||||
|
<TooltipProvider delayDuration={220} skipDelayDuration={80}>
|
||||||
|
<Tooltip>
|
||||||
|
<TooltipTrigger asChild>
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
size="icon"
|
||||||
|
variant="ghost"
|
||||||
|
disabled={voiceRecorder.buttonDisabled}
|
||||||
|
aria-label={voiceButtonLabel}
|
||||||
|
aria-keyshortcuts={VOICE_SHORTCUT_ARIA}
|
||||||
|
title={voiceButtonTooltip}
|
||||||
|
onPointerDown={voiceRecorder.beginPress}
|
||||||
|
onPointerUp={voiceRecorder.endPress}
|
||||||
|
onPointerCancel={voiceRecorder.endPress}
|
||||||
|
onClick={voiceRecorder.handleClick}
|
||||||
|
className={cn(
|
||||||
|
"rounded-full border border-transparent text-muted-foreground hover:bg-muted/65 hover:text-foreground",
|
||||||
|
isHero ? "h-8 w-8" : "h-9 w-9",
|
||||||
|
voiceRecorder.isRecording &&
|
||||||
|
"bg-red-500 text-white shadow-[0_8px_20px_rgba(239,68,68,0.22)] hover:bg-red-500 hover:text-white",
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
{voiceRecorder.state === "transcribing" ? (
|
||||||
|
<Loader2 className={cn(isHero ? "h-4 w-4" : "h-4 w-4", "animate-spin")} />
|
||||||
|
) : voiceRecorder.isRecording ? (
|
||||||
|
<Square className={cn(isHero ? "h-3.5 w-3.5" : "h-3.5 w-3.5")} fill="currentColor" />
|
||||||
|
) : (
|
||||||
|
<Mic className={cn(isHero ? "h-4 w-4" : "h-4 w-4")} />
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
</TooltipTrigger>
|
||||||
|
<TooltipContent
|
||||||
|
side="top"
|
||||||
|
align="center"
|
||||||
|
className="flex items-center gap-2 rounded-full border border-border/70 bg-background px-3 py-1.5 text-[13px] font-medium text-foreground shadow-[0_8px_24px_rgba(15,23,42,0.13)] dark:border-white/10 dark:bg-neutral-900 dark:text-white"
|
||||||
|
>
|
||||||
|
<span>{voiceButtonTooltip}</span>
|
||||||
|
{voiceRecorder.state === "idle" ? (
|
||||||
|
<kbd className="rounded-full bg-muted px-2 py-0.5 font-sans text-[12px] font-semibold leading-none text-muted-foreground dark:bg-white/10 dark:text-white/80">
|
||||||
|
{voiceShortcutLabel}
|
||||||
|
</kbd>
|
||||||
|
) : null}
|
||||||
|
</TooltipContent>
|
||||||
|
</Tooltip>
|
||||||
|
</TooltipProvider>
|
||||||
|
) : null}
|
||||||
<Button
|
<Button
|
||||||
type={showStopButton || modelNeedsSetup ? "button" : "submit"}
|
type={showStopButton || modelNeedsSetup ? "button" : "submit"}
|
||||||
size="icon"
|
size="icon"
|
||||||
|
|||||||
@ -302,6 +302,7 @@ export function ThreadShell({
|
|||||||
runStartedAt,
|
runStartedAt,
|
||||||
goalState,
|
goalState,
|
||||||
send,
|
send,
|
||||||
|
transcribeAudio,
|
||||||
stop,
|
stop,
|
||||||
setMessages,
|
setMessages,
|
||||||
streamError,
|
streamError,
|
||||||
@ -642,6 +643,7 @@ export function ThreadShell({
|
|||||||
cliApps={cliApps}
|
cliApps={cliApps}
|
||||||
mcpPresets={mcpPresets}
|
mcpPresets={mcpPresets}
|
||||||
onStop={stop}
|
onStop={stop}
|
||||||
|
onTranscribeAudio={transcribeAudio}
|
||||||
runStartedAt={runStartedAt}
|
runStartedAt={runStartedAt}
|
||||||
goalState={goalState}
|
goalState={goalState}
|
||||||
workspaceScope={workspaceScope}
|
workspaceScope={workspaceScope}
|
||||||
@ -672,6 +674,7 @@ export function ThreadShell({
|
|||||||
cliApps={cliApps}
|
cliApps={cliApps}
|
||||||
mcpPresets={mcpPresets}
|
mcpPresets={mcpPresets}
|
||||||
runStartedAt={runStartedAt}
|
runStartedAt={runStartedAt}
|
||||||
|
onTranscribeAudio={transcribeAudio}
|
||||||
goalState={goalState}
|
goalState={goalState}
|
||||||
workspaceScope={workspaceScope}
|
workspaceScope={workspaceScope}
|
||||||
workspaceDefaultScope={workspaceDefaultScope}
|
workspaceDefaultScope={workspaceDefaultScope}
|
||||||
|
|||||||
@ -438,6 +438,7 @@ export function useNanobotStream(
|
|||||||
/** Latest sustained goal for this ``chatId`` (``goal_state`` WS events). */
|
/** Latest sustained goal for this ``chatId`` (``goal_state`` WS events). */
|
||||||
goalState: GoalStateWsPayload | undefined;
|
goalState: GoalStateWsPayload | undefined;
|
||||||
send: (content: string, images?: SendImage[], options?: SendOptions) => void;
|
send: (content: string, images?: SendImage[], options?: SendOptions) => void;
|
||||||
|
transcribeAudio: (dataUrl: string, options?: { durationMs?: number }) => Promise<string>;
|
||||||
stop: () => void;
|
stop: () => void;
|
||||||
setMessages: React.Dispatch<React.SetStateAction<UIMessage[]>>;
|
setMessages: React.Dispatch<React.SetStateAction<UIMessage[]>>;
|
||||||
/** Latest transport-level fault raised since the last ``dismissStreamError``.
|
/** Latest transport-level fault raised since the last ``dismissStreamError``.
|
||||||
@ -1089,12 +1090,19 @@ export function useNanobotStream(
|
|||||||
client.sendMessage(chatId, "/stop");
|
client.sendMessage(chatId, "/stop");
|
||||||
}, [chatId, clearActivitySegment, client, flushPendingStreamEvents]);
|
}, [chatId, clearActivitySegment, client, flushPendingStreamEvents]);
|
||||||
|
|
||||||
|
const transcribeAudio = useCallback(
|
||||||
|
(dataUrl: string, options?: { durationMs?: number }) =>
|
||||||
|
client.transcribeAudio(dataUrl, options),
|
||||||
|
[client],
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
messages,
|
messages,
|
||||||
isStreaming,
|
isStreaming,
|
||||||
runStartedAt,
|
runStartedAt,
|
||||||
goalState,
|
goalState,
|
||||||
send,
|
send,
|
||||||
|
transcribeAudio,
|
||||||
stop,
|
stop,
|
||||||
setMessages,
|
setMessages,
|
||||||
streamError,
|
streamError,
|
||||||
|
|||||||
422
webui/src/hooks/useVoiceRecorder.ts
Normal file
422
webui/src/hooks/useVoiceRecorder.ts
Normal file
@ -0,0 +1,422 @@
|
|||||||
|
import {
|
||||||
|
useCallback,
|
||||||
|
useEffect,
|
||||||
|
useRef,
|
||||||
|
useState,
|
||||||
|
type PointerEvent as ReactPointerEvent,
|
||||||
|
} from "react";
|
||||||
|
|
||||||
|
const VOICE_RECORDING_MAX_MS = 120_000;
|
||||||
|
const VOICE_RECORDING_MIN_MS = 650;
|
||||||
|
const VOICE_NO_INPUT_HINT_MS = 1_100;
|
||||||
|
const VOICE_HOLD_START_MS = 140;
|
||||||
|
const VOICE_WAVEFORM_BAR_COUNT = 64;
|
||||||
|
const VOICE_WAVEFORM_SILENT_HEIGHT = 3;
|
||||||
|
const VOICE_WAVEFORM_MIN_HEIGHT = 7;
|
||||||
|
const VOICE_WAVEFORM_MAX_HEIGHT = 34;
|
||||||
|
const VOICE_MIN_LEVEL = 0.018;
|
||||||
|
const VOICE_WAVEFORM_IDLE_LEVELS = Array.from(
|
||||||
|
{ length: VOICE_WAVEFORM_BAR_COUNT },
|
||||||
|
() => VOICE_WAVEFORM_SILENT_HEIGHT,
|
||||||
|
);
|
||||||
|
const VOICE_MIME_CANDIDATES = [
|
||||||
|
"audio/webm;codecs=opus",
|
||||||
|
"audio/webm",
|
||||||
|
"audio/mp4",
|
||||||
|
"audio/ogg;codecs=opus",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
export type VoiceRecorderState = "idle" | "recording" | "transcribing";
|
||||||
|
export type VoiceRecorderErrorKey =
|
||||||
|
| "failed"
|
||||||
|
| "noInput"
|
||||||
|
| "notConfigured"
|
||||||
|
| "permission"
|
||||||
|
| "tooLong"
|
||||||
|
| "tooShort"
|
||||||
|
| "unsupported";
|
||||||
|
|
||||||
|
interface VoiceRecorderOptions {
|
||||||
|
disabled?: boolean;
|
||||||
|
onClearError: () => void;
|
||||||
|
onError: (key: VoiceRecorderErrorKey) => void;
|
||||||
|
onTranscript: (text: string) => void;
|
||||||
|
onTranscribeAudio?: (dataUrl: string, options?: { durationMs?: number }) => Promise<string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useVoiceRecorder({
|
||||||
|
disabled,
|
||||||
|
onClearError,
|
||||||
|
onError,
|
||||||
|
onTranscript,
|
||||||
|
onTranscribeAudio,
|
||||||
|
}: VoiceRecorderOptions) {
|
||||||
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||||
|
const chunksRef = useRef<BlobPart[]>([]);
|
||||||
|
const streamRef = useRef<MediaStream | null>(null);
|
||||||
|
const audioRef = useRef<VoiceAudioState | null>(null);
|
||||||
|
const startedAtRef = useRef(0);
|
||||||
|
const maxTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
const inputHintTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
const holdTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
const holdActiveRef = useRef(false);
|
||||||
|
const startPendingRef = useRef(false);
|
||||||
|
const stopAfterStartRef = useRef(false);
|
||||||
|
const suppressClickRef = useRef(false);
|
||||||
|
const suppressClickTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
const shortcutActiveRef = useRef(false);
|
||||||
|
const levelObservedRef = useRef(false);
|
||||||
|
const peakLevelRef = useRef(0);
|
||||||
|
const levelReliableRef = useRef(false);
|
||||||
|
const noInputHintVisibleRef = useRef(false);
|
||||||
|
const [state, setState] = useState<VoiceRecorderState>("idle");
|
||||||
|
const [elapsedMs, setElapsedMs] = useState(0);
|
||||||
|
const [levels, setLevels] = useState<number[]>(VOICE_WAVEFORM_IDLE_LEVELS);
|
||||||
|
|
||||||
|
const clearInputHintTimer = useCallback(() => clearTimer(inputHintTimerRef), []);
|
||||||
|
const clearSuppressClickTimer = useCallback(() => clearTimer(suppressClickTimerRef), []);
|
||||||
|
|
||||||
|
const suppressNextClick = useCallback(() => {
|
||||||
|
clearSuppressClickTimer();
|
||||||
|
suppressClickRef.current = true;
|
||||||
|
suppressClickTimerRef.current = setTimeout(() => {
|
||||||
|
suppressClickRef.current = false;
|
||||||
|
suppressClickTimerRef.current = null;
|
||||||
|
}, 500);
|
||||||
|
}, [clearSuppressClickTimer]);
|
||||||
|
|
||||||
|
const stopWaveform = useCallback(() => {
|
||||||
|
const audio = audioRef.current;
|
||||||
|
audioRef.current = null;
|
||||||
|
if (!audio) return;
|
||||||
|
if (audio.frame !== null) cancelAnimationFrame(audio.frame);
|
||||||
|
audio.source.disconnect();
|
||||||
|
audio.analyser.disconnect();
|
||||||
|
void audio.context.close().catch(() => undefined);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const startWaveform = useCallback((stream: MediaStream) => {
|
||||||
|
const AudioContextCtor = audioContextConstructor();
|
||||||
|
if (!AudioContextCtor) return;
|
||||||
|
stopWaveform();
|
||||||
|
setLevels(VOICE_WAVEFORM_IDLE_LEVELS);
|
||||||
|
try {
|
||||||
|
const context = new AudioContextCtor();
|
||||||
|
const source = context.createMediaStreamSource(stream);
|
||||||
|
const analyser = context.createAnalyser();
|
||||||
|
analyser.fftSize = 256;
|
||||||
|
analyser.smoothingTimeConstant = 0.68;
|
||||||
|
source.connect(analyser);
|
||||||
|
const audio: VoiceAudioState = {
|
||||||
|
analyser,
|
||||||
|
context,
|
||||||
|
data: new Uint8Array(analyser.fftSize),
|
||||||
|
frame: null,
|
||||||
|
source,
|
||||||
|
};
|
||||||
|
const tick = () => {
|
||||||
|
const current = audioRef.current;
|
||||||
|
if (!current) return;
|
||||||
|
if (current.context.state !== "running") {
|
||||||
|
void current.context.resume().catch(() => undefined);
|
||||||
|
current.frame = requestAnimationFrame(tick);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
current.analyser.getByteTimeDomainData(current.data);
|
||||||
|
const level = voiceLevelFromSamples(current.data);
|
||||||
|
levelReliableRef.current = true;
|
||||||
|
levelObservedRef.current = true;
|
||||||
|
peakLevelRef.current = Math.max(peakLevelRef.current, level);
|
||||||
|
if (level >= VOICE_MIN_LEVEL) {
|
||||||
|
clearInputHintTimer();
|
||||||
|
if (noInputHintVisibleRef.current) {
|
||||||
|
noInputHintVisibleRef.current = false;
|
||||||
|
onClearError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setLevels((currentLevels) => [
|
||||||
|
...currentLevels.slice(1),
|
||||||
|
waveformHeightFromLevel(level),
|
||||||
|
]);
|
||||||
|
current.frame = requestAnimationFrame(tick);
|
||||||
|
};
|
||||||
|
audioRef.current = audio;
|
||||||
|
void context.resume().catch(() => undefined);
|
||||||
|
audio.frame = requestAnimationFrame(tick);
|
||||||
|
} catch {
|
||||||
|
stopWaveform();
|
||||||
|
}
|
||||||
|
}, [clearInputHintTimer, onClearError, stopWaveform]);
|
||||||
|
|
||||||
|
const cleanupRecording = useCallback(() => {
|
||||||
|
clearTimer(holdTimerRef);
|
||||||
|
clearInputHintTimer();
|
||||||
|
clearTimer(maxTimerRef);
|
||||||
|
stopWaveform();
|
||||||
|
streamRef.current?.getTracks().forEach((track) => track.stop());
|
||||||
|
streamRef.current = null;
|
||||||
|
mediaRecorderRef.current = null;
|
||||||
|
startPendingRef.current = false;
|
||||||
|
shortcutActiveRef.current = false;
|
||||||
|
noInputHintVisibleRef.current = false;
|
||||||
|
}, [clearInputHintTimer, stopWaveform]);
|
||||||
|
|
||||||
|
const stopRecording = useCallback(() => {
|
||||||
|
const recorder = mediaRecorderRef.current;
|
||||||
|
if (!recorder || recorder.state === "inactive") return;
|
||||||
|
recorder.stop();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const stopRecordingWhenReady = useCallback(() => {
|
||||||
|
const recorder = mediaRecorderRef.current;
|
||||||
|
if (recorder && recorder.state !== "inactive") {
|
||||||
|
stopRecording();
|
||||||
|
} else if (startPendingRef.current) {
|
||||||
|
stopAfterStartRef.current = true;
|
||||||
|
}
|
||||||
|
}, [stopRecording]);
|
||||||
|
|
||||||
|
const startRecording = useCallback(async () => {
|
||||||
|
if (!onTranscribeAudio || state !== "idle" || startPendingRef.current) return;
|
||||||
|
if (!navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === "undefined") {
|
||||||
|
onError("unsupported");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
startPendingRef.current = true;
|
||||||
|
try {
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
const recorder = new MediaRecorder(stream, mediaRecorderOptions());
|
||||||
|
chunksRef.current = [];
|
||||||
|
streamRef.current = stream;
|
||||||
|
mediaRecorderRef.current = recorder;
|
||||||
|
startedAtRef.current = Date.now();
|
||||||
|
levelObservedRef.current = false;
|
||||||
|
peakLevelRef.current = 0;
|
||||||
|
levelReliableRef.current = false;
|
||||||
|
noInputHintVisibleRef.current = false;
|
||||||
|
setElapsedMs(0);
|
||||||
|
startWaveform(stream);
|
||||||
|
recorder.ondataavailable = (event) => {
|
||||||
|
if (event.data.size > 0) chunksRef.current.push(event.data);
|
||||||
|
};
|
||||||
|
recorder.onstop = () => {
|
||||||
|
const chunks = chunksRef.current.splice(0);
|
||||||
|
const durationMs = Math.max(0, Date.now() - startedAtRef.current);
|
||||||
|
const mimeType = recorder.mimeType || "audio/webm";
|
||||||
|
const hasMeasuredSilence =
|
||||||
|
levelReliableRef.current
|
||||||
|
&& levelObservedRef.current
|
||||||
|
&& peakLevelRef.current < VOICE_MIN_LEVEL;
|
||||||
|
cleanupRecording();
|
||||||
|
if (chunks.length === 0) {
|
||||||
|
setState("idle");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (durationMs < VOICE_RECORDING_MIN_MS) {
|
||||||
|
setState("idle");
|
||||||
|
onError("tooShort");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hasMeasuredSilence) {
|
||||||
|
setState("idle");
|
||||||
|
onError("noInput");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setState("transcribing");
|
||||||
|
void blobToDataUrl(new Blob(chunks, { type: mimeType }))
|
||||||
|
.then((dataUrl) => onTranscribeAudio(dataUrl, { durationMs }))
|
||||||
|
.then(onTranscript)
|
||||||
|
.catch((error) => onError(transcriptionErrorKey(error)))
|
||||||
|
.finally(() => setState("idle"));
|
||||||
|
};
|
||||||
|
recorder.start();
|
||||||
|
setState("recording");
|
||||||
|
onClearError();
|
||||||
|
maxTimerRef.current = setTimeout(stopRecording, VOICE_RECORDING_MAX_MS);
|
||||||
|
inputHintTimerRef.current = setTimeout(() => {
|
||||||
|
const recording = mediaRecorderRef.current?.state === "recording";
|
||||||
|
if (
|
||||||
|
!recording
|
||||||
|
|| !levelReliableRef.current
|
||||||
|
|| !levelObservedRef.current
|
||||||
|
|| peakLevelRef.current >= VOICE_MIN_LEVEL
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
noInputHintVisibleRef.current = true;
|
||||||
|
onError("noInput");
|
||||||
|
}, VOICE_NO_INPUT_HINT_MS);
|
||||||
|
} catch {
|
||||||
|
cleanupRecording();
|
||||||
|
setState("idle");
|
||||||
|
onError("permission");
|
||||||
|
}
|
||||||
|
}, [
|
||||||
|
cleanupRecording,
|
||||||
|
onClearError,
|
||||||
|
onError,
|
||||||
|
onTranscribeAudio,
|
||||||
|
onTranscript,
|
||||||
|
startWaveform,
|
||||||
|
state,
|
||||||
|
stopRecording,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const startRecordingWithDeferredStop = useCallback(() => {
|
||||||
|
stopAfterStartRef.current = false;
|
||||||
|
void startRecording().then(() => {
|
||||||
|
if (!stopAfterStartRef.current) return;
|
||||||
|
stopAfterStartRef.current = false;
|
||||||
|
stopRecording();
|
||||||
|
});
|
||||||
|
}, [startRecording, stopRecording]);
|
||||||
|
|
||||||
|
const beginPress = useCallback((event: ReactPointerEvent<HTMLButtonElement>) => {
|
||||||
|
if (event.pointerType === "mouse" && event.button !== 0) return;
|
||||||
|
if (!onTranscribeAudio || disabled || state !== "idle") return;
|
||||||
|
clearTimer(holdTimerRef);
|
||||||
|
try {
|
||||||
|
event.currentTarget.setPointerCapture(event.pointerId);
|
||||||
|
} catch {
|
||||||
|
// Some embedded runtimes do not expose pointer capture for toolbar buttons.
|
||||||
|
}
|
||||||
|
holdTimerRef.current = setTimeout(() => {
|
||||||
|
holdTimerRef.current = null;
|
||||||
|
holdActiveRef.current = true;
|
||||||
|
suppressNextClick();
|
||||||
|
startRecordingWithDeferredStop();
|
||||||
|
}, VOICE_HOLD_START_MS);
|
||||||
|
}, [disabled, onTranscribeAudio, startRecordingWithDeferredStop, state, suppressNextClick]);
|
||||||
|
|
||||||
|
const endPress = useCallback(() => {
|
||||||
|
const wasHoldRecording = holdActiveRef.current;
|
||||||
|
clearTimer(holdTimerRef);
|
||||||
|
if (!wasHoldRecording) return;
|
||||||
|
holdActiveRef.current = false;
|
||||||
|
suppressNextClick();
|
||||||
|
stopRecordingWhenReady();
|
||||||
|
}, [stopRecordingWhenReady, suppressNextClick]);
|
||||||
|
|
||||||
|
const handleClick = useCallback(() => {
|
||||||
|
if (suppressClickRef.current) {
|
||||||
|
clearSuppressClickTimer();
|
||||||
|
suppressClickRef.current = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (state === "recording") stopRecording();
|
||||||
|
else void startRecording();
|
||||||
|
}, [clearSuppressClickTimer, startRecording, state, stopRecording]);
|
||||||
|
|
||||||
|
const beginShortcutHold = useCallback(() => {
|
||||||
|
if (!onTranscribeAudio || disabled || state !== "idle" || shortcutActiveRef.current) return;
|
||||||
|
shortcutActiveRef.current = true;
|
||||||
|
startRecordingWithDeferredStop();
|
||||||
|
}, [disabled, onTranscribeAudio, startRecordingWithDeferredStop, state]);
|
||||||
|
|
||||||
|
const endShortcutHold = useCallback(() => {
|
||||||
|
if (!shortcutActiveRef.current) return;
|
||||||
|
shortcutActiveRef.current = false;
|
||||||
|
stopRecordingWhenReady();
|
||||||
|
}, [stopRecordingWhenReady]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (state !== "recording") {
|
||||||
|
setElapsedMs(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const updateElapsed = () => {
|
||||||
|
setElapsedMs(Math.max(0, Date.now() - startedAtRef.current));
|
||||||
|
};
|
||||||
|
updateElapsed();
|
||||||
|
const interval = window.setInterval(updateElapsed, 250);
|
||||||
|
return () => window.clearInterval(interval);
|
||||||
|
}, [state]);
|
||||||
|
|
||||||
|
useEffect(() => cleanupRecording, [cleanupRecording]);
|
||||||
|
useEffect(() => () => clearSuppressClickTimer(), [clearSuppressClickTimer]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
beginShortcutHold,
|
||||||
|
beginPress,
|
||||||
|
buttonDisabled: disabled || state === "transcribing",
|
||||||
|
elapsedLabel: formatVoiceElapsed(elapsedMs),
|
||||||
|
endShortcutHold,
|
||||||
|
endPress,
|
||||||
|
handleClick,
|
||||||
|
isRecording: state === "recording",
|
||||||
|
levels,
|
||||||
|
state,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface VoiceAudioState {
|
||||||
|
analyser: AnalyserNode;
|
||||||
|
context: AudioContext;
|
||||||
|
data: Uint8Array<ArrayBuffer>;
|
||||||
|
frame: number | null;
|
||||||
|
source: MediaStreamAudioSourceNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearTimer(ref: { current: ReturnType<typeof setTimeout> | null }) {
|
||||||
|
if (ref.current !== null) {
|
||||||
|
clearTimeout(ref.current);
|
||||||
|
ref.current = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function mediaRecorderOptions(): MediaRecorderOptions | undefined {
|
||||||
|
if (typeof MediaRecorder === "undefined") return undefined;
|
||||||
|
const mimeType = VOICE_MIME_CANDIDATES.find((type) => MediaRecorder.isTypeSupported(type));
|
||||||
|
return mimeType ? { mimeType } : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatVoiceElapsed(ms: number): string {
|
||||||
|
const seconds = Math.max(0, Math.floor(ms / 1000));
|
||||||
|
const minutes = Math.floor(seconds / 60);
|
||||||
|
return `${minutes}:${String(seconds % 60).padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function audioContextConstructor(): typeof AudioContext | undefined {
|
||||||
|
if (typeof window === "undefined") return undefined;
|
||||||
|
return window.AudioContext
|
||||||
|
?? (window as unknown as { webkitAudioContext?: typeof AudioContext }).webkitAudioContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
function voiceLevelFromSamples(samples: ArrayLike<number>): number {
|
||||||
|
if (samples.length === 0) return 0;
|
||||||
|
let sum = 0;
|
||||||
|
for (let index = 0; index < samples.length; index += 1) {
|
||||||
|
const centered = (samples[index] - 128) / 128;
|
||||||
|
sum += centered * centered;
|
||||||
|
}
|
||||||
|
const rms = Math.sqrt(sum / samples.length);
|
||||||
|
return Math.min(1, Math.pow(rms * 4.2, 0.72));
|
||||||
|
}
|
||||||
|
|
||||||
|
function waveformHeightFromLevel(level: number): number {
|
||||||
|
if (level < VOICE_MIN_LEVEL) return VOICE_WAVEFORM_SILENT_HEIGHT;
|
||||||
|
const activeLevel = Math.min(1, (level - VOICE_MIN_LEVEL) / (1 - VOICE_MIN_LEVEL));
|
||||||
|
return Math.round(
|
||||||
|
VOICE_WAVEFORM_MIN_HEIGHT
|
||||||
|
+ activeLevel * (VOICE_WAVEFORM_MAX_HEIGHT - VOICE_WAVEFORM_MIN_HEIGHT),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function blobToDataUrl(blob: Blob): Promise<string> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = () => {
|
||||||
|
if (typeof reader.result === "string") resolve(reader.result);
|
||||||
|
else reject(new Error("invalid_data_url"));
|
||||||
|
};
|
||||||
|
reader.onerror = () => reject(reader.error ?? new Error("read_failed"));
|
||||||
|
reader.readAsDataURL(blob);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function transcriptionErrorKey(error: unknown): VoiceRecorderErrorKey {
|
||||||
|
const detail = error instanceof Error ? error.message : "";
|
||||||
|
if (detail === "not_configured") return "notConfigured";
|
||||||
|
if (detail === "duration") return "tooLong";
|
||||||
|
return "failed";
|
||||||
|
}
|
||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "Models",
|
"models": "Models",
|
||||||
"providers": "Providers",
|
"providers": "Providers",
|
||||||
"image": "Image",
|
"image": "Image",
|
||||||
|
"voice": "Voice",
|
||||||
"browser": "Web",
|
"browser": "Web",
|
||||||
"cliApps": "CLI Apps",
|
"cliApps": "CLI Apps",
|
||||||
"mcp": "MCP",
|
"mcp": "MCP",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"capabilities": "Capabilities",
|
"capabilities": "Capabilities",
|
||||||
"apps": "Apps",
|
"apps": "Apps",
|
||||||
"nativeHost": "Native host",
|
"nativeHost": "Native host",
|
||||||
"hostSafety": "App safety"
|
"hostSafety": "App safety",
|
||||||
|
"voiceInput": "Voice input"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"selectModel": "Select model",
|
"selectModel": "Select model",
|
||||||
@ -161,7 +163,13 @@
|
|||||||
"engine": "Engine",
|
"engine": "Engine",
|
||||||
"logs": "Logs",
|
"logs": "Logs",
|
||||||
"diagnostics": "Diagnostics",
|
"diagnostics": "Diagnostics",
|
||||||
"contextWindow": "Context window"
|
"contextWindow": "Context window",
|
||||||
|
"transcription": "Transcription",
|
||||||
|
"transcriptionProvider": "Provider",
|
||||||
|
"transcriptionProviderStatus": "Provider status",
|
||||||
|
"transcriptionModel": "Model",
|
||||||
|
"transcriptionLanguage": "Language",
|
||||||
|
"voiceLimits": "Limits"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "Switch between light and dark appearance.",
|
"theme": "Switch between light and dark appearance.",
|
||||||
@ -200,7 +208,12 @@
|
|||||||
"diagnostics": "Export a small runtime report for support.",
|
"diagnostics": "Export a small runtime report for support.",
|
||||||
"localServiceAccessNative": "Allow Full Access shell commands to reach services on this Mac.",
|
"localServiceAccessNative": "Allow Full Access shell commands to reach services on this Mac.",
|
||||||
"webuiDefaultAccessNative": "Used by native chats without a project-specific permission.",
|
"webuiDefaultAccessNative": "Used by native chats without a project-specific permission.",
|
||||||
"contextWindow": "Choose the default context budget for this model configuration."
|
"contextWindow": "Choose the default context budget for this model configuration.",
|
||||||
|
"transcription": "Transcribe microphone input before sending it. Chat channel voice messages use the same settings.",
|
||||||
|
"transcriptionProvider": "Uses the matching provider credentials from Providers.",
|
||||||
|
"transcriptionProviderStatus": "API keys stay under providers, not in transcription settings.",
|
||||||
|
"transcriptionModel": "Leave as the resolved default unless your provider needs a custom model id.",
|
||||||
|
"transcriptionLanguage": "Optional ISO-639 hint such as en, zh, ja, or ko."
|
||||||
},
|
},
|
||||||
"timezone": {
|
"timezone": {
|
||||||
"select": "Select timezone",
|
"select": "Select timezone",
|
||||||
@ -391,6 +404,7 @@
|
|||||||
"totalProviders": "{{count}} available",
|
"totalProviders": "{{count}} available",
|
||||||
"webSearch": "Web search",
|
"webSearch": "Web search",
|
||||||
"imageGeneration": "Image generation",
|
"imageGeneration": "Image generation",
|
||||||
|
"voiceInput": "Voice input",
|
||||||
"workspace": "Workspace"
|
"workspace": "Workspace"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "Raw SKILL.md",
|
"rawInstructions": "Raw SKILL.md",
|
||||||
"rawInstructionsEmpty": "No raw instructions.",
|
"rawInstructionsEmpty": "No raw instructions.",
|
||||||
"detailDescription": "Details for {{name}}."
|
"detailDescription": "Details for {{name}}."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "Select provider",
|
||||||
|
"configureProvider": "Configure provider",
|
||||||
|
"languageAuto": "Auto"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "Deep research",
|
"deepResearch": "Deep research",
|
||||||
"voice": "Voice input"
|
"voice": "Voice input"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "Click to dictate or hold",
|
||||||
|
"stop": "Stop recording",
|
||||||
|
"transcribing": "Transcribing...",
|
||||||
|
"recordingStatus": "Recording {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "Voice input is not supported in this browser.",
|
||||||
|
"permission": "Microphone permission is required.",
|
||||||
|
"notConfigured": "Configure a transcription provider first.",
|
||||||
|
"tooLong": "Recording is too long.",
|
||||||
|
"tooShort": "Hold a little longer to record voice.",
|
||||||
|
"noInput": "No microphone input detected.",
|
||||||
|
"failed": "Could not transcribe audio."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "Slash commands",
|
"ariaLabel": "Slash commands",
|
||||||
"label": "commands",
|
"label": "commands",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "Modelos",
|
"models": "Modelos",
|
||||||
"providers": "Proveedores",
|
"providers": "Proveedores",
|
||||||
"image": "Imagen",
|
"image": "Imagen",
|
||||||
|
"voice": "Voz",
|
||||||
"browser": "Internet",
|
"browser": "Internet",
|
||||||
"runtime": "Sistema",
|
"runtime": "Sistema",
|
||||||
"advanced": "Seguridad",
|
"advanced": "Seguridad",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "Servicios MCP",
|
"mcp": "Servicios MCP",
|
||||||
"apps": "Aplicaciones",
|
"apps": "Aplicaciones",
|
||||||
"nativeHost": "Host nativo",
|
"nativeHost": "Host nativo",
|
||||||
"hostSafety": "Seguridad de la app"
|
"hostSafety": "Seguridad de la app",
|
||||||
|
"voiceInput": "Entrada de voz"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "Tema",
|
"theme": "Tema",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "Motor",
|
"engine": "Motor",
|
||||||
"logs": "Registros",
|
"logs": "Registros",
|
||||||
"diagnostics": "Diagnóstico",
|
"diagnostics": "Diagnóstico",
|
||||||
"contextWindow": "Ventana de contexto"
|
"contextWindow": "Ventana de contexto",
|
||||||
|
"transcription": "Transcripcion",
|
||||||
|
"transcriptionProvider": "Proveedor",
|
||||||
|
"transcriptionProviderStatus": "Estado del proveedor",
|
||||||
|
"transcriptionModel": "Modelo",
|
||||||
|
"transcriptionLanguage": "Idioma",
|
||||||
|
"voiceLimits": "Limites"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "Cambia entre apariencia clara y oscura.",
|
"theme": "Cambia entre apariencia clara y oscura.",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
||||||
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
||||||
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
||||||
"contextWindow": "Elige el presupuesto de contexto predeterminado para esta configuración de modelo."
|
"contextWindow": "Elige el presupuesto de contexto predeterminado para esta configuración de modelo.",
|
||||||
|
"transcription": "Transcribe la entrada del microfono antes de enviarla. Los mensajes de voz de los canales usan la misma configuracion.",
|
||||||
|
"transcriptionProvider": "Usa las credenciales del proveedor correspondiente en Proveedores.",
|
||||||
|
"transcriptionProviderStatus": "Las claves API permanecen en proveedores, no en la configuracion de transcripcion.",
|
||||||
|
"transcriptionModel": "Dejalo como el valor predeterminado resuelto salvo que el proveedor necesite un id de modelo personalizado.",
|
||||||
|
"transcriptionLanguage": "Pista ISO-639 opcional, como en, zh, ja o ko."
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "Claro",
|
"light": "Claro",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}} disponibles",
|
"totalProviders": "{{count}} disponibles",
|
||||||
"webSearch": "Búsqueda web",
|
"webSearch": "Búsqueda web",
|
||||||
"imageGeneration": "Generación de imágenes",
|
"imageGeneration": "Generación de imágenes",
|
||||||
|
"voiceInput": "Entrada de voz",
|
||||||
"workspace": "Espacio de trabajo"
|
"workspace": "Espacio de trabajo"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "SKILL.md original",
|
"rawInstructions": "SKILL.md original",
|
||||||
"rawInstructionsEmpty": "No hay instrucciones originales.",
|
"rawInstructionsEmpty": "No hay instrucciones originales.",
|
||||||
"detailDescription": "Detalles de {{name}}."
|
"detailDescription": "Detalles de {{name}}."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "Seleccionar proveedor",
|
||||||
|
"configureProvider": "Configurar proveedor",
|
||||||
|
"languageAuto": "Auto"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "Investigación profunda",
|
"deepResearch": "Investigación profunda",
|
||||||
"voice": "Entrada de voz"
|
"voice": "Entrada de voz"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "Haz clic para dictar o mantén",
|
||||||
|
"stop": "Detener grabación",
|
||||||
|
"transcribing": "Transcribiendo...",
|
||||||
|
"recordingStatus": "Grabando {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "Este navegador no admite entrada de voz.",
|
||||||
|
"permission": "Se requiere permiso de micrófono.",
|
||||||
|
"notConfigured": "Configura primero un proveedor de transcripción.",
|
||||||
|
"tooLong": "La grabación es demasiado larga.",
|
||||||
|
"tooShort": "Mantén pulsado un poco más para grabar voz.",
|
||||||
|
"noInput": "No se detectó entrada del micrófono.",
|
||||||
|
"failed": "No se pudo transcribir el audio."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "Comandos slash",
|
"ariaLabel": "Comandos slash",
|
||||||
"label": "comandos",
|
"label": "comandos",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "Modèles",
|
"models": "Modèles",
|
||||||
"providers": "Fournisseurs",
|
"providers": "Fournisseurs",
|
||||||
"image": "Images",
|
"image": "Images",
|
||||||
|
"voice": "Voix",
|
||||||
"browser": "Internet",
|
"browser": "Internet",
|
||||||
"runtime": "Système",
|
"runtime": "Système",
|
||||||
"advanced": "Sécurité",
|
"advanced": "Sécurité",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "Services MCP",
|
"mcp": "Services MCP",
|
||||||
"apps": "Applications",
|
"apps": "Applications",
|
||||||
"nativeHost": "Hôte natif",
|
"nativeHost": "Hôte natif",
|
||||||
"hostSafety": "Sécurité de l’app"
|
"hostSafety": "Sécurité de l’app",
|
||||||
|
"voiceInput": "Saisie vocale"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "Thème",
|
"theme": "Thème",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "Moteur",
|
"engine": "Moteur",
|
||||||
"logs": "Journaux",
|
"logs": "Journaux",
|
||||||
"diagnostics": "Diagnostic",
|
"diagnostics": "Diagnostic",
|
||||||
"contextWindow": "Fenêtre de contexte"
|
"contextWindow": "Fenêtre de contexte",
|
||||||
|
"transcription": "Transcription",
|
||||||
|
"transcriptionProvider": "Fournisseur",
|
||||||
|
"transcriptionProviderStatus": "Etat du fournisseur",
|
||||||
|
"transcriptionModel": "Modele",
|
||||||
|
"transcriptionLanguage": "Langue",
|
||||||
|
"voiceLimits": "Limites"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "Basculer entre l’apparence claire et sombre.",
|
"theme": "Basculer entre l’apparence claire et sombre.",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "Exporte un petit rapport d’exécution pour le support.",
|
"diagnostics": "Exporte un petit rapport d’exécution pour le support.",
|
||||||
"localServiceAccessNative": "Autorise les commandes shell Full Access à atteindre les services sur ce Mac.",
|
"localServiceAccessNative": "Autorise les commandes shell Full Access à atteindre les services sur ce Mac.",
|
||||||
"webuiDefaultAccessNative": "Utilisé par les chats natifs sans permission propre au projet.",
|
"webuiDefaultAccessNative": "Utilisé par les chats natifs sans permission propre au projet.",
|
||||||
"contextWindow": "Choisissez le budget de contexte par défaut pour cette configuration de modèle."
|
"contextWindow": "Choisissez le budget de contexte par défaut pour cette configuration de modèle.",
|
||||||
|
"transcription": "Transcrit l'entree micro avant l'envoi. Les messages vocaux des canaux utilisent les memes reglages.",
|
||||||
|
"transcriptionProvider": "Utilise les identifiants du fournisseur correspondant dans Fournisseurs.",
|
||||||
|
"transcriptionProviderStatus": "Les cles API restent dans les fournisseurs, pas dans les reglages de transcription.",
|
||||||
|
"transcriptionModel": "Laissez le modele resolu par defaut sauf si votre fournisseur exige un id personnalise.",
|
||||||
|
"transcriptionLanguage": "Indice ISO-639 facultatif, comme en, zh, ja ou ko."
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "Clair",
|
"light": "Clair",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}} disponibles",
|
"totalProviders": "{{count}} disponibles",
|
||||||
"webSearch": "Recherche web",
|
"webSearch": "Recherche web",
|
||||||
"imageGeneration": "Génération d’images",
|
"imageGeneration": "Génération d’images",
|
||||||
|
"voiceInput": "Saisie vocale",
|
||||||
"workspace": "Espace de travail"
|
"workspace": "Espace de travail"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "SKILL.md brut",
|
"rawInstructions": "SKILL.md brut",
|
||||||
"rawInstructionsEmpty": "Aucune instruction brute.",
|
"rawInstructionsEmpty": "Aucune instruction brute.",
|
||||||
"detailDescription": "Détails de {{name}}."
|
"detailDescription": "Détails de {{name}}."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "Choisir un fournisseur",
|
||||||
|
"configureProvider": "Configurer le fournisseur",
|
||||||
|
"languageAuto": "Auto"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "Recherche approfondie",
|
"deepResearch": "Recherche approfondie",
|
||||||
"voice": "Entrée vocale"
|
"voice": "Entrée vocale"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "Cliquez pour dicter ou maintenez",
|
||||||
|
"stop": "Arrêter l'enregistrement",
|
||||||
|
"transcribing": "Transcription...",
|
||||||
|
"recordingStatus": "Enregistrement {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "La saisie vocale n'est pas prise en charge par ce navigateur.",
|
||||||
|
"permission": "L'autorisation du microphone est requise.",
|
||||||
|
"notConfigured": "Configurez d'abord un fournisseur de transcription.",
|
||||||
|
"tooLong": "L'enregistrement est trop long.",
|
||||||
|
"tooShort": "Maintenez un peu plus longtemps pour enregistrer la voix.",
|
||||||
|
"noInput": "Aucune entrée microphone détectée.",
|
||||||
|
"failed": "Impossible de transcrire l'audio."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "Commandes slash",
|
"ariaLabel": "Commandes slash",
|
||||||
"label": "commandes",
|
"label": "commandes",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "Model",
|
"models": "Model",
|
||||||
"providers": "Penyedia",
|
"providers": "Penyedia",
|
||||||
"image": "Gambar",
|
"image": "Gambar",
|
||||||
|
"voice": "Suara",
|
||||||
"browser": "Internet",
|
"browser": "Internet",
|
||||||
"runtime": "Sistem",
|
"runtime": "Sistem",
|
||||||
"advanced": "Keamanan",
|
"advanced": "Keamanan",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "Layanan MCP",
|
"mcp": "Layanan MCP",
|
||||||
"apps": "Aplikasi",
|
"apps": "Aplikasi",
|
||||||
"nativeHost": "Host native",
|
"nativeHost": "Host native",
|
||||||
"hostSafety": "Keamanan aplikasi"
|
"hostSafety": "Keamanan aplikasi",
|
||||||
|
"voiceInput": "Input suara"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "Tema",
|
"theme": "Tema",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "Mesin",
|
"engine": "Mesin",
|
||||||
"logs": "Log",
|
"logs": "Log",
|
||||||
"diagnostics": "Diagnostik",
|
"diagnostics": "Diagnostik",
|
||||||
"contextWindow": "Jendela konteks"
|
"contextWindow": "Jendela konteks",
|
||||||
|
"transcription": "Transkripsi",
|
||||||
|
"transcriptionProvider": "Penyedia",
|
||||||
|
"transcriptionProviderStatus": "Status penyedia",
|
||||||
|
"transcriptionModel": "Model",
|
||||||
|
"transcriptionLanguage": "Bahasa",
|
||||||
|
"voiceLimits": "Batas"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "Beralih antara tampilan terang dan gelap.",
|
"theme": "Beralih antara tampilan terang dan gelap.",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
||||||
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
||||||
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
||||||
"contextWindow": "Pilih anggaran konteks default untuk konfigurasi model ini."
|
"contextWindow": "Pilih anggaran konteks default untuk konfigurasi model ini.",
|
||||||
|
"transcription": "Transkripsikan input mikrofon sebelum dikirim. Pesan suara channel memakai pengaturan yang sama.",
|
||||||
|
"transcriptionProvider": "Menggunakan kredensial penyedia yang sesuai dari Providers.",
|
||||||
|
"transcriptionProviderStatus": "API key tetap berada di providers, bukan di pengaturan transkripsi.",
|
||||||
|
"transcriptionModel": "Biarkan memakai default yang teresolusi kecuali penyedia membutuhkan id model khusus.",
|
||||||
|
"transcriptionLanguage": "Petunjuk ISO-639 opsional, seperti en, zh, ja, atau ko."
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "Terang",
|
"light": "Terang",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}} tersedia",
|
"totalProviders": "{{count}} tersedia",
|
||||||
"webSearch": "Pencarian web",
|
"webSearch": "Pencarian web",
|
||||||
"imageGeneration": "Pembuatan gambar",
|
"imageGeneration": "Pembuatan gambar",
|
||||||
|
"voiceInput": "Input suara",
|
||||||
"workspace": "Ruang kerja"
|
"workspace": "Ruang kerja"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "SKILL.md mentah",
|
"rawInstructions": "SKILL.md mentah",
|
||||||
"rawInstructionsEmpty": "Tidak ada instruksi mentah.",
|
"rawInstructionsEmpty": "Tidak ada instruksi mentah.",
|
||||||
"detailDescription": "Detail untuk {{name}}."
|
"detailDescription": "Detail untuk {{name}}."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "Pilih penyedia",
|
||||||
|
"configureProvider": "Konfigurasi penyedia",
|
||||||
|
"languageAuto": "Auto"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "Riset mendalam",
|
"deepResearch": "Riset mendalam",
|
||||||
"voice": "Input suara"
|
"voice": "Input suara"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "Klik untuk mendikte atau tahan",
|
||||||
|
"stop": "Hentikan rekaman",
|
||||||
|
"transcribing": "Mentranskripsi...",
|
||||||
|
"recordingStatus": "Merekam {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "Input suara tidak didukung di browser ini.",
|
||||||
|
"permission": "Izin mikrofon diperlukan.",
|
||||||
|
"notConfigured": "Konfigurasikan penyedia transkripsi terlebih dahulu.",
|
||||||
|
"tooLong": "Rekaman terlalu panjang.",
|
||||||
|
"tooShort": "Tahan sedikit lebih lama untuk merekam suara.",
|
||||||
|
"noInput": "Tidak ada input mikrofon yang terdeteksi.",
|
||||||
|
"failed": "Tidak dapat mentranskripsi audio."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "Perintah slash",
|
"ariaLabel": "Perintah slash",
|
||||||
"label": "perintah",
|
"label": "perintah",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "モデル",
|
"models": "モデル",
|
||||||
"providers": "プロバイダー",
|
"providers": "プロバイダー",
|
||||||
"image": "画像",
|
"image": "画像",
|
||||||
|
"voice": "音声",
|
||||||
"browser": "ウェブ",
|
"browser": "ウェブ",
|
||||||
"runtime": "システム",
|
"runtime": "システム",
|
||||||
"advanced": "セキュリティ",
|
"advanced": "セキュリティ",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "MCP サービス",
|
"mcp": "MCP サービス",
|
||||||
"apps": "アプリ",
|
"apps": "アプリ",
|
||||||
"nativeHost": "ネイティブホスト",
|
"nativeHost": "ネイティブホスト",
|
||||||
"hostSafety": "アプリの安全性"
|
"hostSafety": "アプリの安全性",
|
||||||
|
"voiceInput": "音声入力"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "テーマ",
|
"theme": "テーマ",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "エンジン",
|
"engine": "エンジン",
|
||||||
"logs": "ログ",
|
"logs": "ログ",
|
||||||
"diagnostics": "診断",
|
"diagnostics": "診断",
|
||||||
"contextWindow": "コンテキストウィンドウ"
|
"contextWindow": "コンテキストウィンドウ",
|
||||||
|
"transcription": "文字起こし",
|
||||||
|
"transcriptionProvider": "プロバイダー",
|
||||||
|
"transcriptionProviderStatus": "プロバイダー状態",
|
||||||
|
"transcriptionModel": "モデル",
|
||||||
|
"transcriptionLanguage": "言語",
|
||||||
|
"voiceLimits": "制限"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "ライト表示とダーク表示を切り替えます。",
|
"theme": "ライト表示とダーク表示を切り替えます。",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "サポート用の小さなランタイムレポートを書き出します。",
|
"diagnostics": "サポート用の小さなランタイムレポートを書き出します。",
|
||||||
"localServiceAccessNative": "Full Access の shell コマンドがこの Mac 上のサービスにアクセスできるようにします。",
|
"localServiceAccessNative": "Full Access の shell コマンドがこの Mac 上のサービスにアクセスできるようにします。",
|
||||||
"webuiDefaultAccessNative": "プロジェクト固有の権限がないネイティブチャットで使用します。",
|
"webuiDefaultAccessNative": "プロジェクト固有の権限がないネイティブチャットで使用します。",
|
||||||
"contextWindow": "このモデル設定で使う既定のコンテキスト予算を選択します。"
|
"contextWindow": "このモデル設定で使う既定のコンテキスト予算を選択します。",
|
||||||
|
"transcription": "マイク入力を送信前に文字起こしします。チャネルの音声メッセージも同じ設定を使います。",
|
||||||
|
"transcriptionProvider": "プロバイダー設定にある対応する認証情報を使います。",
|
||||||
|
"transcriptionProviderStatus": "APIキーは文字起こし設定ではなくプロバイダー側に保存されます。",
|
||||||
|
"transcriptionModel": "プロバイダーがカスタムモデルIDを必要としない限り、解決済みのデフォルトのままにします。",
|
||||||
|
"transcriptionLanguage": "en、zh、ja、ko などの任意の ISO-639 ヒント。"
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "ライト",
|
"light": "ライト",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}} 個利用可能",
|
"totalProviders": "{{count}} 個利用可能",
|
||||||
"webSearch": "Web 検索",
|
"webSearch": "Web 検索",
|
||||||
"imageGeneration": "画像生成",
|
"imageGeneration": "画像生成",
|
||||||
|
"voiceInput": "音声入力",
|
||||||
"workspace": "ワークスペース"
|
"workspace": "ワークスペース"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "元の SKILL.md",
|
"rawInstructions": "元の SKILL.md",
|
||||||
"rawInstructionsEmpty": "元の説明はありません。",
|
"rawInstructionsEmpty": "元の説明はありません。",
|
||||||
"detailDescription": "{{name}} の詳細。"
|
"detailDescription": "{{name}} の詳細。"
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "プロバイダーを選択",
|
||||||
|
"configureProvider": "プロバイダーを設定",
|
||||||
|
"languageAuto": "自動"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "詳細調査",
|
"deepResearch": "詳細調査",
|
||||||
"voice": "音声入力"
|
"voice": "音声入力"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "クリックして音声入力、または長押し",
|
||||||
|
"stop": "録音を停止",
|
||||||
|
"transcribing": "文字起こし中...",
|
||||||
|
"recordingStatus": "録音中 {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "このブラウザーは音声入力に対応していません。",
|
||||||
|
"permission": "マイクの許可が必要です。",
|
||||||
|
"notConfigured": "先に文字起こしプロバイダーを設定してください。",
|
||||||
|
"tooLong": "録音が長すぎます。",
|
||||||
|
"tooShort": "もう少し長く録音してください。",
|
||||||
|
"noInput": "マイク入力が検出されませんでした。",
|
||||||
|
"failed": "音声を文字起こしできませんでした。"
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "スラッシュコマンド",
|
"ariaLabel": "スラッシュコマンド",
|
||||||
"label": "コマンド",
|
"label": "コマンド",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "모델",
|
"models": "모델",
|
||||||
"providers": "제공자",
|
"providers": "제공자",
|
||||||
"image": "이미지",
|
"image": "이미지",
|
||||||
|
"voice": "음성",
|
||||||
"browser": "웹",
|
"browser": "웹",
|
||||||
"runtime": "시스템",
|
"runtime": "시스템",
|
||||||
"advanced": "보안",
|
"advanced": "보안",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "MCP 서비스",
|
"mcp": "MCP 서비스",
|
||||||
"apps": "앱",
|
"apps": "앱",
|
||||||
"nativeHost": "네이티브 호스트",
|
"nativeHost": "네이티브 호스트",
|
||||||
"hostSafety": "앱 보안"
|
"hostSafety": "앱 보안",
|
||||||
|
"voiceInput": "음성 입력"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "테마",
|
"theme": "테마",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "엔진",
|
"engine": "엔진",
|
||||||
"logs": "로그",
|
"logs": "로그",
|
||||||
"diagnostics": "진단",
|
"diagnostics": "진단",
|
||||||
"contextWindow": "컨텍스트 창"
|
"contextWindow": "컨텍스트 창",
|
||||||
|
"transcription": "전사",
|
||||||
|
"transcriptionProvider": "제공자",
|
||||||
|
"transcriptionProviderStatus": "제공자 상태",
|
||||||
|
"transcriptionModel": "모델",
|
||||||
|
"transcriptionLanguage": "언어",
|
||||||
|
"voiceLimits": "제한"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "밝은 모드와 어두운 모드를 전환합니다.",
|
"theme": "밝은 모드와 어두운 모드를 전환합니다.",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "지원용 작은 런타임 보고서를 내보냅니다.",
|
"diagnostics": "지원용 작은 런타임 보고서를 내보냅니다.",
|
||||||
"localServiceAccessNative": "Full Access shell 명령이 이 Mac의 서비스에 접근할 수 있게 합니다.",
|
"localServiceAccessNative": "Full Access shell 명령이 이 Mac의 서비스에 접근할 수 있게 합니다.",
|
||||||
"webuiDefaultAccessNative": "프로젝트별 권한이 없는 네이티브 채팅에 사용됩니다.",
|
"webuiDefaultAccessNative": "프로젝트별 권한이 없는 네이티브 채팅에 사용됩니다.",
|
||||||
"contextWindow": "이 모델 구성의 기본 컨텍스트 예산을 선택합니다."
|
"contextWindow": "이 모델 구성의 기본 컨텍스트 예산을 선택합니다.",
|
||||||
|
"transcription": "마이크 입력을 보내기 전에 텍스트로 변환합니다. 채널 음성 메시지도 같은 설정을 사용합니다.",
|
||||||
|
"transcriptionProvider": "Providers에 저장된 해당 제공자의 인증 정보를 사용합니다.",
|
||||||
|
"transcriptionProviderStatus": "API 키는 transcription 설정이 아니라 providers 아래에 유지됩니다.",
|
||||||
|
"transcriptionModel": "제공자가 사용자 지정 모델 ID를 요구하지 않으면 해석된 기본값을 사용하세요.",
|
||||||
|
"transcriptionLanguage": "en, zh, ja, ko 같은 선택적 ISO-639 힌트입니다."
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "라이트",
|
"light": "라이트",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}}개 사용 가능",
|
"totalProviders": "{{count}}개 사용 가능",
|
||||||
"webSearch": "웹 검색",
|
"webSearch": "웹 검색",
|
||||||
"imageGeneration": "이미지 생성",
|
"imageGeneration": "이미지 생성",
|
||||||
|
"voiceInput": "음성 입력",
|
||||||
"workspace": "작업공간"
|
"workspace": "작업공간"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "원본 SKILL.md",
|
"rawInstructions": "원본 SKILL.md",
|
||||||
"rawInstructionsEmpty": "원본 지침이 없습니다.",
|
"rawInstructionsEmpty": "원본 지침이 없습니다.",
|
||||||
"detailDescription": "{{name}} 세부 정보."
|
"detailDescription": "{{name}} 세부 정보."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "제공자 선택",
|
||||||
|
"configureProvider": "제공자 설정",
|
||||||
|
"languageAuto": "자동"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "심층 조사",
|
"deepResearch": "심층 조사",
|
||||||
"voice": "음성 입력"
|
"voice": "음성 입력"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "클릭해 받아쓰거나 길게 누르기",
|
||||||
|
"stop": "녹음 중지",
|
||||||
|
"transcribing": "변환 중...",
|
||||||
|
"recordingStatus": "녹음 중 {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "이 브라우저는 음성 입력을 지원하지 않습니다.",
|
||||||
|
"permission": "마이크 권한이 필요합니다.",
|
||||||
|
"notConfigured": "먼저 음성 변환 제공업체를 설정하세요.",
|
||||||
|
"tooLong": "녹음 시간이 너무 깁니다.",
|
||||||
|
"tooShort": "음성을 녹음하려면 조금 더 길게 눌러 주세요.",
|
||||||
|
"noInput": "마이크 입력이 감지되지 않았습니다.",
|
||||||
|
"failed": "오디오를 변환하지 못했습니다."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "슬래시 명령",
|
"ariaLabel": "슬래시 명령",
|
||||||
"label": "명령",
|
"label": "명령",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "Mô hình",
|
"models": "Mô hình",
|
||||||
"providers": "Nhà cung cấp",
|
"providers": "Nhà cung cấp",
|
||||||
"image": "Hình ảnh",
|
"image": "Hình ảnh",
|
||||||
|
"voice": "Giọng nói",
|
||||||
"browser": "Trang web",
|
"browser": "Trang web",
|
||||||
"runtime": "Hệ thống",
|
"runtime": "Hệ thống",
|
||||||
"advanced": "Bảo mật",
|
"advanced": "Bảo mật",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "Dịch vụ MCP",
|
"mcp": "Dịch vụ MCP",
|
||||||
"apps": "Ứng dụng",
|
"apps": "Ứng dụng",
|
||||||
"nativeHost": "Host gốc",
|
"nativeHost": "Host gốc",
|
||||||
"hostSafety": "An toàn ứng dụng"
|
"hostSafety": "An toàn ứng dụng",
|
||||||
|
"voiceInput": "Nhap giong noi"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "Chủ đề",
|
"theme": "Chủ đề",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "Bộ máy",
|
"engine": "Bộ máy",
|
||||||
"logs": "Nhật ký",
|
"logs": "Nhật ký",
|
||||||
"diagnostics": "Chẩn đoán",
|
"diagnostics": "Chẩn đoán",
|
||||||
"contextWindow": "Cửa sổ ngữ cảnh"
|
"contextWindow": "Cửa sổ ngữ cảnh",
|
||||||
|
"transcription": "Phien am",
|
||||||
|
"transcriptionProvider": "Nha cung cap",
|
||||||
|
"transcriptionProviderStatus": "Trang thai nha cung cap",
|
||||||
|
"transcriptionModel": "Mo hinh",
|
||||||
|
"transcriptionLanguage": "Ngon ngu",
|
||||||
|
"voiceLimits": "Gioi han"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "Chuyển giữa giao diện sáng và tối.",
|
"theme": "Chuyển giữa giao diện sáng và tối.",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
"diagnostics": "Exporta un pequeño informe de runtime para soporte.",
|
||||||
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
"localServiceAccessNative": "Permite que comandos shell con Full Access alcancen servicios en este Mac.",
|
||||||
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
"webuiDefaultAccessNative": "Usado por chats nativos sin permiso específico de proyecto.",
|
||||||
"contextWindow": "Chọn ngân sách ngữ cảnh mặc định cho cấu hình mô hình này."
|
"contextWindow": "Chọn ngân sách ngữ cảnh mặc định cho cấu hình mô hình này.",
|
||||||
|
"transcription": "Phien am dau vao micro truoc khi gui. Tin nhan giong noi tu kenh chat dung cung cai dat.",
|
||||||
|
"transcriptionProvider": "Dung thong tin xac thuc cua nha cung cap tu Providers.",
|
||||||
|
"transcriptionProviderStatus": "API key nam trong providers, khong nam trong cai dat transcription.",
|
||||||
|
"transcriptionModel": "Giu mac dinh da resolve tru khi nha cung cap can id model tuy chinh.",
|
||||||
|
"transcriptionLanguage": "Goi y ISO-639 tuy chon, nhu en, zh, ja hoac ko."
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "Sáng",
|
"light": "Sáng",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "{{count}} khả dụng",
|
"totalProviders": "{{count}} khả dụng",
|
||||||
"webSearch": "Tìm kiếm web",
|
"webSearch": "Tìm kiếm web",
|
||||||
"imageGeneration": "Tạo hình ảnh",
|
"imageGeneration": "Tạo hình ảnh",
|
||||||
|
"voiceInput": "Nhập bằng giọng nói",
|
||||||
"workspace": "Không gian làm việc"
|
"workspace": "Không gian làm việc"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "SKILL.md gốc",
|
"rawInstructions": "SKILL.md gốc",
|
||||||
"rawInstructionsEmpty": "Không có hướng dẫn gốc.",
|
"rawInstructionsEmpty": "Không có hướng dẫn gốc.",
|
||||||
"detailDescription": "Chi tiết cho {{name}}."
|
"detailDescription": "Chi tiết cho {{name}}."
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "Chon nha cung cap",
|
||||||
|
"configureProvider": "Cau hinh nha cung cap",
|
||||||
|
"languageAuto": "Tu dong"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "Nghiên cứu sâu",
|
"deepResearch": "Nghiên cứu sâu",
|
||||||
"voice": "Nhập bằng giọng nói"
|
"voice": "Nhập bằng giọng nói"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "Bấm để đọc chính tả hoặc nhấn giữ",
|
||||||
|
"stop": "Dừng ghi âm",
|
||||||
|
"transcribing": "Đang chép lời...",
|
||||||
|
"recordingStatus": "Đang ghi {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "Trình duyệt này không hỗ trợ nhập bằng giọng nói.",
|
||||||
|
"permission": "Cần quyền truy cập micrô.",
|
||||||
|
"notConfigured": "Hãy cấu hình nhà cung cấp chép lời trước.",
|
||||||
|
"tooLong": "Bản ghi âm quá dài.",
|
||||||
|
"tooShort": "Giữ lâu hơn một chút để ghi âm giọng nói.",
|
||||||
|
"noInput": "Không phát hiện đầu vào micrô.",
|
||||||
|
"failed": "Không thể chép lời âm thanh."
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "Lệnh slash",
|
"ariaLabel": "Lệnh slash",
|
||||||
"label": "lệnh",
|
"label": "lệnh",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "模型",
|
"models": "模型",
|
||||||
"providers": "提供商",
|
"providers": "提供商",
|
||||||
"image": "图片",
|
"image": "图片",
|
||||||
|
"voice": "语音",
|
||||||
"browser": "网页",
|
"browser": "网页",
|
||||||
"cliApps": "CLI 应用",
|
"cliApps": "CLI 应用",
|
||||||
"mcp": "MCP",
|
"mcp": "MCP",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"capabilities": "能力",
|
"capabilities": "能力",
|
||||||
"apps": "应用",
|
"apps": "应用",
|
||||||
"nativeHost": "原生宿主",
|
"nativeHost": "原生宿主",
|
||||||
"hostSafety": "应用安全"
|
"hostSafety": "应用安全",
|
||||||
|
"voiceInput": "语音识别"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"selectModel": "选择模型",
|
"selectModel": "选择模型",
|
||||||
@ -161,7 +163,13 @@
|
|||||||
"engine": "引擎",
|
"engine": "引擎",
|
||||||
"logs": "日志",
|
"logs": "日志",
|
||||||
"diagnostics": "诊断",
|
"diagnostics": "诊断",
|
||||||
"contextWindow": "上下文窗口"
|
"contextWindow": "上下文窗口",
|
||||||
|
"transcription": "语音转写",
|
||||||
|
"transcriptionProvider": "提供商",
|
||||||
|
"transcriptionProviderStatus": "提供商状态",
|
||||||
|
"transcriptionModel": "模型",
|
||||||
|
"transcriptionLanguage": "语言",
|
||||||
|
"voiceLimits": "限制"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "在浅色和深色外观之间切换。",
|
"theme": "在浅色和深色外观之间切换。",
|
||||||
@ -200,7 +208,12 @@
|
|||||||
"diagnostics": "导出一份用于支持排查的小型运行报告。",
|
"diagnostics": "导出一份用于支持排查的小型运行报告。",
|
||||||
"localServiceAccessNative": "允许完全访问权限下的 shell 命令访问这台 Mac 上的服务。",
|
"localServiceAccessNative": "允许完全访问权限下的 shell 命令访问这台 Mac 上的服务。",
|
||||||
"webuiDefaultAccessNative": "用于没有单独项目权限的原生聊天。",
|
"webuiDefaultAccessNative": "用于没有单独项目权限的原生聊天。",
|
||||||
"contextWindow": "选择此模型配置的默认上下文预算。"
|
"contextWindow": "选择此模型配置的默认上下文预算。",
|
||||||
|
"transcription": "发送前先把麦克风输入转写到输入框。聊天渠道里的语音消息也使用同一套设置。",
|
||||||
|
"transcriptionProvider": "使用「提供商」中对应提供商的凭据。",
|
||||||
|
"transcriptionProviderStatus": "API Key 仍保存在 providers 里,不写进 transcription 设置。",
|
||||||
|
"transcriptionModel": "除非提供商需要自定义模型 ID,否则保持解析后的默认值即可。",
|
||||||
|
"transcriptionLanguage": "可选 ISO-639 语言提示,例如 en、zh、ja 或 ko。"
|
||||||
},
|
},
|
||||||
"timezone": {
|
"timezone": {
|
||||||
"select": "选择时区",
|
"select": "选择时区",
|
||||||
@ -391,6 +404,7 @@
|
|||||||
"totalProviders": "共 {{count}} 个可用",
|
"totalProviders": "共 {{count}} 个可用",
|
||||||
"webSearch": "网页搜索",
|
"webSearch": "网页搜索",
|
||||||
"imageGeneration": "图片生成",
|
"imageGeneration": "图片生成",
|
||||||
|
"voiceInput": "语音识别",
|
||||||
"workspace": "工作区"
|
"workspace": "工作区"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "原始 SKILL.md",
|
"rawInstructions": "原始 SKILL.md",
|
||||||
"rawInstructionsEmpty": "没有原始说明。",
|
"rawInstructionsEmpty": "没有原始说明。",
|
||||||
"detailDescription": "{{name}} 的详情。"
|
"detailDescription": "{{name}} 的详情。"
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "选择提供商",
|
||||||
|
"configureProvider": "配置提供商",
|
||||||
|
"languageAuto": "自动"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -677,6 +696,21 @@
|
|||||||
"deepResearch": "深度研究",
|
"deepResearch": "深度研究",
|
||||||
"voice": "语音输入"
|
"voice": "语音输入"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "点击进行听写或长按",
|
||||||
|
"stop": "停止录音",
|
||||||
|
"transcribing": "正在转写...",
|
||||||
|
"recordingStatus": "正在录音 {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "当前浏览器不支持语音输入。",
|
||||||
|
"permission": "需要麦克风权限。",
|
||||||
|
"notConfigured": "请先配置转写提供商。",
|
||||||
|
"tooLong": "录音时间太长。",
|
||||||
|
"tooShort": "请稍微多录一会儿。",
|
||||||
|
"noInput": "没有检测到麦克风输入。",
|
||||||
|
"failed": "语音转写失败。"
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "斜杠命令",
|
"ariaLabel": "斜杠命令",
|
||||||
"label": "命令",
|
"label": "命令",
|
||||||
|
|||||||
@ -73,6 +73,7 @@
|
|||||||
"models": "模型",
|
"models": "模型",
|
||||||
"providers": "提供商",
|
"providers": "提供商",
|
||||||
"image": "圖片",
|
"image": "圖片",
|
||||||
|
"voice": "語音",
|
||||||
"browser": "網頁",
|
"browser": "網頁",
|
||||||
"runtime": "系統",
|
"runtime": "系統",
|
||||||
"advanced": "安全",
|
"advanced": "安全",
|
||||||
@ -99,7 +100,8 @@
|
|||||||
"mcp": "MCP 服務",
|
"mcp": "MCP 服務",
|
||||||
"apps": "應用",
|
"apps": "應用",
|
||||||
"nativeHost": "原生宿主",
|
"nativeHost": "原生宿主",
|
||||||
"hostSafety": "App 安全"
|
"hostSafety": "App 安全",
|
||||||
|
"voiceInput": "語音辨識"
|
||||||
},
|
},
|
||||||
"rows": {
|
"rows": {
|
||||||
"theme": "主題",
|
"theme": "主題",
|
||||||
@ -142,7 +144,13 @@
|
|||||||
"engine": "引擎",
|
"engine": "引擎",
|
||||||
"logs": "日誌",
|
"logs": "日誌",
|
||||||
"diagnostics": "診斷",
|
"diagnostics": "診斷",
|
||||||
"contextWindow": "上下文視窗"
|
"contextWindow": "上下文視窗",
|
||||||
|
"transcription": "語音轉寫",
|
||||||
|
"transcriptionProvider": "提供商",
|
||||||
|
"transcriptionProviderStatus": "提供商狀態",
|
||||||
|
"transcriptionModel": "模型",
|
||||||
|
"transcriptionLanguage": "語言",
|
||||||
|
"voiceLimits": "限制"
|
||||||
},
|
},
|
||||||
"help": {
|
"help": {
|
||||||
"theme": "在淺色與深色外觀之間切換。",
|
"theme": "在淺色與深色外觀之間切換。",
|
||||||
@ -181,7 +189,12 @@
|
|||||||
"diagnostics": "匯出一份用於支援排查的小型執行報告。",
|
"diagnostics": "匯出一份用於支援排查的小型執行報告。",
|
||||||
"localServiceAccessNative": "允許完全訪問權限下的 shell 命令訪問這台 Mac 上的服務。",
|
"localServiceAccessNative": "允許完全訪問權限下的 shell 命令訪問這台 Mac 上的服務。",
|
||||||
"webuiDefaultAccessNative": "用於沒有單獨專案權限的原生聊天。",
|
"webuiDefaultAccessNative": "用於沒有單獨專案權限的原生聊天。",
|
||||||
"contextWindow": "選擇此模型配置的預設上下文預算。"
|
"contextWindow": "選擇此模型配置的預設上下文預算。",
|
||||||
|
"transcription": "送出前先把麥克風輸入轉寫到輸入框。聊天渠道的語音訊息也使用同一組設定。",
|
||||||
|
"transcriptionProvider": "使用「提供商」中對應提供商的憑證。",
|
||||||
|
"transcriptionProviderStatus": "API Key 仍保存在 providers 裡,不寫進 transcription 設定。",
|
||||||
|
"transcriptionModel": "除非提供商需要自訂模型 ID,否則保持解析後的預設值即可。",
|
||||||
|
"transcriptionLanguage": "可選 ISO-639 語言提示,例如 en、zh、ja 或 ko。"
|
||||||
},
|
},
|
||||||
"values": {
|
"values": {
|
||||||
"light": "淺色",
|
"light": "淺色",
|
||||||
@ -283,6 +296,7 @@
|
|||||||
"totalProviders": "共 {{count}} 個可用",
|
"totalProviders": "共 {{count}} 個可用",
|
||||||
"webSearch": "網頁搜尋",
|
"webSearch": "網頁搜尋",
|
||||||
"imageGeneration": "圖片生成",
|
"imageGeneration": "圖片生成",
|
||||||
|
"voiceInput": "語音辨識",
|
||||||
"workspace": "工作區"
|
"workspace": "工作區"
|
||||||
},
|
},
|
||||||
"usage": {
|
"usage": {
|
||||||
@ -486,6 +500,11 @@
|
|||||||
"rawInstructions": "原始 SKILL.md",
|
"rawInstructions": "原始 SKILL.md",
|
||||||
"rawInstructionsEmpty": "沒有原始說明。",
|
"rawInstructionsEmpty": "沒有原始說明。",
|
||||||
"detailDescription": "{{name}} 的詳細資訊。"
|
"detailDescription": "{{name}} 的詳細資訊。"
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"selectProvider": "選擇提供商",
|
||||||
|
"configureProvider": "設定提供商",
|
||||||
|
"languageAuto": "自動"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"chat": {
|
"chat": {
|
||||||
@ -678,6 +697,21 @@
|
|||||||
"deepResearch": "深度研究",
|
"deepResearch": "深度研究",
|
||||||
"voice": "語音輸入"
|
"voice": "語音輸入"
|
||||||
},
|
},
|
||||||
|
"voice": {
|
||||||
|
"hint": "點擊進行聽寫或長按",
|
||||||
|
"stop": "停止錄音",
|
||||||
|
"transcribing": "正在轉寫...",
|
||||||
|
"recordingStatus": "正在錄音 {{time}}"
|
||||||
|
},
|
||||||
|
"voiceErrors": {
|
||||||
|
"unsupported": "目前瀏覽器不支援語音輸入。",
|
||||||
|
"permission": "需要麥克風權限。",
|
||||||
|
"notConfigured": "請先設定轉寫提供商。",
|
||||||
|
"tooLong": "錄音時間太長。",
|
||||||
|
"tooShort": "請稍微多錄一會兒。",
|
||||||
|
"noInput": "沒有偵測到麥克風輸入。",
|
||||||
|
"failed": "語音轉寫失敗。"
|
||||||
|
},
|
||||||
"slash": {
|
"slash": {
|
||||||
"ariaLabel": "斜線命令",
|
"ariaLabel": "斜線命令",
|
||||||
"label": "命令",
|
"label": "命令",
|
||||||
|
|||||||
210
webui/src/lib/ansi.ts
Normal file
210
webui/src/lib/ansi.ts
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
export type AnsiSegment = {
|
||||||
|
text: string;
|
||||||
|
style?: AnsiStyle;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type AnsiStyle = {
|
||||||
|
backgroundColor?: string;
|
||||||
|
color?: string;
|
||||||
|
fontStyle?: "italic";
|
||||||
|
fontWeight?: number;
|
||||||
|
opacity?: number;
|
||||||
|
textDecorationLine?: "underline";
|
||||||
|
};
|
||||||
|
|
||||||
|
type AnsiState = {
|
||||||
|
backgroundColor?: string;
|
||||||
|
bold: boolean;
|
||||||
|
color?: string;
|
||||||
|
dim: boolean;
|
||||||
|
inverse: boolean;
|
||||||
|
italic: boolean;
|
||||||
|
underline: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
const ESC = String.fromCharCode(27);
|
||||||
|
const ANSI_PATTERN = new RegExp(`${ESC}\\[[0-?]*[ -/]*[@-~]`, "g");
|
||||||
|
|
||||||
|
const ANSI_COLORS = [
|
||||||
|
"#000000",
|
||||||
|
"#cd3131",
|
||||||
|
"#0dbc79",
|
||||||
|
"#e5e510",
|
||||||
|
"#2472c8",
|
||||||
|
"#bc3fbc",
|
||||||
|
"#11a8cd",
|
||||||
|
"#e5e5e5",
|
||||||
|
];
|
||||||
|
|
||||||
|
const ANSI_BRIGHT_COLORS = [
|
||||||
|
"#666666",
|
||||||
|
"#f14c4c",
|
||||||
|
"#23d18b",
|
||||||
|
"#f5f543",
|
||||||
|
"#3b8eea",
|
||||||
|
"#d670d6",
|
||||||
|
"#29b8db",
|
||||||
|
"#ffffff",
|
||||||
|
];
|
||||||
|
|
||||||
|
const RGB_STEPS = [0, 95, 135, 175, 215, 255];
|
||||||
|
|
||||||
|
export function hasAnsi(value: string): boolean {
|
||||||
|
ANSI_PATTERN.lastIndex = 0;
|
||||||
|
return ANSI_PATTERN.test(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stripAnsi(value: string): string {
|
||||||
|
ANSI_PATTERN.lastIndex = 0;
|
||||||
|
return value.replace(ANSI_PATTERN, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function initialState(): AnsiState {
|
||||||
|
return {
|
||||||
|
bold: false,
|
||||||
|
dim: false,
|
||||||
|
inverse: false,
|
||||||
|
italic: false,
|
||||||
|
underline: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function colorFrom256(value: number): string | undefined {
|
||||||
|
if (value < 0 || value > 255) return undefined;
|
||||||
|
if (value < 8) return ANSI_COLORS[value];
|
||||||
|
if (value < 16) return ANSI_BRIGHT_COLORS[value - 8];
|
||||||
|
if (value < 232) {
|
||||||
|
const offset = value - 16;
|
||||||
|
const red = RGB_STEPS[Math.floor(offset / 36)];
|
||||||
|
const green = RGB_STEPS[Math.floor((offset % 36) / 6)];
|
||||||
|
const blue = RGB_STEPS[offset % 6];
|
||||||
|
return `rgb(${red}, ${green}, ${blue})`;
|
||||||
|
}
|
||||||
|
const gray = 8 + ((value - 232) * 10);
|
||||||
|
return `rgb(${gray}, ${gray}, ${gray})`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function colorFromRgb(red: number, green: number, blue: number): string | undefined {
|
||||||
|
if ([red, green, blue].some((value) => !Number.isFinite(value) || value < 0 || value > 255)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return `rgb(${red}, ${green}, ${blue})`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizedSgrParams(sequence: string): number[] | null {
|
||||||
|
if (!sequence.endsWith("m")) return null;
|
||||||
|
const body = sequence.slice(2, -1).trim();
|
||||||
|
if (!body) return [0];
|
||||||
|
return body.split(/[;:]/).map((part) => {
|
||||||
|
const value = Number.parseInt(part || "0", 10);
|
||||||
|
return Number.isFinite(value) ? value : 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyExtendedColor(
|
||||||
|
state: AnsiState,
|
||||||
|
params: number[],
|
||||||
|
index: number,
|
||||||
|
key: "color" | "backgroundColor",
|
||||||
|
): number {
|
||||||
|
const mode = params[index + 1];
|
||||||
|
if (mode === 5) {
|
||||||
|
const color = colorFrom256(params[index + 2]);
|
||||||
|
if (color) state[key] = color;
|
||||||
|
return index + 2;
|
||||||
|
}
|
||||||
|
if (mode === 2) {
|
||||||
|
const color = colorFromRgb(params[index + 2], params[index + 3], params[index + 4]);
|
||||||
|
if (color) state[key] = color;
|
||||||
|
return index + 4;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
function applySgrParams(state: AnsiState, params: number[]): void {
|
||||||
|
for (let index = 0; index < params.length; index += 1) {
|
||||||
|
const code = params[index];
|
||||||
|
if (code === 0) {
|
||||||
|
Object.assign(state, initialState());
|
||||||
|
} else if (code === 1) {
|
||||||
|
state.bold = true;
|
||||||
|
state.dim = false;
|
||||||
|
} else if (code === 2) {
|
||||||
|
state.dim = true;
|
||||||
|
state.bold = false;
|
||||||
|
} else if (code === 3) {
|
||||||
|
state.italic = true;
|
||||||
|
} else if (code === 4) {
|
||||||
|
state.underline = true;
|
||||||
|
} else if (code === 7) {
|
||||||
|
state.inverse = true;
|
||||||
|
} else if (code === 22) {
|
||||||
|
state.bold = false;
|
||||||
|
state.dim = false;
|
||||||
|
} else if (code === 23) {
|
||||||
|
state.italic = false;
|
||||||
|
} else if (code === 24) {
|
||||||
|
state.underline = false;
|
||||||
|
} else if (code === 27) {
|
||||||
|
state.inverse = false;
|
||||||
|
} else if (code === 39) {
|
||||||
|
delete state.color;
|
||||||
|
} else if (code === 49) {
|
||||||
|
delete state.backgroundColor;
|
||||||
|
} else if (code >= 30 && code <= 37) {
|
||||||
|
state.color = ANSI_COLORS[code - 30];
|
||||||
|
} else if (code >= 40 && code <= 47) {
|
||||||
|
state.backgroundColor = ANSI_COLORS[code - 40];
|
||||||
|
} else if (code >= 90 && code <= 97) {
|
||||||
|
state.color = ANSI_BRIGHT_COLORS[code - 90];
|
||||||
|
} else if (code >= 100 && code <= 107) {
|
||||||
|
state.backgroundColor = ANSI_BRIGHT_COLORS[code - 100];
|
||||||
|
} else if (code === 38) {
|
||||||
|
index = applyExtendedColor(state, params, index, "color");
|
||||||
|
} else if (code === 48) {
|
||||||
|
index = applyExtendedColor(state, params, index, "backgroundColor");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function styleFromState(state: AnsiState): AnsiStyle | undefined {
|
||||||
|
const foreground = state.inverse ? state.backgroundColor : state.color;
|
||||||
|
const background = state.inverse ? state.color : state.backgroundColor;
|
||||||
|
const style: AnsiStyle = {};
|
||||||
|
if (foreground) style.color = foreground;
|
||||||
|
if (background) style.backgroundColor = background;
|
||||||
|
if (state.bold) style.fontWeight = 700;
|
||||||
|
if (state.dim) style.opacity = 0.72;
|
||||||
|
if (state.italic) style.fontStyle = "italic";
|
||||||
|
if (state.underline) style.textDecorationLine = "underline";
|
||||||
|
return Object.keys(style).length ? style : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseAnsiSegments(value: string): AnsiSegment[] {
|
||||||
|
const segments: AnsiSegment[] = [];
|
||||||
|
const state = initialState();
|
||||||
|
let cursor = 0;
|
||||||
|
ANSI_PATTERN.lastIndex = 0;
|
||||||
|
|
||||||
|
for (const match of value.matchAll(ANSI_PATTERN)) {
|
||||||
|
const index = match.index ?? 0;
|
||||||
|
if (index > cursor) {
|
||||||
|
segments.push({
|
||||||
|
text: value.slice(cursor, index),
|
||||||
|
style: styleFromState(state),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const params = normalizedSgrParams(match[0]);
|
||||||
|
if (params) applySgrParams(state, params);
|
||||||
|
cursor = index + match[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cursor < value.length) {
|
||||||
|
segments.push({
|
||||||
|
text: value.slice(cursor),
|
||||||
|
style: styleFromState(state),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return segments.filter((segment) => segment.text.length > 0);
|
||||||
|
}
|
||||||
@ -16,6 +16,7 @@ import type {
|
|||||||
SkillDetail,
|
SkillDetail,
|
||||||
SkillsPayload,
|
SkillsPayload,
|
||||||
SlashCommand,
|
SlashCommand,
|
||||||
|
TranscriptionSettingsUpdate,
|
||||||
WebSearchSettingsUpdate,
|
WebSearchSettingsUpdate,
|
||||||
WorkspacesPayload,
|
WorkspacesPayload,
|
||||||
WebuiThreadPersistedPayload,
|
WebuiThreadPersistedPayload,
|
||||||
@ -547,3 +548,21 @@ export async function updateImageGenerationSettings(
|
|||||||
token,
|
token,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function updateTranscriptionSettings(
|
||||||
|
token: string,
|
||||||
|
update: TranscriptionSettingsUpdate,
|
||||||
|
base: string = "",
|
||||||
|
): Promise<SettingsPayload> {
|
||||||
|
const query = new URLSearchParams();
|
||||||
|
query.set("enabled", String(update.enabled));
|
||||||
|
query.set("provider", update.provider);
|
||||||
|
query.set("model", update.model);
|
||||||
|
query.set("language", update.language);
|
||||||
|
query.set("max_duration_sec", String(update.maxDurationSec));
|
||||||
|
query.set("max_upload_mb", String(update.maxUploadMb));
|
||||||
|
return request<SettingsPayload>(
|
||||||
|
`${base}/api/settings/transcription/update?${query}`,
|
||||||
|
token,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
@ -95,6 +95,12 @@ interface PendingNewChat {
|
|||||||
timer: ReturnType<typeof setTimeout>;
|
timer: ReturnType<typeof setTimeout>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface PendingTranscription {
|
||||||
|
resolve: (text: string) => void;
|
||||||
|
reject: (err: Error) => void;
|
||||||
|
timer: ReturnType<typeof setTimeout>;
|
||||||
|
}
|
||||||
|
|
||||||
export interface NanobotClientOptions {
|
export interface NanobotClientOptions {
|
||||||
url: string;
|
url: string;
|
||||||
reconnect?: boolean;
|
reconnect?: boolean;
|
||||||
@ -132,6 +138,7 @@ export class NanobotClient {
|
|||||||
/** Latest ``goal_state`` snapshot per ``chat_id`` (multi-session isolation). */
|
/** Latest ``goal_state`` snapshot per ``chat_id`` (multi-session isolation). */
|
||||||
private goalStateByChatId = new Map<string, GoalStateWsPayload>();
|
private goalStateByChatId = new Map<string, GoalStateWsPayload>();
|
||||||
private pendingNewChat: PendingNewChat | null = null;
|
private pendingNewChat: PendingNewChat | null = null;
|
||||||
|
private pendingTranscriptions = new Map<string, PendingTranscription>();
|
||||||
// Frames queued while the socket is not yet OPEN
|
// Frames queued while the socket is not yet OPEN
|
||||||
private sendQueue: Outbound[] = [];
|
private sendQueue: Outbound[] = [];
|
||||||
private reconnectAttempts = 0;
|
private reconnectAttempts = 0;
|
||||||
@ -320,6 +327,27 @@ export class NanobotClient {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transcribeAudio(
|
||||||
|
dataUrl: string,
|
||||||
|
options?: { durationMs?: number; timeoutMs?: number },
|
||||||
|
): Promise<string> {
|
||||||
|
const requestId = crypto.randomUUID();
|
||||||
|
const timeoutMs = options?.timeoutMs ?? 120_000;
|
||||||
|
return new Promise<string>((resolve, reject) => {
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
this.pendingTranscriptions.delete(requestId);
|
||||||
|
reject(new Error("transcription timed out"));
|
||||||
|
}, timeoutMs);
|
||||||
|
this.pendingTranscriptions.set(requestId, { resolve, reject, timer });
|
||||||
|
this.queueSend({
|
||||||
|
type: "transcribe_audio",
|
||||||
|
request_id: requestId,
|
||||||
|
data_url: dataUrl,
|
||||||
|
...(options?.durationMs !== undefined ? { duration_ms: options.durationMs } : {}),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
attach(chatId: string): void {
|
attach(chatId: string): void {
|
||||||
this.knownChats.add(chatId);
|
this.knownChats.add(chatId);
|
||||||
if (this.socket?.readyState === WS_OPEN) {
|
if (this.socket?.readyState === WS_OPEN) {
|
||||||
@ -425,6 +453,16 @@ export class NanobotClient {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (parsed.event === "transcription_result") {
|
||||||
|
this.resolveTranscription(parsed.request_id, parsed.text);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parsed.event === "transcription_error") {
|
||||||
|
this.rejectTranscription(parsed.request_id, parsed.detail || "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (parsed.event === "session_updated") {
|
if (parsed.event === "session_updated") {
|
||||||
this.emitSessionUpdate(parsed.chat_id, parsed.scope, parsed.workspace_scope);
|
this.emitSessionUpdate(parsed.chat_id, parsed.scope, parsed.workspace_scope);
|
||||||
return;
|
return;
|
||||||
@ -500,6 +538,7 @@ export class NanobotClient {
|
|||||||
this.pendingNewChat.reject(new Error("socket closed"));
|
this.pendingNewChat.reject(new Error("socket closed"));
|
||||||
this.pendingNewChat = null;
|
this.pendingNewChat = null;
|
||||||
}
|
}
|
||||||
|
this.rejectAllTranscriptions("socket closed");
|
||||||
// Surface structured reasons *before* reconnect logic so the UI can
|
// Surface structured reasons *before* reconnect logic so the UI can
|
||||||
// display the error even while the client transparently reconnects.
|
// display the error even while the client transparently reconnects.
|
||||||
// Browsers populate ``CloseEvent.code`` with the wire-level close code;
|
// Browsers populate ``CloseEvent.code`` with the wire-level close code;
|
||||||
@ -528,6 +567,34 @@ export class NanobotClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private resolveTranscription(requestId: string, text: string): void {
|
||||||
|
const pending = this.pendingTranscriptions.get(requestId);
|
||||||
|
if (!pending) return;
|
||||||
|
clearTimeout(pending.timer);
|
||||||
|
this.pendingTranscriptions.delete(requestId);
|
||||||
|
pending.resolve(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
private rejectTranscription(requestId: string | undefined, detail: string): void {
|
||||||
|
if (!requestId) {
|
||||||
|
this.rejectAllTranscriptions(detail);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const pending = this.pendingTranscriptions.get(requestId);
|
||||||
|
if (!pending) return;
|
||||||
|
clearTimeout(pending.timer);
|
||||||
|
this.pendingTranscriptions.delete(requestId);
|
||||||
|
pending.reject(new Error(detail));
|
||||||
|
}
|
||||||
|
|
||||||
|
private rejectAllTranscriptions(detail: string): void {
|
||||||
|
for (const [requestId, pending] of this.pendingTranscriptions) {
|
||||||
|
clearTimeout(pending.timer);
|
||||||
|
pending.reject(new Error(detail));
|
||||||
|
this.pendingTranscriptions.delete(requestId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private scheduleReconnect(): void {
|
private scheduleReconnect(): void {
|
||||||
this.setStatus("reconnecting");
|
this.setStatus("reconnecting");
|
||||||
const attempt = this.reconnectAttempts++;
|
const attempt = this.reconnectAttempts++;
|
||||||
|
|||||||
@ -391,6 +391,23 @@ export interface SettingsPayload {
|
|||||||
default_api_base?: string | null;
|
default_api_base?: string | null;
|
||||||
}>;
|
}>;
|
||||||
};
|
};
|
||||||
|
transcription?: {
|
||||||
|
enabled: boolean;
|
||||||
|
provider: string;
|
||||||
|
provider_configured: boolean;
|
||||||
|
model: string;
|
||||||
|
language: string | null;
|
||||||
|
max_duration_sec: number;
|
||||||
|
max_upload_mb: number;
|
||||||
|
providers: Array<{
|
||||||
|
name: string;
|
||||||
|
label: string;
|
||||||
|
configured: boolean;
|
||||||
|
api_key_hint?: string | null;
|
||||||
|
api_base?: string | null;
|
||||||
|
default_api_base?: string | null;
|
||||||
|
}>;
|
||||||
|
};
|
||||||
runtime: {
|
runtime: {
|
||||||
config_path: string;
|
config_path: string;
|
||||||
workspace_path: string;
|
workspace_path: string;
|
||||||
@ -680,6 +697,15 @@ export interface ImageGenerationSettingsUpdate {
|
|||||||
maxImagesPerTurn: number;
|
maxImagesPerTurn: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface TranscriptionSettingsUpdate {
|
||||||
|
enabled: boolean;
|
||||||
|
provider: string;
|
||||||
|
model: string;
|
||||||
|
language: string;
|
||||||
|
maxDurationSec: number;
|
||||||
|
maxUploadMb: number;
|
||||||
|
}
|
||||||
|
|
||||||
export interface SlashCommand {
|
export interface SlashCommand {
|
||||||
command: string;
|
command: string;
|
||||||
title: string;
|
title: string;
|
||||||
@ -782,6 +808,13 @@ export type InboundEvent =
|
|||||||
scope?: "metadata" | "thread" | string;
|
scope?: "metadata" | "thread" | string;
|
||||||
workspace_scope?: WorkspaceScopePayload;
|
workspace_scope?: WorkspaceScopePayload;
|
||||||
}
|
}
|
||||||
|
| { event: "transcription_result"; request_id: string; text: string }
|
||||||
|
| {
|
||||||
|
event: "transcription_error";
|
||||||
|
request_id?: string;
|
||||||
|
detail?: string;
|
||||||
|
provider?: string;
|
||||||
|
}
|
||||||
| { event: "error"; chat_id?: string; detail?: string; reason?: string };
|
| { event: "error"; chat_id?: string; detail?: string; reason?: string };
|
||||||
|
|
||||||
/** Base64-encoded image attached to an outbound ``message`` envelope.
|
/** Base64-encoded image attached to an outbound ``message`` envelope.
|
||||||
@ -845,6 +878,7 @@ export type Outbound =
|
|||||||
| { type: "new_chat"; workspace_scope?: WorkspaceScopePayload }
|
| { type: "new_chat"; workspace_scope?: WorkspaceScopePayload }
|
||||||
| { type: "attach"; chat_id: string }
|
| { type: "attach"; chat_id: string }
|
||||||
| { type: "set_workspace_scope"; chat_id: string; workspace_scope: WorkspaceScopePayload }
|
| { type: "set_workspace_scope"; chat_id: string; workspace_scope: WorkspaceScopePayload }
|
||||||
|
| { type: "transcribe_audio"; request_id: string; data_url: string; duration_ms?: number }
|
||||||
| {
|
| {
|
||||||
type: "message";
|
type: "message";
|
||||||
chat_id: string;
|
chat_id: string;
|
||||||
|
|||||||
@ -1172,13 +1172,13 @@ describe("App layout", () => {
|
|||||||
|
|
||||||
it("restores the settings section from the URL hash after a page reload", async () => {
|
it("restores the settings section from the URL hash after a page reload", async () => {
|
||||||
mockFetchRoutes({ "/api/settings": baseSettingsPayload() });
|
mockFetchRoutes({ "/api/settings": baseSettingsPayload() });
|
||||||
window.history.replaceState(null, "", "/#/settings?section=models");
|
window.history.replaceState(null, "", "/#/settings?section=voice");
|
||||||
|
|
||||||
render(<App />);
|
render(<App />);
|
||||||
|
|
||||||
await waitFor(() => expect(connectSpy).toHaveBeenCalled());
|
await waitFor(() => expect(connectSpy).toHaveBeenCalled());
|
||||||
expect(await screen.findByRole("heading", { name: "Models" })).toBeInTheDocument();
|
expect(await screen.findByRole("heading", { name: "Voice input" })).toBeInTheDocument();
|
||||||
expect(window.location.hash).toBe("#/settings?section=models");
|
expect(window.location.hash).toBe("#/settings?section=voice");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("updates the URL hash when switching settings sections", async () => {
|
it("updates the URL hash when switching settings sections", async () => {
|
||||||
@ -1197,6 +1197,11 @@ describe("App layout", () => {
|
|||||||
|
|
||||||
expect(await screen.findByRole("heading", { name: "Models" })).toBeInTheDocument();
|
expect(await screen.findByRole("heading", { name: "Models" })).toBeInTheDocument();
|
||||||
expect(window.location.hash).toBe("#/settings?section=models");
|
expect(window.location.hash).toBe("#/settings?section=models");
|
||||||
|
|
||||||
|
fireEvent.click(within(settingsNav).getByRole("button", { name: "Voice" }));
|
||||||
|
|
||||||
|
expect(await screen.findByRole("heading", { name: "Voice input" })).toBeInTheDocument();
|
||||||
|
expect(window.location.hash).toBe("#/settings?section=voice");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("opens Apps from the main sidebar without replacing the sidebar", async () => {
|
it("opens Apps from the main sidebar without replacing the sidebar", async () => {
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import { act, render, screen } from "@testing-library/react";
|
import { act, render, screen } from "@testing-library/react";
|
||||||
|
import userEvent from "@testing-library/user-event";
|
||||||
import { describe, expect, it, vi } from "vitest";
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
import { CodeBlock } from "@/components/CodeBlock";
|
import { CodeBlock } from "@/components/CodeBlock";
|
||||||
@ -87,6 +88,64 @@ describe("CodeBlock", () => {
|
|||||||
expect(screen.getByText("const value = 1;")).toBeInTheDocument();
|
expect(screen.getByText("const value = 1;")).toBeInTheDocument();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("renders ANSI output without mounting the syntax highlighter", () => {
|
||||||
|
render(
|
||||||
|
<ThemeProvider theme="dark">
|
||||||
|
<CodeBlock
|
||||||
|
language="ansi"
|
||||||
|
code={"\x1b[32mPASS\x1b[0m <script>alert(1)</script>"}
|
||||||
|
/>
|
||||||
|
</ThemeProvider>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(screen.queryByTestId("highlighted-code")).not.toBeInTheDocument();
|
||||||
|
expect(screen.getByTestId("ansi-code")).toBeInTheDocument();
|
||||||
|
expect(screen.getByTestId("ansi-code").closest(".not-prose")).toBeTruthy();
|
||||||
|
expect(screen.getByText("ansi")).toBeInTheDocument();
|
||||||
|
expect(screen.getByText("PASS")).toHaveStyle({ color: "#0dbc79" });
|
||||||
|
expect(screen.getByText("<script>alert(1)</script>")).toBeInTheDocument();
|
||||||
|
expect(document.querySelector("script")).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("detects ANSI sequences in regular code blocks", () => {
|
||||||
|
render(
|
||||||
|
<ThemeProvider theme="light">
|
||||||
|
<CodeBlock
|
||||||
|
language="text"
|
||||||
|
code={"\x1b[38;2;35;209;139mtruecolor\x1b[0m"}
|
||||||
|
/>
|
||||||
|
</ThemeProvider>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(screen.queryByTestId("highlighted-code")).not.toBeInTheDocument();
|
||||||
|
expect(screen.getByText("truecolor")).toHaveStyle({
|
||||||
|
color: "rgb(35, 209, 139)",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("copies ANSI output as clean text", async () => {
|
||||||
|
const user = userEvent.setup();
|
||||||
|
const writeText = vi.fn().mockResolvedValue(undefined);
|
||||||
|
Object.defineProperty(navigator, "clipboard", {
|
||||||
|
configurable: true,
|
||||||
|
value: { writeText },
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
render(
|
||||||
|
<ThemeProvider theme="dark">
|
||||||
|
<CodeBlock language="ansi" code={"\x1b[32mPASS\x1b[0m"} />
|
||||||
|
</ThemeProvider>,
|
||||||
|
);
|
||||||
|
|
||||||
|
await user.click(screen.getByRole("button", { name: /copy/i }));
|
||||||
|
|
||||||
|
expect(writeText).toHaveBeenCalledWith("PASS");
|
||||||
|
} finally {
|
||||||
|
Reflect.deleteProperty(navigator, "clipboard");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("reads theme from context without creating per-block observers", async () => {
|
it("reads theme from context without creating per-block observers", async () => {
|
||||||
const originalMutationObserver = globalThis.MutationObserver;
|
const originalMutationObserver = globalThis.MutationObserver;
|
||||||
const observer = vi.fn();
|
const observer = vi.fn();
|
||||||
|
|||||||
@ -412,6 +412,61 @@ describe("NanobotClient", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("sends transcription requests and resolves transcription results outside chat dispatch", async () => {
|
||||||
|
const client = new NanobotClient({
|
||||||
|
url: "ws://test",
|
||||||
|
reconnect: false,
|
||||||
|
socketFactory: (url) => new FakeSocket(url) as unknown as WebSocket,
|
||||||
|
});
|
||||||
|
const handler = vi.fn();
|
||||||
|
client.onChat("chat-a", handler);
|
||||||
|
client.connect();
|
||||||
|
lastSocket().fakeOpen();
|
||||||
|
|
||||||
|
const promise = client.transcribeAudio("data:audio/webm;base64,AAAA", {
|
||||||
|
durationMs: 1234,
|
||||||
|
timeoutMs: 1_000,
|
||||||
|
});
|
||||||
|
const frame = JSON.parse(lastSocket().sent.at(-1) as string);
|
||||||
|
expect(frame).toMatchObject({
|
||||||
|
type: "transcribe_audio",
|
||||||
|
data_url: "data:audio/webm;base64,AAAA",
|
||||||
|
duration_ms: 1234,
|
||||||
|
});
|
||||||
|
expect(typeof frame.request_id).toBe("string");
|
||||||
|
|
||||||
|
lastSocket().fakeMessage({
|
||||||
|
event: "transcription_result",
|
||||||
|
request_id: frame.request_id,
|
||||||
|
text: "hello from voice",
|
||||||
|
});
|
||||||
|
await expect(promise).resolves.toBe("hello from voice");
|
||||||
|
expect(handler).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects pending transcription requests on server errors and socket close", async () => {
|
||||||
|
const client = new NanobotClient({
|
||||||
|
url: "ws://test",
|
||||||
|
reconnect: false,
|
||||||
|
socketFactory: (url) => new FakeSocket(url) as unknown as WebSocket,
|
||||||
|
});
|
||||||
|
client.connect();
|
||||||
|
lastSocket().fakeOpen();
|
||||||
|
|
||||||
|
const errored = client.transcribeAudio("data:audio/webm;base64,AAAA", { timeoutMs: 1_000 });
|
||||||
|
const errorFrame = JSON.parse(lastSocket().sent.at(-1) as string);
|
||||||
|
lastSocket().fakeMessage({
|
||||||
|
event: "transcription_error",
|
||||||
|
request_id: errorFrame.request_id,
|
||||||
|
detail: "not_configured",
|
||||||
|
});
|
||||||
|
await expect(errored).rejects.toThrow("not_configured");
|
||||||
|
|
||||||
|
const dropped = client.transcribeAudio("data:audio/webm;base64,BBBB", { timeoutMs: 1_000 });
|
||||||
|
lastSocket().close();
|
||||||
|
await expect(dropped).rejects.toThrow("socket closed");
|
||||||
|
});
|
||||||
|
|
||||||
it("queues sends while connecting and flushes on open", () => {
|
it("queues sends while connecting and flushes on open", () => {
|
||||||
const client = new NanobotClient({
|
const client = new NanobotClient({
|
||||||
url: "ws://test",
|
url: "ws://test",
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import { fireEvent, render, screen, waitFor, within } from "@testing-library/react";
|
import { act, fireEvent, render, screen, waitFor, within } from "@testing-library/react";
|
||||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
import { ThreadComposer } from "@/components/thread/ThreadComposer";
|
import { ThreadComposer } from "@/components/thread/ThreadComposer";
|
||||||
@ -121,6 +121,7 @@ const MCP_PRESETS: McpPresetInfo[] = [
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
const ORIGINAL_INNER_HEIGHT = window.innerHeight;
|
const ORIGINAL_INNER_HEIGHT = window.innerHeight;
|
||||||
|
const ORIGINAL_MEDIA_DEVICES = navigator.mediaDevices;
|
||||||
|
|
||||||
function mockBlobUrls() {
|
function mockBlobUrls() {
|
||||||
Object.defineProperty(URL, "createObjectURL", {
|
Object.defineProperty(URL, "createObjectURL", {
|
||||||
@ -135,7 +136,16 @@ function mockBlobUrls() {
|
|||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
vi.restoreAllMocks();
|
vi.restoreAllMocks();
|
||||||
|
vi.unstubAllGlobals();
|
||||||
Reflect.deleteProperty(window, "nanobotHost");
|
Reflect.deleteProperty(window, "nanobotHost");
|
||||||
|
if (ORIGINAL_MEDIA_DEVICES) {
|
||||||
|
Object.defineProperty(navigator, "mediaDevices", {
|
||||||
|
configurable: true,
|
||||||
|
value: ORIGINAL_MEDIA_DEVICES,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
Reflect.deleteProperty(navigator, "mediaDevices");
|
||||||
|
}
|
||||||
window.localStorage.clear();
|
window.localStorage.clear();
|
||||||
Object.defineProperty(window, "innerHeight", {
|
Object.defineProperty(window, "innerHeight", {
|
||||||
value: ORIGINAL_INNER_HEIGHT,
|
value: ORIGINAL_INNER_HEIGHT,
|
||||||
@ -161,6 +171,75 @@ function rect(init: Partial<DOMRect>): DOMRect {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mockVoiceRecorder(blob = new Blob(["voice"], { type: "audio/webm" })) {
|
||||||
|
const stopTrack = vi.fn();
|
||||||
|
const getUserMedia = vi.fn(async () => ({
|
||||||
|
getTracks: () => [{ stop: stopTrack }],
|
||||||
|
}));
|
||||||
|
Object.defineProperty(navigator, "mediaDevices", {
|
||||||
|
configurable: true,
|
||||||
|
value: { getUserMedia },
|
||||||
|
});
|
||||||
|
|
||||||
|
class FakeMediaRecorder {
|
||||||
|
static isTypeSupported = vi.fn((type: string) => type === "audio/webm");
|
||||||
|
|
||||||
|
state: RecordingState = "inactive";
|
||||||
|
mimeType = blob.type;
|
||||||
|
ondataavailable: ((event: BlobEvent) => void) | null = null;
|
||||||
|
onstop: (() => void) | null = null;
|
||||||
|
|
||||||
|
start() {
|
||||||
|
this.state = "recording";
|
||||||
|
}
|
||||||
|
|
||||||
|
stop() {
|
||||||
|
this.state = "inactive";
|
||||||
|
this.ondataavailable?.({ data: blob } as BlobEvent);
|
||||||
|
this.onstop?.();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vi.stubGlobal("MediaRecorder", FakeMediaRecorder);
|
||||||
|
return { getUserMedia, stopTrack };
|
||||||
|
}
|
||||||
|
|
||||||
|
function mockVoiceAudioInput(sample = 128, state: AudioContextState = "running") {
|
||||||
|
class FakeAudioContext {
|
||||||
|
state = state;
|
||||||
|
|
||||||
|
createMediaStreamSource() {
|
||||||
|
return { connect: vi.fn(), disconnect: vi.fn() };
|
||||||
|
}
|
||||||
|
|
||||||
|
createAnalyser() {
|
||||||
|
return {
|
||||||
|
fftSize: 256,
|
||||||
|
smoothingTimeConstant: 0,
|
||||||
|
disconnect: vi.fn(),
|
||||||
|
getByteTimeDomainData: (data: Uint8Array) => data.fill(sample),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
close = vi.fn(async () => undefined);
|
||||||
|
resume = vi.fn(async () => undefined);
|
||||||
|
}
|
||||||
|
|
||||||
|
vi.stubGlobal("AudioContext", FakeAudioContext);
|
||||||
|
vi.spyOn(window, "requestAnimationFrame").mockImplementation((callback) =>
|
||||||
|
window.setTimeout(() => callback(performance.now()), 16) as unknown as number
|
||||||
|
);
|
||||||
|
vi.spyOn(window, "cancelAnimationFrame").mockImplementation((id) =>
|
||||||
|
window.clearTimeout(id as unknown as number)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function waitForVoiceCapture(): Promise<void> {
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 700));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
describe("ThreadComposer", () => {
|
describe("ThreadComposer", () => {
|
||||||
it("renders a readonly hero model composer when provided", () => {
|
it("renders a readonly hero model composer when provided", () => {
|
||||||
render(
|
render(
|
||||||
@ -209,6 +288,245 @@ describe("ThreadComposer", () => {
|
|||||||
expect(screen.queryByText(/Enter to send/)).not.toBeInTheDocument();
|
expect(screen.queryByText(/Enter to send/)).not.toBeInTheDocument();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("transcribes voice input into the composer without sending", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
const onSend = vi.fn();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "hello voice");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={onSend}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
|
||||||
|
await waitFor(() => expect(onTranscribeAudio).toHaveBeenCalledWith(
|
||||||
|
expect.stringMatching(/^data:audio\/webm;base64,/),
|
||||||
|
expect.objectContaining({ durationMs: expect.any(Number) }),
|
||||||
|
));
|
||||||
|
await waitFor(() => expect(screen.getByLabelText("Message input")).toHaveValue("hello voice"));
|
||||||
|
expect(onSend).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not start duplicate voice recordings while microphone access is pending", async () => {
|
||||||
|
const { getUserMedia, stopTrack } = mockVoiceRecorder();
|
||||||
|
let resolveStream: ((stream: MediaStream) => void) | undefined;
|
||||||
|
getUserMedia.mockImplementation(() => new Promise((resolve) => {
|
||||||
|
resolveStream = resolve as (stream: MediaStream) => void;
|
||||||
|
}));
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "one recording");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={vi.fn()}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
const voiceButton = screen.getByRole("button", { name: "Voice input" });
|
||||||
|
fireEvent.click(voiceButton);
|
||||||
|
fireEvent.click(voiceButton);
|
||||||
|
|
||||||
|
expect(getUserMedia).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
await act(async () => {
|
||||||
|
resolveStream?.({ getTracks: () => [{ stop: stopTrack }] } as unknown as MediaStream);
|
||||||
|
});
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
|
||||||
|
await waitFor(() => expect(onTranscribeAudio).toHaveBeenCalledTimes(1));
|
||||||
|
await waitFor(() => expect(screen.getByLabelText("Message input")).toHaveValue("one recording"));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports press-and-hold voice recording", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
const onSend = vi.fn();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "held voice");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={onSend}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
const voiceButton = screen.getByRole("button", { name: "Voice input" });
|
||||||
|
fireEvent.pointerDown(voiceButton, { button: 0, pointerId: 1, pointerType: "touch" });
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 180));
|
||||||
|
});
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.pointerUp(screen.getByRole("button", { name: "Stop recording" }), {
|
||||||
|
pointerId: 1,
|
||||||
|
pointerType: "touch",
|
||||||
|
});
|
||||||
|
|
||||||
|
await waitFor(() => expect(onTranscribeAudio).toHaveBeenCalled());
|
||||||
|
await waitFor(() => expect(screen.getByLabelText("Message input")).toHaveValue("held voice"));
|
||||||
|
expect(onSend).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports keyboard hold voice recording", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
const onSend = vi.fn();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "shortcut voice");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={onSend}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
const voiceButton = screen.getByRole("button", { name: "Voice input" });
|
||||||
|
expect(voiceButton).toHaveAttribute("title", "Click to dictate or hold");
|
||||||
|
expect(voiceButton).toHaveAttribute("aria-keyshortcuts", "Control+Shift+D");
|
||||||
|
fireEvent.keyDown(window, { code: "KeyD", ctrlKey: true, key: "D", shiftKey: true });
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.keyUp(window, { code: "KeyD", ctrlKey: true, key: "D", shiftKey: true });
|
||||||
|
|
||||||
|
await waitFor(() => expect(onTranscribeAudio).toHaveBeenCalled());
|
||||||
|
await waitFor(() => expect(screen.getByLabelText("Message input")).toHaveValue("shortcut voice"));
|
||||||
|
expect(onSend).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores the delayed click emitted after a long-press voice recording", async () => {
|
||||||
|
const { getUserMedia } = mockVoiceRecorder();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "held once");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={vi.fn()}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
const voiceButton = screen.getByRole("button", { name: "Voice input" });
|
||||||
|
fireEvent.pointerDown(voiceButton, { button: 0, pointerId: 1, pointerType: "touch" });
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 180));
|
||||||
|
});
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.pointerUp(screen.getByRole("button", { name: "Stop recording" }), {
|
||||||
|
pointerId: 1,
|
||||||
|
pointerType: "touch",
|
||||||
|
});
|
||||||
|
await waitFor(() => expect(screen.getByLabelText("Message input")).toHaveValue("held once"));
|
||||||
|
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 20));
|
||||||
|
});
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
|
||||||
|
expect(getUserMedia).toHaveBeenCalledTimes(1);
|
||||||
|
expect(onTranscribeAudio).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps existing text when voice transcription fails", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
const onSend = vi.fn();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => {
|
||||||
|
throw new Error("not_configured");
|
||||||
|
});
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={onSend}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
const input = screen.getByLabelText("Message input");
|
||||||
|
fireEvent.change(input, { target: { value: "draft" } });
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
await waitForVoiceCapture();
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(screen.getByText("Configure a transcription provider first.")).toBeInTheDocument();
|
||||||
|
});
|
||||||
|
expect(input).toHaveValue("draft");
|
||||||
|
expect(onSend).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not transcribe recordings that are too short", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "should not appear");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={vi.fn()}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(screen.getByText("Hold a little longer to record voice.")).toBeInTheDocument();
|
||||||
|
});
|
||||||
|
expect(onTranscribeAudio).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("warns during recording when microphone input is silent", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
mockVoiceAudioInput();
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "should not appear");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={vi.fn()}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 1_150));
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(screen.getByText("No microphone input detected.")).toBeInTheDocument();
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
expect(onTranscribeAudio).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not treat unavailable microphone levels as silence", async () => {
|
||||||
|
mockVoiceRecorder();
|
||||||
|
mockVoiceAudioInput(128, "suspended");
|
||||||
|
const onTranscribeAudio = vi.fn(async () => "voice text");
|
||||||
|
render(
|
||||||
|
<ThreadComposer
|
||||||
|
onSend={vi.fn()}
|
||||||
|
onTranscribeAudio={onTranscribeAudio}
|
||||||
|
placeholder="Type your message..."
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
fireEvent.click(screen.getByRole("button", { name: "Voice input" }));
|
||||||
|
expect(await screen.findByLabelText("Recording 0:00")).toBeInTheDocument();
|
||||||
|
await act(async () => {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 1_150));
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(screen.queryByText("No microphone input detected.")).not.toBeInTheDocument();
|
||||||
|
fireEvent.click(await screen.findByRole("button", { name: "Stop recording" }));
|
||||||
|
|
||||||
|
await waitFor(() => expect(onTranscribeAudio).toHaveBeenCalledTimes(1));
|
||||||
|
expect(screen.getByDisplayValue("voice text")).toBeInTheDocument();
|
||||||
|
});
|
||||||
|
|
||||||
it("renders and changes workspace access mode", async () => {
|
it("renders and changes workspace access mode", async () => {
|
||||||
const onWorkspaceScopeChange = vi.fn();
|
const onWorkspaceScopeChange = vi.fn();
|
||||||
render(
|
render(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user