Add new TTS/STT Logic

2026-05-16 19:55:51 +02:00
parent a13af83655
commit 726151d90b
2 changed files with 17 additions and 8 deletions
--- a/src/components/onboarding/wizard.tsx
+++ b/src/components/onboarding/wizard.tsx
@@ -199,10 +199,12 @@ export function OnboardingWizard({
      agentName: "Assistant",
      soulMd: FALLBACK_SOUL.replace("{company}", displayOrgName),
      agentsMd: FALLBACK_AGENTS,
-      // CORE defaults: heartbeat + cron pre-selected so the assistant
-      // can be proactive and run scheduled tasks out of the box.
-      // Customers can untoggle either before submitting. core-voice
-      // stays unselected — its toggle is disabled until Phase B.
+      // CORE defaults: heartbeat + cron + active-memory pre-selected so
+      // the assistant can be proactive, run scheduled tasks, and recall
+      // stable context out of the box. Customers can untoggle any of
+      // them before submitting. core-voice is fully wired (Phase B)
+      // but stays unselected — opt-in keeps audio spend predictable
+      // for tenants who don't intend to use voice channels.
      packages: [...DEFAULT_PACKAGE_IDS] as string[],
      billingAddress: {
        // For personal accounts, leave the company field empty — it'll
--- a/src/lib/packages.ts
+++ b/src/lib/packages.ts
@@ -13,8 +13,10 @@
 * Category model (Phase A rework):
 *   - core    — platform-behaviour toggles (heartbeat, cron,
 *               active-memory, voice). Mostly no secrets. core-voice is
- *               a catalog stub in Phase A — toggling stores customer
- *               intent only; the OCI config_patch lands in Phase B.
+ *               fully wired (Phase B): toggling installs the STT / TTS /
+ *               Talk surface via the operator's config_patch, routed
+ *               through LiteLLM (pieced-stt, pieced-tts-inbound,
+ *               pieced-tts-talk).
 *   - channel — messaging integration.
 *   - skill   — ClawHub skill install.
 */
@@ -271,8 +273,13 @@ export const CHANNEL_PACKAGE_IDS: string[] = PACKAGE_CATALOG
 *
 * Each adds some token cost — active-memory the most (one extra
 * sub-agent turn per inbound message) — so customers can untoggle any
- * of them before submitting. core-voice is deliberately excluded from
- * defaults until its config_patch lands in Phase B.
+ * of them before submitting.
+ *
+ * core-voice is intentionally NOT a default. It is fully wired (Phase B)
+ * and customers can enable it from the wizard, but it incurs separate
+ * audio spend on every inbound voice note (Whisper STT) and every
+ * outbound reply (kani-tts / kokoro-fastapi via LiteLLM). Opt-in keeps
+ * cost predictable for tenants who don't intend to use voice channels.
 */
 export const DEFAULT_PACKAGE_IDS: string[] = [
  "core-heartbeat",