infiplot-web/apps/web/.env.example

# =============================================================
# 云梦 — AI 视觉小说
# Recommended setup: Xiaomi MiMo Token Plan for TEXT / VISION / TTS
# (one API key covers all three) + Runware for IMAGE (FLUX.2 [klein]).
#
# TEXT / VISION / TTS use OpenAI-compatible endpoints (any OpenAI-
# compatible host works: OpenRouter, OpenAI, Anthropic via proxy,
# Gemini, DeepSeek, Ollama, ...).
#
# IMAGE uses Runware's own task-array protocol (not OpenAI-compatible);
# the adapter posts an `imageInference` task to IMAGE_BASE_URL.
# =============================================================

# ---- 1. Text LLM · scene director ----------------------------------
# Any OpenAI-compatible endpoint works: OpenAI, Anthropic (via proxy),
# Gemini, OpenRouter, DeepSeek, OpenCode, MiMo, local Ollama, …
# Recommended starters:
#   A. DeepSeek v4-flash direct (https://api.deepseek.com/v1) — pay-as-you-go,
#      fastest first-token latency, very stable JSON output.
#   B. OpenCode Go (https://opencode.ai/zen/go/v1) — $10/mo flat-rate bundle of
#      12 open-source models (DeepSeek v4-flash, Qwen, Kimi, GLM, MiMo, …).
#      Cheaper at high volume, slower at the tail.
#   C. MiMo v2.5 via Xiaomi Token Plan — bundles VISION + TTS in one tp- key.
TEXT_BASE_URL=https://api.deepseek.com/v1
TEXT_API_KEY=sk-xxx
TEXT_MODEL=deepseek-v4-flash

# ---- 2. Image generator (renders the scene background) -------------
# Recommended: Runware + FLUX.2 [klein] 9B KV — distilled 4-step model,
# sub-second inference at ~$0.0008/image. Sign up at https://runware.ai
# AIR ids for FLUX.2 [klein] variants:
#   runware:400@1  · 4B (smaller)
#   runware:400@6  · 9B KV (recommended — fastest at 16:9)
IMAGE_BASE_URL=https://api.runware.ai/v1
IMAGE_API_KEY=runware-xxx
IMAGE_MODEL=runware:400@6

# ---- 3. Vision model · multimodal click interpretation -------------
# Recommended: MiMo V2.5 — multimodal, accepts image_url content parts.
VISION_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
VISION_API_KEY=tp-xxx
VISION_MODEL=mimo-v2.5

# ---- 4. TTS · Xiaomi MiMo (optional — leave blank to disable) ------
# Per-character voice design → clone, with per-line delivery direction.
# Voice identity = the reference audio kept in the session (no server expiry).
# The adapter appends -voicedesign / -voiceclone to TTS_SPEECH_MODEL.
TTS_BASE_URL=https://token-plan-sgp.xiaomimimo.com/v1
TTS_API_KEY=tp-xxx
TTS_SPEECH_MODEL=mimo-v2.5-tts

# ---- 5. MOCK_IMAGE — skip image generation (cheap TTS testing) -----
# true → return a placeholder image instead of calling the image model.
# Text/story/voice still run normally. Great for iterating on TTS.
MOCK_IMAGE=false