#!/usr/bin/env bash # lettabot-tts - Generate speech audio via configurable TTS provider # # Usage: lettabot-tts [output_path] # # Environment: # TTS_PROVIDER - Optional. "elevenlabs" (default) or "openai". # # ElevenLabs: # ELEVENLABS_API_KEY - Required. API key. # ELEVENLABS_VOICE_ID - Optional. Voice ID (default: 21m00Tcm4TlvDq8ikWAM / Rachel). # ELEVENLABS_MODEL_ID - Optional. Model ID (default: eleven_multilingual_v2). # # OpenAI: # OPENAI_API_KEY - Required. API key. # OPENAI_TTS_VOICE - Optional. Voice name (default: alloy). # OPENAI_TTS_MODEL - Optional. Model (default: tts-1). set -euo pipefail TEXT="${1:?Usage: lettabot-tts [output_path]}" # The session subprocess CWD is set to workingDir (bot.ts:642), which is the # same base directory that directives resolve from. This means # $(pwd) and LETTABOT_WORKING_DIR produce paths in the correct coordinate space. OUTBOUND_DIR="${LETTABOT_WORKING_DIR:-$(pwd)}/data/outbound" PROVIDER="${TTS_PROVIDER:-elevenlabs}" # Ensure output directory exists mkdir -p "$OUTBOUND_DIR" # Use collision-safe random filenames when output path is not explicitly provided. if [ -n "${2:-}" ]; then OUTPUT="$2" else # Clean stale voice files older than 1 hour find "$OUTBOUND_DIR" -name 'voice-*.ogg' -mmin +60 -delete 2>/dev/null || true OUTPUT=$(mktemp "${OUTBOUND_DIR}/voice-XXXXXXXXXX.ogg") fi # --------------------------------------------------------------------------- # Provider: ElevenLabs # --------------------------------------------------------------------------- tts_elevenlabs() { if [ -z "${ELEVENLABS_API_KEY:-}" ]; then echo "Error: ELEVENLABS_API_KEY is not set" >&2 exit 1 fi local voice_id="${ELEVENLABS_VOICE_ID:-onwK4e9ZLuTAKqWW03F9}" local model_id="${ELEVENLABS_MODEL_ID:-eleven_multilingual_v2}" local http_code http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \ "https://api.elevenlabs.io/v1/text-to-speech/${voice_id}" \ -H "xi-api-key: ${ELEVENLABS_API_KEY}" \ -H "Content-Type: application/json" \ -d "$(jq -n \ --arg text "$TEXT" \ --arg model "$model_id" \ '{ text: $text, model_id: $model, output_format: "ogg_opus" }' )") if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then echo "Error: ElevenLabs API returned HTTP $http_code" >&2 if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then cat "$OUTPUT" >&2 fi rm -f "$OUTPUT" exit 1 fi } # --------------------------------------------------------------------------- # Provider: OpenAI # --------------------------------------------------------------------------- tts_openai() { if [ -z "${OPENAI_API_KEY:-}" ]; then echo "Error: OPENAI_API_KEY is not set" >&2 exit 1 fi local voice="${OPENAI_TTS_VOICE:-alloy}" local model="${OPENAI_TTS_MODEL:-tts-1}" local http_code http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \ "https://api.openai.com/v1/audio/speech" \ -H "Authorization: Bearer ${OPENAI_API_KEY}" \ -H "Content-Type: application/json" \ -d "$(jq -n \ --arg text "$TEXT" \ --arg model "$model" \ --arg voice "$voice" \ '{ model: $model, input: $text, voice: $voice, response_format: "opus" }' )") if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then echo "Error: OpenAI TTS API returned HTTP $http_code" >&2 if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then cat "$OUTPUT" >&2 fi rm -f "$OUTPUT" exit 1 fi } # --------------------------------------------------------------------------- # Dispatch # --------------------------------------------------------------------------- case "$PROVIDER" in elevenlabs) tts_elevenlabs ;; openai) tts_openai ;; *) echo "Error: Unknown TTS_PROVIDER: $PROVIDER (supported: elevenlabs, openai)" >&2 exit 1 ;; esac echo "$OUTPUT"