Files
lettabot/skills/voice-memo/lettabot-tts
2026-02-25 16:47:33 -08:00

131 lines
3.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# lettabot-tts - Generate speech audio via configurable TTS provider
#
# Usage: lettabot-tts <text> [output_path]
#
# Environment:
# TTS_PROVIDER - Optional. "elevenlabs" (default) or "openai".
#
# ElevenLabs:
# ELEVENLABS_API_KEY - Required. API key.
# ELEVENLABS_VOICE_ID - Optional. Voice ID (default: 21m00Tcm4TlvDq8ikWAM / Rachel).
# ELEVENLABS_MODEL_ID - Optional. Model ID (default: eleven_multilingual_v2).
#
# OpenAI:
# OPENAI_API_KEY - Required. API key.
# OPENAI_TTS_VOICE - Optional. Voice name (default: alloy).
# OPENAI_TTS_MODEL - Optional. Model (default: tts-1).
set -euo pipefail
TEXT="${1:?Usage: lettabot-tts <text> [output_path]}"
# The session subprocess CWD is set to workingDir (bot.ts:642), which is the
# same base directory that <send-file> directives resolve from. This means
# $(pwd) and LETTABOT_WORKING_DIR produce paths in the correct coordinate space.
OUTBOUND_DIR="${LETTABOT_WORKING_DIR:-$(pwd)}/data/outbound"
PROVIDER="${TTS_PROVIDER:-elevenlabs}"
# Ensure output directory exists
mkdir -p "$OUTBOUND_DIR"
# Use collision-safe random filenames when output path is not explicitly provided.
if [ -n "${2:-}" ]; then
OUTPUT="$2"
else
# Clean stale voice files older than 1 hour
find "$OUTBOUND_DIR" -name 'voice-*.ogg' -mmin +60 -delete 2>/dev/null || true
OUTPUT=$(mktemp "${OUTBOUND_DIR}/voice-XXXXXXXXXX.ogg")
fi
# ---------------------------------------------------------------------------
# Provider: ElevenLabs
# ---------------------------------------------------------------------------
tts_elevenlabs() {
if [ -z "${ELEVENLABS_API_KEY:-}" ]; then
echo "Error: ELEVENLABS_API_KEY is not set" >&2
exit 1
fi
local voice_id="${ELEVENLABS_VOICE_ID:-onwK4e9ZLuTAKqWW03F9}"
local model_id="${ELEVENLABS_MODEL_ID:-eleven_multilingual_v2}"
local http_code
http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \
"https://api.elevenlabs.io/v1/text-to-speech/${voice_id}" \
-H "xi-api-key: ${ELEVENLABS_API_KEY}" \
-H "Content-Type: application/json" \
-d "$(jq -n \
--arg text "$TEXT" \
--arg model "$model_id" \
'{
text: $text,
model_id: $model,
output_format: "ogg_opus"
}'
)")
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
echo "Error: ElevenLabs API returned HTTP $http_code" >&2
if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then
cat "$OUTPUT" >&2
fi
rm -f "$OUTPUT"
exit 1
fi
}
# ---------------------------------------------------------------------------
# Provider: OpenAI
# ---------------------------------------------------------------------------
tts_openai() {
if [ -z "${OPENAI_API_KEY:-}" ]; then
echo "Error: OPENAI_API_KEY is not set" >&2
exit 1
fi
local voice="${OPENAI_TTS_VOICE:-alloy}"
local model="${OPENAI_TTS_MODEL:-tts-1}"
local http_code
http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \
"https://api.openai.com/v1/audio/speech" \
-H "Authorization: Bearer ${OPENAI_API_KEY}" \
-H "Content-Type: application/json" \
-d "$(jq -n \
--arg text "$TEXT" \
--arg model "$model" \
--arg voice "$voice" \
'{
model: $model,
input: $text,
voice: $voice,
response_format: "opus"
}'
)")
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
echo "Error: OpenAI TTS API returned HTTP $http_code" >&2
if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then
cat "$OUTPUT" >&2
fi
rm -f "$OUTPUT"
exit 1
fi
}
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
case "$PROVIDER" in
elevenlabs) tts_elevenlabs ;;
openai) tts_openai ;;
*)
echo "Error: Unknown TTS_PROVIDER: $PROVIDER (supported: elevenlabs, openai)" >&2
exit 1
;;
esac
echo "$OUTPUT"