feat: add voice memo responses via TTS (#394)
This commit is contained in:
130
skills/voice-memo/lettabot-tts
Executable file
130
skills/voice-memo/lettabot-tts
Executable file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env bash
|
||||
# lettabot-tts - Generate speech audio via configurable TTS provider
|
||||
#
|
||||
# Usage: lettabot-tts <text> [output_path]
|
||||
#
|
||||
# Environment:
|
||||
# TTS_PROVIDER - Optional. "elevenlabs" (default) or "openai".
|
||||
#
|
||||
# ElevenLabs:
|
||||
# ELEVENLABS_API_KEY - Required. API key.
|
||||
# ELEVENLABS_VOICE_ID - Optional. Voice ID (default: 21m00Tcm4TlvDq8ikWAM / Rachel).
|
||||
# ELEVENLABS_MODEL_ID - Optional. Model ID (default: eleven_multilingual_v2).
|
||||
#
|
||||
# OpenAI:
|
||||
# OPENAI_API_KEY - Required. API key.
|
||||
# OPENAI_TTS_VOICE - Optional. Voice name (default: alloy).
|
||||
# OPENAI_TTS_MODEL - Optional. Model (default: tts-1).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
TEXT="${1:?Usage: lettabot-tts <text> [output_path]}"
|
||||
|
||||
# The session subprocess CWD is set to workingDir (bot.ts:642), which is the
|
||||
# same base directory that <send-file> directives resolve from. This means
|
||||
# $(pwd) and LETTABOT_WORKING_DIR produce paths in the correct coordinate space.
|
||||
OUTBOUND_DIR="${LETTABOT_WORKING_DIR:-$(pwd)}/data/outbound"
|
||||
|
||||
PROVIDER="${TTS_PROVIDER:-elevenlabs}"
|
||||
|
||||
# Ensure output directory exists
|
||||
mkdir -p "$OUTBOUND_DIR"
|
||||
|
||||
# Use collision-safe random filenames when output path is not explicitly provided.
|
||||
if [ -n "${2:-}" ]; then
|
||||
OUTPUT="$2"
|
||||
else
|
||||
# Clean stale voice files older than 1 hour
|
||||
find "$OUTBOUND_DIR" -name 'voice-*.ogg' -mmin +60 -delete 2>/dev/null || true
|
||||
OUTPUT=$(mktemp "${OUTBOUND_DIR}/voice-XXXXXXXXXX.ogg")
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider: ElevenLabs
|
||||
# ---------------------------------------------------------------------------
|
||||
tts_elevenlabs() {
|
||||
if [ -z "${ELEVENLABS_API_KEY:-}" ]; then
|
||||
echo "Error: ELEVENLABS_API_KEY is not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local voice_id="${ELEVENLABS_VOICE_ID:-onwK4e9ZLuTAKqWW03F9}"
|
||||
local model_id="${ELEVENLABS_MODEL_ID:-eleven_multilingual_v2}"
|
||||
|
||||
local http_code
|
||||
http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \
|
||||
"https://api.elevenlabs.io/v1/text-to-speech/${voice_id}" \
|
||||
-H "xi-api-key: ${ELEVENLABS_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n \
|
||||
--arg text "$TEXT" \
|
||||
--arg model "$model_id" \
|
||||
'{
|
||||
text: $text,
|
||||
model_id: $model,
|
||||
output_format: "ogg_opus"
|
||||
}'
|
||||
)")
|
||||
|
||||
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
||||
echo "Error: ElevenLabs API returned HTTP $http_code" >&2
|
||||
if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then
|
||||
cat "$OUTPUT" >&2
|
||||
fi
|
||||
rm -f "$OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider: OpenAI
|
||||
# ---------------------------------------------------------------------------
|
||||
tts_openai() {
|
||||
if [ -z "${OPENAI_API_KEY:-}" ]; then
|
||||
echo "Error: OPENAI_API_KEY is not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local voice="${OPENAI_TTS_VOICE:-alloy}"
|
||||
local model="${OPENAI_TTS_MODEL:-tts-1}"
|
||||
|
||||
local http_code
|
||||
http_code=$(curl -s -w "%{http_code}" -o "$OUTPUT" \
|
||||
"https://api.openai.com/v1/audio/speech" \
|
||||
-H "Authorization: Bearer ${OPENAI_API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n \
|
||||
--arg text "$TEXT" \
|
||||
--arg model "$model" \
|
||||
--arg voice "$voice" \
|
||||
'{
|
||||
model: $model,
|
||||
input: $text,
|
||||
voice: $voice,
|
||||
response_format: "opus"
|
||||
}'
|
||||
)")
|
||||
|
||||
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
||||
echo "Error: OpenAI TTS API returned HTTP $http_code" >&2
|
||||
if file "$OUTPUT" | grep -q "text\|JSON\|ASCII"; then
|
||||
cat "$OUTPUT" >&2
|
||||
fi
|
||||
rm -f "$OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
case "$PROVIDER" in
|
||||
elevenlabs) tts_elevenlabs ;;
|
||||
openai) tts_openai ;;
|
||||
*)
|
||||
echo "Error: Unknown TTS_PROVIDER: $PROVIDER (supported: elevenlabs, openai)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "$OUTPUT"
|
||||
Reference in New Issue
Block a user