#!/usr/bin/env bash # Chunked TTS for long text - splits into sentences, generates each, concatenates # Usage: voice-long.sh "text" [voice] [format] TTS_URL="https://hf4uwho-pocket-tts.hf.space/tts" TEXT="${1:?Usage: voice-long.sh 'text' [voice] [format]}" VOICE="${2:-af_alloy}" FORMAT="${3:-ogg}" OUTDIR="/tmp/tts" mkdir -p "$OUTDIR" HASH=$(echo -n "$TEXT$VOICE$(date +%s%N)" | md5sum | cut -c1-12) OUTFILE="$OUTDIR/voice_${HASH}.${FORMAT}" CHUNKDIR="$OUTDIR/chunks_${HASH}" mkdir -p "$CHUNKDIR" # Split text into sentences (rough but effective) CHUNKS=$(python3 -c " import re text = '''$TEXT''' # Split on sentence boundaries, group into chunks of ~200 chars sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current = '' for s in sentences: if len(current) + len(s) > 300 and current: chunks.append(current.strip()) current = s else: current = (current + ' ' + s).strip() if current.strip(): chunks.append(current.strip()) for i, c in enumerate(chunks): print(f'CHUNK_{i}:{c}') ") # Generate each chunk INDEX=0 CHUNK_FILES=() while IFS= read -r line; do if [[ "$line" == CHUNK_* ]]; then CHUNK_TEXT="${line#CHUNK_[0-9]*:}" # Strip the index prefix CHUNK_TEXT="${line#*:}" ENCODED=$(python3 -c "import urllib.parse; print(urllib.parse.quote('''$CHUNK_TEXT'''))") CHUNKFILE="$CHUNKDIR/chunk_${INDEX}.${FORMAT}" HTTP_CODE=$(curl -s -w "%{http_code}" -o "$CHUNKFILE" "${TTS_URL}?text=${ENCODED}&voice=${VOICE}&format=${FORMAT}" --max-time 60 2>/dev/null) if [ "$HTTP_CODE" != "200" ]; then echo "ERROR: Chunk $INDEX failed with HTTP $HTTP_CODE" >&2 rm -rf "$CHUNKDIR" exit 1 fi # Verify audio MIME=$(file -b --mime-type "$CHUNKFILE" 2>/dev/null) if [[ "$MIME" == audio/* ]] || [[ "$MIME" == application/ogg ]]; then CHUNK_FILES+=("$CHUNKFILE") INDEX=$((INDEX + 1)) else echo "WARNING: Chunk $INDEX not audio ($MIME), skipping" >&2 fi fi done <<< "$CHUNKS" if [ ${#CHUNK_FILES[@]} -eq 0 ]; then echo "ERROR: No chunks generated" >&2 rm -rf "$CHUNKDIR" exit 1 fi # If only one chunk, just use it if [ ${#CHUNK_FILES[@]} -eq 1 ]; then cp "${CHUNK_FILES[0]}" "$OUTFILE" else # Concatenate Ogg files using sox or ffmpeg, fallback to simple cat if command -v sox &>/dev/null; then sox "${CHUNK_FILES[@]}" "$OUTFILE" 2>/dev/null elif command -v ffmpeg &>/dev/null; then # Create concat list CONCFILE="$CHUNKDIR/concat.txt" > "$CONCFILE" for f in "${CHUNK_FILES[@]}"; do echo "file '$f'" >> "$CONCFILE" done ffmpeg -y -f concat -safe 0 -i "$CONCFILE" -c copy "$OUTFILE" 2>/dev/null else # Simple cat (works for raw Ogg but may have glitches) cat "${CHUNK_FILES[@]}" > "$OUTFILE" fi fi rm -rf "$CHUNKDIR" if [ -f "$OUTFILE" ] && [ -s "$OUTFILE" ]; then echo "$OUTFILE" else echo "ERROR: Final file empty or missing" >&2 exit 1 fi