Spaces:
Running
Running
| # Chunked TTS for long text - splits into sentences, generates each, concatenates | |
| # Usage: voice-long.sh "text" [voice] [format] | |
| TTS_URL="https://hf4uwho-pocket-tts.hf.space/tts" | |
| TEXT="${1:?Usage: voice-long.sh 'text' [voice] [format]}" | |
| VOICE="${2:-af_alloy}" | |
| FORMAT="${3:-ogg}" | |
| OUTDIR="/tmp/tts" | |
| mkdir -p "$OUTDIR" | |
| HASH=$(echo -n "$TEXT$VOICE$(date +%s%N)" | md5sum | cut -c1-12) | |
| OUTFILE="$OUTDIR/voice_${HASH}.${FORMAT}" | |
| CHUNKDIR="$OUTDIR/chunks_${HASH}" | |
| mkdir -p "$CHUNKDIR" | |
| # Split text into sentences (rough but effective) | |
| CHUNKS=$(python3 -c " | |
| import re | |
| text = '''$TEXT''' | |
| # Split on sentence boundaries, group into chunks of ~200 chars | |
| sentences = re.split(r'(?<=[.!?])\s+', text) | |
| chunks = [] | |
| current = '' | |
| for s in sentences: | |
| if len(current) + len(s) > 300 and current: | |
| chunks.append(current.strip()) | |
| current = s | |
| else: | |
| current = (current + ' ' + s).strip() | |
| if current.strip(): | |
| chunks.append(current.strip()) | |
| for i, c in enumerate(chunks): | |
| print(f'CHUNK_{i}:{c}') | |
| ") | |
| # Generate each chunk | |
| INDEX=0 | |
| CHUNK_FILES=() | |
| while IFS= read -r line; do | |
| if [[ "$line" == CHUNK_* ]]; then | |
| CHUNK_TEXT="${line#CHUNK_[0-9]*:}" | |
| # Strip the index prefix | |
| CHUNK_TEXT="${line#*:}" | |
| ENCODED=$(python3 -c "import urllib.parse; print(urllib.parse.quote('''$CHUNK_TEXT'''))") | |
| CHUNKFILE="$CHUNKDIR/chunk_${INDEX}.${FORMAT}" | |
| HTTP_CODE=$(curl -s -w "%{http_code}" -o "$CHUNKFILE" "${TTS_URL}?text=${ENCODED}&voice=${VOICE}&format=${FORMAT}" --max-time 60 2>/dev/null) | |
| if [ "$HTTP_CODE" != "200" ]; then | |
| echo "ERROR: Chunk $INDEX failed with HTTP $HTTP_CODE" >&2 | |
| rm -rf "$CHUNKDIR" | |
| exit 1 | |
| fi | |
| # Verify audio | |
| MIME=$(file -b --mime-type "$CHUNKFILE" 2>/dev/null) | |
| if [[ "$MIME" == audio/* ]] || [[ "$MIME" == application/ogg ]]; then | |
| CHUNK_FILES+=("$CHUNKFILE") | |
| INDEX=$((INDEX + 1)) | |
| else | |
| echo "WARNING: Chunk $INDEX not audio ($MIME), skipping" >&2 | |
| fi | |
| fi | |
| done <<< "$CHUNKS" | |
| if [ ${#CHUNK_FILES[@]} -eq 0 ]; then | |
| echo "ERROR: No chunks generated" >&2 | |
| rm -rf "$CHUNKDIR" | |
| exit 1 | |
| fi | |
| # If only one chunk, just use it | |
| if [ ${#CHUNK_FILES[@]} -eq 1 ]; then | |
| cp "${CHUNK_FILES[0]}" "$OUTFILE" | |
| else | |
| # Concatenate Ogg files using sox or ffmpeg, fallback to simple cat | |
| if command -v sox &>/dev/null; then | |
| sox "${CHUNK_FILES[@]}" "$OUTFILE" 2>/dev/null | |
| elif command -v ffmpeg &>/dev/null; then | |
| # Create concat list | |
| CONCFILE="$CHUNKDIR/concat.txt" | |
| > "$CONCFILE" | |
| for f in "${CHUNK_FILES[@]}"; do | |
| echo "file '$f'" >> "$CONCFILE" | |
| done | |
| ffmpeg -y -f concat -safe 0 -i "$CONCFILE" -c copy "$OUTFILE" 2>/dev/null | |
| else | |
| # Simple cat (works for raw Ogg but may have glitches) | |
| cat "${CHUNK_FILES[@]}" > "$OUTFILE" | |
| fi | |
| fi | |
| rm -rf "$CHUNKDIR" | |
| if [ -f "$OUTFILE" ] && [ -s "$OUTFILE" ]; then | |
| echo "$OUTFILE" | |
| else | |
| echo "ERROR: Final file empty or missing" >&2 | |
| exit 1 | |
| fi | |