Pocket-TTS

Running

App Files Files Community

Pocket-TTS / scripts /voice-long.sh

hf4uwho

Add utility scripts: voice tools, chunking, batch generation, space restart

7135eb0 about 2 months ago

Raw

History Blame

3.12 kB

	#!/usr/bin/env bash
	# Chunked TTS for long text - splits into sentences, generates each, concatenates
	# Usage: voice-long.sh "text" [voice] [format]

	TTS_URL="https://hf4uwho-pocket-tts.hf.space/tts"
	TEXT="${1:?Usage: voice-long.sh 'text' [voice] [format]}"
	VOICE="${2:-af_alloy}"
	FORMAT="${3:-ogg}"
	OUTDIR="/tmp/tts"
	mkdir -p "$OUTDIR"

	HASH=$(echo -n "$TEXT$VOICE$(date +%s%N)" \| md5sum \| cut -c1-12)
	OUTFILE="$OUTDIR/voice_${HASH}.${FORMAT}"
	CHUNKDIR="$OUTDIR/chunks_${HASH}"
	mkdir -p "$CHUNKDIR"

	# Split text into sentences (rough but effective)
	CHUNKS=$(python3 -c "
	import re
	text = '''$TEXT'''
	# Split on sentence boundaries, group into chunks of ~200 chars
	sentences = re.split(r'(?<=[.!?])\s+', text)
	chunks = []
	current = ''
	for s in sentences:
	if len(current) + len(s) > 300 and current:
	chunks.append(current.strip())
	current = s
	else:
	current = (current + ' ' + s).strip()
	if current.strip():
	chunks.append(current.strip())
	for i, c in enumerate(chunks):
	print(f'CHUNK_{i}:{c}')
	")

	# Generate each chunk
	INDEX=0
	CHUNK_FILES=()
	while IFS= read -r line; do
	if [[ "$line" == CHUNK_* ]]; then
	CHUNK_TEXT="${line#CHUNK_[0-9]*:}"
	# Strip the index prefix
	CHUNK_TEXT="${line#*:}"
	ENCODED=$(python3 -c "import urllib.parse; print(urllib.parse.quote('''$CHUNK_TEXT'''))")
	CHUNKFILE="$CHUNKDIR/chunk_${INDEX}.${FORMAT}"

	HTTP_CODE=$(curl -s -w "%{http_code}" -o "$CHUNKFILE" "${TTS_URL}?text=${ENCODED}&voice=${VOICE}&format=${FORMAT}" --max-time 60 2>/dev/null)

	if [ "$HTTP_CODE" != "200" ]; then
	echo "ERROR: Chunk $INDEX failed with HTTP $HTTP_CODE" >&2
	rm -rf "$CHUNKDIR"
	exit 1
	fi

	# Verify audio
	MIME=$(file -b --mime-type "$CHUNKFILE" 2>/dev/null)
	if [[ "$MIME" == audio/* ]] \|\| [[ "$MIME" == application/ogg ]]; then
	CHUNK_FILES+=("$CHUNKFILE")
	INDEX=$((INDEX + 1))
	else
	echo "WARNING: Chunk $INDEX not audio ($MIME), skipping" >&2
	fi
	fi
	done <<< "$CHUNKS"

	if [ ${#CHUNK_FILES[@]} -eq 0 ]; then
	echo "ERROR: No chunks generated" >&2
	rm -rf "$CHUNKDIR"
	exit 1
	fi

	# If only one chunk, just use it
	if [ ${#CHUNK_FILES[@]} -eq 1 ]; then
	cp "${CHUNK_FILES[0]}" "$OUTFILE"
	else
	# Concatenate Ogg files using sox or ffmpeg, fallback to simple cat
	if command -v sox &>/dev/null; then
	sox "${CHUNK_FILES[@]}" "$OUTFILE" 2>/dev/null
	elif command -v ffmpeg &>/dev/null; then
	# Create concat list
	CONCFILE="$CHUNKDIR/concat.txt"
	> "$CONCFILE"
	for f in "${CHUNK_FILES[@]}"; do
	echo "file '$f'" >> "$CONCFILE"
	done
	ffmpeg -y -f concat -safe 0 -i "$CONCFILE" -c copy "$OUTFILE" 2>/dev/null
	else
	# Simple cat (works for raw Ogg but may have glitches)
	cat "${CHUNK_FILES[@]}" > "$OUTFILE"
	fi
	fi

	rm -rf "$CHUNKDIR"

	if [ -f "$OUTFILE" ] && [ -s "$OUTFILE" ]; then
	echo "$OUTFILE"
	else
	echo "ERROR: Final file empty or missing" >&2
	exit 1
	fi