File size: 2,462 Bytes
1e7709f 4c992d0 c45f0d6 5406818 9930d31 a61d0cb a71355d 9c9026a a71355d c45f0d6 9c9026a 4c992d0 c45f0d6 9c9026a c45f0d6 9c9026a c45f0d6 9c9026a c45f0d6 9c9026a c45f0d6 4c992d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import uuid
import base64
from pathlib import Path
from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL
from gtts import gTTS
from fastapi import HTTPException
def text_to_speech(
text: str,
voice: str = "en",
fmt: str = "mp3",
) -> str:
"""
Convert text to speech using gTTS (Google Translate, free).
Only MP3 is supported.
Returns file path.
"""
if not text or not text.strip():
raise ValueError("Text cannot be empty")
if fmt != "mp3":
raise ValueError("Only MP3 format is supported by the free TTS backend")
try:
temp_dir = Path("temp_audio")
temp_dir.mkdir(exist_ok=True)
output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
output_path = temp_dir / output_filename
# gTTS uses language codes; voice kept for compatibility.
tts = gTTS(text=text.strip(), lang=voice or "en")
tts.save(str(output_path))
return str(output_path)
except Exception as e:
raise Exception(f"Unexpected error in text_to_speech: {str(e)}")
def text_to_speech_base64(
text: str,
voice: str = "en",
fmt: str = "mp3",
) -> dict:
"""
Convert text to speech and return as Base64.
Only MP3 is supported.
Returns dict with Base64 and metadata.
"""
if not text or not text.strip():
raise ValueError("Text cannot be empty")
if fmt != "mp3":
raise ValueError("Only MP3 format is supported by the free TTS backend")
try:
temp_dir = Path("temp_audio")
temp_dir.mkdir(exist_ok=True)
output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
output_path = temp_dir / output_filename
# Generate speech
tts = gTTS(text=text.strip(), lang=voice or "en")
tts.save(str(output_path))
# Read file and convert to Base64
with open(output_path, "rb") as audio_file:
audio_bytes = audio_file.read()
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
# Clean up temp file
output_path.unlink(missing_ok=True)
return {
"audio_base64": audio_base64,
"mime_type": "audio/mpeg",
"format": fmt,
"filename": output_filename,
"size_bytes": len(audio_bytes),
"size_base64": len(audio_base64)
}
except Exception as e:
raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}") |