import uuid import base64 from pathlib import Path from gtts import gTTS from fastapi import HTTPException def text_to_speech( text: str, voice: str = "en", fmt: str = "mp3", ) -> str: """ Convert text to speech using gTTS (Google Translate, free). Only MP3 is supported. Returns file path. """ if not text or not text.strip(): raise ValueError("Text cannot be empty") if fmt != "mp3": raise ValueError("Only MP3 format is supported by the free TTS backend") try: temp_dir = Path("temp_audio") temp_dir.mkdir(exist_ok=True) output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" output_path = temp_dir / output_filename # gTTS uses language codes; voice kept for compatibility. tts = gTTS(text=text.strip(), lang=voice or "en") tts.save(str(output_path)) return str(output_path) except Exception as e: raise Exception(f"Unexpected error in text_to_speech: {str(e)}") def text_to_speech_base64( text: str, voice: str = "en", fmt: str = "mp3", ) -> dict: """ Convert text to speech and return as Base64. Only MP3 is supported. Returns dict with Base64 and metadata. """ if not text or not text.strip(): raise ValueError("Text cannot be empty") if fmt != "mp3": raise ValueError("Only MP3 format is supported by the free TTS backend") try: temp_dir = Path("temp_audio") temp_dir.mkdir(exist_ok=True) output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" output_path = temp_dir / output_filename # Generate speech tts = gTTS(text=text.strip(), lang=voice or "en") tts.save(str(output_path)) # Read file and convert to Base64 with open(output_path, "rb") as audio_file: audio_bytes = audio_file.read() audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') # Clean up temp file output_path.unlink(missing_ok=True) return { "audio_base64": audio_base64, "mime_type": "audio/mpeg", "format": fmt, "filename": output_filename, "size_bytes": len(audio_bytes), "size_base64": len(audio_base64) } except Exception as e: raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")