malek-messaoudii
feat: Enhance TTS functionality to support Base64 audio output alongside file response. Introduce new text_to_speech_base64 function for Base64 conversion and update generate_tts endpoint to handle return_base64 parameter.
4c992d0
| import uuid | |
| import base64 | |
| from pathlib import Path | |
| from gtts import gTTS | |
| from fastapi import HTTPException | |
| def text_to_speech( | |
| text: str, | |
| voice: str = "en", | |
| fmt: str = "mp3", | |
| ) -> str: | |
| """ | |
| Convert text to speech using gTTS (Google Translate, free). | |
| Only MP3 is supported. | |
| Returns file path. | |
| """ | |
| if not text or not text.strip(): | |
| raise ValueError("Text cannot be empty") | |
| if fmt != "mp3": | |
| raise ValueError("Only MP3 format is supported by the free TTS backend") | |
| try: | |
| temp_dir = Path("temp_audio") | |
| temp_dir.mkdir(exist_ok=True) | |
| output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" | |
| output_path = temp_dir / output_filename | |
| # gTTS uses language codes; voice kept for compatibility. | |
| tts = gTTS(text=text.strip(), lang=voice or "en") | |
| tts.save(str(output_path)) | |
| return str(output_path) | |
| except Exception as e: | |
| raise Exception(f"Unexpected error in text_to_speech: {str(e)}") | |
| def text_to_speech_base64( | |
| text: str, | |
| voice: str = "en", | |
| fmt: str = "mp3", | |
| ) -> dict: | |
| """ | |
| Convert text to speech and return as Base64. | |
| Only MP3 is supported. | |
| Returns dict with Base64 and metadata. | |
| """ | |
| if not text or not text.strip(): | |
| raise ValueError("Text cannot be empty") | |
| if fmt != "mp3": | |
| raise ValueError("Only MP3 format is supported by the free TTS backend") | |
| try: | |
| temp_dir = Path("temp_audio") | |
| temp_dir.mkdir(exist_ok=True) | |
| output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" | |
| output_path = temp_dir / output_filename | |
| # Generate speech | |
| tts = gTTS(text=text.strip(), lang=voice or "en") | |
| tts.save(str(output_path)) | |
| # Read file and convert to Base64 | |
| with open(output_path, "rb") as audio_file: | |
| audio_bytes = audio_file.read() | |
| audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') | |
| # Clean up temp file | |
| output_path.unlink(missing_ok=True) | |
| return { | |
| "audio_base64": audio_base64, | |
| "mime_type": "audio/mpeg", | |
| "format": fmt, | |
| "filename": output_filename, | |
| "size_bytes": len(audio_bytes), | |
| "size_base64": len(audio_base64) | |
| } | |
| except Exception as e: | |
| raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}") |