Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /tts_service.py

malek-messaoudii

feat: Enhance TTS functionality to support Base64 audio output alongside file response. Introduce new text_to_speech_base64 function for Base64 conversion and update generate_tts endpoint to handle return_base64 parameter.

4c992d0 about 23 hours ago

raw

history blame contribute delete

2.41 kB

	import uuid
	import base64
	from pathlib import Path
	from gtts import gTTS
	from fastapi import HTTPException


	def text_to_speech(
	text: str,
	voice: str = "en",
	fmt: str = "mp3",
	) -> str:
	"""
	Convert text to speech using gTTS (Google Translate, free).
	Only MP3 is supported.
	Returns file path.
	"""
	if not text or not text.strip():
	raise ValueError("Text cannot be empty")

	if fmt != "mp3":
	raise ValueError("Only MP3 format is supported by the free TTS backend")

	try:
	temp_dir = Path("temp_audio")
	temp_dir.mkdir(exist_ok=True)

	output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
	output_path = temp_dir / output_filename

	# gTTS uses language codes; voice kept for compatibility.
	tts = gTTS(text=text.strip(), lang=voice or "en")
	tts.save(str(output_path))

	return str(output_path)

	except Exception as e:
	raise Exception(f"Unexpected error in text_to_speech: {str(e)}")


	def text_to_speech_base64(
	text: str,
	voice: str = "en",
	fmt: str = "mp3",
	) -> dict:
	"""
	Convert text to speech and return as Base64.
	Only MP3 is supported.
	Returns dict with Base64 and metadata.
	"""
	if not text or not text.strip():
	raise ValueError("Text cannot be empty")

	if fmt != "mp3":
	raise ValueError("Only MP3 format is supported by the free TTS backend")

	try:
	temp_dir = Path("temp_audio")
	temp_dir.mkdir(exist_ok=True)

	output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
	output_path = temp_dir / output_filename

	# Generate speech
	tts = gTTS(text=text.strip(), lang=voice or "en")
	tts.save(str(output_path))

	# Read file and convert to Base64
	with open(output_path, "rb") as audio_file:
	audio_bytes = audio_file.read()
	audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')

	# Clean up temp file
	output_path.unlink(missing_ok=True)

	return {
	"audio_base64": audio_base64,
	"mime_type": "audio/mpeg",
	"format": fmt,
	"filename": output_filename,
	"size_bytes": len(audio_bytes),
	"size_base64": len(audio_base64)
	}

	except Exception as e:
	raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")