Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

File size: 2,462 Bytes

1e7709f
4c992d0
c45f0d6
5406818
 
9930d31
a61d0cb
a71355d
9c9026a
 
 
a71355d
c45f0d6
9c9026a
 
4c992d0
c45f0d6
9c9026a
 
 
 
 
 
c45f0d6
 
 
9c9026a
c45f0d6
 
9c9026a
 
 
 
 
c45f0d6
9c9026a
c45f0d6
4c992d0

import uuid
import base64
from pathlib import Path
from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL
from gtts import gTTS
from fastapi import HTTPException

def text_to_speech(
    text: str,
    voice: str = "en",
    fmt: str = "mp3",
) -> str:
    """
    Convert text to speech using gTTS (Google Translate, free).
    Only MP3 is supported.
    Returns file path.
    """
    if not text or not text.strip():
        raise ValueError("Text cannot be empty")

    if fmt != "mp3":
        raise ValueError("Only MP3 format is supported by the free TTS backend")

    try:
        temp_dir = Path("temp_audio")
        temp_dir.mkdir(exist_ok=True)

        output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
        output_path = temp_dir / output_filename

        # gTTS uses language codes; voice kept for compatibility.
        tts = gTTS(text=text.strip(), lang=voice or "en")
        tts.save(str(output_path))

        return str(output_path)

    except Exception as e:
        raise Exception(f"Unexpected error in text_to_speech: {str(e)}")


def text_to_speech_base64(
    text: str,
    voice: str = "en",
    fmt: str = "mp3",
) -> dict:
    """
    Convert text to speech and return as Base64.
    Only MP3 is supported.
    Returns dict with Base64 and metadata.
    """
    if not text or not text.strip():
        raise ValueError("Text cannot be empty")

    if fmt != "mp3":
        raise ValueError("Only MP3 format is supported by the free TTS backend")

    try:
        temp_dir = Path("temp_audio")
        temp_dir.mkdir(exist_ok=True)

        output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
        output_path = temp_dir / output_filename

        # Generate speech
        tts = gTTS(text=text.strip(), lang=voice or "en")
        tts.save(str(output_path))

        # Read file and convert to Base64
        with open(output_path, "rb") as audio_file:
            audio_bytes = audio_file.read()
            audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')

        # Clean up temp file
        output_path.unlink(missing_ok=True)

        return {
            "audio_base64": audio_base64,
            "mime_type": "audio/mpeg",
            "format": fmt,
            "filename": output_filename,
            "size_bytes": len(audio_bytes),
            "size_base64": len(audio_base64)
        }

    except Exception as e:
        raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")