File size: 2,462 Bytes
1e7709f
4c992d0
c45f0d6
5406818
 
9930d31
a61d0cb
a71355d
9c9026a
 
 
a71355d
c45f0d6
9c9026a
 
4c992d0
c45f0d6
9c9026a
 
 
 
 
 
c45f0d6
 
 
9c9026a
c45f0d6
 
9c9026a
 
 
 
 
c45f0d6
9c9026a
c45f0d6
4c992d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import uuid
import base64
from pathlib import Path
from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL
from gtts import gTTS
from fastapi import HTTPException

def text_to_speech(
    text: str,
    voice: str = "en",
    fmt: str = "mp3",
) -> str:
    """
    Convert text to speech using gTTS (Google Translate, free).
    Only MP3 is supported.
    Returns file path.
    """
    if not text or not text.strip():
        raise ValueError("Text cannot be empty")

    if fmt != "mp3":
        raise ValueError("Only MP3 format is supported by the free TTS backend")

    try:
        temp_dir = Path("temp_audio")
        temp_dir.mkdir(exist_ok=True)

        output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
        output_path = temp_dir / output_filename

        # gTTS uses language codes; voice kept for compatibility.
        tts = gTTS(text=text.strip(), lang=voice or "en")
        tts.save(str(output_path))

        return str(output_path)

    except Exception as e:
        raise Exception(f"Unexpected error in text_to_speech: {str(e)}")


def text_to_speech_base64(
    text: str,
    voice: str = "en",
    fmt: str = "mp3",
) -> dict:
    """
    Convert text to speech and return as Base64.
    Only MP3 is supported.
    Returns dict with Base64 and metadata.
    """
    if not text or not text.strip():
        raise ValueError("Text cannot be empty")

    if fmt != "mp3":
        raise ValueError("Only MP3 format is supported by the free TTS backend")

    try:
        temp_dir = Path("temp_audio")
        temp_dir.mkdir(exist_ok=True)

        output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
        output_path = temp_dir / output_filename

        # Generate speech
        tts = gTTS(text=text.strip(), lang=voice or "en")
        tts.save(str(output_path))

        # Read file and convert to Base64
        with open(output_path, "rb") as audio_file:
            audio_bytes = audio_file.read()
            audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')

        # Clean up temp file
        output_path.unlink(missing_ok=True)

        return {
            "audio_base64": audio_base64,
            "mime_type": "audio/mpeg",
            "format": fmt,
            "filename": output_filename,
            "size_bytes": len(audio_bytes),
            "size_base64": len(audio_base64)
        }

    except Exception as e:
        raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}")