malek-messaoudii commited on
Commit
9c9026a
·
1 Parent(s): 16153ee

feat: Update TTS functionality to use gTTS for text-to-speech conversion, supporting only MP3 format. Adjust requirements and model fields for compatibility.

Browse files
models/tts.py CHANGED
@@ -2,14 +2,16 @@ from pydantic import BaseModel, Field
2
 
3
  class TTSRequest(BaseModel):
4
  text: str = Field(..., min_length=1, max_length=5000)
5
- voice: str = Field(default="Aaliyah-PlayAI")
6
- format: str = Field(default="wav", pattern="^(wav|mp3)$")
7
-
 
 
8
  class Config:
9
  json_schema_extra = {
10
  "example": {
11
  "text": "Hello, this is a test of text-to-speech.",
12
- "voice": "Aaliyah-PlayAI",
13
- "format": "wav"
14
  }
15
  }
 
2
 
3
  class TTSRequest(BaseModel):
4
  text: str = Field(..., min_length=1, max_length=5000)
5
+ # gTTS uses language codes; keep voice field for compatibility.
6
+ voice: str = Field(default="en")
7
+ # Free backend supports only mp3.
8
+ format: str = Field(default="mp3", pattern="^(mp3)$")
9
+
10
  class Config:
11
  json_schema_extra = {
12
  "example": {
13
  "text": "Hello, this is a test of text-to-speech.",
14
+ "voice": "en",
15
+ "format": "mp3",
16
  }
17
  }
requirements.txt CHANGED
@@ -19,6 +19,7 @@ langsmith>=0.1.0
19
  # Fix urllib3 compatibility issues
20
  urllib3>=1.26.0,<3.0.0
21
  requests-toolbelt>=1.0.0
 
22
 
23
  # Audio processing (optionnel si vous avez besoin de traitement local)
24
  soundfile>=0.12.1
 
19
  # Fix urllib3 compatibility issues
20
  urllib3>=1.26.0,<3.0.0
21
  requests-toolbelt>=1.0.0
22
+ gTTS>=2.5.3
23
 
24
  # Audio processing (optionnel si vous avez besoin de traitement local)
25
  soundfile>=0.12.1
routes/tts_routes.py CHANGED
@@ -9,32 +9,28 @@ router = APIRouter(prefix="/tts", tags=["Text To Speech"])
9
  @router.post("/")
10
  async def generate_tts(request: TTSRequest):
11
  """
12
- Convert text to speech (English only)
13
  """
14
  try:
15
- # Generate audio
16
  audio_path = text_to_speech(
17
  text=request.text,
18
  voice=request.voice,
19
- fmt=request.format
20
  )
21
-
22
- # Verify file exists
23
  if not Path(audio_path).exists():
24
  raise HTTPException(status_code=500, detail="Audio file generation failed")
25
-
26
- # Determine MIME type
27
- media_type = "audio/wav" if request.format == "wav" else "audio/mpeg"
28
-
29
- # Return audio file
30
  return FileResponse(
31
  path=audio_path,
32
  filename=f"speech.{request.format}",
33
  media_type=media_type,
34
  headers={
35
  "Content-Disposition": f"attachment; filename=speech.{request.format}"
36
- }
37
  )
38
-
39
  except Exception as e:
40
  raise HTTPException(status_code=500, detail=str(e))
 
9
  @router.post("/")
10
  async def generate_tts(request: TTSRequest):
11
  """
12
+ Convert text to speech using the free gTTS backend (MP3 only).
13
  """
14
  try:
 
15
  audio_path = text_to_speech(
16
  text=request.text,
17
  voice=request.voice,
18
+ fmt=request.format,
19
  )
20
+
 
21
  if not Path(audio_path).exists():
22
  raise HTTPException(status_code=500, detail="Audio file generation failed")
23
+
24
+ media_type = "audio/mpeg"
25
+
 
 
26
  return FileResponse(
27
  path=audio_path,
28
  filename=f"speech.{request.format}",
29
  media_type=media_type,
30
  headers={
31
  "Content-Disposition": f"attachment; filename=speech.{request.format}"
32
+ },
33
  )
34
+
35
  except Exception as e:
36
  raise HTTPException(status_code=500, detail=str(e))
services/tts_service.py CHANGED
@@ -1,17 +1,21 @@
1
- import requests
2
  import uuid
3
- import os
4
  from pathlib import Path
5
  from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL
 
 
 
 
6
 
7
  def text_to_speech(
8
- text: str,
9
- voice: str = "Aaliyah-PlayAI",
10
- fmt: str = "wav"
11
  ) -> str:
12
  """
13
- Convert text to speech using Groq's TTS API (English only)
 
14
  """
 
15
  if not GROQ_TTS_API_KEY:
16
  raise RuntimeError("GROQ_TTS_API_KEY is not set in config")
17
 
@@ -32,26 +36,26 @@ def text_to_speech(
32
  "response_format": fmt
33
  }
34
 
 
 
 
 
 
 
 
 
35
  try:
36
- # Create temp directory for audio files
37
  temp_dir = Path("temp_audio")
38
  temp_dir.mkdir(exist_ok=True)
39
-
40
- # Unique filename
41
  output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
42
  output_path = temp_dir / output_filename
43
-
44
- # Call Groq API
45
- response = requests.post(url, headers=headers, json=payload, timeout=30)
46
- response.raise_for_status()
47
-
48
- # Save audio file
49
- with open(output_path, "wb") as f:
50
- f.write(response.content)
51
-
52
  return str(output_path)
53
-
54
- except requests.exceptions.RequestException as e:
55
- raise Exception(f"Groq TTS API error: {str(e)}")
56
  except Exception as e:
57
  raise Exception(f"Unexpected error in text_to_speech: {str(e)}")
 
 
1
  import uuid
 
2
  from pathlib import Path
3
  from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL
4
+ <<<<<<< Updated upstream
5
+ =======
6
+ from gtts import gTTS
7
+ >>>>>>> Stashed changes
8
 
9
  def text_to_speech(
10
+ text: str,
11
+ voice: str = "en",
12
+ fmt: str = "mp3",
13
  ) -> str:
14
  """
15
+ Convert text to speech using gTTS (Google Translate, free).
16
+ Only MP3 is supported.
17
  """
18
+ <<<<<<< Updated upstream
19
  if not GROQ_TTS_API_KEY:
20
  raise RuntimeError("GROQ_TTS_API_KEY is not set in config")
21
 
 
36
  "response_format": fmt
37
  }
38
 
39
+ =======
40
+ if not text or not text.strip():
41
+ raise ValueError("Text cannot be empty")
42
+
43
+ if fmt != "mp3":
44
+ raise ValueError("Only MP3 format is supported by the free TTS backend")
45
+
46
+ >>>>>>> Stashed changes
47
  try:
 
48
  temp_dir = Path("temp_audio")
49
  temp_dir.mkdir(exist_ok=True)
50
+
 
51
  output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}"
52
  output_path = temp_dir / output_filename
53
+
54
+ # gTTS uses language codes; voice kept for compatibility.
55
+ tts = gTTS(text=text.strip(), lang=voice or "en")
56
+ tts.save(str(output_path))
57
+
 
 
 
 
58
  return str(output_path)
59
+
 
 
60
  except Exception as e:
61
  raise Exception(f"Unexpected error in text_to_speech: {str(e)}")