malek-messaoudii
commited on
Commit
·
129ec23
1
Parent(s):
4c992d0
feat: Update generate_speech endpoint to enforce MP3 format, modify request and response models for clarity, and enhance documentation to reflect language code usage and format restrictions.
Browse files- models/mcp_models.py +2 -2
- routes/mcp_routes.py +40 -20
models/mcp_models.py
CHANGED
|
@@ -98,12 +98,12 @@ class GenerateSpeechResponse(BaseModel):
|
|
| 98 |
model_config = ConfigDict(
|
| 99 |
json_schema_extra={
|
| 100 |
"example": {
|
| 101 |
-
"audio_path": "temp_audio/tts_e9b78164.
|
| 102 |
}
|
| 103 |
}
|
| 104 |
)
|
| 105 |
|
| 106 |
-
audio_path: str = Field(..., description="Path to generated audio file")
|
| 107 |
|
| 108 |
class ExtractTopicResponse(BaseModel):
|
| 109 |
"""Response model for topic extraction"""
|
|
|
|
| 98 |
model_config = ConfigDict(
|
| 99 |
json_schema_extra={
|
| 100 |
"example": {
|
| 101 |
+
"audio_path": "temp_audio/tts_e9b78164.mp3"
|
| 102 |
}
|
| 103 |
}
|
| 104 |
)
|
| 105 |
|
| 106 |
+
audio_path: str = Field(..., description="Path to generated MP3 audio file")
|
| 107 |
|
| 108 |
class ExtractTopicResponse(BaseModel):
|
| 109 |
"""Response model for topic extraction"""
|
routes/mcp_routes.py
CHANGED
|
@@ -65,17 +65,19 @@ class MatchKeypointRequest(BaseModel):
|
|
| 65 |
|
| 66 |
|
| 67 |
class GenerateSpeechRequest(BaseModel):
|
| 68 |
-
"""Request pour générer de la parole"""
|
| 69 |
text: str = Field(..., description="Texte à convertir en parole")
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
| 73 |
class Config:
|
| 74 |
json_schema_extra = {
|
| 75 |
"example": {
|
| 76 |
"text": "Hello, this is a test",
|
| 77 |
-
"voice": "
|
| 78 |
-
"format": "
|
| 79 |
}
|
| 80 |
}
|
| 81 |
|
|
@@ -174,16 +176,27 @@ async def list_mcp_tools():
|
|
| 174 |
),
|
| 175 |
ToolInfo(
|
| 176 |
name="generate_speech",
|
| 177 |
-
description="Convertit du texte en fichier audio",
|
| 178 |
input_schema={
|
| 179 |
"type": "object",
|
| 180 |
"properties": {
|
| 181 |
-
"text": {
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
},
|
| 185 |
-
"required": ["text"]
|
| 186 |
-
}
|
| 187 |
),
|
| 188 |
ToolInfo(
|
| 189 |
name="generate_argument",
|
|
@@ -286,14 +299,14 @@ async def call_mcp_tool(request: ToolCallRequest):
|
|
| 286 |
}
|
| 287 |
```
|
| 288 |
|
| 289 |
-
5. **generate_speech** - Générer de la parole:
|
| 290 |
```json
|
| 291 |
{
|
| 292 |
"tool_name": "generate_speech",
|
| 293 |
"arguments": {
|
| 294 |
"text": "Hello, this is a test",
|
| 295 |
-
"voice": "
|
| 296 |
-
"format": "
|
| 297 |
}
|
| 298 |
}
|
| 299 |
```
|
|
@@ -488,10 +501,17 @@ async def mcp_transcribe_audio(file: UploadFile = File(...)):
|
|
| 488 |
if os.path.exists(temp_path):
|
| 489 |
os.unlink(temp_path)
|
| 490 |
|
| 491 |
-
@router.post("/tools/generate-speech", summary="Générer de la parole à partir de texte")
|
| 492 |
async def mcp_generate_speech(request: GenerateSpeechRequest):
|
| 493 |
-
"""Convertit du texte en fichier audio (téléchargeable)"""
|
| 494 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
result = await mcp_server.call_tool("generate_speech", {
|
| 496 |
"text": request.text,
|
| 497 |
"voice": request.voice,
|
|
@@ -520,7 +540,7 @@ async def mcp_generate_speech(request: GenerateSpeechRequest):
|
|
| 520 |
# Nettoyer le chemin si c'est une représentation string d'objet
|
| 521 |
if audio_path and isinstance(audio_path, str):
|
| 522 |
# Si c'est une représentation d'objet TextContent, extraire le chemin
|
| 523 |
-
if "text='" in audio_path
|
| 524 |
import re
|
| 525 |
match = re.search(r"text='([^']+)'", audio_path)
|
| 526 |
if match:
|
|
@@ -533,8 +553,8 @@ async def mcp_generate_speech(request: GenerateSpeechRequest):
|
|
| 533 |
if not Path(audio_path).exists():
|
| 534 |
raise HTTPException(status_code=500, detail=f"Audio file not found: {audio_path}")
|
| 535 |
|
| 536 |
-
# Déterminer le type MIME
|
| 537 |
-
media_type = "audio/
|
| 538 |
|
| 539 |
# Retourner le fichier pour téléchargement
|
| 540 |
logger.info(f"Speech generated: {audio_path}")
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
class GenerateSpeechRequest(BaseModel):
|
| 68 |
+
"""Request pour générer de la parole (TTS gTTS, MP3 seulement)"""
|
| 69 |
text: str = Field(..., description="Texte à convertir en parole")
|
| 70 |
+
# gTTS attend un code langue (ex: 'en', 'fr')
|
| 71 |
+
voice: str = Field(default="en", description="Code langue à utiliser (ex: 'en', 'fr')")
|
| 72 |
+
# Le backend gratuit ne supporte que le format MP3
|
| 73 |
+
format: str = Field(default="mp3", description="Format audio (uniquement 'mp3')")
|
| 74 |
+
|
| 75 |
class Config:
|
| 76 |
json_schema_extra = {
|
| 77 |
"example": {
|
| 78 |
"text": "Hello, this is a test",
|
| 79 |
+
"voice": "en",
|
| 80 |
+
"format": "mp3",
|
| 81 |
}
|
| 82 |
}
|
| 83 |
|
|
|
|
| 176 |
),
|
| 177 |
ToolInfo(
|
| 178 |
name="generate_speech",
|
| 179 |
+
description="Convertit du texte en fichier audio (TTS gTTS, MP3 seulement)",
|
| 180 |
input_schema={
|
| 181 |
"type": "object",
|
| 182 |
"properties": {
|
| 183 |
+
"text": {
|
| 184 |
+
"type": "string",
|
| 185 |
+
"description": "Texte à convertir en parole",
|
| 186 |
+
},
|
| 187 |
+
"voice": {
|
| 188 |
+
"type": "string",
|
| 189 |
+
"description": "Code langue à utiliser (ex: 'en', 'fr')",
|
| 190 |
+
"default": "en",
|
| 191 |
+
},
|
| 192 |
+
"format": {
|
| 193 |
+
"type": "string",
|
| 194 |
+
"description": "Format audio (uniquement 'mp3')",
|
| 195 |
+
"default": "mp3",
|
| 196 |
+
},
|
| 197 |
},
|
| 198 |
+
"required": ["text"],
|
| 199 |
+
},
|
| 200 |
),
|
| 201 |
ToolInfo(
|
| 202 |
name="generate_argument",
|
|
|
|
| 299 |
}
|
| 300 |
```
|
| 301 |
|
| 302 |
+
5. **generate_speech** - Générer de la parole (MP3 seulement):
|
| 303 |
```json
|
| 304 |
{
|
| 305 |
"tool_name": "generate_speech",
|
| 306 |
"arguments": {
|
| 307 |
"text": "Hello, this is a test",
|
| 308 |
+
"voice": "en",
|
| 309 |
+
"format": "mp3"
|
| 310 |
}
|
| 311 |
}
|
| 312 |
```
|
|
|
|
| 501 |
if os.path.exists(temp_path):
|
| 502 |
os.unlink(temp_path)
|
| 503 |
|
| 504 |
+
@router.post("/tools/generate-speech", summary="Générer de la parole à partir de texte (MP3 uniquement)")
|
| 505 |
async def mcp_generate_speech(request: GenerateSpeechRequest):
|
| 506 |
+
"""Convertit du texte en fichier audio MP3 (téléchargeable) via l'outil MCP generate_speech"""
|
| 507 |
try:
|
| 508 |
+
# Validation explicite pour être cohérent avec le backend gTTS
|
| 509 |
+
if request.format != "mp3":
|
| 510 |
+
raise HTTPException(
|
| 511 |
+
status_code=400,
|
| 512 |
+
detail="Only 'mp3' format is supported by the TTS backend",
|
| 513 |
+
)
|
| 514 |
+
|
| 515 |
result = await mcp_server.call_tool("generate_speech", {
|
| 516 |
"text": request.text,
|
| 517 |
"voice": request.voice,
|
|
|
|
| 540 |
# Nettoyer le chemin si c'est une représentation string d'objet
|
| 541 |
if audio_path and isinstance(audio_path, str):
|
| 542 |
# Si c'est une représentation d'objet TextContent, extraire le chemin
|
| 543 |
+
if "text='" in audio_path:
|
| 544 |
import re
|
| 545 |
match = re.search(r"text='([^']+)'", audio_path)
|
| 546 |
if match:
|
|
|
|
| 553 |
if not Path(audio_path).exists():
|
| 554 |
raise HTTPException(status_code=500, detail=f"Audio file not found: {audio_path}")
|
| 555 |
|
| 556 |
+
# Déterminer le type MIME (MP3 uniquement)
|
| 557 |
+
media_type = "audio/mpeg"
|
| 558 |
|
| 559 |
# Retourner le fichier pour téléchargement
|
| 560 |
logger.info(f"Speech generated: {audio_path}")
|