import gradio as gr
import os
import sys
import json
import time
from typing import List, Tuple, Optional
import numpy as np
import librosa
from pathlib import Path

# Add src to path for imports
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

from models.speech_to_text import FreeIndianSpeechToText
from utils.config import load_config
from utils.audio_utils import AudioProcessor

class GradioSpeechToTextApp:
    """
    Gradio web interface for Indian Speech-to-Text models.
    Provides an intuitive UI for testing different models and languages.
    """
    
    def __init__(self):
        self.config = load_config()
        self.current_model = None
        self.audio_processor = AudioProcessor()
        self.supported_languages = {
            "Hindi": "hi",
            "Tamil": "ta", 
            "Bengali": "bn",
            "Telugu": "te",
            "Marathi": "mr",
            "Gujarati": "gu",
            "Kannada": "kn",
            "Malayalam": "ml",
            "Punjabi": "pa",
            "Odia": "or",
            "Assamese": "as",
            "Urdu": "ur",
            "English": "en"
        }
        
        # Initialize with default model
        self.initialize_model()
    
    def initialize_model(self):
        """Initialize the default model."""
        try:
            default_model = self.config.get("DEFAULT_MODEL", "distil-whisper")
            self.current_model = FreeIndianSpeechToText(
                model_type=default_model,
                cache_dir=self.config.get("MODEL_CACHE_DIR", "./models")
            )
            return f"✅ Initialized with {default_model} model"
        except Exception as e:
            return f"❌ Error initializing model: {str(e)}"
    
    def transcribe_audio(self, audio_input, model_choice: str, language_choice: str, 
                        enable_preprocessing: bool = True) -> Tuple[str, str, str]:
        """
        Main transcription function for Gradio interface.
        
        Returns:
            Tuple of (transcription_text, model_info, processing_stats)
        """
        if audio_input is None:
            return "❌ No audio provided", "", ""
        
        try:
            # Switch model if needed
            if not self.current_model or self.current_model.current_model_name != model_choice:
                status = self.switch_model(model_choice)
                if not status.startswith("✅"):
                    return f"❌ Model loading failed: {status}", "", ""
            
            # Get language code
            language_code = self.supported_languages.get(language_choice, "hi")
            
            # Preprocess audio if enabled
            if enable_preprocessing:
                try:
                    audio_data = self.audio_processor.preprocess_audio(audio_input)
                except Exception as e:
                    # Fallback to original audio if preprocessing fails
                    audio_data = audio_input
                    print(f"Preprocessing failed, using original: {e}")
            else:
                audio_data = audio_input
            
            # Perform transcription
            start_time = time.time()
            result = self.current_model.transcribe(audio_data, language_code)
            end_time = time.time()
            
            if result["success"]:
                # Format results
                transcription = result["text"]
                
                # Model information
                model_info = self.format_model_info(result)
                
                # Processing statistics
                processing_stats = self.format_processing_stats(result, end_time - start_time)
                
                return transcription, model_info, processing_stats
            else:
                return f"❌ Transcription failed: {result.get('error', 'Unknown error')}", "", ""
                
        except Exception as e:
            return f"❌ Error during transcription: {str(e)}", "", ""
    
    def switch_model(self, model_name: str) -> str:
        """Switch to a different model."""
        try:
            if self.current_model:
                success = self.current_model.switch_model(model_name)
                if success:
                    return f"✅ Switched to {model_name}"
                else:
                    return f"❌ Failed to switch to {model_name}"
            else:
                self.current_model = FreeIndianSpeechToText(
                    model_type=model_name,
                    cache_dir=self.config.get("MODEL_CACHE_DIR", "./models")
                )
                return f"✅ Loaded {model_name}"
        except Exception as e:
            return f"❌ Error switching model: {str(e)}"
    
    def batch_transcribe(self, files: List, model_choice: str, language_choice: str) -> str:
        """Batch transcription for multiple files."""
        if not files:
            return "❌ No files provided"
        
        try:
            # Switch model if needed
            if not self.current_model or self.current_model.current_model_name != model_choice:
                status = self.switch_model(model_choice)
                if not status.startswith("✅"):
                    return f"❌ Model loading failed: {status}"
            
            language_code = self.supported_languages.get(language_choice, "hi")
            
            # Process files
            file_paths = [file.name for file in files]
            results = self.current_model.batch_transcribe(file_paths, language_code)
            
            # Format results
            output = "# Batch Transcription Results\n\n"
            for i, result in enumerate(results, 1):
                if result["success"]:
                    output += f"## File {i}: {Path(result['file']).name}\n"
                    output += f"**Transcription:** {result['text']}\n"
                    output += f"**Processing Time:** {result.get('processing_time', 0):.2f}s\n\n"
                else:
                    output += f"## File {i}: {Path(result['file']).name}\n"
                    output += f"**Error:** {result.get('error', 'Unknown error')}\n\n"
            
            return output
            
        except Exception as e:
            return f"❌ Batch processing error: {str(e)}"
    
    def get_model_comparison(self) -> str:
        """Generate model comparison table."""
        if not self.current_model:
            return "❌ No model loaded"
        
        models = self.current_model.get_available_models()
        
        comparison = "# Available Models Comparison\n\n"
        comparison += "| Model | Type | Size | Languages | Description |\n"
        comparison += "|-------|------|------|-----------|-------------|\n"
        
        for name, config in models.items():
            comparison += f"| {name} | {config['type']} | {config['size']} | {config['languages']} | {config['description']} |\n"
        
        return comparison
    
    def format_model_info(self, result: dict) -> str:
        """Format model information for display."""
        model_info = f"""
**Model:** {result['model']}
**Device:** {result['device']}
**Language:** {result['language']}
"""
        return model_info.strip()
    
    def format_processing_stats(self, result: dict, total_time: float) -> str:
        """Format processing statistics."""
        stats = f"""
**Total Processing Time:** {total_time:.2f}s
**Model Processing Time:** {result.get('processing_time', 0):.2f}s
**Status:** {'✅ Success' if result['success'] else '❌ Failed'}
"""
        return stats.strip()
    
    def create_interface(self) -> gr.Blocks:
        """Create the Gradio interface."""
        
        # Custom CSS for better styling
        css = """
        .gradio-container {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        }
        .model-info {
            background-color: #f0f8ff;
            padding: 10px;
            border-radius: 5px;
            border-left: 4px solid #4CAF50;
        }
        .stats-info {
            background-color: #fff8f0;
            padding: 10px;
            border-radius: 5px;
            border-left: 4px solid #ff9800;
        }
        """
        
        with gr.Blocks(css=css, title="Indian Speech-to-Text Models", theme=gr.themes.Soft()) as interface:
            
            gr.Markdown("""
            # 🎤 Complete Guide to Free Open-Source Speech-to-Text Models for Indian Languages
            
            This application provides access to multiple free, open-source speech-to-text models optimized for Indian languages.
            All models are completely free to use and can be deployed commercially.
            """)
            
            with gr.Tab("🎯 Single Audio Transcription"):
                with gr.Row():
                    with gr.Column(scale=2):
                        # Audio input
                        audio_input = gr.Audio(
                            label="Upload Audio File or Record",
                            type="filepath",
                            sources=["upload", "microphone"]
                        )
                        
                        # Model selection
                        model_choice = gr.Dropdown(
                            choices=[
                                "distil-whisper", "whisper-free", "whisper-small",
                                "wav2vec2-hindi", "wav2vec2-improved", "wav2vec2-multilang",
                                "seamless", "speecht5"
                            ],
                            value="distil-whisper",
                            label="Select Model",
                            info="Choose the speech-to-text model"
                        )
                        
                        # Language selection
                        language_choice = gr.Dropdown(
                            choices=list(self.supported_languages.keys()),
                            value="Hindi",
                            label="Select Language",
                            info="Choose the audio language"
                        )
                        
                        # Preprocessing option
                        enable_preprocessing = gr.Checkbox(
                            value=True,
                            label="Enable Audio Preprocessing",
                            info="Normalize and clean audio for better results"
                        )
                        
                        # Transcribe button
                        transcribe_btn = gr.Button("🎯 Transcribe Audio", variant="primary", size="lg")
                    
                    with gr.Column(scale=3):
                        # Results
                        transcription_output = gr.Textbox(
                            label="Transcription Result",
                            lines=6,
                            placeholder="Transcription will appear here..."
                        )
                        
                        with gr.Row():
                            model_info_output = gr.Markdown(
                                label="Model Information",
                                elem_classes=["model-info"]
                            )
                            
                            processing_stats = gr.Markdown(
                                label="Processing Statistics", 
                                elem_classes=["stats-info"]
                            )
            
            with gr.Tab("📁 Batch Processing"):
                with gr.Row():
                    with gr.Column():
                        # File upload for batch processing
                        batch_files = gr.File(
                            label="Upload Multiple Audio Files",
                            file_count="multiple",
                            file_types=["audio"]
                        )
                        
                        # Model and language for batch
                        batch_model = gr.Dropdown(
                            choices=[
                                "distil-whisper", "whisper-free", "whisper-small",
                                "wav2vec2-hindi", "wav2vec2-improved"
                            ],
                            value="distil-whisper",
                            label="Select Model for Batch Processing"
                        )
                        
                        batch_language = gr.Dropdown(
                            choices=list(self.supported_languages.keys()),
                            value="Hindi",
                            label="Select Language for All Files"
                        )
                        
                        batch_btn = gr.Button("🚀 Process Batch", variant="primary")
                    
                    with gr.Column():
                        batch_results = gr.Markdown(
                            label="Batch Results",
                            value="Upload files and click 'Process Batch' to see results here."
                        )
            
            with gr.Tab("📊 Model Comparison"):
                gr.Markdown("## Model Performance Comparison")
                
                comparison_btn = gr.Button("📊 Generate Comparison Table")
                comparison_output = gr.Markdown()
                
                gr.Markdown("""
                ### Model Recommendations:
                
                - **Distil-Whisper Large-v3**: Best overall choice - 6x faster, 49% smaller, <1% WER difference
                - **OpenAI Whisper Large-v3**: Best accuracy for complex audio
                - **Wav2Vec2 Hindi Models**: Specialized for Hindi language
                - **Whisper Small**: Good balance for CPU-only deployment
                - **SeamlessM4T**: Best for multilingual scenarios (101 languages)
                """)
            
            with gr.Tab("ℹ️ About & Setup"):
                gr.Markdown("""
                ## About This Application
                
                This application showcases free, open-source speech-to-text models specifically optimized for Indian languages.
                All models are available under permissive licenses (MIT, Apache 2.0) and can be used commercially.
                
                ### Supported Languages:
                - Hindi, Tamil, Bengali, Telugu, Marathi
                - Gujarati, Kannada, Malayalam, Punjabi, Odia
                - Assamese, Urdu, English
                
                ### Key Features:
                - ✅ Multiple free model architectures
                - ✅ Real-time and batch processing
                - ✅ Audio preprocessing and optimization
                - ✅ Performance metrics and comparison
                - ✅ Commercial use allowed
                
                ### Technical Stack:
                - **Models**: Transformers, PyTorch, TensorFlow
                - **Interface**: Gradio
                - **Audio Processing**: Librosa, SoundFile
                - **Optimization**: CUDA support, Mixed precision
                
                ### Setup Instructions:
                1. Install dependencies: `pip install -r requirements.txt`
                2. Set environment: `export APP_ENV=local`
                3. Run application: `python app.py`
                """)
            
            # Event handlers
            transcribe_btn.click(
                fn=self.transcribe_audio,
                inputs=[audio_input, model_choice, language_choice, enable_preprocessing],
                outputs=[transcription_output, model_info_output, processing_stats]
            )
            
            batch_btn.click(
                fn=self.batch_transcribe,
                inputs=[batch_files, batch_model, batch_language],
                outputs=[batch_results]
            )
            
            comparison_btn.click(
                fn=self.get_model_comparison,
                outputs=[comparison_output]
            )
        
        return interface
    
    def launch(self, share: bool = None, server_name: str = None, server_port: int = None):
        """Launch the Gradio application."""
        interface = self.create_interface()
        
        # Use config values or defaults
        share = share if share is not None else self.config.get("GRADIO_SHARE", False)
        server_name = server_name or self.config.get("GRADIO_SERVER_NAME", "127.0.0.1")
        server_port = server_port or int(self.config.get("GRADIO_SERVER_PORT", 7860))
        
        print(f"🚀 Launching Speech-to-Text Application...")
        print(f"📍 Server: http://{server_name}:{server_port}")
        print(f"🌐 Share: {share}")
        
        interface.launch(
            share=share,
            server_name=server_name,
            server_port=server_port,
            show_error=True,
            quiet=False
        )

def main():
    """Main function to run the application."""
    app = GradioSpeechToTextApp()
    app.launch()

if __name__ == "__main__":
    main()