import os
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient

# PDF extraction libraries
try:
    from pypdf import PdfReader
    PYPDF_AVAILABLE = True
except ImportError:
    PYPDF_AVAILABLE = False

try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

try:
    import pandas as pd
    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False

# Model configurations
MODELS = [
    "Qwen/Qwen2.5-72B-Instruct",
    "deepseek-ai/DeepSeek-V3",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "moonshotai/Kimi-K2-Thinking",
    "google/gemma-2-9b-it"
]

SYSTEM_PROMPTS = {
    "Default": "You are a helpful, respectful and honest assistant.",
    "Document Analyzer": "You are an expert at analyzing documents. Provide detailed insights, summaries, and answer questions based on the provided document content.",
    "Code Expert": "You are an expert programmer. Analyze code, provide explanations, and suggest improvements.",
    "Data Scientist": "You are a data science expert. Analyze data files and provide insights with statistical analysis.",
}

def extract_text_from_pdf(file_path):
    """Extract text from PDF"""
    if not PYPDF_AVAILABLE:
        return "❌ PDF extraction unavailable."
    
    try:
        reader = PdfReader(file_path)
        text = f"📄 PDF: {len(reader.pages)} pages\n\n"
        for page_num, page in enumerate(reader.pages, 1):
            page_text = page.extract_text()
            text += f"--- Page {page_num} ---\n{page_text}\n\n"
        return text
    except Exception as e:
        return f"❌ Error reading PDF: {str(e)}"

def extract_text_from_docx(file_path):
    """Extract text from DOCX"""
    if not DOCX_AVAILABLE:
        return "❌ DOCX extraction unavailable."
    
    try:
        doc = docx.Document(file_path)
        return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    except Exception as e:
        return f"❌ Error reading DOCX: {str(e)}"

def extract_text_from_txt(file_path):
    """Extract text from TXT"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as f:
            return f.read()
    except Exception as e:
        return f"❌ Error: {str(e)}"

def extract_text_from_csv(file_path):
    """Extract text from CSV"""
    if not PANDAS_AVAILABLE:
        return "❌ CSV extraction unavailable."
    
    try:
        df = pd.read_csv(file_path)
        text = f"📊 CSV: {len(df)} rows, {len(df.columns)} columns\n\n"
        text += f"Columns: {', '.join(df.columns)}\n\n"
        text += f"Preview (first 10 rows):\n{df.head(10).to_string()}\n\n"
        text += f"Statistics:\n{df.describe().to_string()}"
        return text
    except Exception as e:
        return f"❌ Error: {str(e)}"

def process_files(files):
    """Process uploaded files"""
    if not files:
        return ""
    
    content = "\n\n" + "="*50 + "\n📎 UPLOADED DOCUMENTS\n" + "="*50 + "\n\n"
    
    for file_obj in files:
        file_path = file_obj if isinstance(file_obj, str) else file_obj.name
        file_name = Path(file_path).name
        file_ext = Path(file_path).suffix.lower()
        
        content += f"\n📄 **{file_name}**\n\n"
        
        if file_ext == '.pdf':
            text = extract_text_from_pdf(file_path)
        elif file_ext in ['.docx', '.doc']:
            text = extract_text_from_docx(file_path)
        elif file_ext in ['.txt', '.md', '.py', '.json']:
            text = extract_text_from_txt(file_path)
        elif file_ext == '.csv':
            text = extract_text_from_csv(file_path)
        else:
            text = f"⚠️ Unsupported format: {file_ext}"
        
        content += text + "\n\n" + "-"*50 + "\n"
    
    return content

def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
    model_id,
    uploaded_files,
):
    """Main chat function - No OAuth required"""
    
    # Get token from environment variable
    token = os.getenv("HF_TOKEN")
    
    if not token:
        yield "⚠️ HF_TOKEN not configured. Please set it in Space Settings → Repository secrets."
        return
    
    try:
        client = InferenceClient(token=token, model=model_id)
        
        # Build messages
        messages = [{"role": "system", "content": system_message}]
        
        # Add history
        for msg in history:
            messages.append(msg)
        
        # Process uploaded files
        file_content = ""
        if uploaded_files:
            file_content = process_files(uploaded_files)
        
        # Combine user message with file content
        full_message = message + file_content
        messages.append({"role": "user", "content": full_message})
        
        # Stream response
        response = ""
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices and chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                yield response
                
    except Exception as e:
        yield f"❌ Error: {str(e)}\n\nTry a different model or check HF_TOKEN configuration."

def update_system_prompt(preset):
    return SYSTEM_PROMPTS.get(preset, SYSTEM_PROMPTS["Default"])

# Gradio 6 Interface
with gr.Blocks(title="Jarvis - AI Document Assistant") as demo:
    
    gr.Markdown(
        """
        # 💬 Jarvis - AI Document Assistant
        Upload documents (PDF, DOCX, TXT, CSV) and chat with powerful AI models
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            # File upload
            file_upload = gr.File(
                label="📁 Upload Documents",
                file_count="multiple",
                file_types=[".pdf", ".docx", ".txt", ".csv", ".md", ".py", ".json"],
            )
            
            # ChatInterface
            chat = gr.ChatInterface(
                fn=respond,
                chatbot=gr.Chatbot(
                    height=500,
                    show_label=False,
                    avatar_images=(
                        None,
                        "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
                    ),
                ),
                textbox=gr.Textbox(
                    placeholder="💬 Ask a question about your documents...",
                    show_label=False,
                ),
                additional_inputs=[
                    gr.Textbox(
                        value=SYSTEM_PROMPTS["Document Analyzer"],
                        label="System Prompt",
                        visible=False,
                    ),
                    gr.Slider(128, 4096, 2048, step=128, visible=False),
                    gr.Slider(0.1, 2.0, 0.7, step=0.1, visible=False),
                    gr.Slider(0.1, 1.0, 0.95, step=0.05, visible=False),
                    gr.Dropdown(choices=MODELS, value=MODELS[0], visible=False),
                    file_upload,
                ],
                submit_btn="Send",
                stop_btn="Stop",
            )
        
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Settings")
            
            model_dropdown = gr.Dropdown(
                choices=MODELS,
                value=MODELS[0],
                label="🤖 AI Model",
            )
            
            preset_dropdown = gr.Dropdown(
                choices=list(SYSTEM_PROMPTS.keys()),
                value="Document Analyzer",
                label="📋 Behavior Preset",
            )
            
            system_prompt = gr.Textbox(
                value=SYSTEM_PROMPTS["Document Analyzer"],
                label="💬 System Prompt",
                lines=4,
            )
            
            gr.Markdown("### 🎛️ Generation")
            
            max_tokens = gr.Slider(
                128, 4096, 2048, 
                step=128, 
                label="Max Tokens",
                info="Maximum response length"
            )
            
            temperature = gr.Slider(
                0.1, 2.0, 0.7, 
                step=0.1, 
                label="Temperature",
                info="Creativity (higher = more random)"
            )
            
            top_p = gr.Slider(
                0.1, 1.0, 0.95, 
                step=0.05, 
                label="Top-p",
                info="Nucleus sampling"
            )
    
    # Connect settings to ChatInterface
    model_dropdown.change(
        lambda x: x,
        inputs=[model_dropdown],
        outputs=[chat.additional_inputs[4]],
    )
    
    preset_dropdown.change(
        update_system_prompt,
        inputs=[preset_dropdown],
        outputs=[system_prompt],
    )
    
    system_prompt.change(
        lambda x: x,
        inputs=[system_prompt],
        outputs=[chat.additional_inputs[0]],
    )
    
    max_tokens.change(
        lambda x: x,
        inputs=[max_tokens],
        outputs=[chat.additional_inputs[1]],
    )
    
    temperature.change(
        lambda x: x,
        inputs=[temperature],
        outputs=[chat.additional_inputs[2]],
    )
    
    top_p.change(
        lambda x: x,
        inputs=[top_p],
        outputs=[chat.additional_inputs[3]],
    )
    
    gr.Markdown(
        """
        ---
        ### 💡 How to Use
        
        1. **Upload documents** - PDF, DOCX, TXT, CSV supported
        2. **Ask questions** about the content
        3. **Adjust settings** for different response styles
        
        ### 📊 Supported Formats
        - **PDF**: Text extraction from all pages
        - **DOCX**: Microsoft Word documents
        - **TXT/MD**: Plain text and Markdown
        - **CSV**: Data files with statistics
        - **Code**: Python, JavaScript, JSON, etc.
        
        ### 🎯 Tips
        - Lower temperature (0.1-0.5) = Focused, deterministic
        - Higher temperature (0.8-2.0) = Creative, varied
        - Try different models for different tasks
        """
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch()