Spaces:

black279
/

jarvis

Running

File size: 10,397 Bytes

8586755
9c6092b
db43cff
 
 
9c6092b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299cb3d
ca2e19e
54b6f0f
 
 
 
299cb3d
 
ca2e19e
 
9c6092b
 
 
ca2e19e
 
9c6092b
 
 
8794032
9c6092b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8e9fed
 
9c6092b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ca6b
 
 
 
 
 
 
 
 
 
9c31210
c57ca6b
9c31210
 
c57ca6b
 
9c31210
a8e9fed
9c6092b
8586755
ca2e19e
 
9c31210
ca2e19e
9c6092b
9c31210
c57ca6b
 
9c6092b
a8e9fed
 
 
 
9c6092b
a8e9fed
 
9c6092b
ca2e19e
 
82bd9cb
ca2e19e
8586755
 
 
 
 
 
ca2e19e
82bd9cb
a8e9fed
ca2e19e
 
9c31210
ca2e19e
 
 
db43cff
c57ca6b
 
82bd9cb
9c6092b
 
9c31210
c57ca6b
9c6092b
 
ca2e19e
 
 
c57ca6b
a8e9fed
 
ca2e19e
9c6092b
ca2e19e
 
9c31210
a8e9fed
 
c57ca6b
 
 
 
 
 
 
 
a8e9fed
 
 
 
 
 
 
c57ca6b
 
a8e9fed
c57ca6b
 
 
 
 
a8e9fed
9c31210
 
a8e9fed
ca2e19e
 
 
 
 
 
 
c57ca6b
ca2e19e
 
 
 
9c6092b
c57ca6b
ca2e19e
 
 
9c6092b
ca2e19e
a8e9fed
ca2e19e
 
c57ca6b
8794032
c57ca6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8e9fed
c57ca6b
a8e9fed
 
 
c57ca6b
a8e9fed
 
 
 
 
 
 
 
 
 
 
c57ca6b
a8e9fed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c6092b
 
 
a8e9fed
c57ca6b
 
9c31210
 
 
c57ca6b
 
 
 
 
 
 
8794032
c57ca6b
 
 
 
9c6092b
ca2e19e
db43cff
 
ca2e19e
82bd9cb

import os
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient

# PDF extraction libraries
try:
    from pypdf import PdfReader
    PYPDF_AVAILABLE = True
except ImportError:
    PYPDF_AVAILABLE = False

try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

try:
    import pandas as pd
    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False

# Model configurations
MODELS = [
    "Qwen/Qwen2.5-72B-Instruct",
    "deepseek-ai/DeepSeek-V3",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "moonshotai/Kimi-K2-Thinking",
    "google/gemma-2-9b-it"
]

SYSTEM_PROMPTS = {
    "Default": "You are a helpful, respectful and honest assistant.",
    "Document Analyzer": "You are an expert at analyzing documents. Provide detailed insights, summaries, and answer questions based on the provided document content.",
    "Code Expert": "You are an expert programmer. Analyze code, provide explanations, and suggest improvements.",
    "Data Scientist": "You are a data science expert. Analyze data files and provide insights with statistical analysis.",
}

def extract_text_from_pdf(file_path):
    """Extract text from PDF"""
    if not PYPDF_AVAILABLE:
        return "❌ PDF extraction unavailable."
    
    try:
        reader = PdfReader(file_path)
        text = f"📄 PDF: {len(reader.pages)} pages\n\n"
        for page_num, page in enumerate(reader.pages, 1):
            page_text = page.extract_text()
            text += f"--- Page {page_num} ---\n{page_text}\n\n"
        return text
    except Exception as e:
        return f"❌ Error reading PDF: {str(e)}"

def extract_text_from_docx(file_path):
    """Extract text from DOCX"""
    if not DOCX_AVAILABLE:
        return "❌ DOCX extraction unavailable."
    
    try:
        doc = docx.Document(file_path)
        return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    except Exception as e:
        return f"❌ Error reading DOCX: {str(e)}"

def extract_text_from_txt(file_path):
    """Extract text from TXT"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as f:
            return f.read()
    except Exception as e:
        return f"❌ Error: {str(e)}"

def extract_text_from_csv(file_path):
    """Extract text from CSV"""
    if not PANDAS_AVAILABLE:
        return "❌ CSV extraction unavailable."
    
    try:
        df = pd.read_csv(file_path)
        text = f"📊 CSV: {len(df)} rows, {len(df.columns)} columns\n\n"
        text += f"Columns: {', '.join(df.columns)}\n\n"
        text += f"Preview (first 10 rows):\n{df.head(10).to_string()}\n\n"
        text += f"Statistics:\n{df.describe().to_string()}"
        return text
    except Exception as e:
        return f"❌ Error: {str(e)}"

def process_files(files):
    """Process uploaded files"""
    if not files:
        return ""
    
    content = "\n\n" + "="*50 + "\n📎 UPLOADED DOCUMENTS\n" + "="*50 + "\n\n"
    
    for file_obj in files:
        file_path = file_obj if isinstance(file_obj, str) else file_obj.name
        file_name = Path(file_path).name
        file_ext = Path(file_path).suffix.lower()
        
        content += f"\n📄 **{file_name}**\n\n"
        
        if file_ext == '.pdf':
            text = extract_text_from_pdf(file_path)
        elif file_ext in ['.docx', '.doc']:
            text = extract_text_from_docx(file_path)
        elif file_ext in ['.txt', '.md', '.py', '.json']:
            text = extract_text_from_txt(file_path)
        elif file_ext == '.csv':
            text = extract_text_from_csv(file_path)
        else:
            text = f"⚠️ Unsupported format: {file_ext}"
        
        content += text + "\n\n" + "-"*50 + "\n"
    
    return content

def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
    model_id,
    uploaded_files,
):
    """Main chat function - No OAuth required"""
    
    # Get token from environment variable
    token = os.getenv("HF_TOKEN")
    
    if not token:
        yield "⚠️ HF_TOKEN not configured. Please set it in Space Settings → Repository secrets."
        return
    
    try:
        client = InferenceClient(token=token, model=model_id)
        
        # Build messages
        messages = [{"role": "system", "content": system_message}]
        
        # Add history
        for msg in history:
            messages.append(msg)
        
        # Process uploaded files
        file_content = ""
        if uploaded_files:
            file_content = process_files(uploaded_files)
        
        # Combine user message with file content
        full_message = message + file_content
        messages.append({"role": "user", "content": full_message})
        
        # Stream response
        response = ""
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices and chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                yield response
                
    except Exception as e:
        yield f"❌ Error: {str(e)}\n\nTry a different model or check HF_TOKEN configuration."

def update_system_prompt(preset):
    return SYSTEM_PROMPTS.get(preset, SYSTEM_PROMPTS["Default"])

# Gradio 6 Interface
with gr.Blocks(title="Jarvis - AI Document Assistant") as demo:
    
    gr.Markdown(
        """
        # 💬 Jarvis - AI Document Assistant
        Upload documents (PDF, DOCX, TXT, CSV) and chat with powerful AI models
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            # File upload
            file_upload = gr.File(
                label="📁 Upload Documents",
                file_count="multiple",
                file_types=[".pdf", ".docx", ".txt", ".csv", ".md", ".py", ".json"],
            )
            
            # ChatInterface
            chat = gr.ChatInterface(
                fn=respond,
                chatbot=gr.Chatbot(
                    height=500,
                    show_label=False,
                    avatar_images=(
                        None,
                        "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
                    ),
                ),
                textbox=gr.Textbox(
                    placeholder="💬 Ask a question about your documents...",
                    show_label=False,
                ),
                additional_inputs=[
                    gr.Textbox(
                        value=SYSTEM_PROMPTS["Document Analyzer"],
                        label="System Prompt",
                        visible=False,
                    ),
                    gr.Slider(128, 4096, 2048, step=128, visible=False),
                    gr.Slider(0.1, 2.0, 0.7, step=0.1, visible=False),
                    gr.Slider(0.1, 1.0, 0.95, step=0.05, visible=False),
                    gr.Dropdown(choices=MODELS, value=MODELS[0], visible=False),
                    file_upload,
                ],
                submit_btn="Send",
                stop_btn="Stop",
            )
        
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Settings")
            
            model_dropdown = gr.Dropdown(
                choices=MODELS,
                value=MODELS[0],
                label="🤖 AI Model",
            )
            
            preset_dropdown = gr.Dropdown(
                choices=list(SYSTEM_PROMPTS.keys()),
                value="Document Analyzer",
                label="📋 Behavior Preset",
            )
            
            system_prompt = gr.Textbox(
                value=SYSTEM_PROMPTS["Document Analyzer"],
                label="💬 System Prompt",
                lines=4,
            )
            
            gr.Markdown("### 🎛️ Generation")
            
            max_tokens = gr.Slider(
                128, 4096, 2048, 
                step=128, 
                label="Max Tokens",
                info="Maximum response length"
            )
            
            temperature = gr.Slider(
                0.1, 2.0, 0.7, 
                step=0.1, 
                label="Temperature",
                info="Creativity (higher = more random)"
            )
            
            top_p = gr.Slider(
                0.1, 1.0, 0.95, 
                step=0.05, 
                label="Top-p",
                info="Nucleus sampling"
            )
    
    # Connect settings to ChatInterface
    model_dropdown.change(
        lambda x: x,
        inputs=[model_dropdown],
        outputs=[chat.additional_inputs[4]],
    )
    
    preset_dropdown.change(
        update_system_prompt,
        inputs=[preset_dropdown],
        outputs=[system_prompt],
    )
    
    system_prompt.change(
        lambda x: x,
        inputs=[system_prompt],
        outputs=[chat.additional_inputs[0]],
    )
    
    max_tokens.change(
        lambda x: x,
        inputs=[max_tokens],
        outputs=[chat.additional_inputs[1]],
    )
    
    temperature.change(
        lambda x: x,
        inputs=[temperature],
        outputs=[chat.additional_inputs[2]],
    )
    
    top_p.change(
        lambda x: x,
        inputs=[top_p],
        outputs=[chat.additional_inputs[3]],
    )
    
    gr.Markdown(
        """
        ---
        ### 💡 How to Use
        
        1. **Upload documents** - PDF, DOCX, TXT, CSV supported
        2. **Ask questions** about the content
        3. **Adjust settings** for different response styles
        
        ### 📊 Supported Formats
        - **PDF**: Text extraction from all pages
        - **DOCX**: Microsoft Word documents
        - **TXT/MD**: Plain text and Markdown
        - **CSV**: Data files with statistics
        - **Code**: Python, JavaScript, JSON, etc.
        
        ### 🎯 Tips
        - Lower temperature (0.1-0.5) = Focused, deterministic
        - Higher temperature (0.8-2.0) = Creative, varied
        - Try different models for different tasks
        """
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch()