""" OSINT Investigation Assistant - Gradio App A RAG-powered assistant that helps investigators develop methodologies for OSINT investigations using a database of 344+ OSINT tools. """ import os import gradio as gr from dotenv import load_dotenv from src.rag_pipeline import create_pipeline # Load environment variables load_dotenv() # Initialize the RAG pipeline print("Initializing OSINT Investigation Pipeline...") try: pipeline = create_pipeline( retrieval_k=5, model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"), temperature=float(os.getenv("LLM_TEMPERATURE", "0.7")) ) print("✓ Pipeline initialized successfully") except Exception as e: print(f"✗ Error initializing pipeline: {e}") raise def investigate(message: str, history: list) -> str: """ Main chat function for investigation queries Args: message: User's investigation query history: Chat history (list of [user_msg, bot_msg] pairs) Returns: Generated investigation methodology """ try: # Generate response (non-streaming for simplicity) response = pipeline.generate_methodology(message, stream=False) return response except Exception as e: return f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again." def investigate_stream(message: str, history: list): """ Streaming version of investigation function Args: message: User's investigation query history: Chat history Yields: Response chunks """ try: response_stream = pipeline.generate_methodology(message, stream=True) full_response = "" for chunk in response_stream: full_response += chunk yield full_response except Exception as e: yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again." def get_tool_recommendations(query: str, k: int = 5) -> str: """ Get tool recommendations for a query Args: query: Investigation query k: Number of tools to recommend Returns: Formatted tool recommendations """ try: tools = pipeline.get_tool_recommendations(query, k=k) if not tools: return "No relevant tools found." output = f"## Top {len(tools)} Recommended Tools\n\n" for i, tool in enumerate(tools, 1): output += f"### {i}. {tool['name']}\n" output += f"- **Category**: {tool['category']}\n" output += f"- **Cost**: {tool['cost']}\n" output += f"- **URL**: {tool['url']}\n" output += f"- **Description**: {tool['description']}\n" if tool['details'] and tool['details'] != 'N/A': output += f"- **Details**: {tool['details']}\n" output += "\n" return output except Exception as e: return f"Error retrieving tools: {str(e)}" # Custom CSS for better appearance custom_css = """ .gradio-container { max-width: 900px !important; } #component-0 { max-width: 900px; } """ # Create Gradio interface with gr.Blocks( title="OSINT Investigation Assistant", theme=gr.themes.Soft(), css=custom_css ) as demo: gr.Markdown(""" # 🔍 OSINT Investigation Assistant Ask me how to investigate anything using open-source intelligence methods. I'll provide you with a structured methodology and recommend specific OSINT tools from a database of 344+ tools. **Examples:** - "How do I investigate a suspicious domain?" - "What tools can I use to verify an image's authenticity?" - "How can I trace the origin of a social media account?" """) # Main chat interface chatbot = gr.ChatInterface( fn=investigate_stream, type="messages", examples=[ "How do I investigate a suspicious domain?", "What tools can I use to verify an image's authenticity?", "How can I trace the origin of a social media account?", "What's the best way to archive web content for investigation?", "How do I geolocate an image from social media?" ], cache_examples=False, title="Chat Interface", description="Ask your investigation questions here", api_name="investigate" # This creates the /call/investigate API endpoint ) # Additional tab for direct tool search with gr.Tab("Tool Search"): gr.Markdown("### Search for OSINT Tools") with gr.Row(): tool_query = gr.Textbox( label="Search Query", placeholder="e.g., social media analysis, image verification, domain investigation", lines=2 ) tool_count = gr.Slider( minimum=1, maximum=20, value=5, step=1, label="Number of Tools" ) tool_search_btn = gr.Button("Search Tools", variant="primary") tool_output = gr.Markdown(label="Recommended Tools") tool_search_btn.click( fn=get_tool_recommendations, inputs=[tool_query, tool_count], outputs=tool_output, api_name="search_tools" # This creates the /call/search_tools API endpoint ) # Information tab with gr.Tab("About"): gr.Markdown(""" ## About This Assistant This OSINT Investigation Assistant helps researchers and investigators develop structured methodologies for open-source intelligence investigations. ### Features - 🎯 **Structured Methodologies**: Get step-by-step investigation plans - 🛠️ **Tool Recommendations**: Access a database of 344+ OSINT tools - 🔍 **Context-Aware**: Tools are recommended based on your specific needs - 🚀 **API Access**: Use this app via API for integration with other tools ### Technology Stack - **Vector Database**: Supabase with PGVector (344 OSINT tools) - **LLM**: Hugging Face Inference Providers (Llama 3.1) - **RAG Framework**: LangChain for retrieval-augmented generation - **UI/API**: Gradio with automatic API generation ### API Usage This app automatically exposes API endpoints. You can access them using: **Python Client:** ```python from gradio_client import Client client = Client("your-space-url") result = client.predict("How do I investigate a domain?", api_name="/investigate") print(result) ``` **cURL:** ```bash curl -X POST "https://your-space.hf.space/call/investigate" \\ -H "Content-Type: application/json" \\ -d '{"data": ["How do I investigate a domain?"]}' ``` View the full API documentation at the bottom of this page (click "Use via API"). ### Environment Variables Required - `SUPABASE_CONNECTION_STRING`: PostgreSQL connection string for Supabase - `HF_TOKEN`: Hugging Face API token for Inference Providers - `LLM_MODEL` (optional): Model to use (default: meta-llama/Llama-3.1-8B-Instruct) - `LLM_TEMPERATURE` (optional): Temperature for generation (default: 0.7) ### Data Source The tool recommendations are based on the Bellingcat OSINT Toolkit and other curated sources, with 344+ tools across categories including: - Social Media Investigation - Image and Video Analysis - Domain and Network Investigation - Geolocation - Archiving and Preservation - And more... --- Built with ❤️ for the OSINT community """) # Launch configuration if __name__ == "__main__": # Check for required environment variables required_vars = ["SUPABASE_CONNECTION_STRING", "HF_TOKEN"] missing_vars = [var for var in required_vars if not os.getenv(var)] if missing_vars: print(f"⚠️ Warning: Missing environment variables: {', '.join(missing_vars)}") print("Please set these in your .env file or as environment variables") # Launch the app # Set mcp_server=True to enable MCP protocol for agent integration demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_api=True # Show API documentation )