Spaces:
Running
Running
| """ | |
| OSINT Investigation Assistant - Gradio App | |
| A RAG-powered assistant that helps investigators develop methodologies | |
| for OSINT investigations using a database of 344+ OSINT tools. | |
| """ | |
| import os | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from src.rag_pipeline import create_pipeline | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize the RAG pipeline | |
| print("Initializing OSINT Investigation Pipeline...") | |
| try: | |
| pipeline = create_pipeline( | |
| retrieval_k=5, | |
| model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"), | |
| temperature=float(os.getenv("LLM_TEMPERATURE", "0.7")) | |
| ) | |
| print("β Pipeline initialized successfully") | |
| except Exception as e: | |
| print(f"β Error initializing pipeline: {e}") | |
| raise | |
| def investigate(message: str, history: list) -> str: | |
| """ | |
| Main chat function for investigation queries | |
| Args: | |
| message: User's investigation query | |
| history: Chat history (list of [user_msg, bot_msg] pairs) | |
| Returns: | |
| Generated investigation methodology | |
| """ | |
| try: | |
| # Generate response (non-streaming for simplicity) | |
| response = pipeline.generate_methodology(message, stream=False) | |
| return response | |
| except Exception as e: | |
| return f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again." | |
| def investigate_stream(message: str, history: list): | |
| """ | |
| Streaming version of investigation function | |
| Args: | |
| message: User's investigation query | |
| history: Chat history | |
| Yields: | |
| Response chunks | |
| """ | |
| try: | |
| response_stream = pipeline.generate_methodology(message, stream=True) | |
| full_response = "" | |
| for chunk in response_stream: | |
| full_response += chunk | |
| yield full_response | |
| except Exception as e: | |
| yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again." | |
| def get_tool_recommendations(query: str, k: int = 5) -> str: | |
| """ | |
| Get tool recommendations for a query | |
| Args: | |
| query: Investigation query | |
| k: Number of tools to recommend | |
| Returns: | |
| Formatted tool recommendations | |
| """ | |
| try: | |
| tools = pipeline.get_tool_recommendations(query, k=k) | |
| if not tools: | |
| return "No relevant tools found." | |
| output = f"## Top {len(tools)} Recommended Tools\n\n" | |
| for i, tool in enumerate(tools, 1): | |
| output += f"### {i}. {tool['name']}\n" | |
| output += f"- **Category**: {tool['category']}\n" | |
| output += f"- **Cost**: {tool['cost']}\n" | |
| output += f"- **URL**: {tool['url']}\n" | |
| output += f"- **Description**: {tool['description']}\n" | |
| if tool['details'] and tool['details'] != 'N/A': | |
| output += f"- **Details**: {tool['details']}\n" | |
| output += "\n" | |
| return output | |
| except Exception as e: | |
| return f"Error retrieving tools: {str(e)}" | |
| # Custom CSS for better appearance | |
| custom_css = """ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| } | |
| #component-0 { | |
| max-width: 900px; | |
| } | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="OSINT Investigation Assistant", | |
| theme=gr.themes.Soft(), | |
| css=custom_css | |
| ) as demo: | |
| gr.Markdown(""" | |
| # π OSINT Investigation Assistant | |
| Ask me how to investigate anything using open-source intelligence methods. | |
| I'll provide you with a structured methodology and recommend specific OSINT tools | |
| from a database of 344+ tools. | |
| **Examples:** | |
| - "How do I investigate a suspicious domain?" | |
| - "What tools can I use to verify an image's authenticity?" | |
| - "How can I trace the origin of a social media account?" | |
| """) | |
| # Main chat interface | |
| chatbot = gr.ChatInterface( | |
| fn=investigate_stream, | |
| type="messages", | |
| examples=[ | |
| "How do I investigate a suspicious domain?", | |
| "What tools can I use to verify an image's authenticity?", | |
| "How can I trace the origin of a social media account?", | |
| "What's the best way to archive web content for investigation?", | |
| "How do I geolocate an image from social media?" | |
| ], | |
| cache_examples=False, | |
| title="Chat Interface", | |
| description="Ask your investigation questions here", | |
| api_name="investigate" # This creates the /call/investigate API endpoint | |
| ) | |
| # Additional tab for direct tool search | |
| with gr.Tab("Tool Search"): | |
| gr.Markdown("### Search for OSINT Tools") | |
| with gr.Row(): | |
| tool_query = gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g., social media analysis, image verification, domain investigation", | |
| lines=2 | |
| ) | |
| tool_count = gr.Slider( | |
| minimum=1, | |
| maximum=20, | |
| value=5, | |
| step=1, | |
| label="Number of Tools" | |
| ) | |
| tool_search_btn = gr.Button("Search Tools", variant="primary") | |
| tool_output = gr.Markdown(label="Recommended Tools") | |
| tool_search_btn.click( | |
| fn=get_tool_recommendations, | |
| inputs=[tool_query, tool_count], | |
| outputs=tool_output, | |
| api_name="search_tools" # This creates the /call/search_tools API endpoint | |
| ) | |
| # Information tab | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ## About This Assistant | |
| This OSINT Investigation Assistant helps researchers and investigators develop | |
| structured methodologies for open-source intelligence investigations. | |
| ### Features | |
| - π― **Structured Methodologies**: Get step-by-step investigation plans | |
| - π οΈ **Tool Recommendations**: Access a database of 344+ OSINT tools | |
| - π **Context-Aware**: Tools are recommended based on your specific needs | |
| - π **API Access**: Use this app via API for integration with other tools | |
| ### Technology Stack | |
| - **Vector Database**: Supabase with PGVector (344 OSINT tools) | |
| - **LLM**: Hugging Face Inference Providers (Llama 3.1) | |
| - **RAG Framework**: LangChain for retrieval-augmented generation | |
| - **UI/API**: Gradio with automatic API generation | |
| ### API Usage | |
| This app automatically exposes API endpoints. You can access them using: | |
| **Python Client:** | |
| ```python | |
| from gradio_client import Client | |
| client = Client("your-space-url") | |
| result = client.predict("How do I investigate a domain?", api_name="/investigate") | |
| print(result) | |
| ``` | |
| **cURL:** | |
| ```bash | |
| curl -X POST "https://your-space.hf.space/call/investigate" \\ | |
| -H "Content-Type: application/json" \\ | |
| -d '{"data": ["How do I investigate a domain?"]}' | |
| ``` | |
| View the full API documentation at the bottom of this page (click "Use via API"). | |
| ### Environment Variables Required | |
| - `SUPABASE_CONNECTION_STRING`: PostgreSQL connection string for Supabase | |
| - `HF_TOKEN`: Hugging Face API token for Inference Providers | |
| - `LLM_MODEL` (optional): Model to use (default: meta-llama/Llama-3.1-8B-Instruct) | |
| - `LLM_TEMPERATURE` (optional): Temperature for generation (default: 0.7) | |
| ### Data Source | |
| The tool recommendations are based on the Bellingcat OSINT Toolkit and other | |
| curated sources, with 344+ tools across categories including: | |
| - Social Media Investigation | |
| - Image and Video Analysis | |
| - Domain and Network Investigation | |
| - Geolocation | |
| - Archiving and Preservation | |
| - And more... | |
| --- | |
| Built with β€οΈ for the OSINT community | |
| """) | |
| # Launch configuration | |
| if __name__ == "__main__": | |
| # Check for required environment variables | |
| required_vars = ["SUPABASE_CONNECTION_STRING", "HF_TOKEN"] | |
| missing_vars = [var for var in required_vars if not os.getenv(var)] | |
| if missing_vars: | |
| print(f"β οΈ Warning: Missing environment variables: {', '.join(missing_vars)}") | |
| print("Please set these in your .env file or as environment variables") | |
| # Launch the app | |
| # Set mcp_server=True to enable MCP protocol for agent integration | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_api=True # Show API documentation | |
| ) | |