osint-llm / app.py
Tom
Add complete RAG-powered OSINT investigation assistant
6466c00
raw
history blame
8.58 kB
"""
OSINT Investigation Assistant - Gradio App
A RAG-powered assistant that helps investigators develop methodologies
for OSINT investigations using a database of 344+ OSINT tools.
"""
import os
import gradio as gr
from dotenv import load_dotenv
from src.rag_pipeline import create_pipeline
# Load environment variables
load_dotenv()
# Initialize the RAG pipeline
print("Initializing OSINT Investigation Pipeline...")
try:
pipeline = create_pipeline(
retrieval_k=5,
model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
temperature=float(os.getenv("LLM_TEMPERATURE", "0.7"))
)
print("βœ“ Pipeline initialized successfully")
except Exception as e:
print(f"βœ— Error initializing pipeline: {e}")
raise
def investigate(message: str, history: list) -> str:
"""
Main chat function for investigation queries
Args:
message: User's investigation query
history: Chat history (list of [user_msg, bot_msg] pairs)
Returns:
Generated investigation methodology
"""
try:
# Generate response (non-streaming for simplicity)
response = pipeline.generate_methodology(message, stream=False)
return response
except Exception as e:
return f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again."
def investigate_stream(message: str, history: list):
"""
Streaming version of investigation function
Args:
message: User's investigation query
history: Chat history
Yields:
Response chunks
"""
try:
response_stream = pipeline.generate_methodology(message, stream=True)
full_response = ""
for chunk in response_stream:
full_response += chunk
yield full_response
except Exception as e:
yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_CONNECTION_STRING) and try again."
def get_tool_recommendations(query: str, k: int = 5) -> str:
"""
Get tool recommendations for a query
Args:
query: Investigation query
k: Number of tools to recommend
Returns:
Formatted tool recommendations
"""
try:
tools = pipeline.get_tool_recommendations(query, k=k)
if not tools:
return "No relevant tools found."
output = f"## Top {len(tools)} Recommended Tools\n\n"
for i, tool in enumerate(tools, 1):
output += f"### {i}. {tool['name']}\n"
output += f"- **Category**: {tool['category']}\n"
output += f"- **Cost**: {tool['cost']}\n"
output += f"- **URL**: {tool['url']}\n"
output += f"- **Description**: {tool['description']}\n"
if tool['details'] and tool['details'] != 'N/A':
output += f"- **Details**: {tool['details']}\n"
output += "\n"
return output
except Exception as e:
return f"Error retrieving tools: {str(e)}"
# Custom CSS for better appearance
custom_css = """
.gradio-container {
max-width: 900px !important;
}
#component-0 {
max-width: 900px;
}
"""
# Create Gradio interface
with gr.Blocks(
title="OSINT Investigation Assistant",
theme=gr.themes.Soft(),
css=custom_css
) as demo:
gr.Markdown("""
# πŸ” OSINT Investigation Assistant
Ask me how to investigate anything using open-source intelligence methods.
I'll provide you with a structured methodology and recommend specific OSINT tools
from a database of 344+ tools.
**Examples:**
- "How do I investigate a suspicious domain?"
- "What tools can I use to verify an image's authenticity?"
- "How can I trace the origin of a social media account?"
""")
# Main chat interface
chatbot = gr.ChatInterface(
fn=investigate_stream,
type="messages",
examples=[
"How do I investigate a suspicious domain?",
"What tools can I use to verify an image's authenticity?",
"How can I trace the origin of a social media account?",
"What's the best way to archive web content for investigation?",
"How do I geolocate an image from social media?"
],
cache_examples=False,
title="Chat Interface",
description="Ask your investigation questions here",
api_name="investigate" # This creates the /call/investigate API endpoint
)
# Additional tab for direct tool search
with gr.Tab("Tool Search"):
gr.Markdown("### Search for OSINT Tools")
with gr.Row():
tool_query = gr.Textbox(
label="Search Query",
placeholder="e.g., social media analysis, image verification, domain investigation",
lines=2
)
tool_count = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="Number of Tools"
)
tool_search_btn = gr.Button("Search Tools", variant="primary")
tool_output = gr.Markdown(label="Recommended Tools")
tool_search_btn.click(
fn=get_tool_recommendations,
inputs=[tool_query, tool_count],
outputs=tool_output,
api_name="search_tools" # This creates the /call/search_tools API endpoint
)
# Information tab
with gr.Tab("About"):
gr.Markdown("""
## About This Assistant
This OSINT Investigation Assistant helps researchers and investigators develop
structured methodologies for open-source intelligence investigations.
### Features
- 🎯 **Structured Methodologies**: Get step-by-step investigation plans
- πŸ› οΈ **Tool Recommendations**: Access a database of 344+ OSINT tools
- πŸ” **Context-Aware**: Tools are recommended based on your specific needs
- πŸš€ **API Access**: Use this app via API for integration with other tools
### Technology Stack
- **Vector Database**: Supabase with PGVector (344 OSINT tools)
- **LLM**: Hugging Face Inference Providers (Llama 3.1)
- **RAG Framework**: LangChain for retrieval-augmented generation
- **UI/API**: Gradio with automatic API generation
### API Usage
This app automatically exposes API endpoints. You can access them using:
**Python Client:**
```python
from gradio_client import Client
client = Client("your-space-url")
result = client.predict("How do I investigate a domain?", api_name="/investigate")
print(result)
```
**cURL:**
```bash
curl -X POST "https://your-space.hf.space/call/investigate" \\
-H "Content-Type: application/json" \\
-d '{"data": ["How do I investigate a domain?"]}'
```
View the full API documentation at the bottom of this page (click "Use via API").
### Environment Variables Required
- `SUPABASE_CONNECTION_STRING`: PostgreSQL connection string for Supabase
- `HF_TOKEN`: Hugging Face API token for Inference Providers
- `LLM_MODEL` (optional): Model to use (default: meta-llama/Llama-3.1-8B-Instruct)
- `LLM_TEMPERATURE` (optional): Temperature for generation (default: 0.7)
### Data Source
The tool recommendations are based on the Bellingcat OSINT Toolkit and other
curated sources, with 344+ tools across categories including:
- Social Media Investigation
- Image and Video Analysis
- Domain and Network Investigation
- Geolocation
- Archiving and Preservation
- And more...
---
Built with ❀️ for the OSINT community
""")
# Launch configuration
if __name__ == "__main__":
# Check for required environment variables
required_vars = ["SUPABASE_CONNECTION_STRING", "HF_TOKEN"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"⚠️ Warning: Missing environment variables: {', '.join(missing_vars)}")
print("Please set these in your .env file or as environment variables")
# Launch the app
# Set mcp_server=True to enable MCP protocol for agent integration
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_api=True # Show API documentation
)