Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

Yassine Mhirsi commited on 5 days ago

Commit

2380f6f

1 Parent(s): 682062a

feat: Add topic-related schemas and API routes for topic management, along with LangChain dependencies in requirements.

Browse files

Files changed (7) hide show

models/__init__.py +13 -0
models/topic.py +86 -0
requirements.txt +6 -0
routes/__init__.py +2 -1
routes/topic.py +94 -0
services/__init__.py +3 -0
services/topic_service.py +136 -0

models/__init__.py CHANGED Viewed

@@ -28,6 +28,14 @@ from .generate import (
     GenerateResponse,
 )
 # Import MCP-related schemas
 from .mcp_models import (
     ToolCallRequest,
@@ -60,6 +68,11 @@ __all__ = [
     # Generate schemas
     "GenerateRequest",
     "GenerateResponse",
     # MCP schemas
     "ToolCallRequest",
     "ToolCallResponse",

     GenerateResponse,
 )
+# Import topic-related schemas
+from .topic import (
+    TopicRequest,
+    TopicResponse,
+    BatchTopicRequest,
+    BatchTopicResponse,
+)
 # Import MCP-related schemas
 from .mcp_models import (
     ToolCallRequest,
     # Generate schemas
     "GenerateRequest",
     "GenerateResponse",
+    # Topic schemas
+    "TopicRequest",
+    "TopicResponse",
+    "BatchTopicRequest",
+    "BatchTopicResponse",
     # MCP schemas
     "ToolCallRequest",
     "ToolCallResponse",

models/topic.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Pydantic models for topic extraction endpoints"""
+from pydantic import BaseModel, Field, ConfigDict
+from typing import List, Optional
+class TopicRequest(BaseModel):
+    """Request model for topic extraction"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "text": "Social media companies must NOT be allowed to track people across websites."
+            }
+        }
+    )
+    text: str = Field(
+        ..., min_length=5, max_length=5000,
+        description="The text/argument to extract topic from"
+    )
+class TopicResponse(BaseModel):
+    """Response model for topic extraction"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "text": "Social media companies must NOT be allowed to track people across websites.",
+                "topic": "social media tracking and cross-website user privacy",
+                "timestamp": "2024-01-01T12:00:00Z"
+            }
+        }
+    )
+    text: str = Field(..., description="The original input text")
+    topic: str = Field(..., description="The extracted topic")
+    timestamp: str = Field(..., description="Timestamp of the extraction")
+class BatchTopicRequest(BaseModel):
+    """Request model for batch topic extraction"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "texts": [
+                    "Social media companies must NOT be allowed to track people across websites.",
+                    "I don't think universal basic income is a good idea — it'll disincentivize work.",
+                    "We must invest in renewable energy to combat climate change."
+                ]
+            }
+        }
+    )
+    texts: List[str] = Field(
+        ..., min_length=1, max_length=50,
+        description="List of texts to extract topics from (max 50)"
+    )
+class BatchTopicResponse(BaseModel):
+    """Response model for batch topic extraction"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "results": [
+                    {
+                        "text": "Social media companies must NOT be allowed to track people across websites.",
+                        "topic": "social media tracking and cross-website user privacy",
+                        "timestamp": "2024-01-01T12:00:00Z"
+                    },
+                    {
+                        "text": "I don't think universal basic income is a good idea — it'll disincentivize work.",
+                        "topic": "universal basic income and its impact on work incentives",
+                        "timestamp": "2024-01-01T12:00:00Z"
+                    }
+                ],
+                "total_processed": 2,
+                "timestamp": "2024-01-01T12:00:00Z"
+            }
+        }
+    )
+    results: List[TopicResponse] = Field(..., description="List of topic extraction results")
+    total_processed: int = Field(..., description="Number of texts processed")
+    timestamp: str = Field(..., description="Timestamp of the batch extraction")

requirements.txt CHANGED Viewed

@@ -8,6 +8,12 @@ pydantic>=2.5.0
 requests>=2.31.0
 groq>=0.9.0
 # Audio processing (optionnel si vous avez besoin de traitement local)
 soundfile>=0.12.1

 requests>=2.31.0
 groq>=0.9.0
+# LangChain
+langchain>=0.1.0
+langchain-core>=0.1.0
+langchain-groq>=0.1.0
+langsmith>=0.1.0
 # Audio processing (optionnel si vous avez besoin de traitement local)
 soundfile>=0.12.1

routes/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """API route handlers"""
 from fastapi import APIRouter
-from . import root, health, stance, label, generate
 from routes.tts_routes import router as audio_router
 # Create main router
 api_router = APIRouter()
@@ -12,6 +12,7 @@ api_router.include_router(health.router)
 api_router.include_router(stance.router, prefix="/stance")
 api_router.include_router(label.router, prefix="/label")
 api_router.include_router(generate.router, prefix="/generate")
 api_router.include_router(audio_router)
 __all__ = ["api_router"]

 """API route handlers"""
 from fastapi import APIRouter
+from . import root, health, stance, label, generate, topic
 from routes.tts_routes import router as audio_router
 # Create main router
 api_router = APIRouter()
 api_router.include_router(stance.router, prefix="/stance")
 api_router.include_router(label.router, prefix="/label")
 api_router.include_router(generate.router, prefix="/generate")
+api_router.include_router(topic.router, prefix="/topic")
 api_router.include_router(audio_router)
 __all__ = ["api_router"]

routes/topic.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""Topic extraction endpoints"""
+from fastapi import APIRouter, HTTPException
+from datetime import datetime
+import logging
+from services.topic_service import topic_service
+from models.topic import (
+    TopicRequest,
+    TopicResponse,
+    BatchTopicRequest,
+    BatchTopicResponse,
+)
+router = APIRouter()
+logger = logging.getLogger(__name__)
+@router.post("/extract", response_model=TopicResponse, tags=["Topic Extraction"])
+async def extract_topic(request: TopicRequest):
+    """
+    Extract a topic from a given text/argument
+    - **text**: The input text or argument to extract topic from (5-5000 chars)
+    Returns the extracted topic description
+    """
+    try:
+        # Extract topic
+        topic = topic_service.extract_topic(request.text)
+        # Build response
+        response = TopicResponse(
+            text=request.text,
+            topic=topic,
+            timestamp=datetime.now().isoformat()
+        )
+        logger.info(f"Topic extracted: {topic[:50]}...")
+        return response
+    except ValueError as e:
+        logger.error(f"Validation error: {str(e)}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Topic extraction error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Topic extraction failed: {str(e)}")
+@router.post("/batch-extract", response_model=BatchTopicResponse, tags=["Topic Extraction"])
+async def batch_extract_topics(request: BatchTopicRequest):
+    """
+    Extract topics from multiple texts/arguments
+    - **texts**: List of texts to extract topics from (max 50)
+    Returns extracted topics for all texts
+    """
+    try:
+        # Batch extract topics
+        topics = topic_service.batch_extract_topics(request.texts)
+        # Build response
+        results = []
+        timestamp = datetime.now().isoformat()
+        for i, text in enumerate(request.texts):
+            if topics[i] is not None:
+                results.append(
+                    TopicResponse(
+                        text=text,
+                        topic=topics[i],
+                        timestamp=timestamp
+                    )
+                )
+            else:
+                # Skip failed extractions or handle as needed
+                logger.warning(f"Failed to extract topic for text at index {i}")
+        logger.info(f"Batch topic extraction completed: {len(results)}/{len(request.texts)} successful")
+        return BatchTopicResponse(
+            results=results,
+            total_processed=len(results),
+            timestamp=timestamp
+        )
+    except ValueError as e:
+        logger.error(f"Validation error: {str(e)}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Batch topic extraction error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Batch topic extraction failed: {str(e)}")

services/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .generate_model_manager import GenerateModelManager, generate_model_manager
 # NEW imports
 from .stt_service import speech_to_text
 from .tts_service import text_to_speech
 __all__ = [
     "StanceModelManager",
@@ -15,6 +16,8 @@ __all__ = [
     "kpa_model_manager",
     "GenerateModelManager",
     "generate_model_manager",
     # NEW exports
     "speech_to_text",

 # NEW imports
 from .stt_service import speech_to_text
 from .tts_service import text_to_speech
+from .topic_service import TopicService, topic_service
 __all__ = [
     "StanceModelManager",
     "kpa_model_manager",
     "GenerateModelManager",
     "generate_model_manager",
+    "TopicService",
+    "topic_service",
     # NEW exports
     "speech_to_text",

services/topic_service.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""Service for topic extraction from text using LangChain Groq"""
+import logging
+from typing import Optional, List
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_groq import ChatGroq
+from pydantic import BaseModel, Field
+from langsmith import traceable
+from config import GROQ_API_KEY
+logger = logging.getLogger(__name__)
+class TopicOutput(BaseModel):
+    """Pydantic schema for topic extraction output"""
+    topic: str = Field(..., description="A specific, detailed topic description")
+class TopicService:
+    """Service for extracting topics from text arguments"""
+    def __init__(self):
+        self.llm = None
+        self.model_name = "openai/gpt-oss-safeguard-20b"  # Default model
+        self.initialized = False
+    def initialize(self, model_name: Optional[str] = None):
+        """Initialize the Groq LLM with structured output"""
+        if self.initialized:
+            logger.info("Topic service already initialized")
+            return
+        if not GROQ_API_KEY:
+            raise ValueError("GROQ_API_KEY not found in environment variables")
+        if model_name:
+            self.model_name = model_name
+        try:
+            logger.info(f"Initializing topic extraction service with model: {self.model_name}")
+            llm = ChatGroq(
+                model=self.model_name,
+                api_key=GROQ_API_KEY,
+                temperature=0.0,
+                max_tokens=512,
+            )
+            # Bind structured output directly to the model
+            self.llm = llm.with_structured_output(TopicOutput)
+            self.initialized = True
+            logger.info("✓ Topic extraction service initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing topic service: {str(e)}")
+            raise RuntimeError(f"Failed to initialize topic service: {str(e)}")
+    @traceable(name="extract_topic")
+    def extract_topic(self, text: str) -> str:
+        """
+        Extract a topic from the given text/argument
+        Args:
+            text: The input text/argument to extract topic from
+        Returns:
+            The extracted topic string
+        """
+        if not self.initialized:
+            self.initialize()
+        if not text or not isinstance(text, str):
+            raise ValueError("Text must be a non-empty string")
+        text = text.strip()
+        if len(text) == 0:
+            raise ValueError("Text cannot be empty")
+        system_message = """You are an information extraction model.
+Extract a detailed topic from the user text.
+Examples:
+- Text: "Governments should subsidize electric cars to encourage adoption."
+  Output: topic="government subsidies for electric vehicle adoption"
+- Text: "Raising the minimum wage will hurt small businesses and cost jobs."
+  Output: topic="raising the minimum wage and its economic impact on small businesses"
+"""
+        try:
+            result = self.llm.invoke(
+                [
+                    SystemMessage(content=system_message),
+                    HumanMessage(content=text),
+                ]
+            )
+            return result.topic
+        except Exception as e:
+            logger.error(f"Error extracting topic: {str(e)}")
+            raise RuntimeError(f"Topic extraction failed: {str(e)}")
+    def batch_extract_topics(self, texts: List[str]) -> List[str]:
+        """
+        Extract topics from multiple texts
+        Args:
+            texts: List of input texts/arguments
+        Returns:
+            List of extracted topics
+        """
+        if not self.initialized:
+            self.initialize()
+        if not texts or not isinstance(texts, list):
+            raise ValueError("Texts must be a non-empty list")
+        results = []
+        for text in texts:
+            try:
+                topic = self.extract_topic(text)
+                results.append(topic)
+            except Exception as e:
+                logger.error(f"Error extracting topic for text '{text[:50]}...': {str(e)}")
+                results.append(None)  # Or raise, depending on desired behavior
+        return results
+# Initialize singleton instance
+topic_service = TopicService()