Yassine Mhirsi commited on
Commit
7218dd0
·
1 Parent(s): 77f9f6f

feat: Introduce predefined topics list in TopicService and enhance topic extraction logic to validate against this list, ensuring accurate topic matching and improved error handling.

Browse files
Files changed (1) hide show
  1. services/topic_service.py +105 -15
services/topic_service.py CHANGED
@@ -11,19 +11,65 @@ from config import GROQ_API_KEY
11
 
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  class TopicOutput(BaseModel):
16
  """Pydantic schema for topic extraction output"""
17
- topic: str = Field(..., description="A specific, detailed topic description")
18
 
19
 
20
  class TopicService:
21
- """Service for extracting topics from text arguments"""
22
 
23
  def __init__(self):
24
  self.llm = None
25
  self.model_name = "openai/gpt-oss-safeguard-20b" # Default model
26
  self.initialized = False
 
27
 
28
  def initialize(self, model_name: Optional[str] = None):
29
  """Initialize the Groq LLM with structured output"""
@@ -57,16 +103,44 @@ class TopicService:
57
  logger.error(f"Error initializing topic service: {str(e)}")
58
  raise RuntimeError(f"Failed to initialize topic service: {str(e)}")
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  @traceable(name="extract_topic")
61
  def extract_topic(self, text: str) -> str:
62
  """
63
- Extract a topic from the given text/argument
64
 
65
  Args:
66
  text: The input text/argument to extract topic from
67
 
68
  Returns:
69
- The extracted topic string
70
  """
71
  if not self.initialized:
72
  self.initialize()
@@ -78,16 +152,7 @@ class TopicService:
78
  if len(text) == 0:
79
  raise ValueError("Text cannot be empty")
80
 
81
- system_message = """You are an information extraction model.
82
- Extract a topic from the user text. The topic should be a single sentence that captures the main idea of the text in simple english.
83
-
84
- Examples:
85
- - Text: "Governments should subsidize electric cars to encourage adoption."
86
- Output: topic="government subsidies for electric vehicle adoption"
87
-
88
- - Text: "Raising the minimum wage will hurt small businesses and cost jobs."
89
- Output: topic="raising the minimum wage and its economic impact on small businesses"
90
- """
91
 
92
  try:
93
  result = self.llm.invoke(
@@ -97,7 +162,32 @@ Examples:
97
  ]
98
  )
99
 
100
- return result.topic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  except Exception as e:
103
  logger.error(f"Error extracting topic: {str(e)}")
 
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
+ # Predefined topics list
15
+ PREDEFINED_TOPICS = [
16
+ "Assisted suicide should be a criminal offence",
17
+ "We should abolish intellectual property rights",
18
+ "Homeschooling should be banned",
19
+ "The vow of celibacy should be abandoned",
20
+ "We should legalize prostitution",
21
+ "We should ban private military companies",
22
+ "We should abolish capital punishment",
23
+ "Foster care brings more harm than good",
24
+ "Routine child vaccinations should be mandatory",
25
+ "We should abolish the three-strikes laws",
26
+ "We should subsidize student loans",
27
+ "We should end the use of economic sanctions",
28
+ "We should end mandatory retirement",
29
+ "We should close Guantanamo Bay detention camp",
30
+ "We should subsidize space exploration",
31
+ "We should abandon the use of school uniform",
32
+ "The use of public defenders should be mandatory",
33
+ "We should adopt an austerity regime",
34
+ "Social media platforms should be regulated by the government",
35
+ "We should ban human cloning",
36
+ "We should adopt atheism",
37
+ "We should introduce compulsory voting",
38
+ "We should adopt libertarianism",
39
+ "We should abolish the right to keep and bear arms",
40
+ "We should legalize sex selection",
41
+ "We should abandon marriage",
42
+ "Entrapment should be legalized",
43
+ "We should end affirmative action",
44
+ "We should prohibit women in combat",
45
+ "We should adopt a zero-tolerance policy in schools",
46
+ "We should subsidize vocational education",
47
+ "We should ban the use of child actors",
48
+ "We should legalize cannabis",
49
+ "We should ban cosmetic surgery",
50
+ "We should end racial profiling",
51
+ "We should prohibit flag burning",
52
+ "The USA is a good country to live in",
53
+ "We should ban algorithmic trading",
54
+ "We should fight for the abolition of nuclear weapons",
55
+ "We should fight urbanization",
56
+ "We should subsidize journalism",
57
+ ]
58
+
59
 
60
  class TopicOutput(BaseModel):
61
  """Pydantic schema for topic extraction output"""
62
+ topic: str = Field(..., description="The selected topic from the predefined list that most closely matches the input text")
63
 
64
 
65
  class TopicService:
66
+ """Service for extracting topics from text arguments by matching to predefined topics"""
67
 
68
  def __init__(self):
69
  self.llm = None
70
  self.model_name = "openai/gpt-oss-safeguard-20b" # Default model
71
  self.initialized = False
72
+ self.predefined_topics = PREDEFINED_TOPICS
73
 
74
  def initialize(self, model_name: Optional[str] = None):
75
  """Initialize the Groq LLM with structured output"""
 
103
  logger.error(f"Error initializing topic service: {str(e)}")
104
  raise RuntimeError(f"Failed to initialize topic service: {str(e)}")
105
 
106
+ def _get_system_message(self) -> str:
107
+ """Generate system message with predefined topics list"""
108
+ topics_list = "\n".join([f"{i+1}. {topic}" for i, topic in enumerate(self.predefined_topics)])
109
+
110
+ return f"""You are a topic classification model. Your task is to select the MOST SIMILAR topic from the predefined list below that best matches the user's input text.
111
+
112
+ IMPORTANT: You MUST return EXACTLY one of the predefined topics below. Do not create new topics or modify the wording.
113
+
114
+ Predefined Topics:
115
+ {topics_list}
116
+
117
+ Instructions:
118
+ 1. Analyze the user's input text carefully
119
+ 2. Identify the main theme, subject, or argument being discussed
120
+ 3. Find the topic from the predefined list that is MOST SIMILAR to the input text
121
+ 4. Return the EXACT topic text as it appears in the list above
122
+
123
+ Examples:
124
+ - Input: "I think we need to make assisted suicide illegal and punishable by law."
125
+ Output: "Assisted suicide should be a criminal offence"
126
+
127
+ - Input: "Student debt is crushing young people. The government should help pay for college."
128
+ Output: "We should subsidize student loans"
129
+
130
+ - Input: "Marijuana should be legal for adults to use recreationally."
131
+ Output: "We should legalize cannabis"
132
+ """
133
+
134
  @traceable(name="extract_topic")
135
  def extract_topic(self, text: str) -> str:
136
  """
137
+ Extract a topic from the given text/argument by matching to predefined topics
138
 
139
  Args:
140
  text: The input text/argument to extract topic from
141
 
142
  Returns:
143
+ The extracted topic string (must be one of the predefined topics)
144
  """
145
  if not self.initialized:
146
  self.initialize()
 
152
  if len(text) == 0:
153
  raise ValueError("Text cannot be empty")
154
 
155
+ system_message = self._get_system_message()
 
 
 
 
 
 
 
 
 
156
 
157
  try:
158
  result = self.llm.invoke(
 
162
  ]
163
  )
164
 
165
+ selected_topic = result.topic.strip()
166
+
167
+ # Validate that the returned topic is in the predefined list
168
+ if selected_topic not in self.predefined_topics:
169
+ logger.warning(
170
+ f"LLM returned topic not in predefined list: '{selected_topic}'. "
171
+ f"Attempting to find closest match..."
172
+ )
173
+ # Try to find the closest match (case-insensitive)
174
+ selected_topic_lower = selected_topic.lower()
175
+ for predefined_topic in self.predefined_topics:
176
+ if predefined_topic.lower() == selected_topic_lower:
177
+ selected_topic = predefined_topic
178
+ logger.info(f"Found case-insensitive match: '{selected_topic}'")
179
+ break
180
+ else:
181
+ # If still no match, log error and raise
182
+ logger.error(
183
+ f"Could not match returned topic '{selected_topic}' to any predefined topic. "
184
+ f"Available topics: {self.predefined_topics[:3]}..."
185
+ )
186
+ raise ValueError(
187
+ f"Returned topic '{selected_topic}' is not in the predefined topics list"
188
+ )
189
+
190
+ return selected_topic
191
 
192
  except Exception as e:
193
  logger.error(f"Error extracting topic: {str(e)}")