AIEcosystem commited on
Commit
36da601
·
verified ·
1 Parent(s): 7e3667d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +46 -117
src/streamlit_app.py CHANGED
@@ -13,6 +13,7 @@ from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
 
 
16
  st.markdown(
17
  """
18
  <style>
@@ -21,31 +22,37 @@ st.markdown(
21
  background-color: #E8F5E9; /* A very light green */
22
  color: #1B5E20; /* Dark green for the text */
23
  }
24
- /* Sidebar background color */
 
25
  .css-1d36184 {
26
  background-color: #A5D6A7; /* A medium light green */
27
  secondary-background-color: #A5D6A7;
28
  }
29
- /* Expander background color and header */
 
30
  .streamlit-expanderContent, .streamlit-expanderHeader {
31
  background-color: #E8F5E9;
32
  }
33
- /* Text Area background and text color */
 
34
  .stTextArea textarea {
35
  background-color: #81C784; /* A slightly darker medium green */
36
  color: #1B5E20; /* Dark green for text */
37
  }
38
- /* Button background and text color */
 
39
  .stButton > button {
40
  background-color: #81C784;
41
  color: #1B5E20;
42
  }
43
- /* Warning box background and text color */
 
44
  .stAlert.st-warning {
45
  background-color: #66BB6A; /* A medium-dark green for the warning box */
46
  color: #1B5E20;
47
  }
48
- /* Success box background and text color */
 
49
  .stAlert.st-success {
50
  background-color: #66BB6A; /* A medium-dark green for the success box */
51
  color: #1B5E20;
@@ -102,123 +109,47 @@ if not comet_initialized:
102
 
103
  # --- Label Definitions ---
104
  labels = [
105
- "person",
106
- "organization",
107
- "social_media_handle",
108
- "username",
109
- "insurance_company",
110
- "phone_number",
111
- "email",
112
- "email_address",
113
- "mobile_phone_number",
114
- "landline_phone_number",
115
- "fax_number",
116
- "credit_card_number",
117
- "credit_card_expiration_date",
118
- "credit_card_brand",
119
- "cvv",
120
- "cvc",
121
- "bank_account_number",
122
- "iban",
123
- "transaction_number",
124
- "cpf",
125
- "cnpj",
126
- "passport_number",
127
- "passport_expiration_date",
128
- "driver's_license_number",
129
- "tax_identification_number",
130
- "identity_card_number",
131
- "national_id_number",
132
- "identity_document_number",
133
- "birth_certificate_number",
134
- "social_security_number",
135
- "health_insurance_id_number",
136
- "health_insurance_number",
137
- "national_health_insurance_number",
138
- "student_id_number",
139
- "registration_number",
140
- "insurance_number",
141
- "serial_number",
142
- "visa_number",
143
- "reservation_number",
144
- "train_ticket_number",
145
- "medication",
146
- "medical_condition",
147
- "blood_type",
148
- "date_of_birth",
149
- "address",
150
- "ip_address",
151
- "postal_code",
152
- "flight_number",
153
- "license_plate_number",
154
- "vehicle_registration_number",
155
- "digital_signature"
156
  ]
 
157
  # Corrected mapping dictionary
158
  category_mapping = {
159
  "People_and_Groups": [
160
- "person",
161
- "organization",
162
- "social_media_handle",
163
- "username",
164
- "insurance_company"
165
  ],
166
  "Contact_Information": [
167
- "phone_number",
168
- "email",
169
- "email_address",
170
- "mobile_phone_number",
171
- "landline_phone_number",
172
- "fax_number"
173
  ],
174
  "Financial_and_Transactions": [
175
- "credit_card_number",
176
- "credit_card_expiration_date",
177
- "credit_card_brand",
178
- "cvv",
179
- "cvc",
180
- "bank_account_number",
181
- "iban",
182
- "transaction_number",
183
- "cpf",
184
- "cnpj"
185
  ],
186
  "Identification_and_Documents": [
187
- "passport_number",
188
- "passport_expiration_date",
189
- "driver's_license_number",
190
- "tax_identification_number",
191
- "identity_card_number",
192
- "national_id_number",
193
- "identity_document_number",
194
- "birth_certificate_number",
195
- "social_security_number",
196
- "health_insurance_id_number",
197
- "health_insurance_number",
198
- "national_health_insurance_number",
199
- "student_id_number",
200
- "registration_number",
201
- "insurance_number",
202
- "serial_number",
203
- "visa_number",
204
- "reservation_number",
205
- "train_ticket_number"
206
  ],
207
  "Health_and_Personal": [
208
- "medication",
209
- "medical_condition",
210
- "blood_type",
211
- "date_of_birth"
212
  ],
213
  "Locations_and_Addresses": [
214
- "address",
215
- "ip_address",
216
- "postal_code"
217
  ],
218
  "Transportation_and_Logistics": [
219
- "flight_number",
220
- "license_plate_number",
221
- "vehicle_registration_number"
222
  ],
223
  "Digital_and_Security": [
224
  "digital_signature"
@@ -273,7 +204,6 @@ if st.button("Results"):
273
  st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
274
  st.session_state.show_results = False
275
  else:
276
- # Check if the text is different from the last time
277
  if text != st.session_state.last_text:
278
  st.session_state.show_results = True
279
  st.session_state.last_text = text
@@ -282,6 +212,8 @@ if st.button("Results"):
282
  entities = model.predict_entities(text, labels)
283
  df = pd.DataFrame(entities)
284
  st.session_state.results_df = df
 
 
285
  if not df.empty:
286
  df['category'] = df['label'].map(reverse_category_mapping)
287
  if comet_initialized:
@@ -293,15 +225,16 @@ if st.button("Results"):
293
  experiment.log_parameter("input_text", text)
294
  experiment.log_table("predicted_entities", df)
295
  experiment.end()
 
296
  end_time = time.time()
297
  st.session_state.elapsed_time = end_time - start_time
298
  else:
299
- # If the text is the same, just show the cached results without re-running
300
  st.session_state.show_results = True
301
 
302
  # Display results if the state variable is True
303
  if st.session_state.show_results:
304
  df = st.session_state.results_df
 
305
  if not df.empty:
306
  df['category'] = df['label'].map(reverse_category_mapping)
307
  st.subheader("Grouped Entities by Category", divider="violet")
@@ -395,7 +328,7 @@ if st.session_state.show_results:
395
 
396
  with stylable_container(
397
  key="download_button",
398
- css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
399
  ):
400
  st.download_button(
401
  label="Download results and glossary (zip)",
@@ -403,14 +336,10 @@ if st.session_state.show_results:
403
  file_name="nlpblogs_results.zip",
404
  mime="application/zip",
405
  )
406
-
407
- if comet_initialized:
408
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
409
- experiment.end()
410
  else: # If df is empty
411
  st.warning("No entities were found in the provided text.")
412
- end_time = time.time()
413
- elapsed_time = end_time - start_time
414
  st.text("")
415
  st.text("")
416
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
 
16
+ # --- CSS Styling for the App ---
17
  st.markdown(
18
  """
19
  <style>
 
22
  background-color: #E8F5E9; /* A very light green */
23
  color: #1B5E20; /* Dark green for the text */
24
  }
25
+
26
+ /* Sidebar background color */
27
  .css-1d36184 {
28
  background-color: #A5D6A7; /* A medium light green */
29
  secondary-background-color: #A5D6A7;
30
  }
31
+
32
+ /* Expander background color and header */
33
  .streamlit-expanderContent, .streamlit-expanderHeader {
34
  background-color: #E8F5E9;
35
  }
36
+
37
+ /* Text Area background and text color */
38
  .stTextArea textarea {
39
  background-color: #81C784; /* A slightly darker medium green */
40
  color: #1B5E20; /* Dark green for text */
41
  }
42
+
43
+ /* Button background and text color */
44
  .stButton > button {
45
  background-color: #81C784;
46
  color: #1B5E20;
47
  }
48
+
49
+ /* Warning box background and text color */
50
  .stAlert.st-warning {
51
  background-color: #66BB6A; /* A medium-dark green for the warning box */
52
  color: #1B5E20;
53
  }
54
+
55
+ /* Success box background and text color */
56
  .stAlert.st-success {
57
  background-color: #66BB6A; /* A medium-dark green for the success box */
58
  color: #1B5E20;
 
109
 
110
  # --- Label Definitions ---
111
  labels = [
112
+ "person", "organization", "social_media_handle", "username", "insurance_company",
113
+ "phone_number", "email", "email_address", "mobile_phone_number", "landline_phone_number", "fax_number",
114
+ "credit_card_number", "credit_card_expiration_date", "credit_card_brand", "cvv", "cvc",
115
+ "bank_account_number", "iban", "transaction_number", "cpf", "cnpj",
116
+ "passport_number", "passport_expiration_date", "driver's_license_number", "tax_identification_number", "identity_card_number",
117
+ "national_id_number", "identity_document_number", "birth_certificate_number", "social_security_number",
118
+ "health_insurance_id_number", "health_insurance_number", "national_health_insurance_number",
119
+ "student_id_number", "registration_number", "insurance_number", "serial_number",
120
+ "visa_number", "reservation_number", "train_ticket_number",
121
+ "medication", "medical_condition", "blood_type", "date_of_birth",
122
+ "address", "ip_address", "postal_code",
123
+ "flight_number", "license_plate_number", "vehicle_registration_number", "digital_signature"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ]
125
+
126
  # Corrected mapping dictionary
127
  category_mapping = {
128
  "People_and_Groups": [
129
+ "person", "organization", "social_media_handle", "username", "insurance_company"
 
 
 
 
130
  ],
131
  "Contact_Information": [
132
+ "phone_number", "email", "email_address", "mobile_phone_number", "landline_phone_number", "fax_number"
 
 
 
 
 
133
  ],
134
  "Financial_and_Transactions": [
135
+ "credit_card_number", "credit_card_expiration_date", "credit_card_brand", "cvv", "cvc",
136
+ "bank_account_number", "iban", "transaction_number", "cpf", "cnpj"
 
 
 
 
 
 
 
 
137
  ],
138
  "Identification_and_Documents": [
139
+ "passport_number", "passport_expiration_date", "driver's_license_number", "tax_identification_number",
140
+ "identity_card_number", "national_id_number", "identity_document_number", "birth_certificate_number",
141
+ "social_security_number", "health_insurance_id_number", "health_insurance_number",
142
+ "national_health_insurance_number", "student_id_number", "registration_number",
143
+ "insurance_number", "serial_number", "visa_number", "reservation_number", "train_ticket_number"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  ],
145
  "Health_and_Personal": [
146
+ "medication", "medical_condition", "blood_type", "date_of_birth"
 
 
 
147
  ],
148
  "Locations_and_Addresses": [
149
+ "address", "ip_address", "postal_code"
 
 
150
  ],
151
  "Transportation_and_Logistics": [
152
+ "flight_number", "license_plate_number", "vehicle_registration_number"
 
 
153
  ],
154
  "Digital_and_Security": [
155
  "digital_signature"
 
204
  st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
205
  st.session_state.show_results = False
206
  else:
 
207
  if text != st.session_state.last_text:
208
  st.session_state.show_results = True
209
  st.session_state.last_text = text
 
212
  entities = model.predict_entities(text, labels)
213
  df = pd.DataFrame(entities)
214
  st.session_state.results_df = df
215
+
216
+ # Move the Comet ML logging and termination here
217
  if not df.empty:
218
  df['category'] = df['label'].map(reverse_category_mapping)
219
  if comet_initialized:
 
225
  experiment.log_parameter("input_text", text)
226
  experiment.log_table("predicted_entities", df)
227
  experiment.end()
228
+
229
  end_time = time.time()
230
  st.session_state.elapsed_time = end_time - start_time
231
  else:
 
232
  st.session_state.show_results = True
233
 
234
  # Display results if the state variable is True
235
  if st.session_state.show_results:
236
  df = st.session_state.results_df
237
+
238
  if not df.empty:
239
  df['category'] = df['label'].map(reverse_category_mapping)
240
  st.subheader("Grouped Entities by Category", divider="violet")
 
328
 
329
  with stylable_container(
330
  key="download_button",
331
+ css_styles="""button { background-color: #81C784; border: 1px solid black; padding: 5px; color: #1B5E20; }""",
332
  ):
333
  st.download_button(
334
  label="Download results and glossary (zip)",
 
336
  file_name="nlpblogs_results.zip",
337
  mime="application/zip",
338
  )
339
+
 
 
 
340
  else: # If df is empty
341
  st.warning("No entities were found in the provided text.")
342
+
 
343
  st.text("")
344
  st.text("")
345
+ st.info(f"Results processed in **{st.session_state.elapsed_time:.2f} seconds**.")