sentiment / app_finetuned.py
Rakib Hossain
typo fix
888e3dd
"""
Bangla News Sentiment Analysis - Fine-tuned Model
MSc IT Thesis Project
Uses the fine-tuned model trained on Bangla news dataset
"""
import os
import gradio as gr
from transformers import pipeline
import pandas as pd
import torch
# Model path
FINE_TUNED_MODEL_PATH = "models/bangla-sentiment-finetuned"
# Check if GPU available
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
# Load fine-tuned sentiment analysis model
print("Loading fine-tuned model...")
try:
sentiment_analyzer = pipeline(
"sentiment-analysis",
model=FINE_TUNED_MODEL_PATH,
device=device
)
print("✅ Fine-tuned model loaded successfully!")
except Exception as e:
print(f"❌ Error loading fine-tuned model: {e}")
print("Make sure the model is trained and saved at:", FINE_TUNED_MODEL_PATH)
raise
def map_label_to_sentiment(label):
"""
Map model labels (LABEL_0, LABEL_1, LABEL_2) to sentiment classes
Based on fine_tune.py: negative=0, neutral=1, positive=2
"""
label_map = {
'LABEL_0': 'negative',
'LABEL_1': 'neutral',
'LABEL_2': 'positive',
'negative': 'negative',
'neutral': 'neutral',
'positive': 'positive'
}
return label_map.get(label, 'neutral')
def analyze_single_text(text):
"""Analyze sentiment of single text using fine-tuned model"""
if not text or not text.strip():
return {"Error": 1.0}
try:
result = sentiment_analyzer(text[:512])[0]
# Map model label to sentiment class
model_label = result['label']
sentiment = map_label_to_sentiment(model_label)
confidence = float(result['score'])
# Map labels to Bengali/English for display
display_map = {
'positive': '✅ Positive (ইতিবাচক)',
'negative': '❌ Negative (নেতিবাচক)',
'neutral': '➖ Neutral (নিরপেক্ষ)'
}
# Return only float values for gr.Label
return {
display_map.get(sentiment, sentiment): confidence
}
except Exception as e:
print(f"Error in analyze_single_text: {e}")
return {"Error": 1.0}
def analyze_batch_file(file):
"""Analyze sentiment of uploaded CSV file using fine-tuned model"""
if file is None:
return pd.DataFrame({"Error": ["Please upload a file"]})
try:
# Read CSV
df = pd.read_csv(file.name)
if 'text' not in df.columns:
return pd.DataFrame({"Error": ["CSV must have 'text' column"]})
# Analyze each text
results = []
for idx, text in enumerate(df['text'][:100]): # Limit to 100 for demo
try:
result = sentiment_analyzer(str(text)[:512])[0]
# Map model label to sentiment
model_label = result['label']
sentiment = map_label_to_sentiment(model_label)
results.append({
'ID': idx + 1,
'Text Preview': str(text)[:80] + '...' if len(str(text)) > 80 else str(text),
'Sentiment': sentiment.capitalize(),
'Confidence': f"{result['score']:.2%}"
})
except Exception as e:
results.append({
'ID': idx + 1,
'Text Preview': str(text)[:80] + '...',
'Sentiment': 'ERROR',
'Confidence': 'N/A'
})
return pd.DataFrame(results)
except Exception as e:
print(f"Error in analyze_batch_file: {e}")
return pd.DataFrame({"Error": [f"File processing failed: {str(e)}"]})
# Create Gradio Interface
with gr.Blocks(title="Bangla Sentiment Analysis - Fine-tuned Model") as demo:
gr.Markdown("""
# 🇧🇩 Bangla News Sentiment Analysis System
### Analyze sentiment of Bangla text using Fine-tuned AI Model
**MSc IT Thesis Project** | Model: Fine-tuned on Bangla News Dataset
""")
with gr.Tab("📝 Single Text Analysis"):
gr.Markdown("### Analyze sentiment of individual Bangla text")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
lines=8,
placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bangla text here)",
label="Input Text"
)
with gr.Row():
clear_btn = gr.Button("Clear")
analyze_btn = gr.Button("🔍 Analyze Sentiment")
with gr.Column():
output_label = gr.Label(
label="📊 Sentiment Result",
num_top_classes=3
)
gr.Markdown("### 💡 Try these examples:")
gr.Examples(
examples=[
["বাংলাদেশ ক্রিকেট দল দুর্দান্ত পারফরম্যান্স করেছে আজকের ম্যাচে!"],
["সরকারের এই সিদ্ধান্তে জনগণ অসন্তুষ্ট এবং ক্ষুব্ধ"],
["আজকের আবহাওয়া মোটামুটি ভালো থাকবে সারাদিন"],
["শিক্ষা ব্যবস্থায় উন্নতি প্রয়োজন কিন্তু পদক্ষেপ নেওয়া হচ্ছে"],
["এই রেস্তোরাঁর খাবারের মান অত্যন্ত খারাপ ছিল"],
["বাজারে সাধারণ শেয়ারধারীদের স্বার্থ রক্ষার সুযোগ নেই"]
],
inputs=text_input
)
clear_btn.click(lambda: "", outputs=text_input)
analyze_btn.click(
fn=analyze_single_text,
inputs=text_input,
outputs=output_label
)
with gr.Tab("📊 Batch Analysis"):
gr.Markdown("""
### Analyze multiple texts at once
Upload a CSV file with a column named **'text'** containing Bangla text
""")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="📁 Upload CSV File",
file_types=[".csv"]
)
batch_btn = gr.Button("🚀 Process Batch")
gr.Markdown("""
**CSV Format Example:**
```
text
প্রথম বাংলা টেক্সট
দ্বিতীয় বাংলা টেক্সট
তৃতীয় বাংলা টেক্সট
```
""")
with gr.Column():
batch_output = gr.Dataframe(
label="Results"
)
batch_btn.click(
fn=analyze_batch_file,
inputs=file_input,
outputs=batch_output
)
with gr.Tab("ℹ️ About"):
gr.Markdown("""
## About This Project
### 🎯 Objective
Develop an automated sentiment analysis system for Bangla news and social media text using a fine-tuned model.
### 🤖 Model Information
- **Model**: Fine-tuned `DistilBERT` model
- **Base Model**: `tabularisai/multilingual-sentiment-analysis`
- **Fine-tuned On**: Bangla news dataset
- **Architecture**: DistilBERT (Transformer-based)
- **Sentiment Classes**: Positive, Negative, Neutral
- **Model Location**: `models/bangla-sentiment-finetuned/`
### 📚 Dataset
- Bangla news articles from major Bangladeshi newspapers
- Social media comments and reviews
- Manually validated samples
- Fine-tuned for better Bangla text understanding
### 🛠️ Technology Stack
- Python 3.12
- Hugging Face Transformers
- Gradio (Web Interface)
- PyTorch
### 🎓 Academic Context
**MSc in Information Technology - Thesis Project**
### 👨‍💻 Developer
**Rakib**
MSc in Information Technology
Jahangirnagar University
Year: 2025
### 📧 Contact
Email: [[email protected]](mailto:[email protected])
GitHub: [rakibhoossain](https://github.com/rakibhoossain)
---
*Last Updated: December 2025*
""")
# Launch the app
if __name__ == "__main__":
print("\n🚀 Starting Gradio interface with fine-tuned model...")
print("=" * 50)
port = int(os.environ.get("PORT", 7860))
demo.launch(
share=True,
# server_name="127.0.0.1",
# server_port=port,
show_error=True
)