|
|
""" |
|
|
Bangla News Sentiment Analysis - Fine-tuned Model |
|
|
MSc IT Thesis Project |
|
|
Uses the fine-tuned model trained on Bangla news dataset |
|
|
""" |
|
|
import os |
|
|
|
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
import pandas as pd |
|
|
import torch |
|
|
|
|
|
|
|
|
FINE_TUNED_MODEL_PATH = "models/bangla-sentiment-finetuned" |
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
print(f"Using device: {'GPU' if device == 0 else 'CPU'}") |
|
|
|
|
|
|
|
|
print("Loading fine-tuned model...") |
|
|
try: |
|
|
sentiment_analyzer = pipeline( |
|
|
"sentiment-analysis", |
|
|
model=FINE_TUNED_MODEL_PATH, |
|
|
device=device |
|
|
) |
|
|
print("✅ Fine-tuned model loaded successfully!") |
|
|
except Exception as e: |
|
|
print(f"❌ Error loading fine-tuned model: {e}") |
|
|
print("Make sure the model is trained and saved at:", FINE_TUNED_MODEL_PATH) |
|
|
raise |
|
|
|
|
|
|
|
|
def map_label_to_sentiment(label): |
|
|
""" |
|
|
Map model labels (LABEL_0, LABEL_1, LABEL_2) to sentiment classes |
|
|
Based on fine_tune.py: negative=0, neutral=1, positive=2 |
|
|
""" |
|
|
label_map = { |
|
|
'LABEL_0': 'negative', |
|
|
'LABEL_1': 'neutral', |
|
|
'LABEL_2': 'positive', |
|
|
'negative': 'negative', |
|
|
'neutral': 'neutral', |
|
|
'positive': 'positive' |
|
|
} |
|
|
return label_map.get(label, 'neutral') |
|
|
|
|
|
|
|
|
def analyze_single_text(text): |
|
|
"""Analyze sentiment of single text using fine-tuned model""" |
|
|
if not text or not text.strip(): |
|
|
return {"Error": 1.0} |
|
|
|
|
|
try: |
|
|
result = sentiment_analyzer(text[:512])[0] |
|
|
|
|
|
|
|
|
model_label = result['label'] |
|
|
sentiment = map_label_to_sentiment(model_label) |
|
|
confidence = float(result['score']) |
|
|
|
|
|
|
|
|
display_map = { |
|
|
'positive': '✅ Positive (ইতিবাচক)', |
|
|
'negative': '❌ Negative (নেতিবাচক)', |
|
|
'neutral': '➖ Neutral (নিরপেক্ষ)' |
|
|
} |
|
|
|
|
|
|
|
|
return { |
|
|
display_map.get(sentiment, sentiment): confidence |
|
|
} |
|
|
except Exception as e: |
|
|
print(f"Error in analyze_single_text: {e}") |
|
|
return {"Error": 1.0} |
|
|
|
|
|
|
|
|
def analyze_batch_file(file): |
|
|
"""Analyze sentiment of uploaded CSV file using fine-tuned model""" |
|
|
if file is None: |
|
|
return pd.DataFrame({"Error": ["Please upload a file"]}) |
|
|
|
|
|
try: |
|
|
|
|
|
df = pd.read_csv(file.name) |
|
|
|
|
|
if 'text' not in df.columns: |
|
|
return pd.DataFrame({"Error": ["CSV must have 'text' column"]}) |
|
|
|
|
|
|
|
|
results = [] |
|
|
for idx, text in enumerate(df['text'][:100]): |
|
|
try: |
|
|
result = sentiment_analyzer(str(text)[:512])[0] |
|
|
|
|
|
|
|
|
model_label = result['label'] |
|
|
sentiment = map_label_to_sentiment(model_label) |
|
|
|
|
|
results.append({ |
|
|
'ID': idx + 1, |
|
|
'Text Preview': str(text)[:80] + '...' if len(str(text)) > 80 else str(text), |
|
|
'Sentiment': sentiment.capitalize(), |
|
|
'Confidence': f"{result['score']:.2%}" |
|
|
}) |
|
|
except Exception as e: |
|
|
results.append({ |
|
|
'ID': idx + 1, |
|
|
'Text Preview': str(text)[:80] + '...', |
|
|
'Sentiment': 'ERROR', |
|
|
'Confidence': 'N/A' |
|
|
}) |
|
|
|
|
|
return pd.DataFrame(results) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in analyze_batch_file: {e}") |
|
|
return pd.DataFrame({"Error": [f"File processing failed: {str(e)}"]}) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Bangla Sentiment Analysis - Fine-tuned Model") as demo: |
|
|
gr.Markdown(""" |
|
|
# 🇧🇩 Bangla News Sentiment Analysis System |
|
|
### Analyze sentiment of Bangla text using Fine-tuned AI Model |
|
|
**MSc IT Thesis Project** | Model: Fine-tuned on Bangla News Dataset |
|
|
""") |
|
|
|
|
|
with gr.Tab("📝 Single Text Analysis"): |
|
|
gr.Markdown("### Analyze sentiment of individual Bangla text") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox( |
|
|
lines=8, |
|
|
placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bangla text here)", |
|
|
label="Input Text" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
clear_btn = gr.Button("Clear") |
|
|
analyze_btn = gr.Button("🔍 Analyze Sentiment") |
|
|
|
|
|
with gr.Column(): |
|
|
output_label = gr.Label( |
|
|
label="📊 Sentiment Result", |
|
|
num_top_classes=3 |
|
|
) |
|
|
|
|
|
gr.Markdown("### 💡 Try these examples:") |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["বাংলাদেশ ক্রিকেট দল দুর্দান্ত পারফরম্যান্স করেছে আজকের ম্যাচে!"], |
|
|
["সরকারের এই সিদ্ধান্তে জনগণ অসন্তুষ্ট এবং ক্ষুব্ধ"], |
|
|
["আজকের আবহাওয়া মোটামুটি ভালো থাকবে সারাদিন"], |
|
|
["শিক্ষা ব্যবস্থায় উন্নতি প্রয়োজন কিন্তু পদক্ষেপ নেওয়া হচ্ছে"], |
|
|
["এই রেস্তোরাঁর খাবারের মান অত্যন্ত খারাপ ছিল"], |
|
|
["বাজারে সাধারণ শেয়ারধারীদের স্বার্থ রক্ষার সুযোগ নেই"] |
|
|
], |
|
|
inputs=text_input |
|
|
) |
|
|
|
|
|
clear_btn.click(lambda: "", outputs=text_input) |
|
|
analyze_btn.click( |
|
|
fn=analyze_single_text, |
|
|
inputs=text_input, |
|
|
outputs=output_label |
|
|
) |
|
|
|
|
|
with gr.Tab("📊 Batch Analysis"): |
|
|
gr.Markdown(""" |
|
|
### Analyze multiple texts at once |
|
|
Upload a CSV file with a column named **'text'** containing Bangla text |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
file_input = gr.File( |
|
|
label="📁 Upload CSV File", |
|
|
file_types=[".csv"] |
|
|
) |
|
|
batch_btn = gr.Button("🚀 Process Batch") |
|
|
|
|
|
gr.Markdown(""" |
|
|
**CSV Format Example:** |
|
|
``` |
|
|
text |
|
|
প্রথম বাংলা টেক্সট |
|
|
দ্বিতীয় বাংলা টেক্সট |
|
|
তৃতীয় বাংলা টেক্সট |
|
|
``` |
|
|
""") |
|
|
|
|
|
with gr.Column(): |
|
|
batch_output = gr.Dataframe( |
|
|
label="Results" |
|
|
) |
|
|
|
|
|
batch_btn.click( |
|
|
fn=analyze_batch_file, |
|
|
inputs=file_input, |
|
|
outputs=batch_output |
|
|
) |
|
|
|
|
|
with gr.Tab("ℹ️ About"): |
|
|
gr.Markdown(""" |
|
|
## About This Project |
|
|
|
|
|
### 🎯 Objective |
|
|
Develop an automated sentiment analysis system for Bangla news and social media text using a fine-tuned model. |
|
|
|
|
|
### 🤖 Model Information |
|
|
- **Model**: Fine-tuned `DistilBERT` model |
|
|
- **Base Model**: `tabularisai/multilingual-sentiment-analysis` |
|
|
- **Fine-tuned On**: Bangla news dataset |
|
|
- **Architecture**: DistilBERT (Transformer-based) |
|
|
- **Sentiment Classes**: Positive, Negative, Neutral |
|
|
- **Model Location**: `models/bangla-sentiment-finetuned/` |
|
|
|
|
|
### 📚 Dataset |
|
|
- Bangla news articles from major Bangladeshi newspapers |
|
|
- Social media comments and reviews |
|
|
- Manually validated samples |
|
|
- Fine-tuned for better Bangla text understanding |
|
|
|
|
|
### 🛠️ Technology Stack |
|
|
- Python 3.12 |
|
|
- Hugging Face Transformers |
|
|
- Gradio (Web Interface) |
|
|
- PyTorch |
|
|
|
|
|
### 🎓 Academic Context |
|
|
**MSc in Information Technology - Thesis Project** |
|
|
|
|
|
### 👨💻 Developer |
|
|
**Rakib** |
|
|
MSc in Information Technology |
|
|
Jahangirnagar University |
|
|
Year: 2025 |
|
|
|
|
|
### 📧 Contact |
|
|
Email: [[email protected]](mailto:[email protected]) |
|
|
GitHub: [rakibhoossain](https://github.com/rakibhoossain) |
|
|
|
|
|
--- |
|
|
*Last Updated: December 2025* |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("\n🚀 Starting Gradio interface with fine-tuned model...") |
|
|
print("=" * 50) |
|
|
port = int(os.environ.get("PORT", 7860)) |
|
|
|
|
|
demo.launch( |
|
|
share=True, |
|
|
|
|
|
|
|
|
show_error=True |
|
|
) |
|
|
|
|
|
|