Spaces:

rakib7
/

sentiment

Sleeping

File size: 8,983 Bytes

"""
Bangla News Sentiment Analysis - Fine-tuned Model
MSc IT Thesis Project
Uses the fine-tuned model trained on Bangla news dataset
"""
import os

import gradio as gr
from transformers import pipeline
import pandas as pd
import torch

# Model path
FINE_TUNED_MODEL_PATH = "models/bangla-sentiment-finetuned"

# Check if GPU available
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")

# Load fine-tuned sentiment analysis model
print("Loading fine-tuned model...")
try:
    sentiment_analyzer = pipeline(
        "sentiment-analysis",
        model=FINE_TUNED_MODEL_PATH,
        device=device
    )
    print("✅ Fine-tuned model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading fine-tuned model: {e}")
    print("Make sure the model is trained and saved at:", FINE_TUNED_MODEL_PATH)
    raise


def map_label_to_sentiment(label):
    """
    Map model labels (LABEL_0, LABEL_1, LABEL_2) to sentiment classes
    Based on fine_tune.py: negative=0, neutral=1, positive=2
    """
    label_map = {
        'LABEL_0': 'negative',
        'LABEL_1': 'neutral',
        'LABEL_2': 'positive',
        'negative': 'negative',
        'neutral': 'neutral',
        'positive': 'positive'
    }
    return label_map.get(label, 'neutral')


def analyze_single_text(text):
    """Analyze sentiment of single text using fine-tuned model"""
    if not text or not text.strip():
        return {"Error": 1.0}

    try:
        result = sentiment_analyzer(text[:512])[0]

        # Map model label to sentiment class
        model_label = result['label']
        sentiment = map_label_to_sentiment(model_label)
        confidence = float(result['score'])

        # Map labels to Bengali/English for display
        display_map = {
            'positive': '✅ Positive (ইতিবাচক)',
            'negative': '❌ Negative (নেতিবাচক)',
            'neutral': '➖ Neutral (নিরপেক্ষ)'
        }

        # Return only float values for gr.Label
        return {
            display_map.get(sentiment, sentiment): confidence
        }
    except Exception as e:
        print(f"Error in analyze_single_text: {e}")
        return {"Error": 1.0}


def analyze_batch_file(file):
    """Analyze sentiment of uploaded CSV file using fine-tuned model"""
    if file is None:
        return pd.DataFrame({"Error": ["Please upload a file"]})

    try:
        # Read CSV
        df = pd.read_csv(file.name)

        if 'text' not in df.columns:
            return pd.DataFrame({"Error": ["CSV must have 'text' column"]})

        # Analyze each text
        results = []
        for idx, text in enumerate(df['text'][:100]):  # Limit to 100 for demo
            try:
                result = sentiment_analyzer(str(text)[:512])[0]
                
                # Map model label to sentiment
                model_label = result['label']
                sentiment = map_label_to_sentiment(model_label)
                
                results.append({
                    'ID': idx + 1,
                    'Text Preview': str(text)[:80] + '...' if len(str(text)) > 80 else str(text),
                    'Sentiment': sentiment.capitalize(),
                    'Confidence': f"{result['score']:.2%}"
                })
            except Exception as e:
                results.append({
                    'ID': idx + 1,
                    'Text Preview': str(text)[:80] + '...',
                    'Sentiment': 'ERROR',
                    'Confidence': 'N/A'
                })

        return pd.DataFrame(results)

    except Exception as e:
        print(f"Error in analyze_batch_file: {e}")
        return pd.DataFrame({"Error": [f"File processing failed: {str(e)}"]})


# Create Gradio Interface
with gr.Blocks(title="Bangla Sentiment Analysis - Fine-tuned Model") as demo:
    gr.Markdown("""
    # 🇧🇩 Bangla News Sentiment Analysis System
    ### Analyze sentiment of Bangla text using Fine-tuned AI Model
    **MSc IT Thesis Project** | Model: Fine-tuned on Bangla News Dataset
    """)

    with gr.Tab("📝 Single Text Analysis"):
        gr.Markdown("### Analyze sentiment of individual Bangla text")

        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    lines=8,
                    placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bangla text here)",
                    label="Input Text"
                )

                with gr.Row():
                    clear_btn = gr.Button("Clear")
                    analyze_btn = gr.Button("🔍 Analyze Sentiment")

            with gr.Column():
                output_label = gr.Label(
                    label="📊 Sentiment Result",
                    num_top_classes=3
                )

        gr.Markdown("### 💡 Try these examples:")
        gr.Examples(
            examples=[
                ["বাংলাদেশ ক্রিকেট দল দুর্দান্ত পারফরম্যান্স করেছে আজকের ম্যাচে!"],
                ["সরকারের এই সিদ্ধান্তে জনগণ অসন্তুষ্ট এবং ক্ষুব্ধ"],
                ["আজকের আবহাওয়া মোটামুটি ভালো থাকবে সারাদিন"],
                ["শিক্ষা ব্যবস্থায় উন্নতি প্রয়োজন কিন্তু পদক্ষেপ নেওয়া হচ্ছে"],
                ["এই রেস্তোরাঁর খাবারের মান অত্যন্ত খারাপ ছিল"],
                ["বাজারে সাধারণ শেয়ারধারীদের স্বার্থ রক্ষার সুযোগ নেই"]
            ],
            inputs=text_input
        )

        clear_btn.click(lambda: "", outputs=text_input)
        analyze_btn.click(
            fn=analyze_single_text,
            inputs=text_input,
            outputs=output_label
        )

    with gr.Tab("📊 Batch Analysis"):
        gr.Markdown("""
        ### Analyze multiple texts at once
        Upload a CSV file with a column named **'text'** containing Bangla text
        """)

        with gr.Row():
            with gr.Column():
                file_input = gr.File(
                    label="📁 Upload CSV File",
                    file_types=[".csv"]
                )
                batch_btn = gr.Button("🚀 Process Batch")

                gr.Markdown("""
                **CSV Format Example:**
```
text
প্রথম বাংলা টেক্সট
দ্বিতীয় বাংলা টেক্সট
তৃতীয় বাংলা টেক্সট
```
                """)

            with gr.Column():
                batch_output = gr.Dataframe(
                    label="Results"
                )

        batch_btn.click(
            fn=analyze_batch_file,
            inputs=file_input,
            outputs=batch_output
        )

    with gr.Tab("ℹ️ About"):
        gr.Markdown("""
        ## About This Project

        ### 🎯 Objective
        Develop an automated sentiment analysis system for Bangla news and social media text using a fine-tuned model.

        ### 🤖 Model Information
        - **Model**: Fine-tuned `DistilBERT` model
        - **Base Model**: `tabularisai/multilingual-sentiment-analysis`
        - **Fine-tuned On**: Bangla news dataset
        - **Architecture**: DistilBERT (Transformer-based)
        - **Sentiment Classes**: Positive, Negative, Neutral
        - **Model Location**: `models/bangla-sentiment-finetuned/`

        ### 📚 Dataset
        - Bangla news articles from major Bangladeshi newspapers
        - Social media comments and reviews
        - Manually validated samples
        - Fine-tuned for better Bangla text understanding

        ### 🛠️ Technology Stack
        - Python 3.12
        - Hugging Face Transformers
        - Gradio (Web Interface)
        - PyTorch

        ### 🎓 Academic Context
        **MSc in Information Technology - Thesis Project**

        ### 👨‍💻 Developer
        **Rakib**  
        MSc in Information Technology
        Jahangirnagar University
        Year: 2025

        ### 📧 Contact
        Email: [[email protected]](mailto:[email protected])
        GitHub: [rakibhoossain](https://github.com/rakibhoossain)

        ---
        *Last Updated: December 2025*
        """)

# Launch the app
if __name__ == "__main__":
    print("\n🚀 Starting Gradio interface with fine-tuned model...")
    print("=" * 50)
    port = int(os.environ.get("PORT", 7860))

    demo.launch(
        share=True,
        # server_name="127.0.0.1",
        # server_port=port,
        show_error=True
    )