File size: 8,983 Bytes
49c214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9fe3f9
888e3dd
49c214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9fe3f9
 
49c214c
 
 
f9fe3f9
 
49c214c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
"""
Bangla News Sentiment Analysis - Fine-tuned Model
MSc IT Thesis Project
Uses the fine-tuned model trained on Bangla news dataset
"""
import os

import gradio as gr
from transformers import pipeline
import pandas as pd
import torch

# Model path
FINE_TUNED_MODEL_PATH = "models/bangla-sentiment-finetuned"

# Check if GPU available
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")

# Load fine-tuned sentiment analysis model
print("Loading fine-tuned model...")
try:
    sentiment_analyzer = pipeline(
        "sentiment-analysis",
        model=FINE_TUNED_MODEL_PATH,
        device=device
    )
    print("✅ Fine-tuned model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading fine-tuned model: {e}")
    print("Make sure the model is trained and saved at:", FINE_TUNED_MODEL_PATH)
    raise


def map_label_to_sentiment(label):
    """
    Map model labels (LABEL_0, LABEL_1, LABEL_2) to sentiment classes
    Based on fine_tune.py: negative=0, neutral=1, positive=2
    """
    label_map = {
        'LABEL_0': 'negative',
        'LABEL_1': 'neutral',
        'LABEL_2': 'positive',
        'negative': 'negative',
        'neutral': 'neutral',
        'positive': 'positive'
    }
    return label_map.get(label, 'neutral')


def analyze_single_text(text):
    """Analyze sentiment of single text using fine-tuned model"""
    if not text or not text.strip():
        return {"Error": 1.0}

    try:
        result = sentiment_analyzer(text[:512])[0]

        # Map model label to sentiment class
        model_label = result['label']
        sentiment = map_label_to_sentiment(model_label)
        confidence = float(result['score'])

        # Map labels to Bengali/English for display
        display_map = {
            'positive': '✅ Positive (ইতিবাচক)',
            'negative': '❌ Negative (নেতিবাচক)',
            'neutral': '➖ Neutral (নিরপেক্ষ)'
        }

        # Return only float values for gr.Label
        return {
            display_map.get(sentiment, sentiment): confidence
        }
    except Exception as e:
        print(f"Error in analyze_single_text: {e}")
        return {"Error": 1.0}


def analyze_batch_file(file):
    """Analyze sentiment of uploaded CSV file using fine-tuned model"""
    if file is None:
        return pd.DataFrame({"Error": ["Please upload a file"]})

    try:
        # Read CSV
        df = pd.read_csv(file.name)

        if 'text' not in df.columns:
            return pd.DataFrame({"Error": ["CSV must have 'text' column"]})

        # Analyze each text
        results = []
        for idx, text in enumerate(df['text'][:100]):  # Limit to 100 for demo
            try:
                result = sentiment_analyzer(str(text)[:512])[0]
                
                # Map model label to sentiment
                model_label = result['label']
                sentiment = map_label_to_sentiment(model_label)
                
                results.append({
                    'ID': idx + 1,
                    'Text Preview': str(text)[:80] + '...' if len(str(text)) > 80 else str(text),
                    'Sentiment': sentiment.capitalize(),
                    'Confidence': f"{result['score']:.2%}"
                })
            except Exception as e:
                results.append({
                    'ID': idx + 1,
                    'Text Preview': str(text)[:80] + '...',
                    'Sentiment': 'ERROR',
                    'Confidence': 'N/A'
                })

        return pd.DataFrame(results)

    except Exception as e:
        print(f"Error in analyze_batch_file: {e}")
        return pd.DataFrame({"Error": [f"File processing failed: {str(e)}"]})


# Create Gradio Interface
with gr.Blocks(title="Bangla Sentiment Analysis - Fine-tuned Model") as demo:
    gr.Markdown("""
    # 🇧🇩 Bangla News Sentiment Analysis System
    ### Analyze sentiment of Bangla text using Fine-tuned AI Model
    **MSc IT Thesis Project** | Model: Fine-tuned on Bangla News Dataset
    """)

    with gr.Tab("📝 Single Text Analysis"):
        gr.Markdown("### Analyze sentiment of individual Bangla text")

        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    lines=8,
                    placeholder="এখানে বাংলা টেক্সট লিখুন... (Enter Bangla text here)",
                    label="Input Text"
                )

                with gr.Row():
                    clear_btn = gr.Button("Clear")
                    analyze_btn = gr.Button("🔍 Analyze Sentiment")

            with gr.Column():
                output_label = gr.Label(
                    label="📊 Sentiment Result",
                    num_top_classes=3
                )

        gr.Markdown("### 💡 Try these examples:")
        gr.Examples(
            examples=[
                ["বাংলাদেশ ক্রিকেট দল দুর্দান্ত পারফরম্যান্স করেছে আজকের ম্যাচে!"],
                ["সরকারের এই সিদ্ধান্তে জনগণ অসন্তুষ্ট এবং ক্ষুব্ধ"],
                ["আজকের আবহাওয়া মোটামুটি ভালো থাকবে সারাদিন"],
                ["শিক্ষা ব্যবস্থায় উন্নতি প্রয়োজন কিন্তু পদক্ষেপ নেওয়া হচ্ছে"],
                ["এই রেস্তোরাঁর খাবারের মান অত্যন্ত খারাপ ছিল"],
                ["বাজারে সাধারণ শেয়ারধারীদের স্বার্থ রক্ষার সুযোগ নেই"]
            ],
            inputs=text_input
        )

        clear_btn.click(lambda: "", outputs=text_input)
        analyze_btn.click(
            fn=analyze_single_text,
            inputs=text_input,
            outputs=output_label
        )

    with gr.Tab("📊 Batch Analysis"):
        gr.Markdown("""
        ### Analyze multiple texts at once
        Upload a CSV file with a column named **'text'** containing Bangla text
        """)

        with gr.Row():
            with gr.Column():
                file_input = gr.File(
                    label="📁 Upload CSV File",
                    file_types=[".csv"]
                )
                batch_btn = gr.Button("🚀 Process Batch")

                gr.Markdown("""
                **CSV Format Example:**
```
text
প্রথম বাংলা টেক্সট
দ্বিতীয় বাংলা টেক্সট
তৃতীয় বাংলা টেক্সট
```
                """)

            with gr.Column():
                batch_output = gr.Dataframe(
                    label="Results"
                )

        batch_btn.click(
            fn=analyze_batch_file,
            inputs=file_input,
            outputs=batch_output
        )

    with gr.Tab("ℹ️ About"):
        gr.Markdown("""
        ## About This Project

        ### 🎯 Objective
        Develop an automated sentiment analysis system for Bangla news and social media text using a fine-tuned model.

        ### 🤖 Model Information
        - **Model**: Fine-tuned `DistilBERT` model
        - **Base Model**: `tabularisai/multilingual-sentiment-analysis`
        - **Fine-tuned On**: Bangla news dataset
        - **Architecture**: DistilBERT (Transformer-based)
        - **Sentiment Classes**: Positive, Negative, Neutral
        - **Model Location**: `models/bangla-sentiment-finetuned/`

        ### 📚 Dataset
        - Bangla news articles from major Bangladeshi newspapers
        - Social media comments and reviews
        - Manually validated samples
        - Fine-tuned for better Bangla text understanding

        ### 🛠️ Technology Stack
        - Python 3.12
        - Hugging Face Transformers
        - Gradio (Web Interface)
        - PyTorch

        ### 🎓 Academic Context
        **MSc in Information Technology - Thesis Project**

        ### 👨‍💻 Developer
        **Rakib**  
        MSc in Information Technology
        Jahangirnagar University
        Year: 2025

        ### 📧 Contact
        Email: [[email protected]](mailto:[email protected])
        GitHub: [rakibhoossain](https://github.com/rakibhoossain)

        ---
        *Last Updated: December 2025*
        """)

# Launch the app
if __name__ == "__main__":
    print("\n🚀 Starting Gradio interface with fine-tuned model...")
    print("=" * 50)
    port = int(os.environ.get("PORT", 7860))

    demo.launch(
        share=True,
        # server_name="127.0.0.1",
        # server_port=port,
        show_error=True
    )