File size: 3,947 Bytes
2883c99 44416eb 6107fe5 2883c99 6107fe5 2883c99 31c7300 c4cf54e 44416eb c4cf54e 44416eb b08296e e8d56f8 c4cf54e 2883c99 44416eb 6107fe5 44416eb 2883c99 44416eb c4cf54e 6107fe5 2883c99 6107fe5 2883c99 6107fe5 2883c99 6107fe5 2883c99 430d57d 6107fe5 2883c99 6107fe5 c793639 78c5015 c793639 6107fe5 2883c99 ee55fba 2883c99 ee55fba 6107fe5 2883c99 e976402 2883c99 51b64ae 2883c99 6107fe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, BertModel, BertPreTrainedModel
from safetensors.torch import load_file
import gc
# Release memory
gc.collect()
torch.cuda.empty_cache()
model_name = "hfl/chinese-roberta-wwm-ext"
class MultiTaskRoBert(BertPreTrainedModel):
def __init__(self, config, model_name):
super().__init__(config)
# Load backbone with pretrained weights if desired
self.bert = BertModel.from_pretrained(model_name, config=config)
self.classifier = nn.Linear(config.hidden_size, 3)
self.regressor = nn.Linear(config.hidden_size, 5)
def forward(self, input_ids, attention_mask=None, token_type_ids=None):
outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
pooled = outputs.pooler_output
sentiment_logits = self.classifier(pooled)
regression_outputs = self.regressor(pooled)
return {"logits": sentiment_logits, "regression_outputs": regression_outputs}
device = "cpu"
print(f"Device: {device}")
model_path = "model1.safetensors"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
model = MultiTaskRoBert(config, model_name).to(device)
state_dict = load_file(model_path, device="cpu")
model.load_state_dict(state_dict)
model.eval()
# Use half precision to reduce memory usage
# if device.type == 'cuda':
# model.half()
def predict(text: str):
try:
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
padding="max_length",
max_length=128
)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
out = model(**inputs)
pred_class = torch.argmax(out["logits"], dim=-1).item()
sentiment_map = {0: "正面", 1: "負面", 2: "中立"}
reg_results = out["regression_outputs"][0].cpu().numpy()
for i in range(len(reg_results)):
if reg_results[i] < 0:
reg_results[i] = 0
elif reg_results[i] > 5:
reg_results[i] = 5
rating, delight, anger, sorrow, happiness = reg_results
return {
"情感": sentiment_map[pred_class],
"強度": round(rating, 2),
"喜": round(delight, 2),
"怒": round(anger, 2),
"哀": round(sorrow, 2),
"樂": round(happiness, 2),
}
except Exception as e:
return {"错误": f"处理失败: {str(e)}"}
article = "Author: Lu Yuk Tong, [Github Link](https://github.com/LutherYTT/Cantonese-Sentiment-Analysis-System-Multitasking-Learning-on-Scarce-Data)"
# Create Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=3, placeholder="請輸入粵語文本...", label="粵語文本"),
outputs=gr.JSON(label="分析結果"),
title="粵語情感與情緒分析",
description="輸入粵語文本,分析情感(正面/負面/中立)和五種情緒評分",
examples=[
["呢個plan聽落唔錯,我哋試下先啦。"],
["份proposal 你send 咗俾client未?Deadline 係EOD呀。"],
["返工返到好攰,但係見到同事就feel better啲。"],
["你今次嘅presentation做得唔錯,我好 impressed!"],
["夜晚聽到嗰啲聲,我唔敢出房門。"],
["個client 真係好 difficult 囉,改咗n 次 requirements,仲要urgent,chur 到痴線!"],
["我尋日冇乜特別事做,就係喺屋企睇電視。"],
["Weekend 去staycation,間酒店個view 正到爆!"],
["做乜嘢都冇意義。"],
["今朝遲到咗,差啲miss咗個重要meeting"],
],
article=article,
)
if __name__ == "__main__":
iface.launch(share=True, show_error=True) |