csebuetnlp/xlsum
Updated • 5.22k • 152
How to use Anjaan-Khadka/summarization_nepali with Transformers:
# Use a pipeline as a high-level helper
# Warning: Pipeline type "summarization" is no longer supported in transformers v5.
# You must load the model directly (see below) or downgrade to v4.x with:
# 'pip install "transformers<5.0.0'
from transformers import pipeline
pipe = pipeline("summarization", model="Anjaan-Khadka/summarization_nepali") # Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("Anjaan-Khadka/summarization_nepali")
model = AutoModelForSeq2SeqLM.from_pretrained("Anjaan-Khadka/summarization_nepali")This repository contains adapted version of mT5-multilinguag-XLSum for Single Language (Nepali). View original mT5-multilinguag-XLSum model
transformers (tested on 4.11.0.dev0)
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
article_text = " तीन नगरपालिकालाई समेटेर भेरी किनारमा बन्न थालेको आधुनिक नमुना सहरको काम तीव्र गतिमा अघि बढेको छ । भेरीगंगा, गुर्भाकोट र लेकबेंसी नगरपालिकामा बन्न थालेको भेरीगंगा उपत्यका नमुना आधुनिक सहर निर्माण हुन लागेको हो । यसले नदी वारि र पारिको ४ सय ६० वर्ग किलोमिटर क्षेत्रलाई समेट्नेछ ।"
model_name = "Anjaan-Khadka/summarization_nepali"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
input_ids = tokenizer(
(article_text),
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512
)["input_ids"]
output_ids = model.generate(
input_ids=input_ids,
max_length=84,
no_repeat_ngram_size=2,
num_beams=4
)[0]
summary = tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
print(summary)