Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, MT5ForConditionalGeneration | |
| from transformers import T5Tokenizer | |
| import streamlit as st | |
| import pandas as pd | |
| from datasets import Dataset | |
| import torch | |
| from datasets import Dataset, DatasetDict | |
| from transformers import Trainer, TrainingArguments | |
| tokenizer = T5Tokenizer.from_pretrained('google/mt5-base') | |
| model = MT5ForConditionalGeneration.from_pretrained("google/mt5-base") | |
| #st.write(model) | |
| df = pd.read_csv('proverbs.csv') | |
| df | |
| dataset = Dataset.from_pandas(df) | |
| def preprocess_function(examples): | |
| inputs = examples['Proverb'] | |
| targets = examples['Meaning'] | |
| model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length") | |
| with tokenizer.as_target_tokenizer(): | |
| labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length") | |
| model_inputs["labels"] = labels["input_ids"] | |
| return model_inputs | |
| tokenized_dataset = dataset.map(preprocess_function, batched=True) | |
| dataset_split = tokenized_dataset.train_test_split(test_size=0.2) | |
| train_dataset = dataset_split['train'] | |
| test_dataset = dataset_split['test'] | |
| print(f"Training dataset size: {len(train_dataset)}") | |
| print(f"Testing dataset size: {len(test_dataset)}") | |
| training_args = TrainingArguments( | |
| output_dir="./results", | |
| evaluation_strategy="epoch", | |
| learning_rate=2e-5, | |
| per_device_train_batch_size=4, | |
| per_device_eval_batch_size=4, | |
| num_train_epochs=3, | |
| weight_decay=0.01, | |
| save_total_limit=2, | |
| save_steps=500, | |
| ) | |
| # Initialize Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=tokenized_dataset, | |
| eval_dataset=tokenized_dataset, # Typically you'd have a separate eval dataset | |
| ) | |
| # Fine-tune the model | |
| trainer.train() | |
| model.save_pretrained("./fine-tuned-mt5-marathi-proverbs") | |
| tokenizer.save_pretrained("./fine-tuned-mt5-marathi-proverbs") |