| from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig, BitsAndBytesConfig,AutoConfig |
| import time |
| import torch |
| torch.backends.cuda.matmul.allow_tf32 = True |
| import random |
| from datasets import load_dataset |
| from transformers import TrainingArguments |
| from trl import SFTTrainer |
| from peft import LoraConfig |
| |
| from torch.nn import CrossEntropyLoss |
| torch.autograd.set_detect_anomaly(True) |
| random_seed = 42 |
| torch.manual_seed(random_seed) |
| random.seed(random_seed) |
| |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
|
|
|
|
|
| n_ahead_talk_global = 4 |
| n_passes_global = 2 |
| n_ahead_global = 8 |
| n_examples = 0 |
|
|
| def model_init(params): |
| original = False |
| if params is None: |
| params = {} |
| else: |
| params = params.params |
| |
| n_ahead = params.get("n_ahead", n_ahead_global if not original else 1) |
| n_ahead_talk = params.get("n_ahead_talk", n_ahead_talk_global if not original else 1) |
| n_passes = params.get("n_passes", n_passes_global if not original else 1) |
| gumbel_temperature = params.get("gumbel_temperature", 1) |
| use_start_thought_token = params.get("use_start_thought_token", True) |
| use_end_thought_token = params.get("use_end_thought_token", True) |
| include_policy_loss = params.get("include_policy_loss", True) |
| gumbel_detach = params.get("gumbel_detach", True) |
| merged_talk_heads = params.get("merged_talk_heads", True) |
| residual_think_head = params.get("residual_think_head", False) |
| optimize_lm_head_only_at_start = params.get("optimize_lm_head_only_at_start", False) |
|
|
| model_id = "LeroyDyer/SpydazWeb_AGI_MistralStar" |
| tokenizer_id = model_id |
| print("Loading model") |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, |
| max_thoughts=n_ahead + n_ahead_talk + 1, |
| merged_talk_heads=merged_talk_heads, |
| merged_lm_and_talk_heads=False, |
| merged_lm_and_think_heads=True, |
| use_concat_talk_head=True, |
| use_shallow_think=True, |
| use_shallow_talk=False, |
| use_complex_think_head=False, |
| use_complex_talk_head=True, |
| use_weighted_talk_head=True, |
| trust_remote_code=True, |
| device_map="auto", |
| ) |
| print("Loaded model") |
|
|
| tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, truncation=True, padding_side="right") |
| tokenizer.pad_token_id = tokenizer.eos_token_id |
|
|
|
|
|
|
| model.gumbel_detach = gumbel_detach |
| model.include_policy_loss = include_policy_loss |
| model.use_end_thought_token = use_end_thought_token |
| model.use_start_thought_token = use_start_thought_token |
| model.n_ahead = n_ahead |
| model.n_ahead_talk = n_ahead_talk |
| model.n_passes = n_passes |
| model.residual_think_head = residual_think_head |
| model.optimize_lm_head_only_at_start = optimize_lm_head_only_at_start |
| model.gumbel_temperature = gumbel_temperature |
| model.original_mode = original |
| model.config_params = params |
| return model |
| |
| model,tokenizer = model_init(None) |
|
|
|
|
| |
|
|
| peft_config = LoraConfig( |
| r = 128, |
| target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj","lm_head", "embed_tokens"], |
| lora_alpha = 32, |
| lora_dropout = 0, |
| bias = "none", |
| use_dora=True, |
| ) |
|
|
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig |
| from datasets import load_dataset |
| from transformers import TrainingArguments |
| from trl import SFTTrainer |
| from peft import LoraConfig |
|
|
| |
| alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
| |
| ### Instruction: |
| {} |
| |
| ### Input: |
| {} |
| |
| ### Response: |
| {}""" |
| EOS_TOKEN = tokenizer.eos_token |
| def formatting_prompts_func(examples): |
| instructions = examples["instruction"] |
| inputs = examples["input"] |
| outputs = examples["output"] |
| texts = [] |
| for instruction, input, output in zip(instructions, inputs, outputs): |
| |
| text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN |
| texts.append(text) |
| return { "text" : texts, } |
| pass |
| dataset = load_dataset("gate369/Alpaca-Star", split = "train[:2000]") |
| dataset = dataset.shuffle(seed=3704) |
| dataset = dataset.map(formatting_prompts_func, batched = True,) |
| |
|
|
| max_seq_length = 1024 |
| training_args = TrainingArguments( |
| output_dir="./out", |
| num_train_epochs=3, |
| per_device_train_batch_size=1, |
| gradient_checkpointing=False, |
| gradient_accumulation_steps=8, |
| optim="lion_32bit", |
| logging_steps=1, |
| save_strategy="steps", |
| max_steps=1000, |
| bf16=True, |
| tf32=False, |
| learning_rate=6e-05, |
| max_grad_norm=0.3, |
| warmup_ratio=0.06, |
| lr_scheduler_type="cosine", |
| push_to_hub=False, |
|
|
| ) |
| trainer = SFTTrainer( |
| args=training_args, |
| train_dataset=dataset, |
| model=model, |
| tokenizer=tokenizer, |
| max_seq_length=max_seq_length, |
| dataset_text_field="text", |
| peft_config=peft_config, |
| ) |
| trainer.train() |
|
|
| |
| tokenizer.save_pretrained("SFTTrainerModel") |
| model.save_pretrained("SFTTrainerModel") |
|
|
|
|
| import os |
| import huggingface_hub |
| from huggingface_hub import notebook_login |
| from huggingface_hub import create_repo, HfApi |
| from huggingface_hub import hf_hub_download |
| from huggingface_hub import create_repo, HfApi |
| from huggingface_hub import snapshot_download |
|
|
| MODEL_NAME = "_Spydaz_Web_AI_MistralStar" |
| Folderinput = "SFTTrainerModel" |
| WRITE_TOKEN = "" |
| username = "LeroyDyer" |
| huggingface_hub.login(WRITE_TOKEN) |
| api = HfApi(token=WRITE_TOKEN) |
| |
| api.create_repo( |
| repo_id = f"{username}/{MODEL_NAME}", |
| repo_type="model", |
| exist_ok=True, |
| ) |
|
|
| api.upload_folder( |
| repo_id = f"{username}/{MODEL_NAME}", |
| folder_path = Folderinput |
| ) |