| | |
| | from diffusers import IFBasePipeline, IFSuperResolutionPipeline, UNet2DConditionModel |
| | from transformers import T5EncoderModel, T5Tokenizer |
| | import torch |
| | import gc |
| | import os |
| | from pathlib import Path |
| |
|
| | prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' |
| |
|
| | model_id = "diffusers/if" |
| | model_id = "/home/patrick/if" |
| |
|
| | |
| | t5_tok = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", torch_dtype=torch.float16, variant="fp16", use_safetensors=True) |
| |
|
| | t5 = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=True) |
| | t5.cuda() |
| |
|
| | prompt = prompt.lower().strip() |
| | with torch.no_grad(): |
| | inputs = t5_tok(prompt, max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda") |
| | prompt_embeds = t5(**inputs).last_hidden_state |
| |
|
| | with torch.no_grad(): |
| | inputs = t5_tok("", max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda") |
| | neg_prompt_embeds = t5(**inputs).last_hidden_state |
| |
|
| | del t5 |
| | torch.cuda.empty_cache() |
| | gc.collect() |
| |
|
| | generator = torch.Generator("cuda").manual_seed(0) |
| |
|
| | |
| | pipe = IFBasePipeline.from_pretrained(model_id, text_encoder=None, torch_dtype=torch.float16, variant="fp16") |
| | pipe.to("cuda") |
| |
|
| | image = pipe(prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, output_type="pt", num_inference_steps=100, generator=generator).images |
| |
|
| | |
| | pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0] |
| | pil_image.save(os.path.join(Path.home(), "images", "if_I_0.png")) |
| |
|
| | |
| | del pipe |
| | torch.cuda.empty_cache() |
| | gc.collect() |
| |
|
| | |
| | unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_1_unet", torch_dtype=torch.float16) |
| | pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16") |
| | pipe.to("cuda") |
| |
|
| | image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=50, noise_level=250, output_type="pt", generator=generator).images |
| |
|
| | |
| | pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0] |
| | pil_image.save(os.path.join(Path.home(), "images", "if_II_0.png")) |
| |
|
| | |
| | del pipe |
| | torch.cuda.empty_cache() |
| | gc.collect() |
| |
|
| | |
| | unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_2_unet", torch_dtype=torch.float16) |
| | pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16") |
| | pipe.to("cuda") |
| |
|
| | image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=40, noise_level=0, output_type="pt", generator=generator).images |
| |
|
| | |
| | pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0] |
| | pil_image.save(os.path.join(Path.home(), "images", "if_III_0.png")) |
| |
|
| | |
| | del pipe |
| | torch.cuda.empty_cache() |
| | gc.collect() |
| |
|