DiffBIR / configs /train /train_stage2.yaml
MultiMatrix's picture
Upload 7 files
b708a95 verified
model:
cldm:
target: model.cldm.ControlLDM
params:
latent_scale_factor: 0.18215
unet_cfg:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
vae_cfg:
embed_dim: 4
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
clip_cfg:
embed_dim: 1024
vision_cfg:
image_size: 224
layers: 32
width: 1280
head_width: 80
patch_size: 14
text_cfg:
context_length: 77
vocab_size: 49408
width: 1024
heads: 16
layers: 24
layer: "penultimate"
controlnet_cfg:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
hint_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
swinir:
target: model.swinir.SwinIR
params:
img_size: 64
patch_size: 1
in_chans: 3
embed_dim: 180
depths: [6, 6, 6, 6, 6, 6, 6, 6]
num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
window_size: 8
mlp_ratio: 2
sf: 8
img_range: 1.0
upsampler: "nearest+conv"
resi_connection: "1conv"
unshuffle: True
unshuffle_scale: 8
diffusion:
target: model.gaussian_diffusion.Diffusion
params:
linear_start: 0.00085
linear_end: 0.0120
timesteps: 1000
dataset:
train:
target: dataset.codeformer.CodeformerDataset
params:
# training file list path
file_list:
file_backend_cfg:
target: dataset.file_backend.HardDiskBackend
out_size: 512
crop_type: center
blur_kernel_size: 41
kernel_list: ['iso', 'aniso']
kernel_prob: [0.5, 0.5]
blur_sigma: [0.1, 12]
downsample_range: [1, 12]
noise_range: [0, 15]
jpeg_range: [30, 100]
train:
# pretrained sd v2.1 path
sd_path:
# experiment directory path
exp_dir:
# stage 1 swinir path
swinir_path:
learning_rate: 1e-4
# ImageNet 1k (1.3M images)
# batch size = 192, lr = 1e-4, total training steps = 25k
# Our filtered laion2b-en (15M images)
# batch size = 256, lr = 1e-4 (first 30k), 1e-5 (next 50k), total training steps = 80k
batch_size: 256
num_workers:
train_steps: 30000
log_every: 50
ckpt_every: 10000
image_every: 1000
resume: ~