{ "decoder_config": { "image_dim": 4, "patch_size": 2, "decoder_style": "dit", "num_layers": 28, "hidden_size": 1152, "intermediate_size": 3456, "num_heads": 16, "decode_mode": "diffusion", "qk_norm": true, "attn_bias": false, "use_final_norm": false }, "encoder_hidden_size": 768, "latent_h": 32, "latent_w": 32, "latent_mean": [ -0.69, -0.48, -0.6, 0.28 ], "latent_std": [ 12.38, 11.22, 7.93, 21.22 ], "pretrain_mode": "imagenet", "source_ckpt": "ttvidt-dit-pretrain/lndt54kx/checkpoints/epoch=19-step=100000.ckpt" }