Merge branch 'master' of https://git.yasue.org/ken/RandomVideoMaker

2025-09-26 20:55:52 +02:00
parent e670975ee9 b56d7e69dd
commit 4bce4388f9
5 changed files with 1051 additions and 1 deletions
--- a/.clinerules/generators.md
+++ b/.clinerules/generators.md
@ -9,7 +9,7 @@ Use this file src\lib\image-generator-face.ts
 ## Image converting
 Use this file src\lib\image-converter.ts

-## Vide generation with static image
+## Video generation with static image
 Use this ilfe src\lib\video-generator.ts

 Everything when generator need to use existing image you have to copy to input folder to the server and use only filename.
--- a/src/comfyworkflows/edit_image_multiple_qwen.json
+++ b/src/comfyworkflows/edit_image_multiple_qwen.json
@ -0,0 +1,558 @@
+{
+    "1": {
+        "inputs": {
+            "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "2": {
+        "inputs": {
+            "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+            "type": "qwen_image",
+            "device": "default"
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {
+            "title": "Load CLIP"
+        }
+    },
+    "3": {
+        "inputs": {
+            "vae_name": "qwen_image_vae.safetensors"
+        },
+        "class_type": "VAELoader",
+        "_meta": {
+            "title": "Load VAE"
+        }
+    },
+    "4": {
+        "inputs": {
+            "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors",
+            "strength_model": 1,
+            "model": [
+                "1",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "5": {
+        "inputs": {
+            "conditioning": [
+                "11",
+                0
+            ]
+        },
+        "class_type": "ConditioningZeroOut",
+        "_meta": {
+            "title": "ConditioningZeroOut"
+        }
+    },
+    "7": {
+        "inputs": {
+            "seed": 1058883705232539,
+            "steps": 8,
+            "cfg": 1,
+            "sampler_name": "euler",
+            "scheduler": "beta",
+            "denoise": 1,
+            "model": [
+                "66",
+                0
+            ],
+            "positive": [
+                "11",
+                0
+            ],
+            "negative": [
+                "5",
+                0
+            ],
+            "latent_image": [
+                "11",
+                6
+            ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+            "title": "KSampler"
+        }
+    },
+    "8": {
+        "inputs": {
+            "samples": [
+                "7",
+                0
+            ],
+            "vae": [
+                "3",
+                0
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "11": {
+        "inputs": {
+            "prompt": [
+                "21",
+                0
+            ],
+            "enable_resize": false,
+            "enable_vl_resize": false,
+            "upscale_method": "lanczos",
+            "crop": "disabled",
+            "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+            "clip": [
+                "2",
+                0
+            ],
+            "vae": [
+                "3",
+                0
+            ],
+            "image1": [
+                "84",
+                0
+            ],
+            "image2": [
+                "82",
+                0
+            ]
+        },
+        "class_type": "TextEncodeQwenImageEditPlus_lrzjason",
+        "_meta": {
+            "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)"
+        }
+    },
+    "15": {
+        "inputs": {
+            "image": "cloth_0001.png"
+        },
+        "class_type": "LoadImage",
+        "_meta": {
+            "title": "Load Image"
+        }
+    },
+    "21": {
+        "inputs": {
+            "value": "change clothes of image1 with image2"
+        },
+        "class_type": "PrimitiveStringMultiline",
+        "_meta": {
+            "title": "String (Multiline)"
+        }
+    },
+    "64": {
+        "inputs": {
+            "image": "Lauren_body.png"
+        },
+        "class_type": "LoadImage",
+        "_meta": {
+            "title": "Load Image"
+        }
+    },
+    "66": {
+        "inputs": {
+            "lora_name": "extract-outfit_v3.safetensors",
+            "strength_model": 1,
+            "model": [
+                "4",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "67": {
+        "inputs": {
+            "detect_hand": "enable",
+            "detect_body": "enable",
+            "detect_face": "enable",
+            "resolution": 512,
+            "bbox_detector": "yolox_l.onnx",
+            "pose_estimator": "dw-ll_ucoco_384_bs5.torchscript.pt",
+            "scale_stick_for_xinsr_cn": "disable",
+            "image": [
+                "68",
+                0
+            ]
+        },
+        "class_type": "DWPreprocessor",
+        "_meta": {
+            "title": "DWPose Estimator"
+        }
+    },
+    "68": {
+        "inputs": {
+            "image": "281543721672978_1758880135639_0.png"
+        },
+        "class_type": "LoadImage",
+        "_meta": {
+            "title": "Load Image"
+        }
+    },
+    "69": {
+        "inputs": {
+            "images": [
+                "81",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "76": {
+        "inputs": {
+            "number": 720
+        },
+        "class_type": "StaticNumberInt",
+        "_meta": {
+            "title": "Static Number Int"
+        }
+    },
+    "77": {
+        "inputs": {
+            "number": 1280
+        },
+        "class_type": "StaticNumberInt",
+        "_meta": {
+            "title": "Static Number Int"
+        }
+    },
+    "78": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "batch_size": 1
+        },
+        "class_type": "EmptyLatentImage",
+        "_meta": {
+            "title": "Empty Latent Image"
+        }
+    },
+    "81": {
+        "inputs": {
+            "width": 480,
+            "height": 962,
+            "upscale_method": "nearest-exact",
+            "keep_proportion": "pad",
+            "pad_color": "0, 0, 0",
+            "crop_position": "center",
+            "divisible_by": 2,
+            "device": "cpu",
+            "image": [
+                "67",
+                0
+            ]
+        },
+        "class_type": "ImageResizeKJv2",
+        "_meta": {
+            "title": "Resize Image v2"
+        }
+    },
+    "82": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "upscale_method": "nearest-exact",
+            "keep_proportion": "crop",
+            "pad_color": "255,255,255",
+            "crop_position": "center",
+            "divisible_by": 2,
+            "device": "cpu",
+            "image": [
+                "15",
+                0
+            ]
+        },
+        "class_type": "ImageResizeKJv2",
+        "_meta": {
+            "title": "Resize Image v2"
+        }
+    },
+    "83": {
+        "inputs": {
+            "images": [
+                "82",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "84": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "upscale_method": "nearest-exact",
+            "keep_proportion": "pad",
+            "pad_color": "0, 0, 0",
+            "crop_position": "center",
+            "divisible_by": 2,
+            "device": "cpu",
+            "image": [
+                "64",
+                0
+            ]
+        },
+        "class_type": "ImageResizeKJv2",
+        "_meta": {
+            "title": "Resize Image v2"
+        }
+    },
+    "85": {
+        "inputs": {
+            "images": [
+                "84",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "86": {
+        "inputs": {
+            "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+            "type": "qwen_image",
+            "device": "default"
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {
+            "title": "Load CLIP"
+        }
+    },
+    "87": {
+        "inputs": {
+            "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "88": {
+        "inputs": {
+            "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors",
+            "strength_model": 1,
+            "model": [
+                "87",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "89": {
+        "inputs": {
+            "conditioning": [
+                "95",
+                0
+            ]
+        },
+        "class_type": "ConditioningZeroOut",
+        "_meta": {
+            "title": "ConditioningZeroOut"
+        }
+    },
+    "90": {
+        "inputs": {
+            "lora_name": "extract-outfit_v3.safetensors",
+            "strength_model": 1,
+            "model": [
+                "88",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "91": {
+        "inputs": {
+            "seed": 416948400785889,
+            "steps": 8,
+            "cfg": 1,
+            "sampler_name": "euler",
+            "scheduler": "beta",
+            "denoise": 1,
+            "model": [
+                "90",
+                0
+            ],
+            "positive": [
+                "95",
+                0
+            ],
+            "negative": [
+                "89",
+                0
+            ],
+            "latent_image": [
+                "95",
+                6
+            ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+            "title": "KSampler"
+        }
+    },
+    "92": {
+        "inputs": {
+            "samples": [
+                "91",
+                0
+            ],
+            "vae": [
+                "94",
+                0
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "93": {
+        "inputs": {
+            "filename_prefix": "qwenedit",
+            "images": [
+                "92",
+                0
+            ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+            "title": "Save Image"
+        }
+    },
+    "94": {
+        "inputs": {
+            "vae_name": "qwen_image_vae.safetensors"
+        },
+        "class_type": "VAELoader",
+        "_meta": {
+            "title": "Load VAE"
+        }
+    },
+    "95": {
+        "inputs": {
+            "prompt": [
+                "96",
+                0
+            ],
+            "enable_resize": false,
+            "enable_vl_resize": false,
+            "upscale_method": "lanczos",
+            "crop": "disabled",
+            "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+            "clip": [
+                "86",
+                0
+            ],
+            "vae": [
+                "94",
+                0
+            ],
+            "image1": [
+                "8",
+                0
+            ],
+            "image2": [
+                "81",
+                0
+            ]
+        },
+        "class_type": "TextEncodeQwenImageEditPlus_lrzjason",
+        "_meta": {
+            "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)"
+        }
+    },
+    "96": {
+        "inputs": {
+            "value": "change pose of image1 with image2, keep background same as image1"
+        },
+        "class_type": "PrimitiveStringMultiline",
+        "_meta": {
+            "title": "String (Multiline)"
+        }
+    },
+    "132": {
+        "inputs": {
+            "images": [
+                "8",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "133": {
+        "inputs": {
+            "filename_prefix": "qwenimtermediate",
+            "images": [
+                "8",
+                0
+            ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+            "title": "Save Image"
+        }
+    },
+    "134": {
+        "inputs": {
+            "filename_prefix": "qwenpose",
+            "images": [
+                "81",
+                0
+            ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+            "title": "Save Image"
+        }
+    }
+}
--- a/src/comfyworkflows/edit_image_vton.json
+++ b/src/comfyworkflows/edit_image_vton.json
@ -0,0 +1,314 @@
+{
+    "1": {
+        "inputs": {
+            "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors",
+            "weight_dtype": "default"
+        },
+        "class_type": "UNETLoader",
+        "_meta": {
+            "title": "Load Diffusion Model"
+        }
+    },
+    "2": {
+        "inputs": {
+            "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+            "type": "qwen_image",
+            "device": "default"
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {
+            "title": "Load CLIP"
+        }
+    },
+    "3": {
+        "inputs": {
+            "vae_name": "qwen_image_vae.safetensors"
+        },
+        "class_type": "VAELoader",
+        "_meta": {
+            "title": "Load VAE"
+        }
+    },
+    "4": {
+        "inputs": {
+            "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors",
+            "strength_model": 1,
+            "model": [
+                "1",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "5": {
+        "inputs": {
+            "conditioning": [
+                "11",
+                0
+            ]
+        },
+        "class_type": "ConditioningZeroOut",
+        "_meta": {
+            "title": "ConditioningZeroOut"
+        }
+    },
+    "7": {
+        "inputs": {
+            "seed": 799784211855929,
+            "steps": 8,
+            "cfg": 1,
+            "sampler_name": "euler",
+            "scheduler": "beta",
+            "denoise": 1,
+            "model": [
+                "66",
+                0
+            ],
+            "positive": [
+                "11",
+                0
+            ],
+            "negative": [
+                "5",
+                0
+            ],
+            "latent_image": [
+                "11",
+                6
+            ]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+            "title": "KSampler"
+        }
+    },
+    "8": {
+        "inputs": {
+            "samples": [
+                "7",
+                0
+            ],
+            "vae": [
+                "3",
+                0
+            ]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+            "title": "VAE Decode"
+        }
+    },
+    "11": {
+        "inputs": {
+            "prompt": [
+                "21",
+                0
+            ],
+            "enable_resize": false,
+            "enable_vl_resize": false,
+            "upscale_method": "lanczos",
+            "crop": "disabled",
+            "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+            "clip": [
+                "2",
+                0
+            ],
+            "vae": [
+                "3",
+                0
+            ],
+            "image1": [
+                "84",
+                0
+            ],
+            "image2": [
+                "82",
+                0
+            ]
+        },
+        "class_type": "TextEncodeQwenImageEditPlus_lrzjason",
+        "_meta": {
+            "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)"
+        }
+    },
+    "15": {
+        "inputs": {
+            "image": "cloth_0001.png"
+        },
+        "class_type": "LoadImage",
+        "_meta": {
+            "title": "Load cloth"
+        }
+    },
+    "21": {
+        "inputs": {
+            "value": "change clothes of image1 with image2"
+        },
+        "class_type": "PrimitiveStringMultiline",
+        "_meta": {
+            "title": "String (Multiline)"
+        }
+    },
+    "64": {
+        "inputs": {
+            "image": "Lauren_body.png"
+        },
+        "class_type": "LoadImage",
+        "_meta": {
+            "title": "Load model"
+        }
+    },
+    "66": {
+        "inputs": {
+            "lora_name": "extract-outfit_v3.safetensors",
+            "strength_model": 1,
+            "model": [
+                "4",
+                0
+            ]
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {
+            "title": "LoraLoaderModelOnly"
+        }
+    },
+    "76": {
+        "inputs": {
+            "number": 720
+        },
+        "class_type": "StaticNumberInt",
+        "_meta": {
+            "title": "Static Number Int"
+        }
+    },
+    "77": {
+        "inputs": {
+            "number": 1280
+        },
+        "class_type": "StaticNumberInt",
+        "_meta": {
+            "title": "Static Number Int"
+        }
+    },
+    "78": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "batch_size": 1
+        },
+        "class_type": "EmptyLatentImage",
+        "_meta": {
+            "title": "Empty Latent Image"
+        }
+    },
+    "82": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "upscale_method": "nearest-exact",
+            "keep_proportion": "crop",
+            "pad_color": "255,255,255",
+            "crop_position": "center",
+            "divisible_by": 2,
+            "device": "cpu",
+            "image": [
+                "15",
+                0
+            ]
+        },
+        "class_type": "ImageResizeKJv2",
+        "_meta": {
+            "title": "Resize Image v2"
+        }
+    },
+    "83": {
+        "inputs": {
+            "images": [
+                "82",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "84": {
+        "inputs": {
+            "width": [
+                "76",
+                0
+            ],
+            "height": [
+                "77",
+                0
+            ],
+            "upscale_method": "nearest-exact",
+            "keep_proportion": "pad",
+            "pad_color": "0, 0, 0",
+            "crop_position": "center",
+            "divisible_by": 2,
+            "device": "cpu",
+            "image": [
+                "64",
+                0
+            ]
+        },
+        "class_type": "ImageResizeKJv2",
+        "_meta": {
+            "title": "Resize Image v2"
+        }
+    },
+    "85": {
+        "inputs": {
+            "images": [
+                "84",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    },
+    "93": {
+        "inputs": {
+            "filename_prefix": "qwenedit",
+            "images": [
+                "8",
+                0
+            ]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+            "title": "Save Image"
+        }
+    },
+    "132": {
+        "inputs": {
+            "images": [
+                "8",
+                0
+            ]
+        },
+        "class_type": "PreviewImage",
+        "_meta": {
+            "title": "Preview Image"
+        }
+    }
+}
--- a/src/lib/image-converter.ts
+++ b/src/lib/image-converter.ts
@ -183,4 +183,131 @@ export async function extractCloth(
 }


+
+export async function convertImageWithMultipleFile(
+    prompt: string,
+    srcFiles: string[],
+    outputFile: string,
+    comfyBaseUrl: string,
+    comfyOutputDir: string,
+    size: ImageSize = { width: 720, height: 1280 }
+): Promise<string> {
+    const COMFY_BASE_URL = comfyBaseUrl.replace(/\/$/, '');
+    const COMFY_OUTPUT_DIR = comfyOutputDir;
+    let workflow;
+
+    workflow = JSON.parse(await fs.readFile('src/comfyworkflows/edit_image_multiple_qwen.json', 'utf-8'));
+    workflow['21']['inputs']['value'] = prompt;
+    workflow['76']['inputs']['number'] = size.width;
+    workflow['77']['inputs']['number'] = size.height;
+
+    if (srcFiles[0])
+        workflow['64']['inputs']['image'] = srcFiles[0];
+
+    if (srcFiles[1])
+        workflow['15']['inputs']['image'] = srcFiles[1];
+
+    if (srcFiles[2])
+        workflow['68']['inputs']['image'] = srcFiles[2];
+
+    const response = await axios.post(`${COMFY_BASE_URL}/prompt`, { prompt: workflow });
+    const promptId = response.data.prompt_id;
+
+    let history;
+    do {
+        await new Promise(resolve => setTimeout(resolve, 1000));
+        const historyResponse = await axios.get(`${COMFY_BASE_URL}/history/${promptId}`);
+        history = historyResponse.data[promptId];
+    } while (!history || Object.keys(history.outputs).length === 0);
+
+    const files = await fs.readdir(COMFY_OUTPUT_DIR!);
+    const generatedFiles = files.filter(file => file.startsWith('qwenedit'));
+
+    const fileStats = await Promise.all(
+        generatedFiles.map(async (file) => {
+            const stat = await fs.stat(path.join(COMFY_OUTPUT_DIR!, file));
+            return { file, mtime: stat.mtime };
+        })
+    );
+
+    fileStats.sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
+
+    const latestFile = fileStats[0].file;
+    const newFilePath = path.resolve('./generated', outputFile);
+
+    await fs.mkdir('./generated', { recursive: true });
+
+    const sourcePath = path.join(COMFY_OUTPUT_DIR!, latestFile);
+    try {
+        await fs.unlink(newFilePath);
+    } catch (err) {
+        // ignore if not exists
+    }
+
+    await fs.copyFile(sourcePath, newFilePath);
+
+    return newFilePath;
+}
+
+
+export async function convertImageVton(
+    srcFiles: string[],
+    outputFile: string,
+    comfyBaseUrl: string,
+    comfyOutputDir: string,
+    size: ImageSize = { width: 720, height: 1280 }
+): Promise<string> {
+    const COMFY_BASE_URL = comfyBaseUrl.replace(/\/$/, '');
+    const COMFY_OUTPUT_DIR = comfyOutputDir;
+    let workflow;
+
+    workflow = JSON.parse(await fs.readFile('src/comfyworkflows/edit_image_vton.json', 'utf-8'));
+    workflow['76']['inputs']['number'] = size.width;
+    workflow['77']['inputs']['number'] = size.height;
+
+    if (srcFiles[0])
+        workflow['64']['inputs']['image'] = srcFiles[0];
+
+    if (srcFiles[1])
+        workflow['15']['inputs']['image'] = srcFiles[1];
+
+    const response = await axios.post(`${COMFY_BASE_URL}/prompt`, { prompt: workflow });
+    const promptId = response.data.prompt_id;
+
+    let history;
+    do {
+        await new Promise(resolve => setTimeout(resolve, 1000));
+        const historyResponse = await axios.get(`${COMFY_BASE_URL}/history/${promptId}`);
+        history = historyResponse.data[promptId];
+    } while (!history || Object.keys(history.outputs).length === 0);
+
+    const files = await fs.readdir(COMFY_OUTPUT_DIR!);
+    const generatedFiles = files.filter(file => file.startsWith('qwenedit'));
+
+    const fileStats = await Promise.all(
+        generatedFiles.map(async (file) => {
+            const stat = await fs.stat(path.join(COMFY_OUTPUT_DIR!, file));
+            return { file, mtime: stat.mtime };
+        })
+    );
+
+    fileStats.sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
+
+    const latestFile = fileStats[0].file;
+    const newFilePath = path.resolve('./generated', outputFile);
+
+    await fs.mkdir('./generated', { recursive: true });
+
+    const sourcePath = path.join(COMFY_OUTPUT_DIR!, latestFile);
+    try {
+        await fs.unlink(newFilePath);
+    } catch (err) {
+        // ignore if not exists
+    }
+
+    await fs.copyFile(sourcePath, newFilePath);
+
+    return newFilePath;
+}
+
 export { convertImage, convertImageWithFile };
--- a/src/tools/vton_generator.ts
+++ b/src/tools/vton_generator.ts
@ -0,0 +1,51 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { convertImageVton } from '../lib/image-converter';
+import * as dotenv from 'dotenv';
+
+dotenv.config();
+
+const clothesDir = 'C:\\Users\\ken\\Desktop\\VTON\\clothes';
+const modelPath = 'C:\\Users\\ken\\Desktop\\VTON\\models\\Jessica_body.png';
+const posesDir = 'C:\\Users\\ken\\Desktop\\VTON\\poses';
+const outputDir = 'generated';
+
+const comfyBaseUrl = process.env.SERVER1_COMFY_BASE_URL;
+const comfyOutputDir = process.env.SERVER1_COMFY_OUTPUT_DIR;
+
+async function generateVtonImages() {
+    if (!comfyBaseUrl || !comfyOutputDir) {
+        throw new Error("ComfyUI URL or Output Directory is not set in environment variables.");
+    }
+
+    const clothesFiles = fs.readdirSync(clothesDir).filter(file => /\.(jpg|png|jpeg)$/i.test(file));
+    const poseFiles = fs.readdirSync(posesDir).filter(file => /\.(jpg|png|jpeg)$/i.test(file));
+
+    if (!fs.existsSync(outputDir)) {
+        fs.mkdirSync(outputDir);
+    }
+
+    for (let i = 0; i < clothesFiles.length; i++) {
+        const clothFile = clothesFiles[i];
+        const clothPath = path.join(clothesDir, clothFile);
+
+        const randomPoseFile = poseFiles[Math.floor(Math.random() * poseFiles.length)];
+        const posePath = path.join(posesDir, randomPoseFile);
+
+        console.log(`Processing cloth: ${clothFile} with pose: ${randomPoseFile}`);
+
+        const files = [modelPath, clothPath, posePath];
+        const prompt = "change clothes of image1 with image2";
+        const outputFilename = `model_${i}.png`;
+
+        const generatedImagePath = await convertImageVton(files, outputFilename, comfyBaseUrl, comfyOutputDir, { width: 720, height: 1280 });
+
+        if (generatedImagePath) {
+            console.log(`Generated image saved to ${generatedImagePath}`);
+        } else {
+            console.error(`Failed to generate image for ${clothFile}`);
+        }
+    }
+}
+
+generateVtonImages().catch(console.error);