diff --git a/.clinerules/generators.md b/.clinerules/generators.md index 7bbba3c..a411cd4 100644 --- a/.clinerules/generators.md +++ b/.clinerules/generators.md @@ -9,7 +9,7 @@ Use this file src\lib\image-generator-face.ts ## Image converting Use this file src\lib\image-converter.ts -## Vide generation with static image +## Video generation with static image Use this ilfe src\lib\video-generator.ts Everything when generator need to use existing image you have to copy to input folder to the server and use only filename. diff --git a/src/comfyworkflows/edit_image_multiple_qwen.json b/src/comfyworkflows/edit_image_multiple_qwen.json new file mode 100644 index 0000000..5b1462f --- /dev/null +++ b/src/comfyworkflows/edit_image_multiple_qwen.json @@ -0,0 +1,558 @@ +{ + "1": { + "inputs": { + "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": { + "title": "Load Diffusion Model" + } + }, + "2": { + "inputs": { + "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "type": "qwen_image", + "device": "default" + }, + "class_type": "CLIPLoader", + "_meta": { + "title": "Load CLIP" + } + }, + "3": { + "inputs": { + "vae_name": "qwen_image_vae.safetensors" + }, + "class_type": "VAELoader", + "_meta": { + "title": "Load VAE" + } + }, + "4": { + "inputs": { + "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors", + "strength_model": 1, + "model": [ + "1", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "5": { + "inputs": { + "conditioning": [ + "11", + 0 + ] + }, + "class_type": "ConditioningZeroOut", + "_meta": { + "title": "ConditioningZeroOut" + } + }, + "7": { + "inputs": { + "seed": 1058883705232539, + "steps": 8, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "beta", + "denoise": 1, + "model": [ + "66", + 0 + ], + "positive": [ + "11", + 0 + ], + "negative": [ + "5", + 0 + ], + "latent_image": [ + "11", + 6 + ] + }, + "class_type": "KSampler", + "_meta": { + "title": "KSampler" + } + }, + "8": { + "inputs": { + "samples": [ + "7", + 0 + ], + "vae": [ + "3", + 0 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "11": { + "inputs": { + "prompt": [ + "21", + 0 + ], + "enable_resize": false, + "enable_vl_resize": false, + "upscale_method": "lanczos", + "crop": "disabled", + "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", + "clip": [ + "2", + 0 + ], + "vae": [ + "3", + 0 + ], + "image1": [ + "84", + 0 + ], + "image2": [ + "82", + 0 + ] + }, + "class_type": "TextEncodeQwenImageEditPlus_lrzjason", + "_meta": { + "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)" + } + }, + "15": { + "inputs": { + "image": "cloth_0001.png" + }, + "class_type": "LoadImage", + "_meta": { + "title": "Load Image" + } + }, + "21": { + "inputs": { + "value": "change clothes of image1 with image2" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "String (Multiline)" + } + }, + "64": { + "inputs": { + "image": "Lauren_body.png" + }, + "class_type": "LoadImage", + "_meta": { + "title": "Load Image" + } + }, + "66": { + "inputs": { + "lora_name": "extract-outfit_v3.safetensors", + "strength_model": 1, + "model": [ + "4", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "67": { + "inputs": { + "detect_hand": "enable", + "detect_body": "enable", + "detect_face": "enable", + "resolution": 512, + "bbox_detector": "yolox_l.onnx", + "pose_estimator": "dw-ll_ucoco_384_bs5.torchscript.pt", + "scale_stick_for_xinsr_cn": "disable", + "image": [ + "68", + 0 + ] + }, + "class_type": "DWPreprocessor", + "_meta": { + "title": "DWPose Estimator" + } + }, + "68": { + "inputs": { + "image": "281543721672978_1758880135639_0.png" + }, + "class_type": "LoadImage", + "_meta": { + "title": "Load Image" + } + }, + "69": { + "inputs": { + "images": [ + "81", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "76": { + "inputs": { + "number": 720 + }, + "class_type": "StaticNumberInt", + "_meta": { + "title": "Static Number Int" + } + }, + "77": { + "inputs": { + "number": 1280 + }, + "class_type": "StaticNumberInt", + "_meta": { + "title": "Static Number Int" + } + }, + "78": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "batch_size": 1 + }, + "class_type": "EmptyLatentImage", + "_meta": { + "title": "Empty Latent Image" + } + }, + "81": { + "inputs": { + "width": 480, + "height": 962, + "upscale_method": "nearest-exact", + "keep_proportion": "pad", + "pad_color": "0, 0, 0", + "crop_position": "center", + "divisible_by": 2, + "device": "cpu", + "image": [ + "67", + 0 + ] + }, + "class_type": "ImageResizeKJv2", + "_meta": { + "title": "Resize Image v2" + } + }, + "82": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "upscale_method": "nearest-exact", + "keep_proportion": "crop", + "pad_color": "255,255,255", + "crop_position": "center", + "divisible_by": 2, + "device": "cpu", + "image": [ + "15", + 0 + ] + }, + "class_type": "ImageResizeKJv2", + "_meta": { + "title": "Resize Image v2" + } + }, + "83": { + "inputs": { + "images": [ + "82", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "84": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "upscale_method": "nearest-exact", + "keep_proportion": "pad", + "pad_color": "0, 0, 0", + "crop_position": "center", + "divisible_by": 2, + "device": "cpu", + "image": [ + "64", + 0 + ] + }, + "class_type": "ImageResizeKJv2", + "_meta": { + "title": "Resize Image v2" + } + }, + "85": { + "inputs": { + "images": [ + "84", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "86": { + "inputs": { + "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "type": "qwen_image", + "device": "default" + }, + "class_type": "CLIPLoader", + "_meta": { + "title": "Load CLIP" + } + }, + "87": { + "inputs": { + "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": { + "title": "Load Diffusion Model" + } + }, + "88": { + "inputs": { + "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors", + "strength_model": 1, + "model": [ + "87", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "89": { + "inputs": { + "conditioning": [ + "95", + 0 + ] + }, + "class_type": "ConditioningZeroOut", + "_meta": { + "title": "ConditioningZeroOut" + } + }, + "90": { + "inputs": { + "lora_name": "extract-outfit_v3.safetensors", + "strength_model": 1, + "model": [ + "88", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "91": { + "inputs": { + "seed": 416948400785889, + "steps": 8, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "beta", + "denoise": 1, + "model": [ + "90", + 0 + ], + "positive": [ + "95", + 0 + ], + "negative": [ + "89", + 0 + ], + "latent_image": [ + "95", + 6 + ] + }, + "class_type": "KSampler", + "_meta": { + "title": "KSampler" + } + }, + "92": { + "inputs": { + "samples": [ + "91", + 0 + ], + "vae": [ + "94", + 0 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "93": { + "inputs": { + "filename_prefix": "qwenedit", + "images": [ + "92", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + }, + "94": { + "inputs": { + "vae_name": "qwen_image_vae.safetensors" + }, + "class_type": "VAELoader", + "_meta": { + "title": "Load VAE" + } + }, + "95": { + "inputs": { + "prompt": [ + "96", + 0 + ], + "enable_resize": false, + "enable_vl_resize": false, + "upscale_method": "lanczos", + "crop": "disabled", + "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", + "clip": [ + "86", + 0 + ], + "vae": [ + "94", + 0 + ], + "image1": [ + "8", + 0 + ], + "image2": [ + "81", + 0 + ] + }, + "class_type": "TextEncodeQwenImageEditPlus_lrzjason", + "_meta": { + "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)" + } + }, + "96": { + "inputs": { + "value": "change pose of image1 with image2, keep background same as image1" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "String (Multiline)" + } + }, + "132": { + "inputs": { + "images": [ + "8", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "133": { + "inputs": { + "filename_prefix": "qwenimtermediate", + "images": [ + "8", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + }, + "134": { + "inputs": { + "filename_prefix": "qwenpose", + "images": [ + "81", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + } +} \ No newline at end of file diff --git a/src/comfyworkflows/edit_image_vton.json b/src/comfyworkflows/edit_image_vton.json new file mode 100644 index 0000000..550e8c3 --- /dev/null +++ b/src/comfyworkflows/edit_image_vton.json @@ -0,0 +1,314 @@ +{ + "1": { + "inputs": { + "unet_name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": { + "title": "Load Diffusion Model" + } + }, + "2": { + "inputs": { + "clip_name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "type": "qwen_image", + "device": "default" + }, + "class_type": "CLIPLoader", + "_meta": { + "title": "Load CLIP" + } + }, + "3": { + "inputs": { + "vae_name": "qwen_image_vae.safetensors" + }, + "class_type": "VAELoader", + "_meta": { + "title": "Load VAE" + } + }, + "4": { + "inputs": { + "lora_name": "Qwen-Image-Lightning-8steps-V2.0.safetensors", + "strength_model": 1, + "model": [ + "1", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "5": { + "inputs": { + "conditioning": [ + "11", + 0 + ] + }, + "class_type": "ConditioningZeroOut", + "_meta": { + "title": "ConditioningZeroOut" + } + }, + "7": { + "inputs": { + "seed": 799784211855929, + "steps": 8, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "beta", + "denoise": 1, + "model": [ + "66", + 0 + ], + "positive": [ + "11", + 0 + ], + "negative": [ + "5", + 0 + ], + "latent_image": [ + "11", + 6 + ] + }, + "class_type": "KSampler", + "_meta": { + "title": "KSampler" + } + }, + "8": { + "inputs": { + "samples": [ + "7", + 0 + ], + "vae": [ + "3", + 0 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "11": { + "inputs": { + "prompt": [ + "21", + 0 + ], + "enable_resize": false, + "enable_vl_resize": false, + "upscale_method": "lanczos", + "crop": "disabled", + "instruction": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", + "clip": [ + "2", + 0 + ], + "vae": [ + "3", + 0 + ], + "image1": [ + "84", + 0 + ], + "image2": [ + "82", + 0 + ] + }, + "class_type": "TextEncodeQwenImageEditPlus_lrzjason", + "_meta": { + "title": "TextEncodeQwenImageEditPlus 小志Jason(xiaozhijason)" + } + }, + "15": { + "inputs": { + "image": "cloth_0001.png" + }, + "class_type": "LoadImage", + "_meta": { + "title": "Load cloth" + } + }, + "21": { + "inputs": { + "value": "change clothes of image1 with image2" + }, + "class_type": "PrimitiveStringMultiline", + "_meta": { + "title": "String (Multiline)" + } + }, + "64": { + "inputs": { + "image": "Lauren_body.png" + }, + "class_type": "LoadImage", + "_meta": { + "title": "Load model" + } + }, + "66": { + "inputs": { + "lora_name": "extract-outfit_v3.safetensors", + "strength_model": 1, + "model": [ + "4", + 0 + ] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": { + "title": "LoraLoaderModelOnly" + } + }, + "76": { + "inputs": { + "number": 720 + }, + "class_type": "StaticNumberInt", + "_meta": { + "title": "Static Number Int" + } + }, + "77": { + "inputs": { + "number": 1280 + }, + "class_type": "StaticNumberInt", + "_meta": { + "title": "Static Number Int" + } + }, + "78": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "batch_size": 1 + }, + "class_type": "EmptyLatentImage", + "_meta": { + "title": "Empty Latent Image" + } + }, + "82": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "upscale_method": "nearest-exact", + "keep_proportion": "crop", + "pad_color": "255,255,255", + "crop_position": "center", + "divisible_by": 2, + "device": "cpu", + "image": [ + "15", + 0 + ] + }, + "class_type": "ImageResizeKJv2", + "_meta": { + "title": "Resize Image v2" + } + }, + "83": { + "inputs": { + "images": [ + "82", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "84": { + "inputs": { + "width": [ + "76", + 0 + ], + "height": [ + "77", + 0 + ], + "upscale_method": "nearest-exact", + "keep_proportion": "pad", + "pad_color": "0, 0, 0", + "crop_position": "center", + "divisible_by": 2, + "device": "cpu", + "image": [ + "64", + 0 + ] + }, + "class_type": "ImageResizeKJv2", + "_meta": { + "title": "Resize Image v2" + } + }, + "85": { + "inputs": { + "images": [ + "84", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + }, + "93": { + "inputs": { + "filename_prefix": "qwenedit", + "images": [ + "8", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + }, + "132": { + "inputs": { + "images": [ + "8", + 0 + ] + }, + "class_type": "PreviewImage", + "_meta": { + "title": "Preview Image" + } + } +} \ No newline at end of file diff --git a/src/lib/image-converter.ts b/src/lib/image-converter.ts index 7f0e37a..39654e0 100644 --- a/src/lib/image-converter.ts +++ b/src/lib/image-converter.ts @@ -183,4 +183,131 @@ export async function extractCloth( } + +export async function convertImageWithMultipleFile( + prompt: string, + srcFiles: string[], + outputFile: string, + comfyBaseUrl: string, + comfyOutputDir: string, + size: ImageSize = { width: 720, height: 1280 } +): Promise { + const COMFY_BASE_URL = comfyBaseUrl.replace(/\/$/, ''); + const COMFY_OUTPUT_DIR = comfyOutputDir; + let workflow; + + workflow = JSON.parse(await fs.readFile('src/comfyworkflows/edit_image_multiple_qwen.json', 'utf-8')); + workflow['21']['inputs']['value'] = prompt; + workflow['76']['inputs']['number'] = size.width; + workflow['77']['inputs']['number'] = size.height; + + if (srcFiles[0]) + workflow['64']['inputs']['image'] = srcFiles[0]; + + if (srcFiles[1]) + workflow['15']['inputs']['image'] = srcFiles[1]; + + if (srcFiles[2]) + workflow['68']['inputs']['image'] = srcFiles[2]; + + const response = await axios.post(`${COMFY_BASE_URL}/prompt`, { prompt: workflow }); + const promptId = response.data.prompt_id; + + let history; + do { + await new Promise(resolve => setTimeout(resolve, 1000)); + const historyResponse = await axios.get(`${COMFY_BASE_URL}/history/${promptId}`); + history = historyResponse.data[promptId]; + } while (!history || Object.keys(history.outputs).length === 0); + + const files = await fs.readdir(COMFY_OUTPUT_DIR!); + const generatedFiles = files.filter(file => file.startsWith('qwenedit')); + + const fileStats = await Promise.all( + generatedFiles.map(async (file) => { + const stat = await fs.stat(path.join(COMFY_OUTPUT_DIR!, file)); + return { file, mtime: stat.mtime }; + }) + ); + + fileStats.sort((a, b) => b.mtime.getTime() - a.mtime.getTime()); + + const latestFile = fileStats[0].file; + const newFilePath = path.resolve('./generated', outputFile); + + await fs.mkdir('./generated', { recursive: true }); + + const sourcePath = path.join(COMFY_OUTPUT_DIR!, latestFile); + try { + await fs.unlink(newFilePath); + } catch (err) { + // ignore if not exists + } + + await fs.copyFile(sourcePath, newFilePath); + + return newFilePath; +} + + +export async function convertImageVton( + srcFiles: string[], + outputFile: string, + comfyBaseUrl: string, + comfyOutputDir: string, + size: ImageSize = { width: 720, height: 1280 } +): Promise { + const COMFY_BASE_URL = comfyBaseUrl.replace(/\/$/, ''); + const COMFY_OUTPUT_DIR = comfyOutputDir; + let workflow; + + workflow = JSON.parse(await fs.readFile('src/comfyworkflows/edit_image_vton.json', 'utf-8')); + workflow['76']['inputs']['number'] = size.width; + workflow['77']['inputs']['number'] = size.height; + + if (srcFiles[0]) + workflow['64']['inputs']['image'] = srcFiles[0]; + + if (srcFiles[1]) + workflow['15']['inputs']['image'] = srcFiles[1]; + + const response = await axios.post(`${COMFY_BASE_URL}/prompt`, { prompt: workflow }); + const promptId = response.data.prompt_id; + + let history; + do { + await new Promise(resolve => setTimeout(resolve, 1000)); + const historyResponse = await axios.get(`${COMFY_BASE_URL}/history/${promptId}`); + history = historyResponse.data[promptId]; + } while (!history || Object.keys(history.outputs).length === 0); + + const files = await fs.readdir(COMFY_OUTPUT_DIR!); + const generatedFiles = files.filter(file => file.startsWith('qwenedit')); + + const fileStats = await Promise.all( + generatedFiles.map(async (file) => { + const stat = await fs.stat(path.join(COMFY_OUTPUT_DIR!, file)); + return { file, mtime: stat.mtime }; + }) + ); + + fileStats.sort((a, b) => b.mtime.getTime() - a.mtime.getTime()); + + const latestFile = fileStats[0].file; + const newFilePath = path.resolve('./generated', outputFile); + + await fs.mkdir('./generated', { recursive: true }); + + const sourcePath = path.join(COMFY_OUTPUT_DIR!, latestFile); + try { + await fs.unlink(newFilePath); + } catch (err) { + // ignore if not exists + } + + await fs.copyFile(sourcePath, newFilePath); + + return newFilePath; +} + export { convertImage, convertImageWithFile }; diff --git a/src/tools/vton_generator.ts b/src/tools/vton_generator.ts new file mode 100644 index 0000000..e7051b6 --- /dev/null +++ b/src/tools/vton_generator.ts @@ -0,0 +1,51 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { convertImageVton } from '../lib/image-converter'; +import * as dotenv from 'dotenv'; + +dotenv.config(); + +const clothesDir = 'C:\\Users\\ken\\Desktop\\VTON\\clothes'; +const modelPath = 'C:\\Users\\ken\\Desktop\\VTON\\models\\Jessica_body.png'; +const posesDir = 'C:\\Users\\ken\\Desktop\\VTON\\poses'; +const outputDir = 'generated'; + +const comfyBaseUrl = process.env.SERVER1_COMFY_BASE_URL; +const comfyOutputDir = process.env.SERVER1_COMFY_OUTPUT_DIR; + +async function generateVtonImages() { + if (!comfyBaseUrl || !comfyOutputDir) { + throw new Error("ComfyUI URL or Output Directory is not set in environment variables."); + } + + const clothesFiles = fs.readdirSync(clothesDir).filter(file => /\.(jpg|png|jpeg)$/i.test(file)); + const poseFiles = fs.readdirSync(posesDir).filter(file => /\.(jpg|png|jpeg)$/i.test(file)); + + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir); + } + + for (let i = 0; i < clothesFiles.length; i++) { + const clothFile = clothesFiles[i]; + const clothPath = path.join(clothesDir, clothFile); + + const randomPoseFile = poseFiles[Math.floor(Math.random() * poseFiles.length)]; + const posePath = path.join(posesDir, randomPoseFile); + + console.log(`Processing cloth: ${clothFile} with pose: ${randomPoseFile}`); + + const files = [modelPath, clothPath, posePath]; + const prompt = "change clothes of image1 with image2"; + const outputFilename = `model_${i}.png`; + + const generatedImagePath = await convertImageVton(files, outputFilename, comfyBaseUrl, comfyOutputDir, { width: 720, height: 1280 }); + + if (generatedImagePath) { + console.log(`Generated image saved to ${generatedImagePath}`); + } else { + console.error(`Failed to generate image for ${clothFile}`); + } + } +} + +generateVtonImages().catch(console.error);