From 1697523000b5d42388d30041e0c3fab95ee7ee67 Mon Sep 17 00:00:00 2001 From: Ken Yasue Date: Sun, 5 Oct 2025 15:01:06 +0200 Subject: [PATCH] save changes --- package-lock.json | 102 ++++++++++++ package.json | 6 + src/comfyworkflows/edit_image_qwen.json | 30 +++- src/lib/util.ts | 99 ++++++----- src/product/clean_background.ts | 75 +++++++++ src/product/generate_prompt.ts | 208 ++++++++++++++++++++++++ 6 files changed, 471 insertions(+), 49 deletions(-) create mode 100644 src/product/clean_background.ts create mode 100644 src/product/generate_prompt.ts diff --git a/package-lock.json b/package-lock.json index 6d3b074..659addc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,13 +10,17 @@ "license": "ISC", "dependencies": { "@types/axios": "^0.14.4", + "@types/fs-extra": "^11.0.4", "@types/pngjs": "^6.0.5", "@types/sharp": "^0.32.0", "axios": "^1.11.0", "dotenv": "^17.2.1", + "fs-extra": "^11.3.2", "mysql2": "^3.14.3", "open": "^10.2.0", "png-chunk-text": "^1.0.0", + "png-chunks-encode": "^1.0.0", + "png-chunks-extract": "^1.0.0", "pngjs": "^7.0.0", "puppeteer": "^24.16.2", "sharp": "^0.34.4", @@ -25,6 +29,8 @@ "devDependencies": { "@types/node": "^20.19.19", "@types/png-chunk-text": "^1.0.3", + "@types/png-chunks-encode": "^1.0.2", + "@types/png-chunks-extract": "^1.0.2", "ts-node": "^10.9.2", "typescript": "^5.0.0" } @@ -558,6 +564,23 @@ "axios": "*" } }, + "node_modules/@types/fs-extra": { + "version": "11.0.4", + "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-11.0.4.tgz", + "integrity": "sha512-yTbItCNreRooED33qjunPthRcSjERP1r4MqCZc7wv0u2sUkzTFp45tgUfS5+r7FrZPdmCCNflLhVSP/o+SemsQ==", + "dependencies": { + "@types/jsonfile": "*", + "@types/node": "*" + } + }, + "node_modules/@types/jsonfile": { + "version": "6.1.4", + "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.4.tgz", + "integrity": "sha512-D5qGUYwjvnNNextdU59/+fI+spnwtTFmyQP0h+PfIOSkNfpU6AOICUOkm4i0OnSk+NyjdPJrxCDro0sJsWlRpQ==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/node": { "version": "20.19.19", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.19.tgz", @@ -572,6 +595,18 @@ "integrity": "sha512-7keEFz73uNJ9Ar1XMCNnHEXT9pICJnouMQCCYgBEmHMgdkXaQzSTmSvr6tUDSqgdEgmlRAxZd97wprgliyZoCg==", "dev": true }, + "node_modules/@types/png-chunks-encode": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@types/png-chunks-encode/-/png-chunks-encode-1.0.2.tgz", + "integrity": "sha512-Dxn0aXEcSg1wVeHjvNlygm/+fKBDzWMCdxJYhjGUTeefFW/jYxWcrg+W7ppLBfH44iJMqeVBHtHBwtYQUeYvgw==", + "dev": true + }, + "node_modules/@types/png-chunks-extract": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@types/png-chunks-extract/-/png-chunks-extract-1.0.2.tgz", + "integrity": "sha512-z6djfFIbrrddtunoMJBOPlyZrnmeuG1kkvHUNi2QfpOb+JMMLuLliHHTmMyRi7k7LiTAut0HbdGCF6ibDtQAHQ==", + "dev": true + }, "node_modules/@types/pngjs": { "version": "6.0.5", "resolved": "https://registry.npmjs.org/@types/pngjs/-/pngjs-6.0.5.tgz", @@ -896,6 +931,14 @@ } } }, + "node_modules/crc-32": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-0.3.0.tgz", + "integrity": "sha512-kucVIjOmMc1f0tv53BJ/5WIX+MGLcKuoBhnGqQrgKJNqLByb/sVMWfW/Aw6hw0jgcqjJ2pi9E5y32zOIpaUlsA==", + "engines": { + "node": ">=0.8" + } + }, "node_modules/create-require": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", @@ -1230,6 +1273,19 @@ "node": ">= 6" } }, + "node_modules/fs-extra": { + "version": "11.3.2", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.2.tgz", + "integrity": "sha512-Xr9F6z6up6Ws+NjzMCZc6WXg2YFRlrLP9NQDO3VQrWrfiojdhS56TzueT88ze0uBdCTwEIhQ3ptnmKeWGFAe0A==", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -1327,6 +1383,11 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==" + }, "node_modules/has-symbols": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", @@ -1505,6 +1566,17 @@ "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==" }, + "node_modules/jsonfile": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", + "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -1716,6 +1788,23 @@ "resolved": "https://registry.npmjs.org/png-chunk-text/-/png-chunk-text-1.0.0.tgz", "integrity": "sha512-DEROKU3SkkLGWNMzru3xPVgxyd48UGuMSZvioErCure6yhOc/pRH2ZV+SEn7nmaf7WNf3NdIpH+UTrRdKyq9Lw==" }, + "node_modules/png-chunks-encode": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/png-chunks-encode/-/png-chunks-encode-1.0.0.tgz", + "integrity": "sha512-J1jcHgbQRsIIgx5wxW9UmCymV3wwn4qCCJl6KYgEU/yHCh/L2Mwq/nMOkRPtmV79TLxRZj5w3tH69pvygFkDqA==", + "dependencies": { + "crc-32": "^0.3.0", + "sliced": "^1.0.1" + } + }, + "node_modules/png-chunks-extract": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/png-chunks-extract/-/png-chunks-extract-1.0.0.tgz", + "integrity": "sha512-ZiVwF5EJ0DNZyzAqld8BP1qyJBaGOFaq9zl579qfbkcmOwWLLO4I9L8i2O4j3HkI6/35i0nKG2n+dZplxiT89Q==", + "dependencies": { + "crc-32": "^0.3.0" + } + }, "node_modules/pngjs": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz", @@ -1889,6 +1978,11 @@ "@img/sharp-win32-x64": "0.34.4" } }, + "node_modules/sliced": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/sliced/-/sliced-1.0.1.tgz", + "integrity": "sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==" + }, "node_modules/smart-buffer": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", @@ -2079,6 +2173,14 @@ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==" }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/uuid": { "version": "11.1.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", diff --git a/package.json b/package.json index ab4e497..c86f484 100644 --- a/package.json +++ b/package.json @@ -19,18 +19,24 @@ "devDependencies": { "@types/node": "^20.19.19", "@types/png-chunk-text": "^1.0.3", + "@types/png-chunks-encode": "^1.0.2", + "@types/png-chunks-extract": "^1.0.2", "ts-node": "^10.9.2", "typescript": "^5.0.0" }, "dependencies": { "@types/axios": "^0.14.4", + "@types/fs-extra": "^11.0.4", "@types/pngjs": "^6.0.5", "@types/sharp": "^0.32.0", "axios": "^1.11.0", "dotenv": "^17.2.1", + "fs-extra": "^11.3.2", "mysql2": "^3.14.3", "open": "^10.2.0", "png-chunk-text": "^1.0.0", + "png-chunks-encode": "^1.0.0", + "png-chunks-extract": "^1.0.0", "pngjs": "^7.0.0", "puppeteer": "^24.16.2", "sharp": "^0.34.4", diff --git a/src/comfyworkflows/edit_image_qwen.json b/src/comfyworkflows/edit_image_qwen.json index 3259408..2f8e407 100644 --- a/src/comfyworkflows/edit_image_qwen.json +++ b/src/comfyworkflows/edit_image_qwen.json @@ -57,7 +57,7 @@ }, "7": { "inputs": { - "seed": 229610050211520, + "seed": 639545413023960, "steps": 8, "cfg": 1, "sampler_name": "euler", @@ -76,8 +76,8 @@ 0 ], "latent_image": [ - "11", - 6 + "28", + 0 ] }, "class_type": "KSampler", @@ -174,7 +174,7 @@ }, "14": { "inputs": { - "image": "70437490094806_1759383647641_0.png" + "image": "7318418139276581_1759654853736_18 - コピー.png" }, "class_type": "LoadImage", "_meta": { @@ -188,12 +188,12 @@ { "name": "A", "selected": true, - "url": "/api/view?filename=rgthree.compare._temp_camuo_00003_.png&type=temp&subfolder=&rand=0.23138406992361238" + "url": "/api/view?filename=rgthree.compare._temp_niitk_00003_.png&type=temp&subfolder=&rand=0.9166876008508786" }, { "name": "B", "selected": true, - "url": "/api/view?filename=rgthree.compare._temp_camuo_00004_.png&type=temp&subfolder=&rand=0.5709114887760696" + "url": "/api/view?filename=rgthree.compare._temp_niitk_00004_.png&type=temp&subfolder=&rand=0.06689875639286158" } ] }, @@ -226,7 +226,7 @@ }, "21": { "inputs": { - "value": "把图1中的衣服和配饰提取出来,并将背景改为浅灰色。" + "value": "请从图1中提取主要主体,把背景设置为浅灰色,并让主体正面朝向,制作成产品照片。" }, "class_type": "PrimitiveStringMultiline", "_meta": { @@ -313,5 +313,21 @@ "_meta": { "title": "Resize Image v2" } + }, + "28": { + "inputs": { + "pixels": [ + "27", + 0 + ], + "vae": [ + "3", + 0 + ] + }, + "class_type": "VAEEncode", + "_meta": { + "title": "VAE Encode" + } } } \ No newline at end of file diff --git a/src/lib/util.ts b/src/lib/util.ts index fa5e826..a5db247 100644 --- a/src/lib/util.ts +++ b/src/lib/util.ts @@ -1,52 +1,67 @@ -import * as fs from 'fs'; -import { PNG } from 'pngjs'; -import { encode, decode } from 'png-chunk-text'; +// png-json-metadata.ts +import * as fs from "fs"; +import extract from "png-chunks-extract"; +import encodeChunks from "png-chunks-encode"; +import * as textChunk from "png-chunk-text"; -export async function embedJsonToPng(path: string, obj: any): Promise { - return new Promise((resolve, reject) => { - const jsonString = JSON.stringify(obj); - const chunk = { name: 'tEXt', data: `json:${jsonString}` }; +type PngChunk = { name: string; data: Uint8Array }; - fs.createReadStream(path) - .pipe(new PNG()) - .on('parsed', function (this: PNG & { chunks?: any[] }) { - if (!this.chunks) { - return reject(new Error('PNG chunks not found.')); - } - const newChunks = this.chunks.slice(); - newChunks.splice(-1, 0, chunk); - this.chunks = newChunks; +/** + * PNG へ JSON を Base64 で埋め込む(tEXt / keyword: "json-b64") + * - JSON は UTF-8 → Base64 にして ASCII 化(tEXt の Latin-1 制限を回避) + * - 既存の "json-b64" tEXt があれば置き換え(重複回避) + */ +export async function embedJsonToPng(path: string, obj: unknown): Promise { + const input = fs.readFileSync(path); + const chunks = extract(input) as PngChunk[]; - this.pack() - .pipe(fs.createWriteStream(path)) - .on('finish', () => resolve()) - .on('error', (err: Error) => reject(err)); - }) - .on('error', (err: Error) => reject(err)); + // 既存の "json-b64" tEXt を除外 + const filtered: PngChunk[] = chunks.filter((c) => { + if (c.name !== "tEXt") return true; + try { + const decoded = textChunk.decode(c.data); // { keyword, text } + return decoded.keyword !== "json-b64"; + } catch { + // decode 失敗(別の形式など)は残す + return true; + } }); + + const json = JSON.stringify(obj); + const b64 = Buffer.from(json, "utf8").toString("base64"); // ASCII のみ + + // encode() は { name:'tEXt', data: Uint8Array } を返す + const newChunk = textChunk.encode("json-b64", b64) as PngChunk; + + // IEND の直前に挿入(PNG の正しい順序を維持) + const iendIndex = filtered.findIndex((c) => c.name === "IEND"); + if (iendIndex < 0) { + throw new Error("Invalid PNG: missing IEND chunk."); + } + filtered.splice(iendIndex, 0, newChunk); + + const out = Buffer.from(encodeChunks(filtered)); + fs.writeFileSync(path, out); } +/** + * PNG から Base64 JSON(tEXt / keyword: "json-b64")を読み出す + */ export async function readJsonToPng(path: string): Promise { - return new Promise((resolve, reject) => { - fs.readFile(path, (err, data) => { - if (err) { - return reject(err); - } + const input = fs.readFileSync(path); + const chunks = extract(input) as PngChunk[]; - const chunks = decode(data); - const textChunk = chunks.find((chunk: { name: string; data: string }) => chunk.name === 'tEXt' && chunk.data.startsWith('json:')); - - if (textChunk) { - const jsonString = textChunk.data.slice(5); - try { - const jsonObj = JSON.parse(jsonString); - resolve(jsonObj); - } catch (e) { - reject(new Error('Failed to parse JSON from PNG.')); - } - } else { - reject(new Error('No JSON data found in PNG.')); + for (const c of chunks) { + if (c.name !== "tEXt") continue; + try { + const { keyword, text } = textChunk.decode(c.data); + if (keyword === "json-b64") { + const json = Buffer.from(text, "base64").toString("utf8"); + return JSON.parse(json); } - }); - }); + } catch { + // 他の tEXt / 壊れたエントリは無視 + } + } + throw new Error("No base64 JSON found in PNG (tEXt keyword 'json-b64')."); } diff --git a/src/product/clean_background.ts b/src/product/clean_background.ts new file mode 100644 index 0000000..f4ed254 --- /dev/null +++ b/src/product/clean_background.ts @@ -0,0 +1,75 @@ +import { convertImage } from '../lib/image-converter'; +import * as fs from 'fs-extra'; +import * as path from 'path'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const inputDir = path.join(__dirname, '../../input'); +const outputDir = path.join(__dirname, '../../generated/clearned'); + +const comfyUrl = process.env.SERVER1_COMFY_BASE_URL; +const comfyOutputDir = process.env.SERVER1_COMFY_OUTPUT_DIR; + +if (!comfyUrl || !comfyOutputDir) { + console.error("ComfyUI URL or Output Directory is not set in environment variables."); + process.exit(1); +} + +const comfyInputDir = comfyOutputDir.replace("output", "input"); + +async function processImages() { + await fs.ensureDir(outputDir); + + const files = await fs.readdir(inputDir); + let index = 1; + + for (const file of files) { + const sourceFilePath = path.join(inputDir, file); + const stats = await fs.stat(sourceFilePath); + + if (stats.isFile()) { + console.log(`Processing ${file}...`); + + const comfyInputPath = path.join(comfyInputDir, file); + + try { + // 1. Copy file to ComfyUI input directory + await fs.copy(sourceFilePath, comfyInputPath); + console.log(`Copied ${file} to ComfyUI input.`); + + const prompt = "请从图1中提取主要主体,把背景设置为浅灰色,并让主体正面朝向,制作成产品照片。"; + + // 2. Call convertImage with correct parameters + const generatedFilePath = await convertImage(prompt, file, comfyUrl!, comfyOutputDir!); + + if (generatedFilePath && await fs.pathExists(generatedFilePath)) { + const outputFilename = `clearned_${index}.png`; + const finalOutputPath = path.join(outputDir, outputFilename); + + // 3. Move the generated file to the final destination + await fs.move(generatedFilePath, finalOutputPath, { overwrite: true }); + console.log(`Saved cleaned image to ${finalOutputPath}`); + index++; + + // 4. Delete the original file from the script's input directory + await fs.unlink(sourceFilePath); + console.log(`Deleted original file: ${file}`); + } + + // 5. Clean up the file from ComfyUI input directory + await fs.unlink(comfyInputPath); + console.log(`Cleaned up ${file} from ComfyUI input.`); + + } catch (error) { + console.error(`Failed to process ${file}:`, error); + // If something fails, make sure to clean up the copied file if it exists + if (await fs.pathExists(comfyInputPath)) { + await fs.unlink(comfyInputPath); + } + } + } + } +} + +processImages().catch(console.error); diff --git a/src/product/generate_prompt.ts b/src/product/generate_prompt.ts new file mode 100644 index 0000000..b258df2 --- /dev/null +++ b/src/product/generate_prompt.ts @@ -0,0 +1,208 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio'; +import { embedJsonToPng, readJsonToPng } from '../lib/util'; + +const INPUT_DIR = path.join(process.cwd(), 'input'); +const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts'); + +if (!fs.existsSync(OUTPUT_DIR)) { + fs.mkdirSync(OUTPUT_DIR, { recursive: true }); +} + +async function generatePromptsForImage(imagePath: string) { + const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath)); + + // Check if the output file already exists and has valid metadata + if (fs.existsSync(outputFilePath)) { + try { + const existingMetadata = await readJsonToPng(outputFilePath); + if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) { + console.log(`Skipping already processed image: ${path.basename(imagePath)}`); + return; + } + } catch (error) { + // File exists but is invalid or has no metadata, so we'll overwrite it. + console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`); + } + } + + console.log(`Processing image: ${imagePath}`); + + // Step 1: Get main subject and sub-objects + const firstPrompt = ` +You are a creative director for unique product video generation. + +Read the given photo carefully. + +Identify and write the main subject (the most important object in the photo). +Propose 20 possible sub-objects that could appear around the main subject in a video scene. +Sub-objects are only suggestions. +They should be stylish, cool, or complementary items that enhance the main subject. +Keep each sub-object as a short noun phrase (no long explanations). +Do not repeat similar items. +Output strictly in this JSON format: + +{result:{ +"main-subject": "the identified main object", +"sub-object": [ +"first proposal", +"second proposal", +... +"twentieth proposal" +] +}} +`; + + try { + const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt); + const firstApiResult = firstApiResponse.result; + const mainSubject = firstApiResult['main-subject']; + const subObjects = firstApiResult['sub-object']; + + if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) { + console.error('Invalid response from the first API call for image:', imagePath); + return; + } + + // Step 2: Pick 3 random sub-objects + const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3); + + // Step 3: Generate background proposals + const secondPrompt = ` +You are a senior creative director for product photography and video. +Follow the instructions carefully. + +Task: +1. Extract the main subject from Figure 1. +2. Use the three selected sub-objects provided. +3. Generate exactly five background prompt suggestions. + +SUB1: ${selectedSubObjects[0]} +SUB2: ${selectedSubObjects[1]} +SUB3: ${selectedSubObjects[2]} + +Requirements for background prompts: +- All five suggestions must be written in English. +- Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image." +- After that phrase, always instruct to place the three sub-objects in the scene. + Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene." +- Each suggestion must also describe: + - Background color (must always include pink) + - Lighting (direction, mood, intensity) + - Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.) +- Try to describe detail for each sugegstion. > 50 words. +- Suggestions must be visually distinct. +- Each suggestion must use a completely different background color palette while still incorporating pink. +- Do not mention brand names or logos. + +Special condition: +- In the new image, always place a pink silk scarf. +- The background color must always be pink. + +Output strictly in JSON format: + +{result:{ + "main-subject": "${mainSubject}", + "selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"], + "background-proposals": [ + "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", + "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", + "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", + "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", + "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }" + ] + }} +`; + + const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt); + const secondApiResult = secondApiResponse.result; + const backgroundProposals = secondApiResult['background-proposals']; + + if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) { + console.error('Invalid response from the second API call for image:', imagePath); + return; + } + + // Step 4: Translate proposals to Chinese + const translatedProposals: string[] = []; + for (const proposal of backgroundProposals) { + const translationPrompt = `Translate the following English text to Chinese. Return only the translated text. + +Text: "${proposal}" + +Return the result in this format: +{"result":""} +`; + const translationResponse = await callLmstudio(translationPrompt); + const translatedResult = translationResponse.result; + translatedProposals.push(translationResponse.result); + } + + // Step 5: Generate video prompt + const videoPromptRequest = ` +You are a creative director for a short, stylish video ad. +Based on the provided image and the following scene description, generate an attractive video prompt. + +Main Subject: ${mainSubject} +Sub-Objects: ${selectedSubObjects.join(', ')} +Scene Description: ${backgroundProposals[0]} + +The video prompt should: +- Be in English. +- Be approximately 50 words. +- Describe one clear action involving the main subject and sub-objects. +- Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out). +- Be dynamic and visually appealing. + +Output strictly in this JSON format: +{ + "result": "your generated video prompt here" +} +`; + const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest); + const videoPrompt = videoPromptResponse.result; + + if (!videoPrompt) { + console.error('Failed to generate video prompt for image:', imagePath); + return; + } + + // Step 6: Embed all prompts into PNG metadata + const metadata = { + imagePrompts: translatedProposals, + videoPrompt: videoPrompt + }; + + fs.copyFileSync(imagePath, outputFilePath); + await embedJsonToPng(outputFilePath, metadata); + + console.log(`Successfully generated prompts and saved to ${outputFilePath}`); + + } catch (error) { + console.error(`Failed to process image ${imagePath}:`, error); + } +} + +async function main() { + try { + const files = fs.readdirSync(INPUT_DIR); + const imageFiles = files.filter(file => /\.(png|jpg|jpeg)$/i.test(file)); + + if (imageFiles.length === 0) { + console.log('No images found in the input directory.'); + return; + } + + for (const imageFile of imageFiles) { + const imagePath = path.join(INPUT_DIR, imageFile); + await generatePromptsForImage(imagePath); + } + + console.log('All images processed.'); + } catch (error) { + console.error('An error occurred in the main process:', error); + } +} + +main();