save changes

2025-10-05 15:01:06 +02:00
parent eee4e6523e
commit 1697523000
6 changed files with 471 additions and 49 deletions
--- a/src/product/generate_prompt.ts
+++ b/src/product/generate_prompt.ts
@ -0,0 +1,208 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio';
+import { embedJsonToPng, readJsonToPng } from '../lib/util';
+
+const INPUT_DIR = path.join(process.cwd(), 'input');
+const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts');
+
+if (!fs.existsSync(OUTPUT_DIR)) {
+    fs.mkdirSync(OUTPUT_DIR, { recursive: true });
+}
+
+async function generatePromptsForImage(imagePath: string) {
+    const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath));
+
+    // Check if the output file already exists and has valid metadata
+    if (fs.existsSync(outputFilePath)) {
+        try {
+            const existingMetadata = await readJsonToPng(outputFilePath);
+            if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) {
+                console.log(`Skipping already processed image: ${path.basename(imagePath)}`);
+                return;
+            }
+        } catch (error) {
+            // File exists but is invalid or has no metadata, so we'll overwrite it.
+            console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`);
+        }
+    }
+
+    console.log(`Processing image: ${imagePath}`);
+
+    // Step 1: Get main subject and sub-objects
+    const firstPrompt = `
+You are a creative director for unique product video generation.
+
+Read the given photo carefully.
+
+Identify and write the main subject (the most important object in the photo).
+Propose 20 possible sub-objects that could appear around the main subject in a video scene.
+Sub-objects are only suggestions.
+They should be stylish, cool, or complementary items that enhance the main subject.
+Keep each sub-object as a short noun phrase (no long explanations).
+Do not repeat similar items.
+Output strictly in this JSON format:
+
+{result:{
+"main-subject": "the identified main object",
+"sub-object": [
+"first proposal",
+"second proposal",
+...
+"twentieth proposal"
+]
+}}
+`;
+
+    try {
+        const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt);
+        const firstApiResult = firstApiResponse.result;
+        const mainSubject = firstApiResult['main-subject'];
+        const subObjects = firstApiResult['sub-object'];
+
+        if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) {
+            console.error('Invalid response from the first API call for image:', imagePath);
+            return;
+        }
+
+        // Step 2: Pick 3 random sub-objects
+        const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3);
+
+        // Step 3: Generate background proposals
+        const secondPrompt = `
+You are a senior creative director for product photography and video.
+Follow the instructions carefully.
+
+Task:
+1. Extract the main subject from Figure 1.
+2. Use the three selected sub-objects provided.
+3. Generate exactly five background prompt suggestions.
+
+SUB1: ${selectedSubObjects[0]}
+SUB2: ${selectedSubObjects[1]}
+SUB3: ${selectedSubObjects[2]}
+
+Requirements for background prompts:
+- All five suggestions must be written in English.
+- Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image."
+- After that phrase, always instruct to place the three sub-objects in the scene.
+  Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene."
+- Each suggestion must also describe:
+  - Background color (must always include pink)
+  - Lighting (direction, mood, intensity)
+  - Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.)
+- Try to describe detail for each sugegstion. > 50 words.
+- Suggestions must be visually distinct.
+- Each suggestion must use a completely different background color palette while still incorporating pink.
+- Do not mention brand names or logos.
+
+Special condition:
+- In the new image, always place a pink silk scarf.
+- The background color must always be pink.
+
+Output strictly in JSON format:
+
+{result:{
+  "main-subject": "${mainSubject}",
+  "selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"],
+  "background-proposals": [
+    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
+    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
+    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
+    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
+    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }"
+  ]
+    }}
+`;
+
+        const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt);
+        const secondApiResult = secondApiResponse.result;
+        const backgroundProposals = secondApiResult['background-proposals'];
+
+        if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) {
+            console.error('Invalid response from the second API call for image:', imagePath);
+            return;
+        }
+
+        // Step 4: Translate proposals to Chinese
+        const translatedProposals: string[] = [];
+        for (const proposal of backgroundProposals) {
+            const translationPrompt = `Translate the following English text to Chinese. Return only the translated text.
+            
+Text: "${proposal}"
+
+Return the result in this format:
+{"result":""}
+`;
+            const translationResponse = await callLmstudio(translationPrompt);
+            const translatedResult = translationResponse.result;
+            translatedProposals.push(translationResponse.result);
+        }
+
+        // Step 5: Generate video prompt
+        const videoPromptRequest = `
+You are a creative director for a short, stylish video ad.
+Based on the provided image and the following scene description, generate an attractive video prompt.
+
+Main Subject: ${mainSubject}
+Sub-Objects: ${selectedSubObjects.join(', ')}
+Scene Description: ${backgroundProposals[0]}
+
+The video prompt should:
+- Be in English.
+- Be approximately 50 words.
+- Describe one clear action involving the main subject and sub-objects.
+- Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out).
+- Be dynamic and visually appealing.
+
+Output strictly in this JSON format:
+{
+  "result": "your generated video prompt here"
+}
+`;
+        const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest);
+        const videoPrompt = videoPromptResponse.result;
+
+        if (!videoPrompt) {
+            console.error('Failed to generate video prompt for image:', imagePath);
+            return;
+        }
+
+        // Step 6: Embed all prompts into PNG metadata
+        const metadata = {
+            imagePrompts: translatedProposals,
+            videoPrompt: videoPrompt
+        };
+
+        fs.copyFileSync(imagePath, outputFilePath);
+        await embedJsonToPng(outputFilePath, metadata);
+
+        console.log(`Successfully generated prompts and saved to ${outputFilePath}`);
+
+    } catch (error) {
+        console.error(`Failed to process image ${imagePath}:`, error);
+    }
+}
+
+async function main() {
+    try {
+        const files = fs.readdirSync(INPUT_DIR);
+        const imageFiles = files.filter(file => /\.(png|jpg|jpeg)$/i.test(file));
+
+        if (imageFiles.length === 0) {
+            console.log('No images found in the input directory.');
+            return;
+        }
+
+        for (const imageFile of imageFiles) {
+            const imagePath = path.join(INPUT_DIR, imageFile);
+            await generatePromptsForImage(imagePath);
+        }
+
+        console.log('All images processed.');
+    } catch (error) {
+        console.error('An error occurred in the main process:', error);
+    }
+}
+
+main();