save changes

2025-10-06 00:02:50 +02:00
parent 1697523000
commit 4452508dd4
4 changed files with 334 additions and 136 deletions
--- a/src/lib/pinterest.ts
+++ b/src/lib/pinterest.ts
@ -36,6 +36,81 @@ export async function getPinUrlFromPinterest(keyword: string): Promise<string |
    }
 }
 export async function downloadImagesFromPinterestSearch(keyword: string, count: number): Promise<string[]> {
    const browser = await puppeteer.launch({ headless: false });
    const page = await browser.newPage();
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36');
    await page.setViewport({ width: 1920, height: 1080 });
    try {
        const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`;
        await page.goto(searchUrl, { waitUntil: 'networkidle2' });
        logger.info(`Scrolling 3 times...`);
        for (let i = 0; i < 3; i++) {
            await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
            await new Promise(resolve => setTimeout(resolve, Math.random() * 1000 + 1000));
        }
        const imageUrls = await page.$$eval('img', (imgs) => {
            const urls: string[] = imgs.map(img => {
                const srcset = img.getAttribute('srcset') || '';
                if (!srcset) return '';
                const parts = srcset.split(',').map(p => p.trim());
                for (const part of parts) {
                    const m = part.match(/^(\S+)\s+4x$/);
                    if (m && m[1]) return m[1];
                }
                const src = img.src || '';
                if (src.includes('/originals/')) return src;
                return '';
            }).filter(s => !!s && s.includes('pinimg'));
            // Remove duplicates
            return [...new Set(urls)];
        });
        if (imageUrls.length === 0) {
            logger.warn(`No 4x image URLs found for keyword "${keyword}"`);
            return [];
        }
        // shuffle and pick up to `count` unique images
        const shuffled = imageUrls.slice().sort(() => 0.5 - Math.random());
        const chosen = shuffled.slice(0, Math.min(count, shuffled.length));
        const outDir = path.join(process.cwd(), 'download');
        await fs.mkdir(outDir, { recursive: true });
        const results: string[] = [];
        for (let i = 0; i < chosen.length; i++) {
            const src = chosen[i];
            try {
                const imgPage = await browser.newPage();
                const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'networkidle2' });
                if (!resp) {
                    logger.warn(`Failed to fetch image ${src}`);
                    await imgPage.close();
                    continue;
                }
                const buffer = await resp.buffer();
                const timestamp = Date.now();
                const outPath = path.join(outDir, `${keyword.replace(/\s+/g, '_')}_${timestamp}_${i}.png`);
                await fs.writeFile(outPath, buffer);
                results.push(outPath);
                await imgPage.close();
            } catch (err) {
                logger.error(`Failed to download image ${src}:`, err);
            }
        }
        return results;
    } catch (error) {
        logger.error(`Error while downloading images for keyword "${keyword}":`, error);
        return [];
    } finally {
        await browser.close();
    }
 }
 // Download up to `count` images from a pin URL by opening the pin page and scro lling up to 5 times to trigger lazy loading
 // Returns an array of saved image paths (may be empty)
--- a/src/product/generate_image.ts
+++ b/src/product/generate_image.ts
@ -0,0 +1,88 @@
 import * as fs from 'fs/promises';
 import * as path from 'path';
 import dotenv from 'dotenv';
 import { readJsonToPng, embedJsonToPng } from '../lib/util';
 import { convertImage } from '../lib/image-converter';
 dotenv.config();
 const inputDir = './generated/prompts';
 const outputDir = './generated/image';
 const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!;
 const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!;
 interface PngMetadata {
    prompts: {
        imagePrompt: string;
        videoPrompt: string;
    }[];
 }
 async function main() {
    await fs.mkdir(outputDir, { recursive: true });
    const files = await fs.readdir(inputDir);
    let generatedImageIndex = 0;
    for (const file of files) {
        if (path.extname(file).toLowerCase() !== '.png') {
            continue;
        }
        const inputFile = path.join(inputDir, file);
        const metadata = await readJsonToPng(inputFile) as PngMetadata;
        if (metadata && metadata.prompts && Array.isArray(metadata.prompts)) {
            console.log(`Processing ${file} with ${metadata.prompts.length} prompt pairs.`);
            const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input");
            await fs.copyFile(inputFile, path.join(inputfolderFullpath, file));
            for (const promptPair of metadata.prompts) {
                const { imagePrompt, videoPrompt } = promptPair;
                const newFileName = `cleaned_prompt_generated_${generatedImageIndex}.png`;
                generatedImageIndex++;
                const outputPath = path.join(outputDir, newFileName);
                try {
                    await fs.access(outputPath);
                    console.log(`File ${newFileName} already exists, skipping.`);
                    continue;
                } catch (error) {
                    // File does not exist, proceed with generation
                }
                console.log(`Generating image for prompt: "${imagePrompt}"`);
                try {
                    const generatedFilePath = await convertImage(
                        imagePrompt,
                        file, // Using the same image for both inputs as per interpretation
                        COMFY_BASE_URL,
                        COMFY_OUTPUT_DIR
                    );
                    // The convertImage function saves the file in a generic location.
                    // We need to move it to the correct location with the correct name.
                    await fs.rename(generatedFilePath, outputPath);
                    const newMetadata = {
                        imagePrompt: imagePrompt,
                        videoPrompt: videoPrompt
                    };
                    await embedJsonToPng(outputPath, newMetadata);
                    console.log(`Successfully generated and saved ${newFileName} with metadata.`);
                } catch (error) {
                    console.error(`Error generating image for prompt "${imagePrompt}":`, error);
                }
            }
        } else {
            console.log(`Skipping ${file}, no valid prompts metadata found.`);
        }
    }
 }
 main().catch(console.error);
--- a/src/product/generate_prompt.ts
+++ b/src/product/generate_prompt.ts
@ -1,7 +1,10 @@
 import * as fs from 'fs';
 import * as path from 'path';
 import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio';
-import { embedJsonToPng, readJsonToPng } from '../lib/util';
+import { embedJsonToPng } from '../lib/util';
 import { downloadImagesFromPinterestSearch } from '../lib/pinterest';
 import { logger } from '../lib/logger';
 import sharp from 'sharp';
 const INPUT_DIR = path.join(process.cwd(), 'input');
 const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts');
@ -10,148 +13,91 @@ if (!fs.existsSync(OUTPUT_DIR)) {
    fs.mkdirSync(OUTPUT_DIR, { recursive: true });
 }
-async function generatePromptsForImage(imagePath: string) {
+async function generatePromptsForImage(imagePath: string, index: number) {
-    const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath));
+    const outputFilePath = path.join(OUTPUT_DIR, `cleaned_prompt_${index}.png`);
    logger.info(`Processing image: ${path.basename(imagePath)} -> ${path.basename(outputFilePath)}`);
    // Check if the output file already exists and has valid metadata
    if (fs.existsSync(outputFilePath)) {
    try {
-            const existingMetadata = await readJsonToPng(outputFilePath);
+        // Step 1: Detect main object and generate colors from the input image
-            if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) {
+        const colorGenerationPrompt = `
-                console.log(`Skipping already processed image: ${path.basename(imagePath)}`);
+You are a creative assistant. Analyze the provided image.
-                return;
+Identify the main subject product ( not a product name).
-            }
+Then, list exactly five colors related to this subject:
-        } catch (error) {
+- Two colors that are common for this object.
-            // File exists but is invalid or has no metadata, so we'll overwrite it.
+- Two colors that are uncommon but plausible.
-            console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`);
+- One color that is completely crazy or surreal for this object.
        }
    }
    console.log(`Processing image: ${imagePath}`);
    // Step 1: Get main subject and sub-objects
    const firstPrompt = `
 You are a creative director for unique product video generation.
 Read the given photo carefully.
 Identify and write the main subject (the most important object in the photo).
 Propose 20 possible sub-objects that could appear around the main subject in a video scene.
 Sub-objects are only suggestions.
 They should be stylish, cool, or complementary items that enhance the main subject.
 Keep each sub-object as a short noun phrase (no long explanations).
 Do not repeat similar items.
 Output strictly in this JSON format:
-
+{
-{result:{
+  "result": {
-"main-subject": "the identified main object",
+    "main_object": "the identified noun",
-"sub-object": [
+    "colors": [
-"first proposal",
+      "color1",
-"second proposal",
+      "color2",
-...
+      "color3",
-"twentieth proposal"
+      "color4",
-]
+      "color5"
 }}
 `;
    try {
        const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt);
        const firstApiResult = firstApiResponse.result;
        const mainSubject = firstApiResult['main-subject'];
        const subObjects = firstApiResult['sub-object'];
        if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) {
            console.error('Invalid response from the first API call for image:', imagePath);
            return;
        }
        // Step 2: Pick 3 random sub-objects
        const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3);
        // Step 3: Generate background proposals
        const secondPrompt = `
 You are a senior creative director for product photography and video.
 Follow the instructions carefully.
 Task:
 1. Extract the main subject from Figure 1.
 2. Use the three selected sub-objects provided.
 3. Generate exactly five background prompt suggestions.
 SUB1: ${selectedSubObjects[0]}
 SUB2: ${selectedSubObjects[1]}
 SUB3: ${selectedSubObjects[2]}
 Requirements for background prompts:
 - All five suggestions must be written in English.
 - Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image."
 - After that phrase, always instruct to place the three sub-objects in the scene.
  Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene."
 - Each suggestion must also describe:
  - Background color (must always include pink)
  - Lighting (direction, mood, intensity)
  - Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.)
 - Try to describe detail for each sugegstion. > 50 words.
 - Suggestions must be visually distinct.
 - Each suggestion must use a completely different background color palette while still incorporating pink.
 - Do not mention brand names or logos.
 Special condition:
 - In the new image, always place a pink silk scarf.
 - The background color must always be pink.
 Output strictly in JSON format:
 {result:{
  "main-subject": "${mainSubject}",
  "selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"],
  "background-proposals": [
    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
    "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }"
    ]
-    }}
+  }
 }
 `;
        const colorResponse = await callLMStudioAPIWithFile(imagePath, colorGenerationPrompt);
        const { main_object, colors } = colorResponse.result;
-        const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt);
+        if (!main_object || !Array.isArray(colors) || colors.length !== 5) {
-        const secondApiResult = secondApiResponse.result;
+            logger.error(`Failed to get a valid main object and color list for ${imagePath}.`);
        const backgroundProposals = secondApiResult['background-proposals'];
        if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) {
            console.error('Invalid response from the second API call for image:', imagePath);
            return;
        }
-        // Step 4: Translate proposals to Chinese
+        logger.info(`Main object: "${main_object}", Colors: ${colors.join(', ')}`);
        const translatedProposals: string[] = [];
        for (const proposal of backgroundProposals) {
            const translationPrompt = `Translate the following English text to Chinese. Return only the translated text.
-Text: "${proposal}"
+        const prompts: { imagePrompt: string, videoPrompt: string }[] = [];
        const themes = ["special", "unique", "beautiful", "crazy", "funny"];
-Return the result in this format:
+        // Step 2: Iterate through each color
-{"result":""}
+        for (const color of colors) {
-`;
+            const randomTheme = themes[Math.floor(Math.random() * themes.length)];
-            const translationResponse = await callLmstudio(translationPrompt);
+            const pinterestQuery = `${main_object} product photo ${color} background ${randomTheme}`;
-            const translatedResult = translationResponse.result;
+            logger.info(`Searching Pinterest for: "${pinterestQuery}"`);
-            translatedProposals.push(translationResponse.result);
+
            // Step 3: Get an image from Pinterest
            const downloadedImages = await downloadImagesFromPinterestSearch(pinterestQuery, 1);
            if (downloadedImages.length === 0) {
                logger.warn(`Could not find an image on Pinterest for query: "${pinterestQuery}"`);
                continue;
            }
            const pinterestImagePath = downloadedImages[0];
            logger.info(`Downloaded Pinterest image: ${pinterestImagePath}`);
-        // Step 5: Generate video prompt
+            // Step 4: Generate a detailed prompt from the Pinterest image
            const imagePromptRequest = `
 You are an expert in generating descriptive prompts for image generation models.
 Analyze the provided image and describe it in a single, detailed paragraph.
 Focus on style, mood, lighting, color palette, sub-objects, and composition.
 Do not mention the main object itself. The prompt should be about the scene.
 Output strictly in this JSON format:
 {
  "result": "your generated prompt here"
 }
 `;
            const imagePromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, imagePromptRequest);
            const imagePrompt = imagePromptResponse.result;
            if (imagePrompt) {
                logger.info(`Generated image prompt for color ${color}: "${imagePrompt}"`);
                // Step 5: Generate a matching video prompt
                const videoPromptRequest = `
 You are a creative director for a short, stylish video ad.
 Based on the provided image and the following scene description, generate an attractive video prompt.
-Main Subject: ${mainSubject}
+Main Subject: ${main_object}
-Sub-Objects: ${selectedSubObjects.join(', ')}
+Scene Description: ${imagePrompt}
 Scene Description: ${backgroundProposals[0]}
 The video prompt should:
- Be in English.
+- Be in English and approximately 50 words.
- Be approximately 50 words.
+- Describe one clear action involving the main subject.
 - Describe one clear action involving the main subject and sub-objects.
 - Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out).
 - Be dynamic and visually appealing.
@ -160,27 +106,41 @@ Output strictly in this JSON format:
  "result": "your generated video prompt here"
 }
 `;
-        const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest);
+                const videoPromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, videoPromptRequest);
                const videoPrompt = videoPromptResponse.result;
-        if (!videoPrompt) {
+                if (videoPrompt) {
-            console.error('Failed to generate video prompt for image:', imagePath);
+                    logger.info(`Generated video prompt for color ${color}: "${videoPrompt}"`);
                    prompts.push({ imagePrompt, videoPrompt });
                } else {
                    logger.warn(`Failed to generate a video prompt for ${pinterestImagePath}`);
                }
            } else {
                logger.warn(`Failed to generate an image prompt for ${pinterestImagePath}`);
            }
        }
        if (prompts.length === 0) {
            logger.error(`No prompt pairs were generated for ${imagePath}. Aborting.`);
            return;
        }
-        // Step 6: Embed all prompts into PNG metadata
+        // Step 6: Embed all prompts into the original image and save to the new location
        const metadata = {
-            imagePrompts: translatedProposals,
+            prompts: prompts
            videoPrompt: videoPrompt
        };
-        fs.copyFileSync(imagePath, outputFilePath);
+        // Convert original image to a valid PNG at the output path before embedding
        await sharp(imagePath)
            .toFormat('png')
            .toFile(outputFilePath);
        await embedJsonToPng(outputFilePath, metadata);
-        console.log(`Successfully generated prompts and saved to ${outputFilePath}`);
+        logger.info(`Successfully generated prompts and saved metadata to ${outputFilePath}`);
    } catch (error) {
-        console.error(`Failed to process image ${imagePath}:`, error);
+        logger.error(`An error occurred while processing ${imagePath}:`, error);
    }
 }
@ -194,9 +154,10 @@ async function main() {
            return;
        }
-        for (const imageFile of imageFiles) {
+        for (let i = 0; i < imageFiles.length; i++) {
            const imageFile = imageFiles[i];
            const imagePath = path.join(INPUT_DIR, imageFile);
-            await generatePromptsForImage(imagePath);
+            await generatePromptsForImage(imagePath, i);
        }
        console.log('All images processed.');
--- a/src/product/generate_video.ts
+++ b/src/product/generate_video.ts
@ -0,0 +1,74 @@
 import * as fs from 'fs/promises';
 import * as path from 'path';
 import dotenv from 'dotenv';
 import { readJsonToPng } from '../lib/util';
 import { generateVideo } from '../lib/video-generator';
 dotenv.config();
 const inputDir = './input';
 const outputDir = './generated/video';
 const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!;
 const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!;
 interface PngMetadata {
    imagePrompt: string;
    videoPrompt: string;
 }
 async function main() {
    await fs.mkdir(outputDir, { recursive: true });
    const files = await fs.readdir(inputDir);
    const pngFiles = files.filter(file => path.extname(file).toLowerCase() === '.png');
    for (let i = 0; i < pngFiles.length; i++) {
        const file = pngFiles[i];
        const inputFile = path.join(inputDir, file);
        const metadata = await readJsonToPng(inputFile) as PngMetadata;
        if (metadata && metadata.videoPrompt) {
            console.log(`Processing ${file} for video generation.`);
            const originalFileName = path.parse(file).name;
            const nameParts = originalFileName.split('_');
            const promptIndex = nameParts[nameParts.length - 1];
            const newFileName = `product_${i}_${promptIndex}.mp4`;
            const outputPath = path.join(outputDir, newFileName);
            try {
                await fs.access(outputPath);
                console.log(`File ${newFileName} already exists, skipping.`);
                continue;
            } catch (error) {
                // File does not exist, proceed with generation
            }
            console.log(`Generating video for prompt: "${metadata.videoPrompt}"`);
            const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input");
            await fs.copyFile(inputFile, path.join(inputfolderFullpath, file));
            try {
                await generateVideo(
                    metadata.videoPrompt,
                    file,
                    newFileName,
                    COMFY_BASE_URL,
                    COMFY_OUTPUT_DIR
                );
                console.log(`Successfully generated and saved ${newFileName}`);
            } catch (error) {
                console.error(`Error generating video for ${file}:`, error);
            }
        } else {
            console.log(`Skipping ${file}, no valid videoPrompt metadata found.`);
        }
    }
 }
 main().catch(console.error);