diff --git a/src/lib/pinterest.ts b/src/lib/pinterest.ts index 5c0b016..1b3821a 100644 --- a/src/lib/pinterest.ts +++ b/src/lib/pinterest.ts @@ -36,6 +36,81 @@ export async function getPinUrlFromPinterest(keyword: string): Promise { + const browser = await puppeteer.launch({ headless: false }); + const page = await browser.newPage(); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); + await page.setViewport({ width: 1920, height: 1080 }); + try { + const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`; + await page.goto(searchUrl, { waitUntil: 'networkidle2' }); + + logger.info(`Scrolling 3 times...`); + for (let i = 0; i < 3; i++) { + await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); + await new Promise(resolve => setTimeout(resolve, Math.random() * 1000 + 1000)); + } + + const imageUrls = await page.$$eval('img', (imgs) => { + const urls: string[] = imgs.map(img => { + const srcset = img.getAttribute('srcset') || ''; + if (!srcset) return ''; + const parts = srcset.split(',').map(p => p.trim()); + for (const part of parts) { + const m = part.match(/^(\S+)\s+4x$/); + if (m && m[1]) return m[1]; + } + const src = img.src || ''; + if (src.includes('/originals/')) return src; + return ''; + }).filter(s => !!s && s.includes('pinimg')); + // Remove duplicates + return [...new Set(urls)]; + }); + + if (imageUrls.length === 0) { + logger.warn(`No 4x image URLs found for keyword "${keyword}"`); + return []; + } + + // shuffle and pick up to `count` unique images + const shuffled = imageUrls.slice().sort(() => 0.5 - Math.random()); + const chosen = shuffled.slice(0, Math.min(count, shuffled.length)); + + const outDir = path.join(process.cwd(), 'download'); + await fs.mkdir(outDir, { recursive: true }); + + const results: string[] = []; + for (let i = 0; i < chosen.length; i++) { + const src = chosen[i]; + try { + const imgPage = await browser.newPage(); + const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'networkidle2' }); + if (!resp) { + logger.warn(`Failed to fetch image ${src}`); + await imgPage.close(); + continue; + } + const buffer = await resp.buffer(); + const timestamp = Date.now(); + const outPath = path.join(outDir, `${keyword.replace(/\s+/g, '_')}_${timestamp}_${i}.png`); + await fs.writeFile(outPath, buffer); + results.push(outPath); + await imgPage.close(); + } catch (err) { + logger.error(`Failed to download image ${src}:`, err); + } + } + return results; + + } catch (error) { + logger.error(`Error while downloading images for keyword "${keyword}":`, error); + return []; + } finally { + await browser.close(); + } +} + // Download up to `count` images from a pin URL by opening the pin page and scro lling up to 5 times to trigger lazy loading // Returns an array of saved image paths (may be empty) diff --git a/src/product/generate_image.ts b/src/product/generate_image.ts new file mode 100644 index 0000000..4bef5a7 --- /dev/null +++ b/src/product/generate_image.ts @@ -0,0 +1,88 @@ +import * as fs from 'fs/promises'; +import * as path from 'path'; +import dotenv from 'dotenv'; +import { readJsonToPng, embedJsonToPng } from '../lib/util'; +import { convertImage } from '../lib/image-converter'; + +dotenv.config(); + +const inputDir = './generated/prompts'; +const outputDir = './generated/image'; + +const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!; +const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!; + +interface PngMetadata { + prompts: { + imagePrompt: string; + videoPrompt: string; + }[]; +} + +async function main() { + await fs.mkdir(outputDir, { recursive: true }); + + const files = await fs.readdir(inputDir); + let generatedImageIndex = 0; + + for (const file of files) { + if (path.extname(file).toLowerCase() !== '.png') { + continue; + } + + const inputFile = path.join(inputDir, file); + const metadata = await readJsonToPng(inputFile) as PngMetadata; + + if (metadata && metadata.prompts && Array.isArray(metadata.prompts)) { + console.log(`Processing ${file} with ${metadata.prompts.length} prompt pairs.`); + + const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input"); + await fs.copyFile(inputFile, path.join(inputfolderFullpath, file)); + + for (const promptPair of metadata.prompts) { + const { imagePrompt, videoPrompt } = promptPair; + const newFileName = `cleaned_prompt_generated_${generatedImageIndex}.png`; + generatedImageIndex++; + const outputPath = path.join(outputDir, newFileName); + + try { + await fs.access(outputPath); + console.log(`File ${newFileName} already exists, skipping.`); + continue; + } catch (error) { + // File does not exist, proceed with generation + } + + console.log(`Generating image for prompt: "${imagePrompt}"`); + + try { + const generatedFilePath = await convertImage( + imagePrompt, + file, // Using the same image for both inputs as per interpretation + COMFY_BASE_URL, + COMFY_OUTPUT_DIR + ); + + // The convertImage function saves the file in a generic location. + // We need to move it to the correct location with the correct name. + await fs.rename(generatedFilePath, outputPath); + + const newMetadata = { + imagePrompt: imagePrompt, + videoPrompt: videoPrompt + }; + + await embedJsonToPng(outputPath, newMetadata); + + console.log(`Successfully generated and saved ${newFileName} with metadata.`); + } catch (error) { + console.error(`Error generating image for prompt "${imagePrompt}":`, error); + } + } + } else { + console.log(`Skipping ${file}, no valid prompts metadata found.`); + } + } +} + +main().catch(console.error); diff --git a/src/product/generate_prompt.ts b/src/product/generate_prompt.ts index b258df2..bb3e24b 100644 --- a/src/product/generate_prompt.ts +++ b/src/product/generate_prompt.ts @@ -1,7 +1,10 @@ import * as fs from 'fs'; import * as path from 'path'; import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio'; -import { embedJsonToPng, readJsonToPng } from '../lib/util'; +import { embedJsonToPng } from '../lib/util'; +import { downloadImagesFromPinterestSearch } from '../lib/pinterest'; +import { logger } from '../lib/logger'; +import sharp from 'sharp'; const INPUT_DIR = path.join(process.cwd(), 'input'); const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts'); @@ -10,148 +13,91 @@ if (!fs.existsSync(OUTPUT_DIR)) { fs.mkdirSync(OUTPUT_DIR, { recursive: true }); } -async function generatePromptsForImage(imagePath: string) { - const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath)); - - // Check if the output file already exists and has valid metadata - if (fs.existsSync(outputFilePath)) { - try { - const existingMetadata = await readJsonToPng(outputFilePath); - if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) { - console.log(`Skipping already processed image: ${path.basename(imagePath)}`); - return; - } - } catch (error) { - // File exists but is invalid or has no metadata, so we'll overwrite it. - console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`); - } - } - - console.log(`Processing image: ${imagePath}`); - - // Step 1: Get main subject and sub-objects - const firstPrompt = ` -You are a creative director for unique product video generation. - -Read the given photo carefully. - -Identify and write the main subject (the most important object in the photo). -Propose 20 possible sub-objects that could appear around the main subject in a video scene. -Sub-objects are only suggestions. -They should be stylish, cool, or complementary items that enhance the main subject. -Keep each sub-object as a short noun phrase (no long explanations). -Do not repeat similar items. -Output strictly in this JSON format: - -{result:{ -"main-subject": "the identified main object", -"sub-object": [ -"first proposal", -"second proposal", -... -"twentieth proposal" -] -}} -`; +async function generatePromptsForImage(imagePath: string, index: number) { + const outputFilePath = path.join(OUTPUT_DIR, `cleaned_prompt_${index}.png`); + logger.info(`Processing image: ${path.basename(imagePath)} -> ${path.basename(outputFilePath)}`); try { - const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt); - const firstApiResult = firstApiResponse.result; - const mainSubject = firstApiResult['main-subject']; - const subObjects = firstApiResult['sub-object']; + // Step 1: Detect main object and generate colors from the input image + const colorGenerationPrompt = ` +You are a creative assistant. Analyze the provided image. +Identify the main subject product ( not a product name). +Then, list exactly five colors related to this subject: +- Two colors that are common for this object. +- Two colors that are uncommon but plausible. +- One color that is completely crazy or surreal for this object. - if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) { - console.error('Invalid response from the first API call for image:', imagePath); +Output strictly in this JSON format: +{ + "result": { + "main_object": "the identified noun", + "colors": [ + "color1", + "color2", + "color3", + "color4", + "color5" + ] + } +} +`; + const colorResponse = await callLMStudioAPIWithFile(imagePath, colorGenerationPrompt); + const { main_object, colors } = colorResponse.result; + + if (!main_object || !Array.isArray(colors) || colors.length !== 5) { + logger.error(`Failed to get a valid main object and color list for ${imagePath}.`); return; } - // Step 2: Pick 3 random sub-objects - const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3); + logger.info(`Main object: "${main_object}", Colors: ${colors.join(', ')}`); - // Step 3: Generate background proposals - const secondPrompt = ` -You are a senior creative director for product photography and video. -Follow the instructions carefully. + const prompts: { imagePrompt: string, videoPrompt: string }[] = []; + const themes = ["special", "unique", "beautiful", "crazy", "funny"]; -Task: -1. Extract the main subject from Figure 1. -2. Use the three selected sub-objects provided. -3. Generate exactly five background prompt suggestions. + // Step 2: Iterate through each color + for (const color of colors) { + const randomTheme = themes[Math.floor(Math.random() * themes.length)]; + const pinterestQuery = `${main_object} product photo ${color} background ${randomTheme}`; + logger.info(`Searching Pinterest for: "${pinterestQuery}"`); -SUB1: ${selectedSubObjects[0]} -SUB2: ${selectedSubObjects[1]} -SUB3: ${selectedSubObjects[2]} + // Step 3: Get an image from Pinterest + const downloadedImages = await downloadImagesFromPinterestSearch(pinterestQuery, 1); + if (downloadedImages.length === 0) { + logger.warn(`Could not find an image on Pinterest for query: "${pinterestQuery}"`); + continue; + } + const pinterestImagePath = downloadedImages[0]; + logger.info(`Downloaded Pinterest image: ${pinterestImagePath}`); -Requirements for background prompts: -- All five suggestions must be written in English. -- Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image." -- After that phrase, always instruct to place the three sub-objects in the scene. - Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene." -- Each suggestion must also describe: - - Background color (must always include pink) - - Lighting (direction, mood, intensity) - - Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.) -- Try to describe detail for each sugegstion. > 50 words. -- Suggestions must be visually distinct. -- Each suggestion must use a completely different background color palette while still incorporating pink. -- Do not mention brand names or logos. + // Step 4: Generate a detailed prompt from the Pinterest image + const imagePromptRequest = ` +You are an expert in generating descriptive prompts for image generation models. +Analyze the provided image and describe it in a single, detailed paragraph. +Focus on style, mood, lighting, color palette, sub-objects, and composition. +Do not mention the main object itself. The prompt should be about the scene. -Special condition: -- In the new image, always place a pink silk scarf. -- The background color must always be pink. - -Output strictly in JSON format: - -{result:{ - "main-subject": "${mainSubject}", - "selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"], - "background-proposals": [ - "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", - "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", - "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", - "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }", - "Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }" - ] - }} +Output strictly in this JSON format: +{ + "result": "your generated prompt here" +} `; + const imagePromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, imagePromptRequest); + const imagePrompt = imagePromptResponse.result; - const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt); - const secondApiResult = secondApiResponse.result; - const backgroundProposals = secondApiResult['background-proposals']; + if (imagePrompt) { + logger.info(`Generated image prompt for color ${color}: "${imagePrompt}"`); - if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) { - console.error('Invalid response from the second API call for image:', imagePath); - return; - } - - // Step 4: Translate proposals to Chinese - const translatedProposals: string[] = []; - for (const proposal of backgroundProposals) { - const translationPrompt = `Translate the following English text to Chinese. Return only the translated text. - -Text: "${proposal}" - -Return the result in this format: -{"result":""} -`; - const translationResponse = await callLmstudio(translationPrompt); - const translatedResult = translationResponse.result; - translatedProposals.push(translationResponse.result); - } - - // Step 5: Generate video prompt - const videoPromptRequest = ` + // Step 5: Generate a matching video prompt + const videoPromptRequest = ` You are a creative director for a short, stylish video ad. Based on the provided image and the following scene description, generate an attractive video prompt. -Main Subject: ${mainSubject} -Sub-Objects: ${selectedSubObjects.join(', ')} -Scene Description: ${backgroundProposals[0]} +Main Subject: ${main_object} +Scene Description: ${imagePrompt} The video prompt should: -- Be in English. -- Be approximately 50 words. -- Describe one clear action involving the main subject and sub-objects. +- Be in English and approximately 50 words. +- Describe one clear action involving the main subject. - Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out). - Be dynamic and visually appealing. @@ -160,27 +106,41 @@ Output strictly in this JSON format: "result": "your generated video prompt here" } `; - const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest); - const videoPrompt = videoPromptResponse.result; + const videoPromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, videoPromptRequest); + const videoPrompt = videoPromptResponse.result; - if (!videoPrompt) { - console.error('Failed to generate video prompt for image:', imagePath); + if (videoPrompt) { + logger.info(`Generated video prompt for color ${color}: "${videoPrompt}"`); + prompts.push({ imagePrompt, videoPrompt }); + } else { + logger.warn(`Failed to generate a video prompt for ${pinterestImagePath}`); + } + } else { + logger.warn(`Failed to generate an image prompt for ${pinterestImagePath}`); + } + } + + if (prompts.length === 0) { + logger.error(`No prompt pairs were generated for ${imagePath}. Aborting.`); return; } - // Step 6: Embed all prompts into PNG metadata + // Step 6: Embed all prompts into the original image and save to the new location const metadata = { - imagePrompts: translatedProposals, - videoPrompt: videoPrompt + prompts: prompts }; - fs.copyFileSync(imagePath, outputFilePath); + // Convert original image to a valid PNG at the output path before embedding + await sharp(imagePath) + .toFormat('png') + .toFile(outputFilePath); + await embedJsonToPng(outputFilePath, metadata); - console.log(`Successfully generated prompts and saved to ${outputFilePath}`); + logger.info(`Successfully generated prompts and saved metadata to ${outputFilePath}`); } catch (error) { - console.error(`Failed to process image ${imagePath}:`, error); + logger.error(`An error occurred while processing ${imagePath}:`, error); } } @@ -194,9 +154,10 @@ async function main() { return; } - for (const imageFile of imageFiles) { + for (let i = 0; i < imageFiles.length; i++) { + const imageFile = imageFiles[i]; const imagePath = path.join(INPUT_DIR, imageFile); - await generatePromptsForImage(imagePath); + await generatePromptsForImage(imagePath, i); } console.log('All images processed.'); diff --git a/src/product/generate_video.ts b/src/product/generate_video.ts new file mode 100644 index 0000000..e2f591a --- /dev/null +++ b/src/product/generate_video.ts @@ -0,0 +1,74 @@ +import * as fs from 'fs/promises'; +import * as path from 'path'; +import dotenv from 'dotenv'; +import { readJsonToPng } from '../lib/util'; +import { generateVideo } from '../lib/video-generator'; + +dotenv.config(); + +const inputDir = './input'; +const outputDir = './generated/video'; + +const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!; +const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!; + + +interface PngMetadata { + imagePrompt: string; + videoPrompt: string; +} + +async function main() { + await fs.mkdir(outputDir, { recursive: true }); + + const files = await fs.readdir(inputDir); + const pngFiles = files.filter(file => path.extname(file).toLowerCase() === '.png'); + + for (let i = 0; i < pngFiles.length; i++) { + const file = pngFiles[i]; + + const inputFile = path.join(inputDir, file); + const metadata = await readJsonToPng(inputFile) as PngMetadata; + + if (metadata && metadata.videoPrompt) { + console.log(`Processing ${file} for video generation.`); + + const originalFileName = path.parse(file).name; + const nameParts = originalFileName.split('_'); + const promptIndex = nameParts[nameParts.length - 1]; + const newFileName = `product_${i}_${promptIndex}.mp4`; + const outputPath = path.join(outputDir, newFileName); + + try { + await fs.access(outputPath); + console.log(`File ${newFileName} already exists, skipping.`); + continue; + } catch (error) { + // File does not exist, proceed with generation + } + + console.log(`Generating video for prompt: "${metadata.videoPrompt}"`); + + const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input"); + await fs.copyFile(inputFile, path.join(inputfolderFullpath, file)); + + try { + await generateVideo( + metadata.videoPrompt, + file, + newFileName, + COMFY_BASE_URL, + COMFY_OUTPUT_DIR + ); + + console.log(`Successfully generated and saved ${newFileName}`); + } catch (error) { + console.error(`Error generating video for ${file}:`, error); + } + } else { + console.log(`Skipping ${file}, no valid videoPrompt metadata found.`); + } + } +} + +main().catch(console.error);