save changes

This commit is contained in:
2025-10-06 00:02:50 +02:00
parent 1697523000
commit 4452508dd4
4 changed files with 334 additions and 136 deletions

View File

@ -36,6 +36,81 @@ export async function getPinUrlFromPinterest(keyword: string): Promise<string |
}
}
export async function downloadImagesFromPinterestSearch(keyword: string, count: number): Promise<string[]> {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36');
await page.setViewport({ width: 1920, height: 1080 });
try {
const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`;
await page.goto(searchUrl, { waitUntil: 'networkidle2' });
logger.info(`Scrolling 3 times...`);
for (let i = 0; i < 3; i++) {
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await new Promise(resolve => setTimeout(resolve, Math.random() * 1000 + 1000));
}
const imageUrls = await page.$$eval('img', (imgs) => {
const urls: string[] = imgs.map(img => {
const srcset = img.getAttribute('srcset') || '';
if (!srcset) return '';
const parts = srcset.split(',').map(p => p.trim());
for (const part of parts) {
const m = part.match(/^(\S+)\s+4x$/);
if (m && m[1]) return m[1];
}
const src = img.src || '';
if (src.includes('/originals/')) return src;
return '';
}).filter(s => !!s && s.includes('pinimg'));
// Remove duplicates
return [...new Set(urls)];
});
if (imageUrls.length === 0) {
logger.warn(`No 4x image URLs found for keyword "${keyword}"`);
return [];
}
// shuffle and pick up to `count` unique images
const shuffled = imageUrls.slice().sort(() => 0.5 - Math.random());
const chosen = shuffled.slice(0, Math.min(count, shuffled.length));
const outDir = path.join(process.cwd(), 'download');
await fs.mkdir(outDir, { recursive: true });
const results: string[] = [];
for (let i = 0; i < chosen.length; i++) {
const src = chosen[i];
try {
const imgPage = await browser.newPage();
const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'networkidle2' });
if (!resp) {
logger.warn(`Failed to fetch image ${src}`);
await imgPage.close();
continue;
}
const buffer = await resp.buffer();
const timestamp = Date.now();
const outPath = path.join(outDir, `${keyword.replace(/\s+/g, '_')}_${timestamp}_${i}.png`);
await fs.writeFile(outPath, buffer);
results.push(outPath);
await imgPage.close();
} catch (err) {
logger.error(`Failed to download image ${src}:`, err);
}
}
return results;
} catch (error) {
logger.error(`Error while downloading images for keyword "${keyword}":`, error);
return [];
} finally {
await browser.close();
}
}
// Download up to `count` images from a pin URL by opening the pin page and scro lling up to 5 times to trigger lazy loading
// Returns an array of saved image paths (may be empty)

View File

@ -0,0 +1,88 @@
import * as fs from 'fs/promises';
import * as path from 'path';
import dotenv from 'dotenv';
import { readJsonToPng, embedJsonToPng } from '../lib/util';
import { convertImage } from '../lib/image-converter';
dotenv.config();
const inputDir = './generated/prompts';
const outputDir = './generated/image';
const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!;
const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!;
interface PngMetadata {
prompts: {
imagePrompt: string;
videoPrompt: string;
}[];
}
async function main() {
await fs.mkdir(outputDir, { recursive: true });
const files = await fs.readdir(inputDir);
let generatedImageIndex = 0;
for (const file of files) {
if (path.extname(file).toLowerCase() !== '.png') {
continue;
}
const inputFile = path.join(inputDir, file);
const metadata = await readJsonToPng(inputFile) as PngMetadata;
if (metadata && metadata.prompts && Array.isArray(metadata.prompts)) {
console.log(`Processing ${file} with ${metadata.prompts.length} prompt pairs.`);
const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input");
await fs.copyFile(inputFile, path.join(inputfolderFullpath, file));
for (const promptPair of metadata.prompts) {
const { imagePrompt, videoPrompt } = promptPair;
const newFileName = `cleaned_prompt_generated_${generatedImageIndex}.png`;
generatedImageIndex++;
const outputPath = path.join(outputDir, newFileName);
try {
await fs.access(outputPath);
console.log(`File ${newFileName} already exists, skipping.`);
continue;
} catch (error) {
// File does not exist, proceed with generation
}
console.log(`Generating image for prompt: "${imagePrompt}"`);
try {
const generatedFilePath = await convertImage(
imagePrompt,
file, // Using the same image for both inputs as per interpretation
COMFY_BASE_URL,
COMFY_OUTPUT_DIR
);
// The convertImage function saves the file in a generic location.
// We need to move it to the correct location with the correct name.
await fs.rename(generatedFilePath, outputPath);
const newMetadata = {
imagePrompt: imagePrompt,
videoPrompt: videoPrompt
};
await embedJsonToPng(outputPath, newMetadata);
console.log(`Successfully generated and saved ${newFileName} with metadata.`);
} catch (error) {
console.error(`Error generating image for prompt "${imagePrompt}":`, error);
}
}
} else {
console.log(`Skipping ${file}, no valid prompts metadata found.`);
}
}
}
main().catch(console.error);

View File

@ -1,7 +1,10 @@
import * as fs from 'fs';
import * as path from 'path';
import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio';
import { embedJsonToPng, readJsonToPng } from '../lib/util';
import { embedJsonToPng } from '../lib/util';
import { downloadImagesFromPinterestSearch } from '../lib/pinterest';
import { logger } from '../lib/logger';
import sharp from 'sharp';
const INPUT_DIR = path.join(process.cwd(), 'input');
const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts');
@ -10,148 +13,91 @@ if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
async function generatePromptsForImage(imagePath: string) {
const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath));
async function generatePromptsForImage(imagePath: string, index: number) {
const outputFilePath = path.join(OUTPUT_DIR, `cleaned_prompt_${index}.png`);
logger.info(`Processing image: ${path.basename(imagePath)} -> ${path.basename(outputFilePath)}`);
// Check if the output file already exists and has valid metadata
if (fs.existsSync(outputFilePath)) {
try {
const existingMetadata = await readJsonToPng(outputFilePath);
if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) {
console.log(`Skipping already processed image: ${path.basename(imagePath)}`);
return;
}
} catch (error) {
// File exists but is invalid or has no metadata, so we'll overwrite it.
console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`);
}
}
// Step 1: Detect main object and generate colors from the input image
const colorGenerationPrompt = `
You are a creative assistant. Analyze the provided image.
Identify the main subject product ( not a product name).
Then, list exactly five colors related to this subject:
- Two colors that are common for this object.
- Two colors that are uncommon but plausible.
- One color that is completely crazy or surreal for this object.
console.log(`Processing image: ${imagePath}`);
// Step 1: Get main subject and sub-objects
const firstPrompt = `
You are a creative director for unique product video generation.
Read the given photo carefully.
Identify and write the main subject (the most important object in the photo).
Propose 20 possible sub-objects that could appear around the main subject in a video scene.
Sub-objects are only suggestions.
They should be stylish, cool, or complementary items that enhance the main subject.
Keep each sub-object as a short noun phrase (no long explanations).
Do not repeat similar items.
Output strictly in this JSON format:
{result:{
"main-subject": "the identified main object",
"sub-object": [
"first proposal",
"second proposal",
...
"twentieth proposal"
{
"result": {
"main_object": "the identified noun",
"colors": [
"color1",
"color2",
"color3",
"color4",
"color5"
]
}}
}
}
`;
const colorResponse = await callLMStudioAPIWithFile(imagePath, colorGenerationPrompt);
const { main_object, colors } = colorResponse.result;
try {
const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt);
const firstApiResult = firstApiResponse.result;
const mainSubject = firstApiResult['main-subject'];
const subObjects = firstApiResult['sub-object'];
if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) {
console.error('Invalid response from the first API call for image:', imagePath);
if (!main_object || !Array.isArray(colors) || colors.length !== 5) {
logger.error(`Failed to get a valid main object and color list for ${imagePath}.`);
return;
}
// Step 2: Pick 3 random sub-objects
const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3);
logger.info(`Main object: "${main_object}", Colors: ${colors.join(', ')}`);
// Step 3: Generate background proposals
const secondPrompt = `
You are a senior creative director for product photography and video.
Follow the instructions carefully.
const prompts: { imagePrompt: string, videoPrompt: string }[] = [];
const themes = ["special", "unique", "beautiful", "crazy", "funny"];
Task:
1. Extract the main subject from Figure 1.
2. Use the three selected sub-objects provided.
3. Generate exactly five background prompt suggestions.
// Step 2: Iterate through each color
for (const color of colors) {
const randomTheme = themes[Math.floor(Math.random() * themes.length)];
const pinterestQuery = `${main_object} product photo ${color} background ${randomTheme}`;
logger.info(`Searching Pinterest for: "${pinterestQuery}"`);
SUB1: ${selectedSubObjects[0]}
SUB2: ${selectedSubObjects[1]}
SUB3: ${selectedSubObjects[2]}
Requirements for background prompts:
- All five suggestions must be written in English.
- Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image."
- After that phrase, always instruct to place the three sub-objects in the scene.
Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene."
- Each suggestion must also describe:
- Background color (must always include pink)
- Lighting (direction, mood, intensity)
- Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.)
- Try to describe detail for each sugegstion. > 50 words.
- Suggestions must be visually distinct.
- Each suggestion must use a completely different background color palette while still incorporating pink.
- Do not mention brand names or logos.
Special condition:
- In the new image, always place a pink silk scarf.
- The background color must always be pink.
Output strictly in JSON format:
{result:{
"main-subject": "${mainSubject}",
"selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"],
"background-proposals": [
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }"
]
}}
`;
const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt);
const secondApiResult = secondApiResponse.result;
const backgroundProposals = secondApiResult['background-proposals'];
if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) {
console.error('Invalid response from the second API call for image:', imagePath);
return;
// Step 3: Get an image from Pinterest
const downloadedImages = await downloadImagesFromPinterestSearch(pinterestQuery, 1);
if (downloadedImages.length === 0) {
logger.warn(`Could not find an image on Pinterest for query: "${pinterestQuery}"`);
continue;
}
const pinterestImagePath = downloadedImages[0];
logger.info(`Downloaded Pinterest image: ${pinterestImagePath}`);
// Step 4: Translate proposals to Chinese
const translatedProposals: string[] = [];
for (const proposal of backgroundProposals) {
const translationPrompt = `Translate the following English text to Chinese. Return only the translated text.
// Step 4: Generate a detailed prompt from the Pinterest image
const imagePromptRequest = `
You are an expert in generating descriptive prompts for image generation models.
Analyze the provided image and describe it in a single, detailed paragraph.
Focus on style, mood, lighting, color palette, sub-objects, and composition.
Do not mention the main object itself. The prompt should be about the scene.
Text: "${proposal}"
Return the result in this format:
{"result":""}
`;
const translationResponse = await callLmstudio(translationPrompt);
const translatedResult = translationResponse.result;
translatedProposals.push(translationResponse.result);
Output strictly in this JSON format:
{
"result": "your generated prompt here"
}
`;
const imagePromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, imagePromptRequest);
const imagePrompt = imagePromptResponse.result;
// Step 5: Generate video prompt
if (imagePrompt) {
logger.info(`Generated image prompt for color ${color}: "${imagePrompt}"`);
// Step 5: Generate a matching video prompt
const videoPromptRequest = `
You are a creative director for a short, stylish video ad.
Based on the provided image and the following scene description, generate an attractive video prompt.
Main Subject: ${mainSubject}
Sub-Objects: ${selectedSubObjects.join(', ')}
Scene Description: ${backgroundProposals[0]}
Main Subject: ${main_object}
Scene Description: ${imagePrompt}
The video prompt should:
- Be in English.
- Be approximately 50 words.
- Describe one clear action involving the main subject and sub-objects.
- Be in English and approximately 50 words.
- Describe one clear action involving the main subject.
- Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out).
- Be dynamic and visually appealing.
@ -160,27 +106,41 @@ Output strictly in this JSON format:
"result": "your generated video prompt here"
}
`;
const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest);
const videoPromptResponse = await callLMStudioAPIWithFile(pinterestImagePath, videoPromptRequest);
const videoPrompt = videoPromptResponse.result;
if (!videoPrompt) {
console.error('Failed to generate video prompt for image:', imagePath);
if (videoPrompt) {
logger.info(`Generated video prompt for color ${color}: "${videoPrompt}"`);
prompts.push({ imagePrompt, videoPrompt });
} else {
logger.warn(`Failed to generate a video prompt for ${pinterestImagePath}`);
}
} else {
logger.warn(`Failed to generate an image prompt for ${pinterestImagePath}`);
}
}
if (prompts.length === 0) {
logger.error(`No prompt pairs were generated for ${imagePath}. Aborting.`);
return;
}
// Step 6: Embed all prompts into PNG metadata
// Step 6: Embed all prompts into the original image and save to the new location
const metadata = {
imagePrompts: translatedProposals,
videoPrompt: videoPrompt
prompts: prompts
};
fs.copyFileSync(imagePath, outputFilePath);
// Convert original image to a valid PNG at the output path before embedding
await sharp(imagePath)
.toFormat('png')
.toFile(outputFilePath);
await embedJsonToPng(outputFilePath, metadata);
console.log(`Successfully generated prompts and saved to ${outputFilePath}`);
logger.info(`Successfully generated prompts and saved metadata to ${outputFilePath}`);
} catch (error) {
console.error(`Failed to process image ${imagePath}:`, error);
logger.error(`An error occurred while processing ${imagePath}:`, error);
}
}
@ -194,9 +154,10 @@ async function main() {
return;
}
for (const imageFile of imageFiles) {
for (let i = 0; i < imageFiles.length; i++) {
const imageFile = imageFiles[i];
const imagePath = path.join(INPUT_DIR, imageFile);
await generatePromptsForImage(imagePath);
await generatePromptsForImage(imagePath, i);
}
console.log('All images processed.');

View File

@ -0,0 +1,74 @@
import * as fs from 'fs/promises';
import * as path from 'path';
import dotenv from 'dotenv';
import { readJsonToPng } from '../lib/util';
import { generateVideo } from '../lib/video-generator';
dotenv.config();
const inputDir = './input';
const outputDir = './generated/video';
const COMFY_BASE_URL = process.env.SERVER2_COMFY_BASE_URL!;
const COMFY_OUTPUT_DIR = process.env.SERVER2_COMFY_OUTPUT_DIR!;
interface PngMetadata {
imagePrompt: string;
videoPrompt: string;
}
async function main() {
await fs.mkdir(outputDir, { recursive: true });
const files = await fs.readdir(inputDir);
const pngFiles = files.filter(file => path.extname(file).toLowerCase() === '.png');
for (let i = 0; i < pngFiles.length; i++) {
const file = pngFiles[i];
const inputFile = path.join(inputDir, file);
const metadata = await readJsonToPng(inputFile) as PngMetadata;
if (metadata && metadata.videoPrompt) {
console.log(`Processing ${file} for video generation.`);
const originalFileName = path.parse(file).name;
const nameParts = originalFileName.split('_');
const promptIndex = nameParts[nameParts.length - 1];
const newFileName = `product_${i}_${promptIndex}.mp4`;
const outputPath = path.join(outputDir, newFileName);
try {
await fs.access(outputPath);
console.log(`File ${newFileName} already exists, skipping.`);
continue;
} catch (error) {
// File does not exist, proceed with generation
}
console.log(`Generating video for prompt: "${metadata.videoPrompt}"`);
const inputfolderFullpath = COMFY_OUTPUT_DIR.replace("output", "input");
await fs.copyFile(inputFile, path.join(inputfolderFullpath, file));
try {
await generateVideo(
metadata.videoPrompt,
file,
newFileName,
COMFY_BASE_URL,
COMFY_OUTPUT_DIR
);
console.log(`Successfully generated and saved ${newFileName}`);
} catch (error) {
console.error(`Error generating video for ${file}:`, error);
}
} else {
console.log(`Skipping ${file}, no valid videoPrompt metadata found.`);
}
}
}
main().catch(console.error);