save changes

This commit is contained in:
2025-10-05 15:01:06 +02:00
parent eee4e6523e
commit 1697523000
6 changed files with 471 additions and 49 deletions

View File

@ -0,0 +1,208 @@
import * as fs from 'fs';
import * as path from 'path';
import { callLMStudioAPIWithFile, callLmstudio } from '../lib/lmstudio';
import { embedJsonToPng, readJsonToPng } from '../lib/util';
const INPUT_DIR = path.join(process.cwd(), 'input');
const OUTPUT_DIR = path.join(process.cwd(), 'generated', 'prompts');
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
async function generatePromptsForImage(imagePath: string) {
const outputFilePath = path.join(OUTPUT_DIR, path.basename(imagePath));
// Check if the output file already exists and has valid metadata
if (fs.existsSync(outputFilePath)) {
try {
const existingMetadata = await readJsonToPng(outputFilePath);
if (existingMetadata && existingMetadata.imagePrompts && existingMetadata.videoPrompt) {
console.log(`Skipping already processed image: ${path.basename(imagePath)}`);
return;
}
} catch (error) {
// File exists but is invalid or has no metadata, so we'll overwrite it.
console.log(`Output file for ${path.basename(imagePath)} exists but is invalid. Regenerating...`);
}
}
console.log(`Processing image: ${imagePath}`);
// Step 1: Get main subject and sub-objects
const firstPrompt = `
You are a creative director for unique product video generation.
Read the given photo carefully.
Identify and write the main subject (the most important object in the photo).
Propose 20 possible sub-objects that could appear around the main subject in a video scene.
Sub-objects are only suggestions.
They should be stylish, cool, or complementary items that enhance the main subject.
Keep each sub-object as a short noun phrase (no long explanations).
Do not repeat similar items.
Output strictly in this JSON format:
{result:{
"main-subject": "the identified main object",
"sub-object": [
"first proposal",
"second proposal",
...
"twentieth proposal"
]
}}
`;
try {
const firstApiResponse = await callLMStudioAPIWithFile(imagePath, firstPrompt);
const firstApiResult = firstApiResponse.result;
const mainSubject = firstApiResult['main-subject'];
const subObjects = firstApiResult['sub-object'];
if (!mainSubject || !Array.isArray(subObjects) || subObjects.length < 3) {
console.error('Invalid response from the first API call for image:', imagePath);
return;
}
// Step 2: Pick 3 random sub-objects
const selectedSubObjects = subObjects.sort(() => 0.5 - Math.random()).slice(0, 3);
// Step 3: Generate background proposals
const secondPrompt = `
You are a senior creative director for product photography and video.
Follow the instructions carefully.
Task:
1. Extract the main subject from Figure 1.
2. Use the three selected sub-objects provided.
3. Generate exactly five background prompt suggestions.
SUB1: ${selectedSubObjects[0]}
SUB2: ${selectedSubObjects[1]}
SUB3: ${selectedSubObjects[2]}
Requirements for background prompts:
- All five suggestions must be written in English.
- Every suggestion must begin with the phrase: "Extract the object from Figure 1 and generate a new image."
- After that phrase, always instruct to place the three sub-objects in the scene.
Example: "and include Pink silk scarf, Pearl necklace, Pink lipstick in the scene."
- Each suggestion must also describe:
- Background color (must always include pink)
- Lighting (direction, mood, intensity)
- Style or design elements (minimal, futuristic, luxury, natural, abstract, etc.)
- Try to describe detail for each sugegstion. > 50 words.
- Suggestions must be visually distinct.
- Each suggestion must use a completely different background color palette while still incorporating pink.
- Do not mention brand names or logos.
Special condition:
- In the new image, always place a pink silk scarf.
- The background color must always be pink.
Output strictly in JSON format:
{result:{
"main-subject": "${mainSubject}",
"selected-sub-objects": ["${selectedSubObjects[0]}","${selectedSubObjects[1]}","${selectedSubObjects[2]}"],
"background-proposals": [
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }",
"Extract the object from Figure1 and generate a new image,{be creative and generate scene with ${selectedSubObjects[0]},${selectedSubObjects[1]},${selectedSubObjects[2]} }"
]
}}
`;
const secondApiResponse = await callLMStudioAPIWithFile(imagePath, secondPrompt);
const secondApiResult = secondApiResponse.result;
const backgroundProposals = secondApiResult['background-proposals'];
if (!Array.isArray(backgroundProposals) || backgroundProposals.length !== 5) {
console.error('Invalid response from the second API call for image:', imagePath);
return;
}
// Step 4: Translate proposals to Chinese
const translatedProposals: string[] = [];
for (const proposal of backgroundProposals) {
const translationPrompt = `Translate the following English text to Chinese. Return only the translated text.
Text: "${proposal}"
Return the result in this format:
{"result":""}
`;
const translationResponse = await callLmstudio(translationPrompt);
const translatedResult = translationResponse.result;
translatedProposals.push(translationResponse.result);
}
// Step 5: Generate video prompt
const videoPromptRequest = `
You are a creative director for a short, stylish video ad.
Based on the provided image and the following scene description, generate an attractive video prompt.
Main Subject: ${mainSubject}
Sub-Objects: ${selectedSubObjects.join(', ')}
Scene Description: ${backgroundProposals[0]}
The video prompt should:
- Be in English.
- Be approximately 50 words.
- Describe one clear action involving the main subject and sub-objects.
- Include one specific camera movement (e.g., slow zoom in, orbiting shot, push-in, pull-out).
- Be dynamic and visually appealing.
Output strictly in this JSON format:
{
"result": "your generated video prompt here"
}
`;
const videoPromptResponse = await callLMStudioAPIWithFile(imagePath, videoPromptRequest);
const videoPrompt = videoPromptResponse.result;
if (!videoPrompt) {
console.error('Failed to generate video prompt for image:', imagePath);
return;
}
// Step 6: Embed all prompts into PNG metadata
const metadata = {
imagePrompts: translatedProposals,
videoPrompt: videoPrompt
};
fs.copyFileSync(imagePath, outputFilePath);
await embedJsonToPng(outputFilePath, metadata);
console.log(`Successfully generated prompts and saved to ${outputFilePath}`);
} catch (error) {
console.error(`Failed to process image ${imagePath}:`, error);
}
}
async function main() {
try {
const files = fs.readdirSync(INPUT_DIR);
const imageFiles = files.filter(file => /\.(png|jpg|jpeg)$/i.test(file));
if (imageFiles.length === 0) {
console.log('No images found in the input directory.');
return;
}
for (const imageFile of imageFiles) {
const imagePath = path.join(INPUT_DIR, imageFile);
await generatePromptsForImage(imagePath);
}
console.log('All images processed.');
} catch (error) {
console.error('An error occurred in the main process:', error);
}
}
main();