save current changes

This commit is contained in:
2025-08-30 23:22:11 +02:00
parent 62e22e4965
commit 9e056b752d
4 changed files with 562 additions and 237 deletions

View File

@ -11,6 +11,60 @@ import { VideoModel } from './lib/db/video';
dotenv.config();
// Utility: extract JSON substring from a text.
// Tries fenced ```json``` blocks first, otherwise extracts first {...} span.
function extractJsonFromText(text: string): any | null {
if (!text || typeof text !== 'string') return null;
// Try fenced code block with optional json language
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
if (fenced && fenced[1]) {
try {
return JSON.parse(fenced[1].trim());
} catch (e) {
// fall through to brace extraction
}
}
// Try to extract first {...} match
const brace = text.match(/\{[\s\S]*\}/);
if (brace && brace[0]) {
try {
return JSON.parse(brace[0]);
} catch (e) {
return null;
}
}
return null;
}
// Wrapper to call OpenAI with an image and prompt, and extract JSON reliably.
// - Uses callOpenAIWithFile to pass the image.
// - Tries to parse JSON from response if needed.
// - Retries up to maxRetries times (default 5) when parsing fails or an error occurs.
async function callOpenAIWithFileAndExtract(imagePath: string, prompt: string, maxRetries = 5): Promise<any | null> {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const res = await callOpenAIWithFile(imagePath, prompt);
// callOpenAIWithFile may return an object or parsed JSON already
if (res && typeof res === 'object') {
return res;
}
if (typeof res === 'string') {
const parsed = extractJsonFromText(res);
if (parsed) return parsed;
}
// unexpected shape -> retry
logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} returned unexpected result. Retrying...`);
} catch (err) {
logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} failed: ${err}`);
}
}
logger.error(`callOpenAIWithFileAndExtract: failed to get valid JSON after ${maxRetries} attempts`);
return null;
}
const servers = [
{
baseUrl: process.env.SERVER1_COMFY_BASE_URL,
@ -36,9 +90,10 @@ interface GenerationTask {
scene: string;
action: string;
camera: string;
videoInstructions?: string[];
}
async function getPromptsForImage(imagePaths: string[], pinUrl: string, genre: string, subGenre: string): Promise<GenerationTask | null> {
async function getPromptsForImage(imagePaths: string[], pinUrl: string, genre: string, subGenre: string, videoInstructions: string[] = []): Promise<GenerationTask | null> {
const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`;
const timestamp = new Date().getTime();
const imageFileName = `${pinId}_${timestamp}.png`;
@ -54,26 +109,121 @@ async function getPromptsForImage(imagePaths: string[], pinUrl: string, genre: s
const imageForPrompt = renamedImagePaths[Math.floor(Math.random() * renamedImagePaths.length)];
try {
const promptResponse = await callOpenAIWithFile(imageForPrompt,
`Analyze the provided image and generate the following:
1. 'scene': A description of the image's environment.
2. 'action': A description of the main action occurring in the image.
3. 'camera': A description of the camera shot (e.g., 'close-up', 'wide-angle').
4. 'image_prompt': A short and detailed prompt to generate this photo.
5. 'video_prompt': A prompt describing a creative and subtle movement of the main object. The camera should be slight panning or static.
// Step 1: Detect main object
const step1Prompt = `
Return exactly one JSON object and nothing else: { "mainobject": "..." }.
Look at the provided image and determine the single most prominent/main object or subject in the scene.
Answer with a short noun or short phrase (no extra commentary).
If unsure, give the best concise guess.
`;
const step1Res = await callOpenAIWithFileAndExtract(imageForPrompt, step1Prompt, 5);
const mainobject = (step1Res && (step1Res.mainobject || step1Res.mainObject || step1Res.object)) ? String(step1Res.mainobject || step1Res.mainObject || step1Res.object).trim() : '';
if (!mainobject) {
throw new Error('Could not detect main object');
}
logger.info(`Detected main object for ${imageForPrompt}: ${mainobject}`);
// Step 2: Determine best action for this scene
const step2Prompt = `
You have access to the image and the detected main object: "${mainobject}".
Decide which single action type best fits this scene from the list:
- no action
- micro animation (animate object but small movement)
- big movement
- impossible movement
Return exactly one JSON object and nothing else: { "actiontype": "..." }.
Do not add commentary. Choose the single best option from the list above.
`;
const step2Res = await callOpenAIWithFileAndExtract(imageForPrompt, step2Prompt, 5);
const actiontype = (step2Res && (step2Res.actiontype || step2Res.actionType)) ? String(step2Res.actiontype || step2Res.actionType).trim() : '';
if (!actiontype) {
throw new Error('Could not determine action type');
}
logger.info(`Decided action type for ${imageForPrompt}: ${actiontype}`);
// Step 3: Ask OpenAI what is the best camera work for the scene
const step3Prompt = `
Given the image and the following information:
- main object: "${mainobject}"
- chosen action type: "${actiontype}"
From the options below pick the single best camera approach for this scene:
- static camera
- pan
- rotation
- follow the moving object
- zoom to the object
- impossible camera work
Return exactly one JSON object and nothing else: { "cameraworkType": "..." }.
Choose one of the listed options and do not add commentary.
`;
const step3Res = await callOpenAIWithFileAndExtract(imageForPrompt, step3Prompt, 5);
const cameraworkType = (step3Res && (step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera)) ? String(step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera).trim() : '';
if (!cameraworkType) {
throw new Error('Could not determine camera work');
}
logger.info(`Decided camera work for ${imageForPrompt}: ${cameraworkType}`);
let videoInstruction = "";
if (videoInstructions && videoInstructions.length > 0) {
const videoInstructionPrompt = `
Given the image and the following information:
- main object: "${mainobject}"
From the options below pick the single best camera approach for this scene:
${videoInstructions.join(",\r\n")}
Return exactly one JSON object and nothing else: { "videoInstruction": "..." }.
Choose one of the listed options and do not add commentary.
`;
const videoInstructionRes = await callOpenAIWithFileAndExtract(imageForPrompt, videoInstructionPrompt, 5);
const videoInstructionFinalRes = (step3Res && (videoInstructionRes.videoInstruction || videoInstructionRes.videoInstruction || videoInstructionRes.camera)) ? String(videoInstructionRes.videoInstruction || videoInstructionRes.videoInstruction || videoInstructionRes.camera).trim() : '';
if (videoInstructionFinalRes)
videoInstruction = videoInstructionFinalRes
}
// Step 4: Generate final video prompt (and image prompt) using all gathered info
const finalPrompt = `
Return exactly one JSON object: { "scene": "...", "action":"...", "camera":"...", "image_prompt":"...", "videoPrompt":"..." } and nothing else.
Write "videoPrompt" in 100150 words, present tense, plain concrete language.
Write "image_prompt" as a concise, detailed prompt suitable for generating a similar image.
HARD RULES (must comply for videoPrompt):
- One continuous shot. Real-time 8 seconds. No edits.
- Fixed location and vantage. Do not change background or angle.
- Lens and focal length locked. No zooms, no close-ups that imply a lens change.
- Camera motion: at most subtle pan/tilt/dolly within 1 meter while staying in the same spot.
- Keep framing consistent. No “another shot/meanwhile.”
- Use clear simple sentences. No metaphors or poetic language.
Here is information of the scene, please generate fields accordingly:
Detected Main Object: ${mainobject}
Suggested Action Type: ${actiontype}
Suggested Camera Work: ${cameraworkType}
Genre: ${genre}
Sub-Genre: ${subGenre}
${videoInstruction ? 'video instruction:' + videoInstruction : ""}
`;
const finalRes = await callOpenAIWithFileAndExtract(imageForPrompt, finalPrompt, 5);
const scene = finalRes && (finalRes.scene || finalRes.Scene) ? String(finalRes.scene) : '';
const action = finalRes && (finalRes.action || finalRes.Action) ? String(finalRes.action) : '';
const camera = finalRes && (finalRes.camera || finalRes.Camera) ? String(finalRes.camera) : '';
const imagePrompt = finalRes && (finalRes.image_prompt || finalRes.imagePrompt || finalRes.image_prompt) ? String(finalRes.image_prompt || finalRes.imagePrompt) : '';
const videoPrompt = finalRes && (finalRes.videoPrompt || finalRes.video_prompt || finalRes.video_prompt) ? String(finalRes.videoPrompt || finalRes.video_prompt) : '';
if (!imagePrompt || !videoPrompt) {
throw new Error('Final LM output did not include image_prompt or videoPrompt');
}
Output should be in this JSON format:
---
{
"scene": "{result comes here}",
"action": "{result comes here}",
"camera": "{result comes here}",
"image_prompt": "{result comes here}",
"video_prompt": "{result comes here}"
}
---
`);
const { scene, action, camera, image_prompt: imagePrompt, video_prompt: videoPrompt } = promptResponse;
logger.info(`Image prompt for ${imageForPrompt}:`, imagePrompt);
logger.info(`Video prompt for ${imageForPrompt}:`, videoPrompt);
@ -179,7 +329,7 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
(async () => {
// Load pinterest keywords JSON, pick up to 20 subGenres and choose 1 pinId per subGenre
const keywordsFilePath = path.resolve(process.cwd(), 'src', 'pinterest_keywords.json');
let allKeywords: { genre: string; subGenre: string; pinIds?: string[]; pinId?: string[] }[] = [];
let allKeywords: { genre: string; subGenre: string; pinIds?: string[]; pinId?: string[], videoInstructions?: string[] }[] = [];
try {
const raw = await fs.readFile(keywordsFilePath, 'utf-8');
allKeywords = JSON.parse(raw);
@ -189,7 +339,7 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
}
allKeywords = allKeywords.filter(a => {
return (a.genre == "food" && a.subGenre == "imagination")
return (a.genre == "sports" && a.subGenre == "Motocross")
});
function shuffle<T>(arr: T[]): T[] {
@ -205,24 +355,39 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
// Download up to `count` images from a pin URL by opening the pin page and scrolling up to 5 times to trigger lazy loading
// Returns an array of saved image paths (may be empty)
async function downloadOneImageFromPin(pinUrl: string, count: number = 1): Promise<string[]> {
const browser = await puppeteer.launch({ headless: true });
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36');
await page.setViewport({ width: 1920, height: 1080 });
try {
await page.goto(pinUrl, { waitUntil: 'networkidle2', timeout: 30000 });
for (let i = 0; i < 5; i++) {
for (let i = 0; i < 3; i++) {
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await new Promise((r) => setTimeout(r, 700 + Math.random() * 800));
}
const imgs: string[] = await page.$$eval('img', imgs =>
imgs.map(img => (img as HTMLImageElement).src)
.filter(src => !!src && (src.includes('pinimg') || /\.(jpe?g|png|webp)$/i.test(src)))
);
const imgs: string[] = await page.$$eval('img', imgs => {
// For each <img> try to extract the 4x (original) URL from srcset.
// srcset example:
// "https://i.pinimg.com/236x/...jpg 1x, https://i.pinimg.com/474x/...jpg 2x, https://i.pinimg.com/736x/...jpg 3x, https://i.pinimg.com/originals/...jpg 4x"
const urls: string[] = imgs.map(img => {
const srcset = (img as HTMLImageElement).getAttribute('srcset') || '';
if (!srcset) return '';
const parts = srcset.split(',').map(p => p.trim());
for (const part of parts) {
const m = part.match(/^(\S+)\s+4x$/);
if (m && m[1]) return m[1];
}
// fallback: if src contains "originals" return src
const src = (img as HTMLImageElement).src || '';
if (src.includes('/originals/')) return src;
return '';
}).filter(s => !!s && s.includes('pinimg'));
return urls;
});
if (!imgs || imgs.length === 0) {
logger.warn(`No image src found on pin page ${pinUrl}`);
logger.warn(`No image src (4x) found on pin page ${pinUrl}`);
return [];
}
@ -265,13 +430,16 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
}
}
const numberOfPinIds = 20;
// Build keywords list with single chosen pinId per selected subGenre
const keywords: { genre: string; subGenre: string; pinId: string[] }[] = [];
const keywords: {
genre: string; subGenre: string; pinIds: string[], videoInstructions?: string[]
}[] = [];
for (const entry of selectedEntries) {
const pinIds = (entry.pinIds || entry.pinId) as string[] | undefined;
if (!Array.isArray(pinIds) || pinIds.length === 0) continue;
const chosenPinId = pinIds[Math.floor(Math.random() * pinIds.length)];
keywords.push({ genre: entry.genre, subGenre: entry.subGenre, pinId: [chosenPinId] });
const chosenPinId = pinIds.splice(0, numberOfPinIds);
keywords.push({ genre: entry.genre, subGenre: entry.subGenre, pinIds: chosenPinId, videoInstructions: entry.videoInstructions });
}
if (keywords.length === 0) {
@ -291,24 +459,9 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
const { genre, subGenre } = genreSubGenre;
for (let i = 0; i < 10; i++) {
// pinId is now an array with a single chosen id. Pick the first element.
const pinIdField = (genreSubGenre as any).pinId;
let selectedPinId: string | undefined;
if (Array.isArray(pinIdField) && pinIdField.length > 0) {
selectedPinId = pinIdField[0];
logger.info(`Selected chosen pinId ${selectedPinId} for ${genre} / ${subGenre}`);
} else if (typeof pinIdField === 'string' && pinIdField) {
selectedPinId = pinIdField;
logger.info(`Using single pinId ${selectedPinId} for ${genre} / ${subGenre}`);
}
for (const pinId of genreSubGenre.pinIds) {
if (!selectedPinId) {
logger.warn(`No pinId available for ${genre}/${subGenre}. Skipping.`);
continue;
}
const pin = `https://www.pinterest.com/pin/${selectedPinId}/`;
const pin = `https://www.pinterest.com/pin/${pinId}/`;
logger.info(`--- Starting processing for pin: ${pin} ---`);
// download images from the pin page (pass desired count as second arg)
@ -323,8 +476,9 @@ async function getPinUrlFromPinterest(keyword: string): Promise<string | null> {
// proceed if we have at least one image
if (selectedImages.length >= 1) {
const task = await getPromptsForImage(selectedImages, pin, genre, subGenre);
const task = await getPromptsForImage(selectedImages, pin, genre, subGenre, genreSubGenre.videoInstructions);
if (task) {
task.videoInstructions = genreSubGenre.videoInstructions;
generationTasks.push(task);
}
} else {