import { downloadImagesFromPinterestPin } from './lib/downloader'; import { callOpenAIWithFile } from './lib/openai'; import { generateVideo } from './lib/video-generator'; import { generateImage as generateImageMixStyle } from './lib/image-generator-mix-style'; import { generateImage as generateImage } from './lib/image-generator'; import { logger } from './lib/logger'; import * as fs from 'fs/promises'; import dotenv from 'dotenv'; import path from 'path'; import puppeteer from 'puppeteer'; import { VideoModel } from './lib/db/video'; dotenv.config(); const RUN_ONCE = (process.env.RUN_ONCE || 'false').toLowerCase() === 'true'; const USE_REFERENCE_IMAGE = (process.env.USE_REFERENCE_IMAGE || 'true').toLowerCase() === 'true'; // Utility: extract JSON substring from a text. // Tries fenced ```json``` blocks first, otherwise extracts first {...} span. function extractJsonFromText(text: string): any | null { if (!text || typeof text !== 'string') return null; // Try fenced code block with optional json language const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); if (fenced && fenced[1]) { try { return JSON.parse(fenced[1].trim()); } catch (e) { // fall through to brace extraction } } // Try to extract first {...} match const brace = text.match(/\{[\s\S]*\}/); if (brace && brace[0]) { try { return JSON.parse(brace[0]); } catch (e) { return null; } } return null; } // Wrapper to call OpenAI with an image and prompt, and extract JSON reliably. // - Uses callOpenAIWithFile to pass the image. // - Tries to parse JSON from response if needed. // - Retries up to maxRetries times (default 5) when parsing fails or an error occurs. async function callOpenAIWithFileAndExtract(imagePath: string, prompt: string, maxRetries = 5): Promise { for (let attempt = 1; attempt <= maxRetries; attempt++) { try { const res = await callOpenAIWithFile(imagePath, prompt); // callOpenAIWithFile may return an object or parsed JSON already if (res && typeof res === 'object') { return res; } if (typeof res === 'string') { const parsed = extractJsonFromText(res); if (parsed) return parsed; } // unexpected shape -> retry logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} returned unexpected result. Retrying...`); } catch (err) { logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} failed: ${err}`); } } logger.error(`callOpenAIWithFileAndExtract: failed to get valid JSON after ${maxRetries} attempts`); return null; } const servers = [ { baseUrl: process.env.SERVER1_COMFY_BASE_URL, outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR, }, { baseUrl: process.env.SERVER2_COMFY_BASE_URL, outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR, }, ].filter((s): s is { baseUrl: string; outputDir: string } => !!s.baseUrl && !!s.outputDir); interface GenerationTask { pinUrl: string; imagePrompt: string; videoPrompt: string; imageFileName: string; renamedImagePaths: string[]; generatedImagePath?: string; genre: string; subGenre: string; scene: string; action: string; camera: string; videoInstructions?: string[]; } async function getPromptsForImage(imagePaths: string[], pinUrl: string, genre: string, subGenre: string, videoInstructions: string[] = []): Promise { const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`; const timestamp = new Date().getTime(); const imageFileName = `${pinId}_${timestamp}.png`; const renamedImagePaths: string[] = []; for (let i = 0; i < imagePaths.length; i++) { const renamedPath = path.join(path.dirname(imagePaths[i]), `${pinId}_${timestamp}_${i}.png`); await fs.rename(imagePaths[i], renamedPath); renamedImagePaths.push(renamedPath); } logger.debug(`Renamed source images to: ${renamedImagePaths.join(', ')}`); const imageForPrompt = renamedImagePaths[Math.floor(Math.random() * renamedImagePaths.length)]; try { // Step 1: Detect main object const step1Prompt = ` Return exactly one JSON object and nothing else: { "mainobject": "..." }. Look at the provided image and determine the single most prominent/main object or subject in the scene. Answer with a short noun or short phrase (no extra commentary). If unsure, give the best concise guess. `; const step1Res = await callOpenAIWithFileAndExtract(imageForPrompt, step1Prompt, 5); const mainobject = (step1Res && (step1Res.mainobject || step1Res.mainObject || step1Res.object)) ? String(step1Res.mainobject || step1Res.mainObject || step1Res.object).trim() : ''; if (!mainobject) { throw new Error('Could not detect main object'); } logger.info(`Detected main object for ${imageForPrompt}: ${mainobject}`); // Step 2: Determine best action for this scene const step2Prompt = ` You have access to the image and the detected main object: "${mainobject}". Decide which single action type best fits this scene from the list: - no action - micro animation (animate object but small movement) - big movement - impossible movement - Dance ( if its woman portrait ) Return exactly one JSON object and nothing else: { "actiontype": "..." }. Do not add commentary. Choose the single best option from the list above. `; const step2Res = await callOpenAIWithFileAndExtract(imageForPrompt, step2Prompt, 5); const actiontype = (step2Res && (step2Res.actiontype || step2Res.actionType)) ? String(step2Res.actiontype || step2Res.actionType).trim() : ''; if (!actiontype) { throw new Error('Could not determine action type'); } logger.info(`Decided action type for ${imageForPrompt}: ${actiontype}`); // Step 3: Ask OpenAI what is the best camera work for the scene const step3Prompt = ` Given the image and the following information: - main object: "${mainobject}" - chosen action type: "${actiontype}" From the options below pick the single best camera approach for this scene: - static camera - pan - rotation - follow the moving object - zoom to the object - impossible camera work Return exactly one JSON object and nothing else: { "cameraworkType": "..." }. Choose one of the listed options and do not add commentary. `; const step3Res = await callOpenAIWithFileAndExtract(imageForPrompt, step3Prompt, 5); const cameraworkType = (step3Res && (step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera)) ? String(step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera).trim() : ''; if (!cameraworkType) { throw new Error('Could not determine camera work'); } logger.info(`Decided camera work for ${imageForPrompt}: ${cameraworkType}`); let videoInstruction = ""; if (videoInstructions && videoInstructions.length > 0) { const videoInstructionPrompt = ` Given the image and the following information: - main object: "${mainobject}" From the options below pick the single best camera approach for this scene: ${videoInstructions.join(",\r\n")} Return exactly one JSON object and nothing else: { "videoInstruction": "..." }. Choose one of the listed options and do not add commentary. `; const videoInstructionRes = await callOpenAIWithFileAndExtract(imageForPrompt, videoInstructionPrompt, 5); const videoInstructionFinalRes = (step3Res && (videoInstructionRes.videoInstruction || videoInstructionRes.videoInstruction || videoInstructionRes.camera)) ? String(videoInstructionRes.videoInstruction || videoInstructionRes.videoInstruction || videoInstructionRes.camera).trim() : ''; if (videoInstructionFinalRes) videoInstruction = videoInstructionFinalRes } // Step 4: Generate final video prompt (and image prompt) using all gathered info const finalPrompt = ` Return exactly one JSON object: { "scene": "...", "action":"...", "camera":"...", "image_prompt":"...", "videoPrompt":"..." } and nothing else. Write "videoPrompt" in 100–150 words, present tense, plain concrete language. Write "image_prompt" as a concise, detailed prompt suitable for generating a similar image. HARD RULES (must comply for videoPrompt): - One continuous shot. Real-time 8 seconds. No edits. - Fixed location and vantage. Do not change background or angle. - Lens and focal length locked. No zooms, no close-ups that imply a lens change. - Camera motion: at most subtle pan/tilt/dolly within 1 meter while staying in the same spot. - Keep framing consistent. No “another shot/meanwhile.” - Use clear simple sentences. No metaphors or poetic language. Here is information of the scene, please generate fields accordingly: Detected Main Object: ${mainobject} Suggested Action Type: ${actiontype} Suggested Camera Work: ${cameraworkType} Genre: ${genre} Sub-Genre: ${subGenre} ${videoInstruction ? 'video instruction:' + videoInstruction : ""} `; const finalRes = await callOpenAIWithFileAndExtract(imageForPrompt, finalPrompt, 5); const scene = finalRes && (finalRes.scene || finalRes.Scene) ? String(finalRes.scene) : ''; const action = finalRes && (finalRes.action || finalRes.Action) ? String(finalRes.action) : ''; const camera = finalRes && (finalRes.camera || finalRes.Camera) ? String(finalRes.camera) : ''; const imagePrompt = finalRes && (finalRes.image_prompt || finalRes.imagePrompt || finalRes.image_prompt) ? String(finalRes.image_prompt || finalRes.imagePrompt) : ''; const videoPrompt = finalRes && (finalRes.videoPrompt || finalRes.video_prompt || finalRes.video_prompt) ? String(finalRes.videoPrompt || finalRes.video_prompt) : ''; if (!imagePrompt || !videoPrompt) { throw new Error('Final LM output did not include image_prompt or videoPrompt'); } logger.info(`Image prompt for ${imageForPrompt}:`, imagePrompt); logger.info(`Video prompt for ${imageForPrompt}:`, videoPrompt); return { pinUrl, imagePrompt, videoPrompt, imageFileName, renamedImagePaths, genre, subGenre, scene, action, camera }; } catch (error) { logger.error(`Failed to get prompts for ${imageForPrompt}:`, error); for (const p of renamedImagePaths) { try { await fs.unlink(p); } catch (cleanupError) { // ignore } } return null; } } async function generateImageForTask(task: GenerationTask, server: { baseUrl: string; outputDir: string; }): Promise { const { imagePrompt, imageFileName, renamedImagePaths } = task; const { baseUrl, outputDir } = server; const inputDir = outputDir.replace("output", "input"); const sourceFileNames: string[] = []; try { if (USE_REFERENCE_IMAGE) { // Copy renamed source images to the server input directory for (const sourcePath of renamedImagePaths) { const fileName = path.basename(sourcePath); const destPath = path.join(inputDir, fileName); await fs.copyFile(sourcePath, destPath); sourceFileNames.push(fileName); logger.info(`Copied ${sourcePath} to ${destPath}`); } // generateImageMixStyle expects two source files; if we only have one, pass the same one twice const srcA = sourceFileNames[0]; const srcB = sourceFileNames[1] || sourceFileNames[0]; const generatedImagePath = await generateImageMixStyle( imagePrompt, srcA, srcB, imageFileName, baseUrl, outputDir, { width: 1280, height: 720 } ); return generatedImagePath; } else { // Use Pinterest images only to create the prompt; generate final image using the single-image generator const generatedImagePath = await generateImage( imagePrompt, imageFileName, baseUrl, outputDir, 'qwen', { width: 1280, height: 720 } ); return generatedImagePath; } } catch (error) { logger.error(`Failed to generate image for ${imageFileName} on server ${baseUrl}:`, error); return null; } finally { // cleanup local renamed images and any files copied to the server input dir for (const sourcePath of renamedImagePaths) { try { await fs.unlink(sourcePath); logger.debug(`Deleted source image: ${sourcePath}`); } catch (error) { logger.error(`Failed to delete source image ${sourcePath}:`, error); } } for (const fileName of sourceFileNames) { try { const serverPath = path.join(inputDir, fileName); await fs.unlink(serverPath); logger.debug(`Deleted server image: ${serverPath}`); } catch (error) { logger.error(`Failed to delete server image ${fileName}:`, error); } } } } async function getPinUrlFromPinterest(keyword: string): Promise { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`; await page.goto(searchUrl, { waitUntil: 'networkidle2' }); const scrollCount = Math.floor(Math.random() * 5) + 1; logger.info(`Scrolling ${scrollCount} times...`); for (let i = 0; i < scrollCount; i++) { await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise(resolve => setTimeout(resolve, Math.random() * 1000 + 1000)); } const pinLinks = await page.$$eval('a', (anchors) => anchors.map((a) => a.href).filter((href) => href.includes('/pin/')) ); if (pinLinks.length > 0) { return pinLinks[Math.floor(Math.random() * pinLinks.length)]; } return null; } catch (error) { logger.error('Error while getting pin URL from Pinterest:', error); return null; } finally { await browser.close(); } } (async () => { // Load pinterest keywords JSON, pick up to 20 subGenres and choose 1 pinId per subGenre const keywordsFilePath = path.resolve(process.cwd(), 'src', 'pinterest_keywords.json'); let allKeywords: { genre: string; subGenre: string; pinIds?: string[]; pinId?: string[], videoInstructions?: string[] }[] = []; try { const raw = await fs.readFile(keywordsFilePath, 'utf-8'); allKeywords = JSON.parse(raw); } catch (err) { logger.error('Failed to read pinterest keywords JSON:', err); return; } /* allKeywords = allKeywords.filter(a => { return (a.genre == "city" && a.subGenre == "Bridges") || (a.genre == "city" && a.subGenre == "Castles") || (a.genre == "city" && a.subGenre == "Cathedrals") || (a.genre == "city" && a.subGenre == "Factories") || (a.genre == "city" && a.subGenre == "Futuristic Cities") || (a.genre == "city" && a.subGenre == "Historic Towns") || (a.genre == "city" && a.subGenre == "Libraries") || (a.genre == "city" && a.subGenre == "Markets") || (a.genre == "city" && a.subGenre == "Modern Plazas") || (a.genre == "city" && a.subGenre == "Museums") || (a.genre == "city" && a.subGenre == "Palaces") || (a.genre == "city" && a.subGenre == "Residential Blocks") || (a.genre == "city" && a.subGenre == "Skylines") || (a.genre == "city" && a.subGenre == "Stadiums") || (a.genre == "city" && a.subGenre == "Street Cafes") || (a.genre == "city" && a.subGenre == "Urban Parks") || (a.genre == "city" && a.subGenre == "Skyscrapers") || (a.genre == "city" && a.subGenre == "Slums") }); */ allKeywords = allKeywords.filter(a => { return (a.genre == "epic") }); function shuffle(arr: T[]): T[] { for (let i = arr.length - 1; i > 0; i--) { const j = Math.floor(Math.random() * (i + 1)); [arr[i], arr[j]] = [arr[j], arr[i]]; } return arr; } //const selectedEntries = shuffle(allKeywords.slice()).slice(0, Math.min(20, allKeywords.length)); const selectedEntries = allKeywords; // Download up to `count` images from a pin URL by opening the pin page and scro lling up to 5 times to trigger lazy loading // Returns an array of saved image paths (may be empty) async function downloadOneImageFromPin(pinUrl: string, count: number = 1): Promise { const browser = await puppeteer.launch({ headless: false }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { await page.goto(pinUrl, { waitUntil: 'networkidle2', timeout: 30000 }); for (let i = 0; i < 3; i++) { await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise((r) => setTimeout(r, 700 + Math.random() * 800)); } const imgs: string[] = await page.$$eval('img', imgs => { // For each try to extract the 4x (original) URL from srcset. // srcset example: // "https://i.pinimg.com/236x/...jpg 1x, https://i.pinimg.com/474x/...jpg 2x, https://i.pinimg.com/736x/...jpg 3x, https://i.pinimg.com/originals/...jpg 4x" const urls: string[] = imgs.map(img => { const srcset = (img as HTMLImageElement).getAttribute('srcset') || ''; if (!srcset) return ''; const parts = srcset.split(',').map(p => p.trim()); for (const part of parts) { const m = part.match(/^(\S+)\s+4x$/); if (m && m[1]) return m[1]; } // fallback: if src contains "originals" return src const src = (img as HTMLImageElement).src || ''; if (src.includes('/originals/')) return src; return ''; }).filter(s => !!s && s.includes('pinimg')); return urls; }); if (!imgs || imgs.length === 0) { logger.warn(`No image src (4x) found on pin page ${pinUrl}`); return []; } // shuffle and pick up to `count` unique images const shuffled = imgs.slice().sort(() => 0.5 - Math.random()); const chosen = shuffled.slice(0, Math.min(count, shuffled.length)); const outDir = path.join(process.cwd(), 'download'); await fs.mkdir(outDir, { recursive: true }); const results: string[] = []; for (let i = 0; i < chosen.length; i++) { const src = chosen[i]; try { const imgPage = await browser.newPage(); const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'networkidle2' }); if (!resp) { logger.warn(`Failed to fetch image ${src} from ${pinUrl}`); await imgPage.close(); continue; } const buffer = await resp.buffer(); const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`; const timestamp = Date.now(); const outPath = path.join(outDir, `${pinId}_${timestamp}_${i}.png`); await fs.writeFile(outPath, buffer); results.push(outPath); await imgPage.close(); } catch (err) { logger.error(`Failed to download image ${src} from ${pinUrl}:`, err); } } return results; } catch (err) { logger.error(`Failed to download images from ${pinUrl}:`, err); return []; } finally { await browser.close(); } } const numberOfPinIds = Number(process.env.NUMBER_OF_PINIDS) || 20; // Build keywords list with single chosen pinId per selected subGenre const keywords: { genre: string; subGenre: string; pinIds: string[], videoInstructions?: string[] }[] = []; for (const entry of selectedEntries) { const pinIds = (entry.pinIds || entry.pinId) as string[] | undefined; if (!Array.isArray(pinIds) || pinIds.length === 0) continue; const chosenPinId = pinIds.splice(0, numberOfPinIds); keywords.push({ genre: entry.genre, subGenre: entry.subGenre, pinIds: chosenPinId, videoInstructions: entry.videoInstructions }); } if (keywords.length === 0) { logger.error("No keywords/pinIds available from pinterest_keywords.json. Exiting."); return; } if (servers.length === 0) { logger.error("No servers configured. Please check your .env file."); return; } type pinIdsType = { pinId: string, genreSubGenre: { genre: string, subGenre: string, pinIds: string[], videoInstructions: string[] } }; while (true) { const generationTasks: GenerationTask[] = []; const allPinIds: pinIdsType[] = keywords.reduce((acc, curr) => { const videoInstructions = curr.videoInstructions ?? []; for (const id of curr.pinIds ?? []) { acc.push({ pinId: id, genreSubGenre: { genre: curr.genre, subGenre: curr.subGenre, pinIds: curr.pinIds, videoInstructions, }, }); } return acc; }, []); const pickedUpPinIds: pinIdsType[] = shuffle(allPinIds).slice(0, 30); for (const row of pickedUpPinIds) { const { genreSubGenre, pinId } = row; const genre = genreSubGenre.genre; const subGenre = genreSubGenre.subGenre; const pin = `https://www.pinterest.com/pin/${pinId}/`; logger.info(`--- Starting processing for pin: ${pin} ---`); // download images from the pin page (pass desired count as second arg) const downloadedImagePaths = await downloadOneImageFromPin(pin, 20); if (!downloadedImagePaths || downloadedImagePaths.length === 0) { logger.warn(`No images were downloaded for pin ${pin}. Skipping.`); continue; } const selectedImages = downloadedImagePaths.sort(() => 0.5 - Math.random()).slice(0, 2); logger.info(`--- Downloaded ${selectedImages.length} image(s) for processing ---`); // proceed if we have at least one image if (selectedImages.length >= 1) { const task = await getPromptsForImage(selectedImages, pin, genre, subGenre, genreSubGenre.videoInstructions); if (task) { task.videoInstructions = genreSubGenre.videoInstructions; generationTasks.push(task); } } else { logger.warn(`Skipping pin ${pin} as it did not yield images.`); for (const imagePath of selectedImages) { try { await fs.unlink(imagePath); } catch (error) { logger.error(`Failed to delete image ${imagePath}:`, error); } } } } // --- Image Generation Phase --- logger.info(`--- Starting image generation for ${generationTasks.length} tasks ---`); for (const task of generationTasks) { const server = servers[Math.floor(Math.random() * servers.length)]; const imagePath = await generateImageForTask(task, server); if (imagePath) { task.generatedImagePath = imagePath; } } logger.info("--- Finished image generation ---"); // --- Video Generation Phase --- logger.info(`--- Starting video generation for ${generationTasks.length} tasks ---`); for (const task of generationTasks) { if (!task.generatedImagePath) { logger.warn(`Skipping video generation for task ${task.imageFileName} as it has no generated image.`); continue; } const server = servers[Math.floor(Math.random() * servers.length)]; const inputDir = server.outputDir.replace("output", "input"); const generatedImageName = path.basename(task.generatedImagePath); const serverImagePath = path.join(inputDir, generatedImageName); try { await fs.copyFile(task.generatedImagePath, serverImagePath); logger.info(`Copied ${task.generatedImagePath} to ${serverImagePath}`); const videoFileName = task.imageFileName.replace('.png', '.mp4'); const videoPath = await generateVideo( task.videoPrompt, generatedImageName, videoFileName, server.baseUrl, server.outputDir, { width: 1280, height: 720 } ); if (videoPath) { const videoData = { genre: task.genre, sub_genre: task.subGenre, scene: task.scene, action: task.action, camera: task.camera, image_prompt: task.imagePrompt, video_prompt: task.videoPrompt, image_path: task.generatedImagePath, video_path: videoPath, }; const videoId = await VideoModel.create(videoData); logger.info(`Successfully saved video record to database with ID: ${videoId}`); const newImageName = `${videoId}_${task.genre}_${task.subGenre}${path.extname(task.generatedImagePath)}`; const newVideoName = `${videoId}_${task.genre}_${task.subGenre}${path.extname(videoPath)}`; const newImagePath = path.join(path.dirname(task.generatedImagePath), newImageName); const newVideoPath = path.join(path.dirname(videoPath), newVideoName); await fs.rename(task.generatedImagePath, newImagePath); await fs.rename(videoPath, newVideoPath); await VideoModel.update(videoId, { image_path: newImagePath, video_path: newVideoPath, }); logger.info(`Renamed files and updated database record for video ID: ${videoId}`); } } catch (error) { logger.error('An error occurred during video generation or database operations:', error); } finally { try { await fs.unlink(serverImagePath); logger.debug(`Deleted server image: ${serverImagePath}`); } catch (error) { logger.error(`Failed to delete server image ${serverImagePath}:`, error); } } } logger.info("--- Finished video generation ---"); if (RUN_ONCE) { logger.info('RUN_ONCE=true - exiting after a single iteration of generation.'); return; } } })();