import { callOpenAI, callOpenAIWithFile } from './lib/openai'; import { generateVideo } from './lib/video-generator'; import { generateImage as generateImageMixStyle } from './lib/image-generator-mix-style'; import { generateImage as generateImage } from './lib/image-generator'; import { logger } from './lib/logger'; import * as fs from 'fs/promises'; import dotenv from 'dotenv'; import path from 'path'; import puppeteer from 'puppeteer'; import { VideoModel } from './lib/db/video'; dotenv.config(); const RUN_ONCE = (process.env.RUN_ONCE || 'false').toLowerCase() === 'true'; const NUMBER_OF_KEYWORDS = Number(process.env.NUMBER_OF_KEYWORDS) || 20; const SCROLL_SEARCH = Number(process.env.SCROLL_SEARCH) || 5; // scroll times on search results const SCROLL_PIN = Number(process.env.SCROLL_PIN) || 3; // scroll times on pin page const USE_REFERENCE_IMAGE = (process.env.USE_REFERENCE_IMAGE || 'true').toLowerCase() === 'true'; // Hard-coded user prompt (used as the video generation instruction). // You can change this string here or set a different value if you edit the file. const HARDCODED_USER_PROMPT = process.env.HARDCODED_USER_PROMPT || "Generate 20 dance keywords more something like street dance. So I can search pinterest."; const servers = [ /*{ baseUrl: process.env.SERVER1_COMFY_BASE_URL, outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR, },*/ { baseUrl: process.env.SERVER2_COMFY_BASE_URL, outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR, }, ].filter((s): s is { baseUrl: string; outputDir: string } => !!s.baseUrl && !!s.outputDir); interface PipelineItem { keyword: string; pinUrl: string; imagePrompt: string; videoPrompt: string; baseImagePath: string; // downloaded from pin generatedImagePath?: string; // generated on server } // Re-usable helper to extract JSON embedded in text function extractJsonFromText(text: string): any | null { if (!text || typeof text !== 'string') return null; const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); if (fenced && fenced[1]) { try { return JSON.parse(fenced[1].trim()); } catch (e) { /* fall through */ } } const brace = text.match(/\{[\s\S]*\}|\[[\s\S]*\]/); if (brace && brace[0]) { try { return JSON.parse(brace[0]); } catch (e) { return null; } } // Attempt line-separated keywords fallback const lines = text.split(/\r?\n/).map(l => l.trim()).filter(Boolean); if (lines.length > 1) return lines; return null; } // Wrapper to call OpenAI with an image and prompt and extract JSON-like result async function callOpenAIWithFileAndExtract(imagePath: string, prompt: string, maxRetries = 5): Promise { for (let attempt = 1; attempt <= maxRetries; attempt++) { try { const res = await callOpenAIWithFile(imagePath, prompt); if (!res) { logger.warn(`callOpenAIWithFileAndExtract attempt ${attempt} returned empty response`); continue; } if (typeof res === 'object') return res; if (typeof res === 'string') { const parsed = extractJsonFromText(res); if (parsed) return parsed; } logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} unexpected shape`); } catch (err) { logger.warn(`callOpenAIWithFileAndExtract: attempt ${attempt} failed: ${err}`); } } logger.error(`callOpenAIWithFileAndExtract: failed after ${maxRetries} attempts`); return null; } // Ask ChatGPT to produce keywords from a single high-level prompt async function generateKeywordsFromPrompt(prompt: string, count = NUMBER_OF_KEYWORDS): Promise { const instruction = `You are given a short instruction describing the type of short 8-second cinematic videos to create. Return exactly a JSON array of ${count} short keyword phrases (each 1-3 words) suitable for searching Pinterest. Example output: ["sunset beach","city skyline",...]. Do not include commentary.`; const res = await callOpenAI(`${instruction}\n\nInstruction: ${prompt}`); const parsed = extractJsonFromText(typeof res === 'string' ? res : (res && (res.text || JSON.stringify(res)))); if (Array.isArray(parsed)) { return parsed.map(String).slice(0, count); } // fallback: try to parse common fields if (res && typeof res === 'object') { const maybe = res.keywords || res.list || res.items || res.keywords_list; if (Array.isArray(maybe)) return maybe.map(String).slice(0, count); } // last fallback: split lines const text = typeof res === 'string' ? res : JSON.stringify(res); const lines = text.split(/\r?\n/).map(l => l.trim()).filter(Boolean); if (lines.length >= 1) { // extract up to count tokens (remove numbering) const cleaned = lines.map(l => l.replace(/^\d+[\).\s-]*/, '').trim()).filter(Boolean); return cleaned.slice(0, count); } return []; } async function getPinUrlFromPinterest(keyword: string, scrollCount = SCROLL_SEARCH): Promise { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`; await page.goto(searchUrl, { waitUntil: 'networkidle2' }); for (let i = 0; i < scrollCount; i++) { await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise(r => setTimeout(r, 500 + Math.random() * 1000)); } const pinLinks = await page.$$eval('a', (anchors) => anchors.map((a) => a.href).filter((href) => href.includes('/pin/')) ); if (pinLinks.length > 0) return pinLinks[Math.floor(Math.random() * pinLinks.length)]; return null; } catch (error) { logger.error('Error while getting pin URL from Pinterest:', error); return null; } finally { await browser.close(); } } // Download one high-quality image from a pin page async function downloadOneImageFromPin(pinUrl: string, count: number = 1, scrollTimes = SCROLL_PIN): Promise { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { await page.goto(pinUrl, { waitUntil: 'networkidle2', timeout: 30000 }); for (let i = 0; i < scrollTimes; i++) { await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise((r) => setTimeout(r, 700 + Math.random() * 800)); } const imgs: string[] = await page.$$eval('img', imgs => { const urls: string[] = imgs.map(img => { const srcset = (img as HTMLImageElement).getAttribute('srcset') || ''; if (!srcset) return ''; const parts = srcset.split(',').map(p => p.trim()); for (const part of parts) { const m = part.match(/^(\S+)\s+4x$/); if (m && m[1]) return m[1]; } const src = (img as HTMLImageElement).src || ''; if (src.includes('/originals/')) return src; return ''; }).filter(s => !!s && s.includes('pinimg')); return urls; }); if (!imgs || imgs.length === 0) { logger.warn(`No high-res images found on pin ${pinUrl}`); return []; } const shuffled = imgs.slice().sort(() => 0.5 - Math.random()); const chosen = shuffled.slice(0, Math.min(count, shuffled.length)); const outDir = path.join(process.cwd(), 'download'); await fs.mkdir(outDir, { recursive: true }); const results: string[] = []; for (let i = 0; i < chosen.length; i++) { const src = chosen[i]; try { const imgPage = await browser.newPage(); const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'networkidle2' }); if (!resp) { await imgPage.close(); continue; } const buffer = await resp.buffer(); const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`; const timestamp = Date.now(); const outPath = path.join(outDir, `${pinId}_${timestamp}_${i}.png`); await fs.writeFile(outPath, buffer); results.push(outPath); await imgPage.close(); } catch (err) { logger.error(`Failed to download image ${src} from ${pinUrl}:`, err); } } return results; } catch (err) { logger.error(`Failed to download images from ${pinUrl}:`, err); return []; } finally { await browser.close(); } } // Reuse the getPromptsForImage logic (uses OpenAI + image) async function getPromptsForImage(imagePaths: string[], pinUrl: string, genrePrompt: string): Promise<{ imagePrompt: string; videoPrompt: string; baseImagePath: string } | null> { const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`; const timestamp = Date.now(); const renamedImagePaths: string[] = []; for (let i = 0; i < imagePaths.length; i++) { const renamedPath = path.join(path.dirname(imagePaths[i]), `${pinId}_${timestamp}_${i}.png`); await fs.rename(imagePaths[i], renamedPath); renamedImagePaths.push(renamedPath); } const imageForPrompt = renamedImagePaths[Math.floor(Math.random() * renamedImagePaths.length)]; try { const step1Prompt = `Return exactly one JSON object: { "mainobject": "..." }. Look at the provided image and determine the single most prominent/main object or subject in the scene. Answer with a short noun or short phrase.`; const step1Res = await callOpenAIWithFileAndExtract(imageForPrompt, step1Prompt, 5); const mainobject = (step1Res && (step1Res.mainobject || step1Res.mainObject || step1Res.object)) ? String(step1Res.mainobject || step1Res.mainObject || step1Res.object).trim() : ''; if (!mainobject) throw new Error('Could not detect main object'); const step2Prompt = `You have access to the image and the detected main object: "${mainobject}". Decide which single action type best fits this scene from the list: - no action - micro animation - big movement - impossible movement - Dance (if portrait). Return exactly one JSON object: { "actiontype": "..." }.`; const step2Res = await callOpenAIWithFileAndExtract(imageForPrompt, step2Prompt, 5); const actiontype = (step2Res && (step2Res.actiontype || step2Res.actionType)) ? String(step2Res.actiontype || step2Res.actionType).trim() : ''; const step3Prompt = `Given the image and the following information: - main object: "${mainobject}" - chosen action type: "${actiontype}" From the options pick the single best camera approach: - static camera - pan - rotation - follow the moving object - zoom to the object - impossible camera work. Return exactly one JSON object: { "cameraworkType": "..." }.`; const step3Res = await callOpenAIWithFileAndExtract(imageForPrompt, step3Prompt, 5); const cameraworkType = (step3Res && (step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera)) ? String(step3Res.cameraworkType || step3Res.cameraWorkType || step3Res.camera).trim() : ''; const finalPrompt = `Return exactly one JSON object: { "scene": "...", "action":"...", "camera":"...", "image_prompt":"...", "videoPrompt":"..." } and nothing else. Write "videoPrompt" in 100–150 words, present tense, plain concrete language. Write "image_prompt" as a concise, detailed prompt suitable for generating a similar image. Here is information of the scene: Detected Main Object: ${mainobject} Suggested Action Type: ${actiontype} Suggested Camera Work: ${cameraworkType} Genre instruction: ${genrePrompt}`; const finalRes = await callOpenAIWithFileAndExtract(imageForPrompt, finalPrompt, 5); const imagePrompt = finalRes && (finalRes.image_prompt || finalRes.imagePrompt || finalRes.image_prompt) ? String(finalRes.image_prompt || finalRes.imagePrompt) : ''; const videoPrompt = finalRes && (finalRes.videoPrompt || finalRes.video_prompt || finalRes.video_prompt) ? String(finalRes.videoPrompt || finalRes.video_prompt) : ''; if (!imagePrompt || !videoPrompt) throw new Error('Final LM output missing prompts'); return { imagePrompt, videoPrompt, baseImagePath: imageForPrompt }; } catch (error) { logger.error('Failed to get prompts for image:', error); for (const p of renamedImagePaths) { try { await fs.unlink(p); } catch (e) { /* ignore */ } } return null; } } async function generateImageForItem(item: PipelineItem, server: { baseUrl: string; outputDir: string; }): Promise { const { imagePrompt, baseImagePath } = item as any; const { baseUrl, outputDir } = server; const inputDir = outputDir.replace("output", "input"); const sourceFileNames: string[] = []; try { if (USE_REFERENCE_IMAGE) { const fileName = path.basename(baseImagePath); const destPath = path.join(inputDir, fileName); await fs.copyFile(baseImagePath, destPath); sourceFileNames.push(fileName); logger.info(`Copied ${baseImagePath} to ${destPath}`); const srcA = sourceFileNames[0]; const srcB = sourceFileNames[1] || sourceFileNames[0]; const generatedImagePath = await generateImageMixStyle( imagePrompt, srcA, srcB, `${path.basename(baseImagePath)}`, baseUrl, outputDir, { width: 1280, height: 720 } ); return generatedImagePath; } else { const generatedImagePath = await generateImage( imagePrompt, `${path.basename(baseImagePath)}`, baseUrl, outputDir, 'qwen', { width: 1280, height: 720 } ); return generatedImagePath; } } catch (error) { logger.error(`Failed to generate image on server ${baseUrl}:`, error); return null; } finally { // cleanup base image copied to server input for (const fileName of sourceFileNames) { try { const serverPath = path.join(inputDir, fileName); await fs.unlink(serverPath); } catch (error) { logger.error(`Failed to delete server image ${fileName}:`, error); } } // local base image cleanup is left to caller if desired } } (async () => { // Entry prompt: use hard-coded prompt defined at the top of the file const userPrompt = HARDCODED_USER_PROMPT; if (servers.length === 0) { logger.error("No servers configured. Please set SERVER1_COMFY_BASE_URL/OUTPUT_DIR etc in .env"); return; } while (true) { logger.info(`Starting pipeline iteration for prompt: ${userPrompt}`); // 1) Ask OpenAI to generate keywords const keywords = await generateKeywordsFromPrompt(userPrompt, NUMBER_OF_KEYWORDS); logger.info(`Generated ${keywords.length} keywords: ${keywords.join(', ')}`); // 2) For each keyword: search pinterest, pick pinId, open pin page, pick one photo, generate prompts, generate image on servers const pipelineItems: PipelineItem[] = []; for (const kw of keywords) { try { const pinUrl = await getPinUrlFromPinterest(kw, SCROLL_SEARCH); if (!pinUrl) { logger.warn(`No pin found for keyword "${kw}"`); continue; } const downloaded = await downloadOneImageFromPin(pinUrl, 1, SCROLL_PIN); if (!downloaded || downloaded.length === 0) { logger.warn(`No photo downloaded for pin ${pinUrl}`); continue; } const prompts = await getPromptsForImage(downloaded, pinUrl, kw); if (!prompts) { logger.warn(`Failed to produce prompts for image from pin ${pinUrl}`); // cleanup downloaded file for (const f of downloaded) { try { await fs.unlink(f); } catch (e) { /* ignore */ } } continue; } const item: PipelineItem = { keyword: kw, pinUrl, imagePrompt: prompts.imagePrompt, videoPrompt: prompts.videoPrompt, baseImagePath: prompts.baseImagePath, }; pipelineItems.push(item); logger.info(`Prepared pipeline item for keyword "${kw}"`); } catch (err) { logger.error(`Error processing keyword ${kw}:`, err); } } // 3) Generate images for all pipeline items, distributed across servers concurrently logger.info(`Starting image generation for ${pipelineItems.length} items`); if (pipelineItems.length > 0) { const tasksByServer: PipelineItem[][] = servers.map(() => []); pipelineItems.forEach((it, idx) => { const si = idx % servers.length; tasksByServer[si].push(it); }); await Promise.all(servers.map(async (server, si) => { const tasks = tasksByServer[si]; if (!tasks || tasks.length === 0) return; logger.info(`Server ${server.baseUrl} generating ${tasks.length} images`); const results = await Promise.all(tasks.map(t => generateImageForItem(t, server))); for (let i = 0; i < tasks.length; i++) { const res = results[i]; if (res) tasks[i].generatedImagePath = res; } logger.info(`Server ${server.baseUrl} finished image generation`); })); } // 4) Collect successful items and generate videos (distributed across servers concurrently) const readyItems = pipelineItems.filter(i => i.generatedImagePath); logger.info(`Starting video generation for ${readyItems.length} items`); if (readyItems.length > 0) { const tasksByServer: PipelineItem[][] = servers.map(() => []); readyItems.forEach((it, idx) => { const si = idx % servers.length; tasksByServer[si].push(it); }); await Promise.all(servers.map(async (server, si) => { const tasks = tasksByServer[si]; if (!tasks || tasks.length === 0) return; logger.info(`Server ${server.baseUrl} starting ${tasks.length} video task(s)`); await Promise.allSettled(tasks.map(async (task) => { if (!task.generatedImagePath) { logger.warn(`Skipping a task on ${server.baseUrl} - missing generatedImagePath`); return; } const inputDir = server.outputDir.replace("output", "input"); const generatedImageName = path.basename(task.generatedImagePath); const serverImagePath = path.join(inputDir, generatedImageName); try { await fs.copyFile(task.generatedImagePath, serverImagePath); logger.info(`Copied ${task.generatedImagePath} to ${serverImagePath}`); const videoFileName = `${path.basename(task.generatedImagePath, path.extname(task.generatedImagePath))}.mp4`; const videoPath = await generateVideo( task.videoPrompt, generatedImageName, videoFileName, server.baseUrl, server.outputDir, { width: 1280, height: 720 }, false, false ); if (videoPath) { const videoData = { genre: task.keyword, sub_genre: task.keyword, scene: '', action: '', camera: '', image_prompt: task.imagePrompt, video_prompt: task.videoPrompt, image_path: task.generatedImagePath, video_path: videoPath, }; // ensure image_path is string (guard above) const videoId = await VideoModel.create(videoData); logger.info(`Saved video record ID: ${videoId}`); const newImageName = `${videoId}_${task.keyword}${path.extname(task.generatedImagePath)}`; const newVideoName = `${videoId}_${task.keyword}${path.extname(videoPath)}`; const newImagePath = path.join(path.dirname(task.generatedImagePath), newImageName); const newVideoPath = path.join(path.dirname(videoPath), newVideoName); await fs.rename(task.generatedImagePath, newImagePath); await fs.rename(videoPath, newVideoPath); await VideoModel.update(videoId, { image_path: newImagePath, video_path: newVideoPath, }); logger.info(`Renamed and updated DB for video ID: ${videoId}`); } else { logger.warn(`Video generation returned no path for ${task.generatedImagePath} on ${server.baseUrl}`); } } catch (err) { logger.error('Error during video generation pipeline step:', err); } finally { try { await fs.unlink(serverImagePath); } catch (e) { /* ignore */ } } })); logger.info(`Server ${server.baseUrl} finished video tasks`); })); } logger.info('Pipeline iteration finished.'); // Cleanup base images downloaded from pins if you want to remove them. for (const item of pipelineItems) { try { await fs.unlink(item.baseImagePath); } catch (e) { /* ignore */ } } if (RUN_ONCE) { logger.info('RUN_ONCE=true - exiting after one iteration'); return; } } })();