import dotenv from 'dotenv'; import path from 'path'; import fs from 'fs/promises'; import { logger } from '../lib/logger'; import { callOpenAI } from '../lib/openai'; import { generateVideo } from '../lib/video-generator'; dotenv.config(); type Size = { width: number; height: number }; interface MusicSpotCharacter { bodyType: string; hairStyle: string; } interface MusicSpotCut { cutId: number; pose: string; action: string; camera?: string[]; // list of camera variants per cut } interface MusicSpotScene { sceneId: number; time: string; location: string; outfit: string; cuts: MusicSpotCut[]; } interface MusicSpotConfig { character: MusicSpotCharacter; scenes: MusicSpotScene[]; } interface Server { baseUrl?: string; outputDir?: string; inputDir?: string; name: string; } const DEFAULT_SIZE: Size = { width: 720, height: 1280 }; const FOLDER = process.argv[2] || process.env.MUSICSPOT_FOLDER || 'oputstise'; const FOLDER_SAFE = FOLDER.replace(/[/\\?%*:|"<>]/g, '_'); const GENERATED_DIR = path.resolve('generated'); function loadServers(): Server[] { const servers: Server[] = [ { name: 'SERVER1', baseUrl: process.env.SERVER1_COMFY_BASE_URL, outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR, }, /* { name: 'SERVER2', baseUrl: process.env.SERVER2_COMFY_BASE_URL, outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR, },*/ ] .filter((s) => !!s.baseUrl && !!s.outputDir) .map((s) => ({ ...s, inputDir: s.outputDir!.replace(/output/i, 'input'), })); if (servers.length === 0) { logger.warn('No servers configured. Please set SERVER{N}_COMFY_BASE_URL and SERVER{N}_COMFY_OUTPUT_DIR in .env'); } else { for (const s of servers) { logger.info(`Configured ${s.name}: baseUrl=${s.baseUrl}, outputDir=${s.outputDir}, inputDir=${s.inputDir}`); } } return servers; } async function ensureDirs() { await fs.mkdir(GENERATED_DIR, { recursive: true }); } function buildVideoPromptRequest( character: MusicSpotCharacter, scene: MusicSpotScene, cut: MusicSpotCut, cameraIntent: string ): string { return ` Return exactly one JSON object and nothing else: { "videoPrompt": "..." }. Write "videoPrompt" in 100–140 words. Present tense. Concrete, simple sentences. HARD RULES: - One continuous 8-second shot (oner). No edits. - Fixed location and general vantage; maintain spatial continuity. - No zooms, no rack zoom, no smash/push-in, no cuts, no transitions, no "meanwhile". - Camera motion: at most a slight pan/tilt or subtle dolly within 1 meter. - Keep framing consistent (vertical 720x1280). Avoid technical brand names or lens jargon. Incorporate the following camera intention: "${cameraIntent}". If it conflicts with HARD RULES (e.g., zoom, push-in, extreme moves), reinterpret it into a subtle, compliant motion (e.g., gentle glide, slight pan/tilt) while preserving the creative intent. Describe: 1) Main action: ${cut.action} 2) Pose/composition: ${cut.pose} 3) Scene/time/location/outfit: ${scene.time}; ${scene.location}; outfit: ${scene.outfit} 4) Lighting/mood/style coherent with the character: ${character.bodyType}; hair: ${character.hairStyle} Prohibited (case-insensitive): cut, cuts, cutting, quick cut, insert, close-up, extreme close-up, zoom, zooming, push-in, pull-out, whip, switch angle, change angle, montage, cross-cut, smash cut, transition, meanwhile, later. Only respond with JSON. `.trim(); } const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); async function getVideoPromptFromOpenAI(req: string): Promise { const res = await callOpenAI(req); const prompt = res?.videoPrompt || res?.video_prompt || res?.prompt; if (!prompt || typeof prompt !== 'string') { throw new Error('OpenAI failed to return videoPrompt JSON.'); } return prompt.trim(); } function pickServer(servers: Server[], idx: number): Server { if (servers.length === 0) { throw new Error('No servers configured.'); } return servers[idx % servers.length]; } async function copyImageToAllServerInputs(servers: Server[], localGeneratedImagePath: string): Promise { const fileName = path.basename(localGeneratedImagePath); for (const s of servers) { if (!s.inputDir) continue; const dest = path.join(s.inputDir, fileName); try { await fs.copyFile(localGeneratedImagePath, dest); logger.debug(`Copied ${fileName} to ${s.name} input: ${dest}`); } catch (err) { logger.warn(`Failed to copy ${fileName} to ${s.name} input: ${err}`); } } return fileName; // return the name used for Comfy workflows } async function fileExists(p: string): Promise { try { await fs.access(p); return true; } catch { return false; } } async function main() { try { await ensureDirs(); // Load scenes.json const configRaw = await fs.readFile(path.resolve(`src/musicspot_generator/${FOLDER}/scenes.json`), 'utf-8'); const cfg: MusicSpotConfig = JSON.parse(configRaw); const servers = loadServers(); if (servers.length === 0) { return; } // Generate videos only, based on images already present in ./generated let videoTaskIndex = 0; for (const scene of cfg.scenes) { logger.info(`=== Scene ${scene.sceneId}: Video generation start ===`); for (const cut of scene.cuts) { const cameraVariants = Array.isArray(cut.camera) && cut.camera.length > 0 ? cut.camera : ['eye-level medium shot', 'slight left 30°', 'slight right 30°', 'slight high angle', 'slight low angle']; for (let camIdx = 0; camIdx < cameraVariants.length; camIdx++) { const cameraIntent = cameraVariants[camIdx]; const variantIndex = camIdx + 1; const imgFileName = `${FOLDER_SAFE}_musicspot_s${scene.sceneId}_c${cut.cutId}_v${variantIndex}.png`; const imgPath = path.join(GENERATED_DIR, imgFileName); // Only proceed if image exists const hasImage = await fileExists(imgPath); if (!hasImage) { logger.warn(`Skipping video: source image not found: ${imgPath}`); continue; } const videoFileName = imgFileName.replace(/\.png$/i, '.mp4'); const videoOutPath = path.join(GENERATED_DIR, videoFileName); // Skip if video already const hasVideo = await fileExists(videoOutPath); if (hasVideo) { logger.info(`Video already exists, skipping: ${videoOutPath}`); continue; } // 1) Generate video prompt for this camera logger.info( `Scene ${scene.sceneId} - Cut ${cut.cutId} - Cam${variantIndex}: generating video prompt from image ${imgFileName}...` ); const vidPromptReq = buildVideoPromptRequest(cfg.character, scene, cut, cameraIntent); let videoPrompt: string; try { videoPrompt = await getVideoPromptFromOpenAI(vidPromptReq); } catch (err) { logger.error(`OpenAI video prompt failed for ${imgFileName}: ${err}`); continue; } // 2) Copy the base image to every server's input folder const imageFileNameForComfy = await copyImageToAllServerInputs(servers, imgPath); // 3) Generate video on a chosen server (round-robin) const serverForVideo = pickServer(servers, videoTaskIndex++); logger.info(`Generating video (${videoFileName}) on ${serverForVideo.name} using ${imageFileNameForComfy}...`); try { const videoPath = await generateVideo( videoPrompt, imageFileNameForComfy, videoFileName, serverForVideo.baseUrl!, serverForVideo.outputDir!, DEFAULT_SIZE, true, true ); await sleep(10000); // wait a bit for file system to settle logger.info(`Video generated: ${videoPath}`); } catch (err) { logger.error(`Video generation failed (${videoFileName}) on ${serverForVideo.name}: ${err}`); } } } logger.info(`=== Scene ${scene.sceneId}: Video generation complete ===`); } logger.info('Video generation for all scenes completed.'); } catch (err) { logger.error('Fatal error in music spot video generator:', err); } } main().catch((err) => { logger.error('Unhandled error:', err); });