260 lines
9.3 KiB
TypeScript
260 lines
9.3 KiB
TypeScript
import dotenv from 'dotenv';
|
||
import path from 'path';
|
||
import fs from 'fs/promises';
|
||
|
||
import { logger } from '../lib/logger';
|
||
import { callOpenAI } from '../lib/openai';
|
||
import { generateVideo } from '../lib/video-generator';
|
||
|
||
dotenv.config();
|
||
|
||
type Size = { width: number; height: number };
|
||
|
||
interface MusicSpotCharacter {
|
||
bodyType: string;
|
||
hairStyle: string;
|
||
}
|
||
|
||
interface MusicSpotCut {
|
||
cutId: number;
|
||
pose: string;
|
||
action: string;
|
||
camera?: string[]; // list of camera variants per cut
|
||
}
|
||
|
||
interface MusicSpotScene {
|
||
sceneId: number;
|
||
time: string;
|
||
location: string;
|
||
outfit: string;
|
||
cuts: MusicSpotCut[];
|
||
}
|
||
|
||
interface MusicSpotConfig {
|
||
character: MusicSpotCharacter;
|
||
scenes: MusicSpotScene[];
|
||
}
|
||
|
||
interface Server {
|
||
baseUrl?: string;
|
||
outputDir?: string;
|
||
inputDir?: string;
|
||
name: string;
|
||
}
|
||
|
||
const DEFAULT_SIZE: Size = { width: 720, height: 1280 };
|
||
const FOLDER = process.argv[2] || process.env.MUSICSPOT_FOLDER || 'oputstise';
|
||
const FOLDER_SAFE = FOLDER.replace(/[/\\?%*:|"<>]/g, '_');
|
||
const GENERATED_DIR = path.resolve('generated');
|
||
|
||
function loadServers(): Server[] {
|
||
const servers: Server[] = [
|
||
{
|
||
name: 'SERVER1',
|
||
baseUrl: process.env.SERVER1_COMFY_BASE_URL,
|
||
outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR,
|
||
},
|
||
/*
|
||
{
|
||
name: 'SERVER2',
|
||
baseUrl: process.env.SERVER2_COMFY_BASE_URL,
|
||
outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR,
|
||
},*/
|
||
]
|
||
.filter((s) => !!s.baseUrl && !!s.outputDir)
|
||
.map((s) => ({
|
||
...s,
|
||
inputDir: s.outputDir!.replace(/output/i, 'input'),
|
||
}));
|
||
|
||
if (servers.length === 0) {
|
||
logger.warn('No servers configured. Please set SERVER{N}_COMFY_BASE_URL and SERVER{N}_COMFY_OUTPUT_DIR in .env');
|
||
} else {
|
||
for (const s of servers) {
|
||
logger.info(`Configured ${s.name}: baseUrl=${s.baseUrl}, outputDir=${s.outputDir}, inputDir=${s.inputDir}`);
|
||
}
|
||
}
|
||
|
||
return servers;
|
||
}
|
||
|
||
async function ensureDirs() {
|
||
await fs.mkdir(GENERATED_DIR, { recursive: true });
|
||
}
|
||
|
||
function buildVideoPromptRequest(
|
||
character: MusicSpotCharacter,
|
||
scene: MusicSpotScene,
|
||
cut: MusicSpotCut,
|
||
cameraIntent: string
|
||
): string {
|
||
return `
|
||
Return exactly one JSON object and nothing else: { "videoPrompt": "..." }.
|
||
|
||
Write "videoPrompt" in 100–140 words. Present tense. Concrete, simple sentences.
|
||
|
||
HARD RULES:
|
||
- One continuous 8-second shot (oner). No edits.
|
||
- Fixed location and general vantage; maintain spatial continuity.
|
||
- No zooms, no rack zoom, no smash/push-in, no cuts, no transitions, no "meanwhile".
|
||
- Camera motion: at most a slight pan/tilt or subtle dolly within 1 meter.
|
||
- Keep framing consistent (vertical 720x1280). Avoid technical brand names or lens jargon.
|
||
|
||
Incorporate the following camera intention: "${cameraIntent}".
|
||
If it conflicts with HARD RULES (e.g., zoom, push-in, extreme moves), reinterpret it into a subtle, compliant motion (e.g., gentle glide, slight pan/tilt) while preserving the creative intent.
|
||
|
||
Describe:
|
||
1) Main action: ${cut.action}
|
||
2) Pose/composition: ${cut.pose}
|
||
3) Scene/time/location/outfit: ${scene.time}; ${scene.location}; outfit: ${scene.outfit}
|
||
4) Lighting/mood/style coherent with the character: ${character.bodyType}; hair: ${character.hairStyle}
|
||
|
||
Prohibited (case-insensitive): cut, cuts, cutting, quick cut, insert, close-up, extreme close-up, zoom, zooming, push-in, pull-out, whip, switch angle, change angle, montage, cross-cut, smash cut, transition, meanwhile, later.
|
||
|
||
Only respond with JSON.
|
||
`.trim();
|
||
}
|
||
|
||
const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
|
||
|
||
async function getVideoPromptFromOpenAI(req: string): Promise<string> {
|
||
const res = await callOpenAI(req);
|
||
const prompt = res?.videoPrompt || res?.video_prompt || res?.prompt;
|
||
if (!prompt || typeof prompt !== 'string') {
|
||
throw new Error('OpenAI failed to return videoPrompt JSON.');
|
||
}
|
||
return prompt.trim();
|
||
}
|
||
|
||
function pickServer(servers: Server[], idx: number): Server {
|
||
if (servers.length === 0) {
|
||
throw new Error('No servers configured.');
|
||
}
|
||
return servers[idx % servers.length];
|
||
}
|
||
|
||
async function copyImageToAllServerInputs(servers: Server[], localGeneratedImagePath: string): Promise<string> {
|
||
const fileName = path.basename(localGeneratedImagePath);
|
||
for (const s of servers) {
|
||
if (!s.inputDir) continue;
|
||
const dest = path.join(s.inputDir, fileName);
|
||
try {
|
||
await fs.copyFile(localGeneratedImagePath, dest);
|
||
logger.debug(`Copied ${fileName} to ${s.name} input: ${dest}`);
|
||
} catch (err) {
|
||
logger.warn(`Failed to copy ${fileName} to ${s.name} input: ${err}`);
|
||
}
|
||
}
|
||
return fileName; // return the name used for Comfy workflows
|
||
}
|
||
|
||
async function fileExists(p: string): Promise<boolean> {
|
||
try {
|
||
await fs.access(p);
|
||
return true;
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
try {
|
||
await ensureDirs();
|
||
|
||
// Load scenes.json
|
||
const configRaw = await fs.readFile(path.resolve(`src/musicspot_generator/${FOLDER}/scenes.json`), 'utf-8');
|
||
const cfg: MusicSpotConfig = JSON.parse(configRaw);
|
||
|
||
const servers = loadServers();
|
||
if (servers.length === 0) {
|
||
return;
|
||
}
|
||
|
||
// Generate videos only, based on images already present in ./generated
|
||
let videoTaskIndex = 0;
|
||
|
||
for (const scene of cfg.scenes) {
|
||
logger.info(`=== Scene ${scene.sceneId}: Video generation start ===`);
|
||
for (const cut of scene.cuts) {
|
||
const cameraVariants =
|
||
Array.isArray(cut.camera) && cut.camera.length > 0
|
||
? cut.camera
|
||
: ['eye-level medium shot', 'slight left 30°', 'slight right 30°', 'slight high angle', 'slight low angle'];
|
||
|
||
for (let camIdx = 0; camIdx < cameraVariants.length; camIdx++) {
|
||
const cameraIntent = cameraVariants[camIdx];
|
||
const variantIndex = camIdx + 1;
|
||
|
||
const imgFileName = `${FOLDER_SAFE}_musicspot_s${scene.sceneId}_c${cut.cutId}_v${variantIndex}.png`;
|
||
const imgPath = path.join(GENERATED_DIR, imgFileName);
|
||
|
||
// Only proceed if image exists
|
||
const hasImage = await fileExists(imgPath);
|
||
if (!hasImage) {
|
||
logger.warn(`Skipping video: source image not found: ${imgPath}`);
|
||
continue;
|
||
}
|
||
|
||
const videoFileName = imgFileName.replace(/\.png$/i, '.mp4');
|
||
const videoOutPath = path.join(GENERATED_DIR, videoFileName);
|
||
|
||
// Skip if video already
|
||
const hasVideo = await fileExists(videoOutPath);
|
||
if (hasVideo) {
|
||
logger.info(`Video already exists, skipping: ${videoOutPath}`);
|
||
continue;
|
||
}
|
||
|
||
// 1) Generate video prompt for this camera
|
||
logger.info(
|
||
`Scene ${scene.sceneId} - Cut ${cut.cutId} - Cam${variantIndex}: generating video prompt from image ${imgFileName}...`
|
||
);
|
||
const vidPromptReq = buildVideoPromptRequest(cfg.character, scene, cut, cameraIntent);
|
||
|
||
let videoPrompt: string;
|
||
try {
|
||
videoPrompt = await getVideoPromptFromOpenAI(vidPromptReq);
|
||
} catch (err) {
|
||
logger.error(`OpenAI video prompt failed for ${imgFileName}: ${err}`);
|
||
continue;
|
||
}
|
||
|
||
// 2) Copy the base image to every server's input folder
|
||
const imageFileNameForComfy = await copyImageToAllServerInputs(servers, imgPath);
|
||
|
||
// 3) Generate video on a chosen server (round-robin)
|
||
const serverForVideo = pickServer(servers, videoTaskIndex++);
|
||
logger.info(`Generating video (${videoFileName}) on ${serverForVideo.name} using ${imageFileNameForComfy}...`);
|
||
|
||
try {
|
||
const videoPath = await generateVideo(
|
||
videoPrompt,
|
||
imageFileNameForComfy,
|
||
videoFileName,
|
||
serverForVideo.baseUrl!,
|
||
serverForVideo.outputDir!,
|
||
DEFAULT_SIZE,
|
||
true,
|
||
true
|
||
);
|
||
|
||
await sleep(10000); // wait a bit for file system to settle
|
||
logger.info(`Video generated: ${videoPath}`);
|
||
} catch (err) {
|
||
logger.error(`Video generation failed (${videoFileName}) on ${serverForVideo.name}: ${err}`);
|
||
}
|
||
}
|
||
}
|
||
logger.info(`=== Scene ${scene.sceneId}: Video generation complete ===`);
|
||
}
|
||
|
||
logger.info('Video generation for all scenes completed.');
|
||
} catch (err) {
|
||
logger.error('Fatal error in music spot video generator:', err);
|
||
}
|
||
}
|
||
|
||
main().catch((err) => {
|
||
logger.error('Unhandled error:', err);
|
||
});
|