Files
2025-09-15 07:39:50 +02:00

260 lines
9.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import dotenv from 'dotenv';
import path from 'path';
import fs from 'fs/promises';
import { logger } from '../lib/logger';
import { callOpenAI } from '../lib/openai';
import { generateVideo } from '../lib/video-generator';
dotenv.config();
type Size = { width: number; height: number };
interface MusicSpotCharacter {
bodyType: string;
hairStyle: string;
}
interface MusicSpotCut {
cutId: number;
pose: string;
action: string;
camera?: string[]; // list of camera variants per cut
}
interface MusicSpotScene {
sceneId: number;
time: string;
location: string;
outfit: string;
cuts: MusicSpotCut[];
}
interface MusicSpotConfig {
character: MusicSpotCharacter;
scenes: MusicSpotScene[];
}
interface Server {
baseUrl?: string;
outputDir?: string;
inputDir?: string;
name: string;
}
const DEFAULT_SIZE: Size = { width: 720, height: 1280 };
const FOLDER = process.argv[2] || process.env.MUSICSPOT_FOLDER || 'oputstise';
const FOLDER_SAFE = FOLDER.replace(/[/\\?%*:|"<>]/g, '_');
const GENERATED_DIR = path.resolve('generated');
function loadServers(): Server[] {
const servers: Server[] = [
{
name: 'SERVER1',
baseUrl: process.env.SERVER1_COMFY_BASE_URL,
outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR,
},
/*
{
name: 'SERVER2',
baseUrl: process.env.SERVER2_COMFY_BASE_URL,
outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR,
},*/
]
.filter((s) => !!s.baseUrl && !!s.outputDir)
.map((s) => ({
...s,
inputDir: s.outputDir!.replace(/output/i, 'input'),
}));
if (servers.length === 0) {
logger.warn('No servers configured. Please set SERVER{N}_COMFY_BASE_URL and SERVER{N}_COMFY_OUTPUT_DIR in .env');
} else {
for (const s of servers) {
logger.info(`Configured ${s.name}: baseUrl=${s.baseUrl}, outputDir=${s.outputDir}, inputDir=${s.inputDir}`);
}
}
return servers;
}
async function ensureDirs() {
await fs.mkdir(GENERATED_DIR, { recursive: true });
}
function buildVideoPromptRequest(
character: MusicSpotCharacter,
scene: MusicSpotScene,
cut: MusicSpotCut,
cameraIntent: string
): string {
return `
Return exactly one JSON object and nothing else: { "videoPrompt": "..." }.
Write "videoPrompt" in 100140 words. Present tense. Concrete, simple sentences.
HARD RULES:
- One continuous 8-second shot (oner). No edits.
- Fixed location and general vantage; maintain spatial continuity.
- No zooms, no rack zoom, no smash/push-in, no cuts, no transitions, no "meanwhile".
- Camera motion: at most a slight pan/tilt or subtle dolly within 1 meter.
- Keep framing consistent (vertical 720x1280). Avoid technical brand names or lens jargon.
Incorporate the following camera intention: "${cameraIntent}".
If it conflicts with HARD RULES (e.g., zoom, push-in, extreme moves), reinterpret it into a subtle, compliant motion (e.g., gentle glide, slight pan/tilt) while preserving the creative intent.
Describe:
1) Main action: ${cut.action}
2) Pose/composition: ${cut.pose}
3) Scene/time/location/outfit: ${scene.time}; ${scene.location}; outfit: ${scene.outfit}
4) Lighting/mood/style coherent with the character: ${character.bodyType}; hair: ${character.hairStyle}
Prohibited (case-insensitive): cut, cuts, cutting, quick cut, insert, close-up, extreme close-up, zoom, zooming, push-in, pull-out, whip, switch angle, change angle, montage, cross-cut, smash cut, transition, meanwhile, later.
Only respond with JSON.
`.trim();
}
const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
async function getVideoPromptFromOpenAI(req: string): Promise<string> {
const res = await callOpenAI(req);
const prompt = res?.videoPrompt || res?.video_prompt || res?.prompt;
if (!prompt || typeof prompt !== 'string') {
throw new Error('OpenAI failed to return videoPrompt JSON.');
}
return prompt.trim();
}
function pickServer(servers: Server[], idx: number): Server {
if (servers.length === 0) {
throw new Error('No servers configured.');
}
return servers[idx % servers.length];
}
async function copyImageToAllServerInputs(servers: Server[], localGeneratedImagePath: string): Promise<string> {
const fileName = path.basename(localGeneratedImagePath);
for (const s of servers) {
if (!s.inputDir) continue;
const dest = path.join(s.inputDir, fileName);
try {
await fs.copyFile(localGeneratedImagePath, dest);
logger.debug(`Copied ${fileName} to ${s.name} input: ${dest}`);
} catch (err) {
logger.warn(`Failed to copy ${fileName} to ${s.name} input: ${err}`);
}
}
return fileName; // return the name used for Comfy workflows
}
async function fileExists(p: string): Promise<boolean> {
try {
await fs.access(p);
return true;
} catch {
return false;
}
}
async function main() {
try {
await ensureDirs();
// Load scenes.json
const configRaw = await fs.readFile(path.resolve(`src/musicspot_generator/${FOLDER}/scenes.json`), 'utf-8');
const cfg: MusicSpotConfig = JSON.parse(configRaw);
const servers = loadServers();
if (servers.length === 0) {
return;
}
// Generate videos only, based on images already present in ./generated
let videoTaskIndex = 0;
for (const scene of cfg.scenes) {
logger.info(`=== Scene ${scene.sceneId}: Video generation start ===`);
for (const cut of scene.cuts) {
const cameraVariants =
Array.isArray(cut.camera) && cut.camera.length > 0
? cut.camera
: ['eye-level medium shot', 'slight left 30°', 'slight right 30°', 'slight high angle', 'slight low angle'];
for (let camIdx = 0; camIdx < cameraVariants.length; camIdx++) {
const cameraIntent = cameraVariants[camIdx];
const variantIndex = camIdx + 1;
const imgFileName = `${FOLDER_SAFE}_musicspot_s${scene.sceneId}_c${cut.cutId}_v${variantIndex}.png`;
const imgPath = path.join(GENERATED_DIR, imgFileName);
// Only proceed if image exists
const hasImage = await fileExists(imgPath);
if (!hasImage) {
logger.warn(`Skipping video: source image not found: ${imgPath}`);
continue;
}
const videoFileName = imgFileName.replace(/\.png$/i, '.mp4');
const videoOutPath = path.join(GENERATED_DIR, videoFileName);
// Skip if video already
const hasVideo = await fileExists(videoOutPath);
if (hasVideo) {
logger.info(`Video already exists, skipping: ${videoOutPath}`);
continue;
}
// 1) Generate video prompt for this camera
logger.info(
`Scene ${scene.sceneId} - Cut ${cut.cutId} - Cam${variantIndex}: generating video prompt from image ${imgFileName}...`
);
const vidPromptReq = buildVideoPromptRequest(cfg.character, scene, cut, cameraIntent);
let videoPrompt: string;
try {
videoPrompt = await getVideoPromptFromOpenAI(vidPromptReq);
} catch (err) {
logger.error(`OpenAI video prompt failed for ${imgFileName}: ${err}`);
continue;
}
// 2) Copy the base image to every server's input folder
const imageFileNameForComfy = await copyImageToAllServerInputs(servers, imgPath);
// 3) Generate video on a chosen server (round-robin)
const serverForVideo = pickServer(servers, videoTaskIndex++);
logger.info(`Generating video (${videoFileName}) on ${serverForVideo.name} using ${imageFileNameForComfy}...`);
try {
const videoPath = await generateVideo(
videoPrompt,
imageFileNameForComfy,
videoFileName,
serverForVideo.baseUrl!,
serverForVideo.outputDir!,
DEFAULT_SIZE,
true,
true
);
await sleep(10000); // wait a bit for file system to settle
logger.info(`Video generated: ${videoPath}`);
} catch (err) {
logger.error(`Video generation failed (${videoFileName}) on ${serverForVideo.name}: ${err}`);
}
}
}
logger.info(`=== Scene ${scene.sceneId}: Video generation complete ===`);
}
logger.info('Video generation for all scenes completed.');
} catch (err) {
logger.error('Fatal error in music spot video generator:', err);
}
}
main().catch((err) => {
logger.error('Unhandled error:', err);
});