From 888b27e27558197a660708645e63977f8752b745 Mon Sep 17 00:00:00 2001 From: Ken Yasue Date: Fri, 22 Aug 2025 21:50:40 +0200 Subject: [PATCH] save work --- src/generatePrompt.ts | 15 +++- src/generatePromptVideo.ts | 167 +++++++++++++++++++++++++++++++++++++ src/generateVideo.ts | 4 +- src/index.ts | 10 +++ 4 files changed, 193 insertions(+), 3 deletions(-) create mode 100644 src/generatePromptVideo.ts diff --git a/src/generatePrompt.ts b/src/generatePrompt.ts index 15d12b6..245ef2b 100644 --- a/src/generatePrompt.ts +++ b/src/generatePrompt.ts @@ -210,8 +210,21 @@ async function processItem(genre: string, item: any) { const lmInput = ` Generate two prompts in JSON: { "imagePrompt": "...", "videoPrompt": "..." }. Image prompt: ~125 words describing a still image representing the scene. Include lighting, mood, style and any accents provided. - Video prompt: ~125 words describing an 8-second piece starting from the scene, describing the action and camera movement. Include lighting, mood, style and any accents provided. + Video prompt: + The "videoPrompt" should be 100–150 words. + Describe a single 8-second video that stays in one fixed scene. + - Do NOT change the background or location. + - No scene cuts. Keep spatial continuity. + - Camera may be static or move slightly (slow pan, gentle dolly, tilt). + Focus on: + 1. Main action in the scene + 2. Camera framing and motion (if any) + 3. Lighting and mood + 4. Visual style and small accents + + Use clear and simple sentences, avoid poetic or abstract language. + Use the following details: - Genre: ${genre} - Sub-Genre: ${subGenre} diff --git a/src/generatePromptVideo.ts b/src/generatePromptVideo.ts new file mode 100644 index 0000000..2552364 --- /dev/null +++ b/src/generatePromptVideo.ts @@ -0,0 +1,167 @@ +import fs from 'fs'; +import path from 'path'; +import { query } from './lib/mysql'; +import { logger } from './lib/logger'; +import { callLMStudio } from './lib/lmstudio'; + +async function main() { + await updatePromptsFromDB(); + process.exit(); +} + +/** + * Find DB records whose video_prompt contains 'cut' or 'zoom' (case-insensitive), + * regenerate the video_prompt using LMStudio, and update the record. + * + * If the newly generated prompt still contains any banned words/phrases, regenerate + * again (up to maxAttempts). If after attempts the prompt is still invalid, skip update. + */ +async function updatePromptsFromDB() { + logger.info("Starting DB sweep for video_prompt containing 'cut' or 'zoom'..."); + + // Banned regex per requirement + const banned = /\b(cut|cuts|cutting|quick cut|insert|macro insert|close-?up|extreme close-?up|zoom|zooming|push-?in|pull-?out|whip|switch angle|change angle|montage|cross-?cut|smash cut|transition|meanwhile|later)\b/i; + + let rows: any[] = []; + try { + // Case-insensitive search for 'cut' or 'zoom' anywhere in video_prompt + rows = (await query( + "SELECT id, genre, sub_genre, scene, action, camera, video_prompt FROM video WHERE LOWER(COALESCE(video_prompt,'')) LIKE ? OR LOWER(COALESCE(video_prompt,'')) LIKE ?", + ['%cut%', '%zoom%'] + )) as any[]; + } catch (err) { + logger.error('DB query failed while searching for problematic prompts:', err); + return; + } + + if (!rows || rows.length === 0) { + logger.info("No records found with 'cut' or 'zoom' in video_prompt."); + return; + } + + logger.info(`Found ${rows.length} record(s) to process.`); + + for (const row of rows) { + const id = row.id; + const genre = row.genre || ''; + const subGenre = row.sub_genre || ''; + const scene = row.scene || ''; + const action = row.action || ''; + const camera = row.camera || ''; + + if (!genre || !subGenre || !scene) { + logger.info(`Skipping id=${id} due to missing identification fields: genre='${genre}', sub_genre='${subGenre}', scene='${scene}'`); + continue; + } + + // Build LM input (similar ruleset to previous implementation) + const lmInput = buildLMInputFromRecord(genre, subGenre, scene, action, camera, row.video_prompt); + + let finalPrompt: string | null = null; + const maxAttempts = 10; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + let lmResponse: any = null; + try { + lmResponse = await callLMStudio(lmInput); + } catch (err) { + logger.warn(`LMStudio call failed for id=${id} (attempt ${attempt}): ${err}`); + // Retry on next loop iteration + continue; + } + + if (!lmResponse) { + logger.warn(`LMStudio returned empty response for id=${id} (attempt ${attempt}).`); + continue; + } + + const videoPrompt = lmResponse.videoPrompt || lmResponse.video_prompt || lmResponse.prompt || null; + if (!videoPrompt || typeof videoPrompt !== 'string') { + logger.warn(`LMStudio did not return a valid videoPrompt for id=${id} (attempt ${attempt}).`); + continue; + } + + // Check banned regex + if (banned.test(videoPrompt)) { + logger.info(`Generated prompt for id=${id} (attempt ${attempt}) still contains banned phrases - retrying.`); + logger.info(videoPrompt); + // If last attempt, we will fall through and skip update + continue; + } + + // Passed banned check + finalPrompt = videoPrompt; + break; + } + + if (!finalPrompt) { + logger.warn(`Could not generate a clean prompt for id=${id} after ${maxAttempts} attempts. Skipping update.`); + continue; + } + + // Update DB + try { + await query('UPDATE video SET video_prompt = ? WHERE id = ?', [finalPrompt, id]); + logger.info(`Updated video_prompt for id=${id}`); + } catch (err) { + logger.error(`Failed to update video_prompt for id=${id}: ${err}`); + } + } + + logger.info('Finished DB sweep for problematic prompts.'); +} + +/** + * Helper to construct LM input for a single DB record. + * Keeps the same HARD RULES and prohibited list as previous data-driven generation. + */ +function buildLMInputFromRecord( + genre: string, + subGenre: string, + finalScene: string, + chosenAction: string, + camera: string, + existingPrompt: string | undefined +) { + const accents = 'none'; + const mood = 'n/a'; + const lighting = 'n/a'; + const style = 'n/a'; + + const lmInput = ` +Return exactly one JSON object: { "videoPrompt": "..." } and nothing else. + +Write "videoPrompt" in 100–150 words, present tense, plain concrete language. + +HARD RULES (must comply): +- One continuous shot ("one take", "oner"). Real-time 8 seconds. No edits. +- Fixed location and vantage. Do not change background or angle. +- Lens and focal length locked. No zooms, no close-ups that imply a lens change, no rack zoom. +- Camera motion: at most subtle pan/tilt/dolly within 1 meter while staying in the same spot. +- Keep framing consistent (e.g., medium-wide two-shot). No “another shot/meanwhile.” +- Describe: (1) main action, (2) framing & motion, (3) lighting & mood, (4) style & small accents. +- Use clear simple sentences. No metaphors or poetic language. + +PROHIBITED WORDS/PHRASES (case-insensitive): +cut, cuts, cutting, quick cut, insert, macro insert, close-up, extreme close-up, +zoom, zooms, zooming, push-in, pull-out, whip, switch angle, change angle, +montage, cross-cut, smash cut, transition, meanwhile, later. + +If proximity is needed, say: "the camera glides slightly closer while staying in the same position." + +Here is information of the scene, please generate prompt for the video based on these information for key "videoPrompt": +Genre: ${genre} +Sub-Genre: ${subGenre} +Scene: ${finalScene} +Action: ${chosenAction || 'n/a'} +Camera: ${camera || 'static or subtle movement (stay within scene)'} +Accents: ${accents} +Mood: ${mood} +Lighting: ${lighting} +Style: ${style} +`; + + return lmInput; +} + +main(); diff --git a/src/generateVideo.ts b/src/generateVideo.ts index 1fb7122..c2c4292 100644 --- a/src/generateVideo.ts +++ b/src/generateVideo.ts @@ -16,10 +16,10 @@ interface VideoRecord { } const servers = [ - /*{ + { baseUrl: process.env.SERVER1_COMFY_BASE_URL, outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR, - },*/ + }, { baseUrl: process.env.SERVER2_COMFY_BASE_URL, outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR, diff --git a/src/index.ts b/src/index.ts index 1ee1948..3b44952 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,12 @@ import { generateImage } from './lib/image-generator'; import { generateVideo } from './lib/video-generator'; import { logger } from './lib/logger'; import * as fs from 'fs/promises'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const COMFY_BASE_URL = process.env.COMFY_BASE_URL1 || ""; +const COMFY_OUTPUT_DIR = process.env.COMFY_OUTPUT_DIR1 || ""; async function prepareImageForKeyword(keyword: string): Promise<{ keyword: string; generatedImagePath: string } | null> { const numberOfPages = 1; @@ -49,6 +55,8 @@ async function prepareImageForKeyword(keyword: string): Promise<{ keyword: strin const generatedImagePath = await generateImage( imagePrompt, imageFileName, + COMFY_BASE_URL, + COMFY_OUTPUT_DIR, 'flux', { width: 720, height: 1280 } ); @@ -81,6 +89,8 @@ async function generateVideoFromImagePath(keyword: string, generatedImagePath: s videoPrompt, generatedImagePath, videoFileName, + COMFY_BASE_URL, + COMFY_OUTPUT_DIR, { width: 720, height: 1280 } ); logger.debug(`Generated video from prompt, saved to: ${generatedVideoPath}`);