Merge branch 'master' of https://git.yasue.org/ken/RandomVideoMaker

save work
2025-08-22 21:50:45 +02:00 · 2025-08-22 21:50:40 +02:00
4 changed files with 193 additions and 3 deletions
--- a/src/generatePrompt.ts
+++ b/src/generatePrompt.ts
@ -210,7 +210,20 @@ async function processItem(genre: string, item: any) {
    const lmInput = `
      Generate two prompts in JSON: { "imagePrompt": "...", "videoPrompt": "..." }.
      Image prompt: ~125 words describing a still image representing the scene. Include lighting, mood, style and any accents provided.
-      Video prompt: ~125 words describing an 8-second piece starting from the scene, describing the action and camera movement. Include lighting, mood, style and any accents provided.
+      Video prompt: 
        The "videoPrompt" should be 100–150 words.  
  Describe a single 8-second video that stays in one fixed scene.  
  - Do NOT change the background or location.  
  - No scene cuts. Keep spatial continuity.  
  - Camera may be static or move slightly (slow pan, gentle dolly, tilt).  
  Focus on:  
  1. Main action in the scene  
  2. Camera framing and motion (if any)  
  3. Lighting and mood  
  4. Visual style and small accents  
  Use clear and simple sentences, avoid poetic or abstract language.  
      Use the following details:
      - Genre: ${genre}
--- a/src/generatePromptVideo.ts
+++ b/src/generatePromptVideo.ts
@ -0,0 +1,167 @@
 import fs from 'fs';
 import path from 'path';
 import { query } from './lib/mysql';
 import { logger } from './lib/logger';
 import { callLMStudio } from './lib/lmstudio';
 async function main() {
    await updatePromptsFromDB();
    process.exit();
 }
 /**
 * Find DB records whose video_prompt contains 'cut' or 'zoom' (case-insensitive),
 * regenerate the video_prompt using LMStudio, and update the record.
 *
 * If the newly generated prompt still contains any banned words/phrases, regenerate
 * again (up to maxAttempts). If after attempts the prompt is still invalid, skip update.
 */
 async function updatePromptsFromDB() {
    logger.info("Starting DB sweep for video_prompt containing 'cut' or 'zoom'...");
    // Banned regex per requirement
    const banned = /\b(cut|cuts|cutting|quick cut|insert|macro insert|close-?up|extreme close-?up|zoom|zooming|push-?in|pull-?out|whip|switch angle|change angle|montage|cross-?cut|smash cut|transition|meanwhile|later)\b/i;
    let rows: any[] = [];
    try {
        // Case-insensitive search for 'cut' or 'zoom' anywhere in video_prompt
        rows = (await query(
            "SELECT id, genre, sub_genre, scene, action, camera, video_prompt FROM video WHERE LOWER(COALESCE(video_prompt,'')) LIKE ? OR LOWER(COALESCE(video_prompt,'')) LIKE ?",
            ['%cut%', '%zoom%']
        )) as any[];
    } catch (err) {
        logger.error('DB query failed while searching for problematic prompts:', err);
        return;
    }
    if (!rows || rows.length === 0) {
        logger.info("No records found with 'cut' or 'zoom' in video_prompt.");
        return;
    }
    logger.info(`Found ${rows.length} record(s) to process.`);
    for (const row of rows) {
        const id = row.id;
        const genre = row.genre || '';
        const subGenre = row.sub_genre || '';
        const scene = row.scene || '';
        const action = row.action || '';
        const camera = row.camera || '';
        if (!genre || !subGenre || !scene) {
            logger.info(`Skipping id=${id} due to missing identification fields: genre='${genre}', sub_genre='${subGenre}', scene='${scene}'`);
            continue;
        }
        // Build LM input (similar ruleset to previous implementation)
        const lmInput = buildLMInputFromRecord(genre, subGenre, scene, action, camera, row.video_prompt);
        let finalPrompt: string | null = null;
        const maxAttempts = 10;
        for (let attempt = 1; attempt <= maxAttempts; attempt++) {
            let lmResponse: any = null;
            try {
                lmResponse = await callLMStudio(lmInput);
            } catch (err) {
                logger.warn(`LMStudio call failed for id=${id} (attempt ${attempt}): ${err}`);
                // Retry on next loop iteration
                continue;
            }
            if (!lmResponse) {
                logger.warn(`LMStudio returned empty response for id=${id} (attempt ${attempt}).`);
                continue;
            }
            const videoPrompt = lmResponse.videoPrompt || lmResponse.video_prompt || lmResponse.prompt || null;
            if (!videoPrompt || typeof videoPrompt !== 'string') {
                logger.warn(`LMStudio did not return a valid videoPrompt for id=${id} (attempt ${attempt}).`);
                continue;
            }
            // Check banned regex
            if (banned.test(videoPrompt)) {
                logger.info(`Generated prompt for id=${id} (attempt ${attempt}) still contains banned phrases - retrying.`);
                logger.info(videoPrompt);
                // If last attempt, we will fall through and skip update
                continue;
            }
            // Passed banned check
            finalPrompt = videoPrompt;
            break;
        }
        if (!finalPrompt) {
            logger.warn(`Could not generate a clean prompt for id=${id} after ${maxAttempts} attempts. Skipping update.`);
            continue;
        }
        // Update DB
        try {
            await query('UPDATE video SET video_prompt = ? WHERE id = ?', [finalPrompt, id]);
            logger.info(`Updated video_prompt for id=${id}`);
        } catch (err) {
            logger.error(`Failed to update video_prompt for id=${id}: ${err}`);
        }
    }
    logger.info('Finished DB sweep for problematic prompts.');
 }
 /**
 * Helper to construct LM input for a single DB record.
 * Keeps the same HARD RULES and prohibited list as previous data-driven generation.
 */
 function buildLMInputFromRecord(
    genre: string,
    subGenre: string,
    finalScene: string,
    chosenAction: string,
    camera: string,
    existingPrompt: string | undefined
 ) {
    const accents = 'none';
    const mood = 'n/a';
    const lighting = 'n/a';
    const style = 'n/a';
    const lmInput = `
 Return exactly one JSON object: { "videoPrompt": "..." } and nothing else.
 Write "videoPrompt" in 100–150 words, present tense, plain concrete language.
 HARD RULES (must comply):
 - One continuous shot ("one take", "oner"). Real-time 8 seconds. No edits.
 - Fixed location and vantage. Do not change background or angle.
 - Lens and focal length locked. No zooms, no close-ups that imply a lens change, no rack zoom.
 - Camera motion: at most subtle pan/tilt/dolly within 1 meter while staying in the same spot.
 - Keep framing consistent (e.g., medium-wide two-shot). No “another shot/meanwhile.”
 - Describe: (1) main action, (2) framing & motion, (3) lighting & mood, (4) style & small accents.
 - Use clear simple sentences. No metaphors or poetic language.
 PROHIBITED WORDS/PHRASES (case-insensitive): 
 cut, cuts, cutting, quick cut, insert, macro insert, close-up, extreme close-up,
 zoom, zooms, zooming, push-in, pull-out, whip, switch angle, change angle,
 montage, cross-cut, smash cut, transition, meanwhile, later.
 If proximity is needed, say: "the camera glides slightly closer while staying in the same position."
 Here is information of the scene, please generate prompt for the video based on these information for key "videoPrompt":
 Genre: ${genre}
 Sub-Genre: ${subGenre}
 Scene: ${finalScene}
 Action: ${chosenAction || 'n/a'}
 Camera: ${camera || 'static or subtle movement (stay within scene)'}
 Accents: ${accents}
 Mood: ${mood}
 Lighting: ${lighting}
 Style: ${style}
 `;
    return lmInput;
 }
 main();
--- a/src/generateVideo.ts
+++ b/src/generateVideo.ts
@ -16,10 +16,10 @@ interface VideoRecord {
 }
 const servers = [
-    /*{
+    {
        baseUrl: process.env.SERVER1_COMFY_BASE_URL,
        outputDir: process.env.SERVER1_COMFY_OUTPUT_DIR,
-    },*/
+    },
    {
        baseUrl: process.env.SERVER2_COMFY_BASE_URL,
        outputDir: process.env.SERVER2_COMFY_OUTPUT_DIR,
--- a/src/index.ts
+++ b/src/index.ts
@ -4,6 +4,12 @@ import { generateImage } from './lib/image-generator';
 import { generateVideo } from './lib/video-generator';
 import { logger } from './lib/logger';
 import * as fs from 'fs/promises';
 import dotenv from 'dotenv';
 dotenv.config();
 const COMFY_BASE_URL = process.env.COMFY_BASE_URL1 || "";
 const COMFY_OUTPUT_DIR = process.env.COMFY_OUTPUT_DIR1 || "";
 async function prepareImageForKeyword(keyword: string): Promise<{ keyword: string; generatedImagePath: string } | null> {
    const numberOfPages = 1;
@ -49,6 +55,8 @@ async function prepareImageForKeyword(keyword: string): Promise<{ keyword: strin
        const generatedImagePath = await generateImage(
            imagePrompt,
            imageFileName,
            COMFY_BASE_URL,
            COMFY_OUTPUT_DIR,
            'flux',
            { width: 720, height: 1280 }
        );
@ -81,6 +89,8 @@ async function generateVideoFromImagePath(keyword: string, generatedImagePath: s
            videoPrompt,
            generatedImagePath,
            videoFileName,
            COMFY_BASE_URL,
            COMFY_OUTPUT_DIR,
            { width: 720, height: 1280 }
        );
        logger.debug(`Generated video from prompt, saved to: ${generatedVideoPath}`);
Author	SHA1	Message	Date
Ken Yasue	3a66ecc808	Merge branch 'master' of https://git.yasue.org/ken/RandomVideoMaker	2025-08-22 21:50:45 +02:00
Ken Yasue	888b27e275	save work	2025-08-22 21:50:40 +02:00