import { callLmstudio } from '../../lib/lmstudio'; import { logger } from '../../lib/logger'; import * as fs from 'fs/promises'; import dotenv from 'dotenv'; import path from 'path'; import puppeteer from 'puppeteer'; dotenv.config(); const SCROLL_SEARCH = 3; // scroll times on search results const SCROLL_PIN = 3; // scroll times on pin page const PINS_TO_COLLECT = 5; // Hard-coded user prompt const HARDCODED_USER_PROMPT = process.env.HARDCODED_USER_PROMPT || ` Generate 20 keywords for photos of a ghotst or monster from all over the world. "Cute Japanese yokai" is mandatory, also add "Realistic photo cute" keyword to all genearated keywords first. Example output : ["Cute Japanese yokai","Realistic photo Cute ghost","Realistic photo cute monster","Realistic photo cute haloween monster","Realistic photo cute haloween ghost"... and 20 items in array] `; async function getPinUrlsFromPinterest(keyword: string, scrollCount = SCROLL_SEARCH, limit = PINS_TO_COLLECT): Promise { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(keyword)}`; await page.goto(searchUrl, { waitUntil: 'networkidle2' }); let pinLinks = new Set(); for (let i = 0; i < scrollCount; i++) { const linksBefore = pinLinks.size; const newLinks = await page.$$eval('a', (anchors) => anchors.map((a) => a.href).filter((href) => href.includes('/pin/')) ); newLinks.forEach(link => pinLinks.add(link)); if (pinLinks.size >= limit) { break; } await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise(r => setTimeout(r, 500 + Math.random() * 1000)); if (pinLinks.size === linksBefore) { // If no new pins are loaded, stop scrolling logger.info(`No new pins loaded for "${keyword}", stopping scroll.`); break; } } return Array.from(pinLinks).slice(0, limit); } catch (error) { logger.error(`Error while getting pin URLs from Pinterest for keyword "${keyword}":`, error); return []; } finally { await browser.close(); } } async function downloadImagesFromPin(pinUrl: string, scrollTimes = SCROLL_PIN): Promise { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); try { await page.goto(pinUrl, { waitUntil: 'networkidle2', timeout: 30000 }); for (let i = 0; i < scrollTimes; i++) { await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await new Promise((r) => setTimeout(r, 700 + Math.random() * 800)); } const imgs: string[] = await page.$$eval('img', imgs => { const urls: string[] = imgs.map(img => { const srcset = (img as HTMLImageElement).getAttribute('srcset') || ''; if (!srcset) { return ''; // Ignore images without srcset } const parts = srcset.split(',').map(p => p.trim()); for (const part of parts) { const match = part.match(/^(\S+)\s+4x$/); if (match && match[1]) { return match[1]; // Found the 4x version, return it } } return ''; // No 4x version found for this image }).filter(s => !!s && s.includes('pinimg')); // Filter out empty strings and non-pinterest images return [...new Set(urls)]; // Return unique URLs }); if (!imgs || imgs.length === 0) { logger.warn(`No high-res images found on pin ${pinUrl}`); return []; } const outDir = path.join(process.cwd(), 'download'); await fs.mkdir(outDir, { recursive: true }); const results: string[] = []; for (let i = 0; i < imgs.length; i++) { const src = imgs[i]; try { const imgPage = await browser.newPage(); const resp = await imgPage.goto(src, { timeout: 30000, waitUntil: 'load' }); if (!resp) { await imgPage.close(); continue; } const buffer = await resp.buffer(); const pinId = pinUrl.split('/').filter(Boolean).pop() || `pin_${Date.now()}`; const timestamp = Date.now(); const outPath = path.join(outDir, `${pinId}_${timestamp}_${i}.png`); await fs.writeFile(outPath, buffer); results.push(outPath); await imgPage.close(); } catch (err) { logger.error(`Failed to download image ${src} from ${pinUrl}:`, err); } } return results; } catch (err) { logger.error(`Failed to download images from ${pinUrl}:`, err); return []; } finally { await browser.close(); } } // Re-usable helper to extract JSON embedded in text function extractJsonFromText(text: string): any | null { if (!text || typeof text !== 'string') return null; const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); if (fenced && fenced[1]) { try { return JSON.parse(fenced[1].trim()); } catch (e) { /* fall through */ } } const brace = text.match(/\{[\s\S]*\}|\[[\s\S]*\]/); if (brace && brace[0]) { try { return JSON.parse(brace[0]); } catch (e) { return null; } } // Attempt line-separated keywords fallback const lines = text.split(/\r?\n/).map((l: string) => l.trim()).filter(Boolean); if (lines.length > 1) return lines; return null; } async function extractKeywordsFromPromptWithLmstudio(prompt: string, count = 5): Promise { const instruction = `You are given a short instruction describing the type of content to search for. Return exactly a JSON array of ${count} short keyword phrases suitable for searching Pinterest. `; try { const res = await callLmstudio(`${instruction}\n\nInstruction: ${prompt}`); if (!res) { logger.warn('callLmstudio returned empty response for keyword extraction.'); return []; } let parsed: any; if (typeof res === 'object' && res.text) { parsed = extractJsonFromText(res.text); } else if (typeof res === 'string') { parsed = extractJsonFromText(res); } else if (typeof res === 'object') { parsed = res; } if (Array.isArray(parsed)) { return parsed.map(String).slice(0, count); } if (typeof parsed === 'object' && parsed !== null) { const maybe = parsed.keywords || parsed.list || parsed.items || parsed.keywords_list; if (Array.isArray(maybe)) return maybe.map(String).slice(0, count); } const text = typeof res === 'string' ? res : (res && res.text) || JSON.stringify(res); const lines = text.split(/\r?\n/).map((l: string) => l.replace(/^\d+[\).\s-]*/, '').trim()).filter(Boolean); if (lines.length >= 1) { return lines.slice(0, count); } logger.warn(`Could not parse keywords from LM Studio response: ${JSON.stringify(res)}`); return []; } catch (error) { logger.error('Error during keyword extraction with callLmstudio:', error); return []; } } (async () => { logger.info(`Starting photo download process with prompt: "${HARDCODED_USER_PROMPT}"`); // 1. Extract keywords from the hardcoded prompt const keywords = await extractKeywordsFromPromptWithLmstudio(HARDCODED_USER_PROMPT, 20); // Using 5 keywords to get a good variety if (!keywords || keywords.length === 0) { logger.error("Could not extract keywords from prompt. Exiting."); return; } logger.info(`Extracted keywords: ${keywords.join(', ')}`); // 2. Search Pinterest for each keyword and collect pin URLs let allPinUrls = new Set(); for (const keyword of keywords) { logger.info(`Searching Pinterest for keyword: "${keyword}"`); const pinUrls = await getPinUrlsFromPinterest(keyword, SCROLL_SEARCH, PINS_TO_COLLECT); pinUrls.forEach(url => allPinUrls.add(url)); } const finalPinUrls = Array.from(allPinUrls); logger.info(`Collected ${finalPinUrls.length} unique pin URLs to process.`); // 3. Go through each pin URL, scroll, and download all photos let totalDownloads = 0; for (const pinUrl of finalPinUrls) { try { logger.info(`Processing pin: ${pinUrl}`); const downloadedPaths = await downloadImagesFromPin(pinUrl, SCROLL_PIN); if (downloadedPaths.length > 0) { logger.info(`Successfully downloaded ${downloadedPaths.length} images from ${pinUrl}`); totalDownloads += downloadedPaths.length; } else { logger.warn(`No images were downloaded from ${pinUrl}`); } } catch (error) { logger.error(`An error occurred while processing pin ${pinUrl}:`, error); } } logger.info(`Photo download process finished. Total images downloaded: ${totalDownloads}`); })();