save current changes
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@ -17,3 +17,6 @@
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Downloaded images
|
||||
/download/
|
||||
|
||||
981
package-lock.json
generated
981
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
10
package.json
10
package.json
@ -12,7 +12,11 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"typescript": "^5.0.0",
|
||||
"@types/node": "^20.0.0"
|
||||
"@types/node": "^20.0.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"dotenv": "^17.2.1",
|
||||
"puppeteer": "^24.16.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
30
src/index.ts
30
src/index.ts
@ -1 +1,29 @@
|
||||
console.log("Hello, TypeScript! 2");
|
||||
import { downloadPinterestImages } from './lib/downloader';
|
||||
import { describeImage } from './lib/image-describer';
|
||||
import { logger, setLogLevel, LogLevel } from './lib/logger';
|
||||
|
||||
(async () => {
|
||||
const keyword = 'cyberpunk city';
|
||||
const numberOfPages = 1;
|
||||
const imagePaths = await downloadPinterestImages(keyword, numberOfPages);
|
||||
logger.info('Downloaded images:', imagePaths);
|
||||
|
||||
for (const imagePath of imagePaths) {
|
||||
try {
|
||||
const llmResponseJSON = await describeImage(imagePath,
|
||||
`Describe this image as a prompt for an image generation model.
|
||||
Prompt should be in 200 words.
|
||||
Output should be in this format
|
||||
---
|
||||
{
|
||||
"prompt":""
|
||||
}
|
||||
---
|
||||
`);
|
||||
const prompt = llmResponseJSON.prompt;
|
||||
logger.info(`Description for ${imagePath}:`, prompt);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to describe ${imagePath}:`, error);
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
85
src/lib/downloader.ts
Normal file
85
src/lib/downloader.ts
Normal file
@ -0,0 +1,85 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { logger } from './logger';
|
||||
|
||||
const downloadPath = path.resolve(__dirname, '../../download');
|
||||
|
||||
if (!fs.existsSync(downloadPath)) {
|
||||
fs.mkdirSync(downloadPath, { recursive: true });
|
||||
}
|
||||
|
||||
async function downloadImage(url: string, filepath: string) {
|
||||
const response = await fetch(url);
|
||||
const buffer = await response.arrayBuffer();
|
||||
fs.writeFileSync(filepath, Buffer.from(buffer));
|
||||
}
|
||||
|
||||
export async function downloadPinterestImages(keyword: string, numberOfPages: number): Promise<string[]> {
|
||||
const browser = await puppeteer.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
|
||||
await page.setViewport({ width: 1280, height: 800 });
|
||||
|
||||
const encodedKeyword = encodeURIComponent(keyword);
|
||||
await page.goto(`https://www.pinterest.com/search/pins/?q=${encodedKeyword}`, { waitUntil: 'networkidle2' });
|
||||
|
||||
logger.debug('Searching for:', keyword);
|
||||
|
||||
let imageCount = 0;
|
||||
const downloadedUrls = new Set<string>();
|
||||
const downloadedImagePaths: string[] = [];
|
||||
|
||||
for (let i = 0; i < numberOfPages; i++) {
|
||||
logger.debug(`Scraping page ${i + 1}...`);
|
||||
try {
|
||||
const imageUrls = await page.evaluate(() => {
|
||||
const images = Array.from(document.querySelectorAll('img[src*="i.pinimg.com"]'));
|
||||
const urls = images.map(img => {
|
||||
const srcset = (img as HTMLImageElement).srcset;
|
||||
if (srcset) {
|
||||
const sources = srcset.split(',').map(s => s.trim());
|
||||
const source4x = sources.find(s => s.endsWith(' 4x'));
|
||||
if (source4x) {
|
||||
return source4x.split(' ')[0];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
});
|
||||
return urls.filter((url): url is string => url !== null);
|
||||
});
|
||||
|
||||
for (const url of imageUrls) {
|
||||
if (!downloadedUrls.has(url)) {
|
||||
downloadedUrls.add(url);
|
||||
const extension = path.extname(new URL(url).pathname) || '.jpg';
|
||||
const filename = `${keyword.replace(/ /g, '_')}_${imageCount++}${extension}`;
|
||||
const filepath = path.join(downloadPath, filename);
|
||||
logger.debug(`Downloading ${url} to ${filepath}`);
|
||||
await downloadImage(url, filepath);
|
||||
downloadedImagePaths.push(filepath);
|
||||
}
|
||||
}
|
||||
|
||||
const previousHeight = await page.evaluate('document.body.scrollHeight');
|
||||
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
|
||||
|
||||
try {
|
||||
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`, { timeout: 10000 });
|
||||
} catch (e) {
|
||||
logger.debug('No more content to load.');
|
||||
break;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)); // Small delay for images to render
|
||||
} catch (error) {
|
||||
logger.error(`An error occurred while scraping page ${i + 1}:`, error);
|
||||
logger.debug('Stopping the scraping process due to an error.');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
logger.debug('Done.');
|
||||
return downloadedImagePaths;
|
||||
}
|
||||
66
src/lib/image-describer.ts
Normal file
66
src/lib/image-describer.ts
Normal file
@ -0,0 +1,66 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import dotenv from 'dotenv';
|
||||
import { logger } from './logger';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const LLM_BASE_URL = process.env.LLM_BASE_URL;
|
||||
|
||||
async function describeImage(imagePath: string, prompt: string): Promise<any> {
|
||||
if (!LLM_BASE_URL) {
|
||||
throw new Error('LLM_BASE_URL is not defined in the .env file');
|
||||
}
|
||||
|
||||
const imageBuffer = fs.readFileSync(imagePath);
|
||||
const base64Image = imageBuffer.toString('base64');
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
let llmResponse = "";
|
||||
|
||||
try {
|
||||
const requestUrl = new URL('v1/chat/completions', LLM_BASE_URL);
|
||||
const response = await fetch(requestUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'local-model',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${base64Image}` } },
|
||||
{ type: 'text', text: prompt },
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature: 0.7,
|
||||
}),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
if (data.choices && data.choices.length > 0) {
|
||||
const content = data.choices[0].message.content;
|
||||
llmResponse = content;
|
||||
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
||||
if (jsonMatch) {
|
||||
return JSON.parse(jsonMatch[0]);
|
||||
}
|
||||
} else {
|
||||
logger.error('Unexpected API response:', data);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Attempt ${i + 1} failed:`, error);
|
||||
if (error instanceof TypeError && error.message.includes('fetch failed')) {
|
||||
logger.error('Could not connect to the LM Studio server. Please ensure the server is running and accessible at the specified LLM_BASE_URL.');
|
||||
}
|
||||
logger.debug(`LLM response: ${llmResponse}`)
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Failed to describe image after 10 attempts');
|
||||
}
|
||||
|
||||
export { describeImage };
|
||||
60
src/lib/logger.ts
Normal file
60
src/lib/logger.ts
Normal file
@ -0,0 +1,60 @@
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export enum LogLevel {
|
||||
NONE = 0,
|
||||
ERROR = 1,
|
||||
WARN = 2,
|
||||
INFO = 3,
|
||||
DEBUG = 4,
|
||||
}
|
||||
|
||||
const getLogLevelFromString = (level: string | undefined): LogLevel => {
|
||||
if (!level) {
|
||||
return LogLevel.INFO;
|
||||
}
|
||||
switch (level.toUpperCase()) {
|
||||
case 'NONE':
|
||||
return LogLevel.NONE;
|
||||
case 'ERROR':
|
||||
return LogLevel.ERROR;
|
||||
case 'WARN':
|
||||
return LogLevel.WARN;
|
||||
case 'INFO':
|
||||
return LogLevel.INFO;
|
||||
case 'DEBUG':
|
||||
return LogLevel.DEBUG;
|
||||
default:
|
||||
return LogLevel.INFO;
|
||||
}
|
||||
};
|
||||
|
||||
let currentLogLevel: LogLevel = getLogLevelFromString(process.env.LOG_LEVEL);
|
||||
|
||||
export function setLogLevel(level: LogLevel) {
|
||||
currentLogLevel = level;
|
||||
}
|
||||
|
||||
export const logger = {
|
||||
error: (...args: any[]) => {
|
||||
if (currentLogLevel >= LogLevel.ERROR) {
|
||||
console.error(...args);
|
||||
}
|
||||
},
|
||||
warn: (...args: any[]) => {
|
||||
if (currentLogLevel >= LogLevel.WARN) {
|
||||
console.warn(...args);
|
||||
}
|
||||
},
|
||||
info: (...args: any[]) => {
|
||||
if (currentLogLevel >= LogLevel.INFO) {
|
||||
console.info(...args);
|
||||
}
|
||||
},
|
||||
debug: (...args: any[]) => {
|
||||
if (currentLogLevel >= LogLevel.DEBUG) {
|
||||
console.debug(...args);
|
||||
}
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user