Merge branch 'main' of https://git.yasue.org/ken/tripadviser_scraper
This commit is contained in:
15
src/index.ts
15
src/index.ts
@ -9,7 +9,7 @@ import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
|||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { getCities } from './lib/cities';
|
import { getCities } from './lib/cities';
|
||||||
import { WebDriverUtils, saveContactInfoToCSV } from './lib/utils';
|
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome } from './lib/utils';
|
||||||
import * as UIActions from './lib/UIActions';
|
import * as UIActions from './lib/UIActions';
|
||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
|
|
||||||
@ -23,17 +23,8 @@ async function visitCityPages(): Promise<void> {
|
|||||||
|
|
||||||
console.log('Connecting to existing Chrome browser...');
|
console.log('Connecting to existing Chrome browser...');
|
||||||
|
|
||||||
// Connect to an existing Chrome browser running in debug mode on port 9222
|
const driver = await useExistingChrome();
|
||||||
const options = new chrome.Options();
|
if (!driver) return;
|
||||||
|
|
||||||
// Set the debugger address to connect to the existing Chrome instance
|
|
||||||
options.debuggerAddress('localhost:9222');
|
|
||||||
|
|
||||||
// Create WebDriver instance that connects to the existing browser
|
|
||||||
const driver: WebDriver = await new Builder()
|
|
||||||
.forBrowser('chrome')
|
|
||||||
.setChromeOptions(options)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// Visit each city's TripAdvisor page
|
// Visit each city's TripAdvisor page
|
||||||
for (let i = 0; i < cities.length; i++) {
|
for (let i = 0; i < cities.length; i++) {
|
||||||
|
|||||||
@ -2,7 +2,8 @@
|
|||||||
* Utility class for common WebDriver operations
|
* Utility class for common WebDriver operations
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { WebDriver, By, until } from 'selenium-webdriver';
|
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
|
||||||
|
import chrome from 'selenium-webdriver/chrome';
|
||||||
import { writeFileSync, existsSync, appendFileSync } from 'fs';
|
import { writeFileSync, existsSync, appendFileSync } from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { ContactInfo } from './types';
|
import { ContactInfo } from './types';
|
||||||
@ -44,3 +45,58 @@ export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, fil
|
|||||||
|
|
||||||
console.log(`Contact info saved to ${filePath}`);
|
console.log(`Contact info saved to ${filePath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function disableCookiesInChrome(): Promise<WebDriver | null> {
|
||||||
|
// Set Chrome options
|
||||||
|
const options = new chrome.Options();
|
||||||
|
|
||||||
|
// 1. Block all cookies
|
||||||
|
//options.setUserPreferences({
|
||||||
|
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
|
||||||
|
// 'profile.block_third_party_cookies': true
|
||||||
|
//});
|
||||||
|
|
||||||
|
// 2. Optional: Launch in incognito for extra privacy
|
||||||
|
options.addArguments('--incognito');
|
||||||
|
options.addArguments('--start-maximized');
|
||||||
|
|
||||||
|
let driver: WebDriver | null = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
driver = await new Builder()
|
||||||
|
.forBrowser('chrome')
|
||||||
|
.setChromeOptions(options)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
await driver.get('https://www.tripadvisor.com/');
|
||||||
|
|
||||||
|
console.log('Chrome launched with cookies disabled.');
|
||||||
|
|
||||||
|
// Optional: Verify cookies are blocked by trying to set/get a cookie
|
||||||
|
await driver.manage().addCookie({ name: 'test', value: '123' });
|
||||||
|
const cookies = await driver.manage().getCookies();
|
||||||
|
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
|
||||||
|
|
||||||
|
return driver;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function useExistingChrome(): Promise<WebDriver> {
|
||||||
|
// Connect to an existing Chrome browser running in debug mode on port 9222
|
||||||
|
const options = new chrome.Options();
|
||||||
|
|
||||||
|
// Set the debugger address to connect to the existing Chrome instance
|
||||||
|
options.debuggerAddress('localhost:9222');
|
||||||
|
|
||||||
|
// Create WebDriver instance that connects to the existing browser
|
||||||
|
const driver: WebDriver = await new Builder()
|
||||||
|
.forBrowser('chrome')
|
||||||
|
.setChromeOptions(options)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user