1 Commits
main ... edge

Author SHA1 Message Date
ad18256217 trying to use edge 2025-03-23 22:37:07 +01:00
9 changed files with 133 additions and 37047 deletions

4
.vscode/launch.json vendored
View File

@ -8,7 +8,7 @@
"skipFiles": [ "skipFiles": [
"<node_internals>/**" "<node_internals>/**"
], ],
"program": "${workspaceFolder}/src/index_getcontact", "program": "${workspaceFolder}/src/index.ts",
"preLaunchTask": "tsc: build - tsconfig.json", "preLaunchTask": "tsc: build - tsconfig.json",
"outFiles": [ "outFiles": [
"${workspaceFolder}/dist/**/*.js" "${workspaceFolder}/dist/**/*.js"
@ -26,7 +26,7 @@
"runtimeArgs": [ "runtimeArgs": [
"--loader", "--loader",
"ts-node/esm", "ts-node/esm",
"${workspaceFolder}/src/index_getcontact" "${workspaceFolder}/src/index.ts"
], ],
"sourceMaps": true "sourceMaps": true
} }

File diff suppressed because it is too large Load Diff

View File

@ -9,105 +9,173 @@ import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
import { getCities } from './lib/cities'; import { getCities } from './lib/cities';
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils'; import { WebDriverUtils, saveContactInfoToCSV } from './lib/utils';
import * as UIActions from './lib/UIActions'; import * as UIActions from './lib/UIActions';
import { randomUUID } from 'crypto'; import { randomUUID } from 'crypto';
import os from 'os';
import edge from 'selenium-webdriver/edge';
/** /**
* Function to visit TripAdvisor pages for each city * Function to visit TripAdvisor pages for each city
*/ */
async function visitCityPages(): Promise<void> { async function visitCityPages(): Promise<void> {
const userHomeDir = os.homedir(); // gets C:\Users\<YourName>
const driverPath = path.join(userHomeDir, 'Documents', 'edgedriver_win64', 'msedgedriver.exe');
// Configure Edge service to use your custom driver path
const service = new edge.ServiceBuilder(driverPath);
const options = new edge.Options();
options.addArguments('--inprivate');
options.addArguments('--start-maximized');
let driver: WebDriver;
driver = await new Builder()
.forBrowser('MicrosoftEdge')
.setEdgeOptions(options)
.setEdgeService(service)
.build();
await driver.get('https://www.tripadvisor.com');
await WebDriverUtils.wait(5);
/*
// Connect to an existing Chrome browser running in debug mode on port 9222
const options = new chrome.Options();
// Set the debugger address to connect to the existing Chrome instance
options.debuggerAddress('localhost:9222');
// Create WebDriver instance that connects to the existing browser
const driver: WebDriver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
*/
const cities = getCities(path.join(__dirname, '../data/cities.csv')); const cities = getCities(path.join(__dirname, '../data/cities.csv'));
console.log('Connecting to existing Chrome browser...'); console.log('Connecting to existing Chrome browser...');
const driver = await useExistingChrome();
if (!driver) return;
// Visit each city's TripAdvisor page // Visit each city's TripAdvisor page
for (let i = 0; i < cities.length; i++) { for (let i = 0; i < cities.length; i++) {
const city = cities[i]; const city = cities[i];
console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`); console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`);
let originalWindow;
let cityTopWindow;
let attactionsWindow;
let museumWindow;
try { try {
const originalWindow = await driver.getWindowHandle();
console.log("Logo click") console.log("Logo click")
if (!await UIActions.gotoHome(driver)) throw `${city} failed`; if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(5);
console.log("Exec Search") console.log("Exec Search")
if (!await UIActions.execSearch(driver, `"${city}" museums`)) throw `${city} failed`; if (!await UIActions.execSearch(driver, city)) throw `${city} failed`;
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(5);
console.log("Click See all")
if (!await UIActions.clickSeeAll(driver)) {
if (!await UIActions.clickTourismLink(driver)) throw `${city} failed`;
if (!await UIActions.clickSeeAll(driver)) throw `${city} failed`;
}
await WebDriverUtils.wait(5);
console.log("Switch tab")
let windows = await driver.getAllWindowHandles();
// Switch to the newly opened window/tab
for (const handle of windows) {
if (handle !== originalWindow) {
cityTopWindow = handle;
await driver.switchTo().window(handle);
}
}
console.log("Click See all attractions")
if (!await UIActions.clickSeeAllAttractions(driver)) throw `${city} failed`;
await WebDriverUtils.wait(5);
console.log("Switch tab to Attraction")
windows = await driver.getAllWindowHandles();
// Switch to the newly opened window/tab
for (const handle of windows) {
if (handle !== originalWindow && handle !== cityTopWindow) {
attactionsWindow = handle;
await driver.switchTo().window(attactionsWindow);
}
}
// click museum
console.log("Click Museum link");
if (!await UIActions.clickMuseumsLink(driver)) throw `${city} failed`;
await WebDriverUtils.wait(5);
let page = 1; let page = 1;
while (1) { while (1) {
// get list of museums // get list of museums
console.log("Get list of museums"); console.log("Get list of museums");
const museumsInList = await UIActions.getAttractionNames(driver); const museumElms = await UIActions.getMusiums(driver);
await WebDriverUtils.wait(1);
museumsInList.forEach((museumName) => {
saveMuseumnameToCSV(city, museumName, path.join(__dirname, '../data/museums.csv'));
})
UIActions.clickPagination(driver, page); for (const listItem of museumElms) {
await WebDriverUtils.wait(driver,5);
if (museumsInList.length < 30) await listItem.click();
break; await WebDriverUtils.wait(3);
windows = await driver.getAllWindowHandles();
for (const handle of windows) {
if (handle !== originalWindow && handle !== cityTopWindow && handle !== attactionsWindow) {
museumWindow = handle;
await driver.switchTo().window(museumWindow);
}
}
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
museumWindow && await driver.switchTo().window(museumWindow);
await driver.close();
await WebDriverUtils.wait(1);
attactionsWindow && await driver.switchTo().window(attactionsWindow);
await WebDriverUtils.wait(1);
}
page++; page++;
if (page > 10) break; if (page > 10) break;
UIActions.clickPagination(driver, page);
await WebDriverUtils.wait(5);
} }
/*
for (const museumIndex in museumNames) {
const museumName = museumNames[museumIndex]; await UIActions.closeAllTabsExceptFirst(driver);
try{
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
}catch(e){
console.error(`failed ${museumName}`)
}
}
*/
if (i < cities.length - 1) { if (i < cities.length - 1) {
console.log(`Waiting for 5000 seconds before next city...`); console.log(`Waiting for 5000 seconds before next city...`);
await WebDriverUtils.wait(); // Wait 5000 seconds before next city await WebDriverUtils.wait(5); // Wait 5000 seconds before next city
} }
} catch (error) { } catch (error) {
await UIActions.closeAllTabsExceptFirst(driver);
// If the button is not found within the timeout, log and continue to the next city // If the button is not found within the timeout, log and continue to the next city
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`); console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
await WebDriverUtils.wait(); // Wait 5 seconds before next city await WebDriverUtils.wait(5); // Wait 5 seconds before next city
} }
} }

View File

@ -1,112 +0,0 @@
/**
* Selenium WebDriver script to visit TripAdvisor pages for random cities
* & 'C:\Program Files\Google\Chrome\Application\chrome.exe' --remote-debugging-port=9222
*/
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
import * as chromedriver from 'chromedriver';
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs';
import * as path from 'path';
import { getMuseums } from './lib/museums';
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils';
import * as UIActions from './lib/UIActions';
import { randomUUID } from 'crypto';
/**
* Function to visit TripAdvisor pages for each city
*/
async function visitMuseumPages(): Promise<void> {
const museums = getMuseums(path.join(__dirname, '../data/museums.csv'));
const contacts = getMuseums(path.join(__dirname, '../data/museums.csv'));
console.log('Connecting to existing Chrome browser...');
const driver = await useExistingChrome();
if (!driver) return;
// Visit each city's TripAdvisor page
for (let i = 0; i < museums.length; i++) {
const row = museums[i];
const city = row["city"];
const museum = row["name"];
console.log(`[${i + 1}/${museums.length}] Visiting TripAdvisor page for ${museum}...`);
try {
console.log(`reading museum ${i}/${museums.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `"${city}" ${museum}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museum, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
/*
for (const museumIndex in museumNames) {
const museumName = museumNames[museumIndex];
try{
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
}catch(e){
console.error(`failed ${museumName}`)
}
}
*/
if (i < cities.length - 1) {
console.log(`Waiting for 5000 seconds before next city...`);
await WebDriverUtils.wait(); // Wait 5000 seconds before next city
}
} catch (error) {
// If the button is not found within the timeout, log and continue to the next city
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
await WebDriverUtils.wait(); // Wait 5 seconds before next city
}
}
console.log('Finished visiting all cities!');
}
// Run the function
visitMuseumPages().catch(error => {
console.error('Error in main function:', error);
});

View File

@ -1,4 +1,4 @@
import { Builder, By, until, WebDriver, WebElement, Actions } from 'selenium-webdriver'; import { Builder, By, until, WebDriver, WebElement } from 'selenium-webdriver';
import * as chromedriver from 'chromedriver'; import * as chromedriver from 'chromedriver';
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome'; import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs'; import * as fs from 'fs';
@ -8,7 +8,7 @@ import { ContactInfo } from './types';
export async function execSearch(driver: WebDriver, searchTerm: string): Promise<boolean> { export async function execSearch(driver: WebDriver, city: string): Promise<boolean> {
try { try {
// Find the search input field // Find the search input field
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]'; const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
@ -18,12 +18,12 @@ export async function execSearch(driver: WebDriver, searchTerm: string): Promise
// Clear any existing text and enter the city name // Clear any existing text and enter the city name
await searchInput.clear(); await searchInput.clear();
await searchInput.sendKeys(searchTerm); await searchInput.sendKeys(city);
// Submit the search (press Enter) // Submit the search (press Enter)
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(2);
await searchInput.sendKeys('\uE007'); // Unicode for Enter key await searchInput.sendKeys('\uE007'); // Unicode for Enter key
await WebDriverUtils.wait(driver); // Wait 5 seconds before next city await WebDriverUtils.wait(5); // Wait 5 seconds before next city
return true; return true;
} catch (e) { } catch (e) {
@ -31,25 +31,6 @@ export async function execSearch(driver: WebDriver, searchTerm: string): Promise
} }
} }
export async function typeSearch(driver: WebDriver, searchTerm: string): Promise<boolean> {
try {
// Find the search input field
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
await WebDriverUtils.waitForElement(driver, searchSelector, 10000);
console.log("Search box found");
const searchInput = await driver.findElement(By.css(searchSelector));
// Clear any existing text and enter the city name
await searchInput.clear();
await searchInput.sendKeys(searchTerm);
return true;
} catch (e) {
return false;
}
}
export async function clickSeeAll(driver: WebDriver): Promise<boolean> { export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
try { try {
const seeAllElement = await driver.wait( const seeAllElement = await driver.wait(
@ -68,21 +49,6 @@ export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
} }
} }
export async function getSeeAllUrl(driver: WebDriver): Promise<string> {
const xpath = `//h3[normalize-space(.)='Things to do']/ancestor::div[1]//a[starts-with(@href, '/Attractions')]`;
try {
const anchor = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
const url = await anchor.getAttribute('href');
console.log('Found Attractions URL:', url);
return url;
} catch (err) {
console.warn('Could not find the Attractions link:', err);
}
return "";
}
export async function gotoHome(driver: WebDriver): Promise<boolean> { export async function gotoHome(driver: WebDriver): Promise<boolean> {
try { try {
// Click on the Tripadvisor logo before searching for the city // Click on the Tripadvisor logo before searching for the city
@ -99,7 +65,7 @@ export async function gotoHome(driver: WebDriver): Promise<boolean> {
} }
export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> { export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> {
const xpath = `//h3[starts-with(normalize-space(.), 'Things to do')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`; const xpath = `//h2[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
try { try {
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000); const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
@ -116,22 +82,6 @@ export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean
} }
} }
export async function getSeeAllAttractionsUrl(driver: WebDriver): Promise<string | null> {
const xpath = `//h3[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
try {
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
await driver.wait(until.elementIsVisible(anchorElement), 5000);
const href = await anchorElement.getAttribute('href');
return href;
} catch (error) {
console.warn('Element not found or href not retrievable.', error);
return null;
}
}
export async function clickMuseumsLink(driver: WebDriver): Promise<boolean> { export async function clickMuseumsLink(driver: WebDriver): Promise<boolean> {
const xpath = `//a[.//*[normalize-space(.)='Museums']]`; const xpath = `//a[.//*[normalize-space(.)='Museums']]`;
@ -180,51 +130,8 @@ export async function getMusiums(driver: WebDriver): Promise<WebElement[]> {
} }
} }
export async function getMuseumsLinks(driver: WebDriver): Promise<string[]> {
const xpath = `//div//section[.//a[starts-with(@href, '/Attraction')] and .//h3]//a[starts-with(@href, '/Attraction') and .//img]`;
try {
const links = await driver.findElements(By.xpath(xpath));
const urls: string[] = [];
for (const link of links) {
const href = await link.getAttribute('href');
if (href) {
urls.push(href);
}
}
return urls;
} catch (error) {
console.warn('Error getting attraction URLs:', error);
return [];
}
}
export async function getAttractionNames(driver: WebDriver): Promise<string[]> {
// XPath to find <h3> inside <a> whose href starts with /Attraction_Review
const xpath = `//a[starts-with(@href, '/Attraction_Review')]/h3`;
try {
const h3Elements = await driver.findElements(By.xpath(xpath));
const names: string[] = [];
for (const h3 of h3Elements) {
const text = await h3.getText();
if (text) {
names.push(text.trim().replace(/^\d+\.\s*/, ''));
}
}
return names;
} catch (error) {
console.warn('Error getting attraction names:', error);
return [];
}
}
export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> { export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> {
const result: ContactInfo = { name: null, websiteUrl: null, email: null }; const result: ContactInfo = { websiteUrl: null, email: null };
// XPath to find URL (starting with 'http' but not containing 'tripadvisor') // XPath to find URL (starting with 'http' but not containing 'tripadvisor')
const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`; const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`;
@ -286,42 +193,4 @@ export async function closeAllTabsExceptFirst(driver: WebDriver): Promise<void>
await driver.switchTo().window(originalHandle); await driver.switchTo().window(originalHandle);
console.log(`Switched back to original tab: ${originalHandle}`); console.log(`Switched back to original tab: ${originalHandle}`);
} }
export async function clickFirstAttractionLinkInForm(driver: WebDriver): Promise<boolean> {
const xpath = `//form//a[starts-with(@href, '/Attraction')]`;
try {
// Wait for the link to appear inside a form
const link = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
await driver.wait(until.elementIsVisible(link), 5000);
await driver.wait(until.elementIsEnabled(link), 5000);
await link.click();
console.log('Clicked the first attraction link in the form.');
return true;
} catch (error) {
console.warn('Attraction link not found or not clickable.', error);
return false;
}
}
export async function simulateClickAt(driver: WebDriver, x: number, y: number): Promise<void> {
try {
const bodyElement = await driver.findElement(By.css('body'));
await driver
.actions({ bridge: true }) // optional; required in some environments
.move({ x, y, origin: bodyElement }) // move relative to the viewport
.click()
.perform();
console.log(`Clicked at (${x}, ${y})`);
} catch (error) {
console.error('An error occurred:', error);
}
}

View File

@ -1,26 +0,0 @@
/**
* List of cities to visit on TripAdvisor
*/
import fs from 'fs';
import path from 'path';
import { parse } from 'csv-parse/sync';
export function getContacts(csvFilePath: string) {
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
const records = parse(fileContent, {
columns: true,
skip_empty_lines: true
});
const cities: {city:string,name:string}[] = records.map((record: any) => {
return {
city: record['City'],
name: record['Name of City'],
}
});
return cities;
}

View File

@ -1,26 +0,0 @@
/**
* List of cities to visit on TripAdvisor
*/
import fs from 'fs';
import path from 'path';
import { parse } from 'csv-parse/sync';
export function getMuseums(csvFilePath: string) {
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
const records = parse(fileContent, {
columns: true,
skip_empty_lines: true
});
const cities: {city:string,name:string}[] = records.map((record: any) => {
return {
city: record['City'],
name: record['Name of City'],
}
});
return cities;
}

View File

@ -1,5 +1,4 @@
export interface ContactInfo { export interface ContactInfo {
name: string | null,
websiteUrl: string | null; websiteUrl: string | null;
email: string | null; email: string | null;
} }

View File

@ -2,8 +2,7 @@
* Utility class for common WebDriver operations * Utility class for common WebDriver operations
*/ */
import { Builder, By, until, WebDriver } from 'selenium-webdriver'; import { WebDriver, By, until } from 'selenium-webdriver';
import chrome from 'selenium-webdriver/chrome';
import { writeFileSync, existsSync, appendFileSync } from 'fs'; import { writeFileSync, existsSync, appendFileSync } from 'fs';
import * as path from 'path'; import * as path from 'path';
import { ContactInfo } from './types'; import { ContactInfo } from './types';
@ -14,33 +13,9 @@ export class WebDriverUtils {
* @param seconds Number of seconds to wait * @param seconds Number of seconds to wait
* @returns Promise that resolves after the specified time * @returns Promise that resolves after the specified time
*/ */
static async wait(driver?: WebDriver,baseTime: number = 3): Promise<void> { static async wait(seconds: number): Promise<void> {
const seconds = Math.floor(Math.random() * 1000) % 3 + baseTime; console.log(`Waiting for ${seconds} seconds...`);
console.log(`Scrolling to bottom for ${seconds} seconds...`); return new Promise(resolve => setTimeout(resolve, seconds * 1000));
const endTime = Date.now() + seconds * 1000;
let scrollCounter = 0;
while (Date.now() < endTime) {
/*
try {
if (driver) {
await driver.executeScript(`
window.scrollBy(0, 10);
`);
scrollCounter++;
}
} catch (error) {
console.warn('Scroll failed:', error);
}
*/
// Wait a little between scrolls
await new Promise(resolve => setTimeout(resolve, 500));
}
} }
/** /**
@ -58,7 +33,7 @@ export class WebDriverUtils {
export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void { export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void {
const headers = 'City,Website URL,Email\n'; const headers = 'City,Website URL,Email\n';
const line = `"${city}","${contactInfo.name}","${contactInfo.websiteUrl}","${contactInfo.email}"\n`; const line = `"${city},"${contactInfo.websiteUrl}","${contactInfo.email}"\n`;
if (!existsSync(filePath)) { if (!existsSync(filePath)) {
writeFileSync(filePath, headers + line); writeFileSync(filePath, headers + line);
@ -68,111 +43,3 @@ export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, fil
console.log(`Contact info saved to ${filePath}`); console.log(`Contact info saved to ${filePath}`);
} }
export function saveMuseumnameToCSV(city: string, museumName: string, filePath: string): void {
const headers = 'City,Website URL,Email\n';
const line = `"${city},"${museumName}"\n`;
if (!existsSync(filePath)) {
writeFileSync(filePath, headers + line);
} else {
appendFileSync(filePath, line);
}
console.log(`Museum name saved to ${filePath}`);
}
export async function disableCookiesInChrome(): Promise<WebDriver | null> {
// Set Chrome options
const options = new chrome.Options();
// 1. Block all cookies
//options.setUserPreferences({
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
// 'profile.block_third_party_cookies': true
//});
// 2. Optional: Launch in incognito for extra privacy
options.addArguments('--incognito');
options.addArguments('--start-maximized');
let driver: WebDriver | null = null;
try {
driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
await driver.get('https://www.tripadvisor.com/');
console.log('Chrome launched with cookies disabled.');
// Optional: Verify cookies are blocked by trying to set/get a cookie
await driver.manage().addCookie({ name: 'test', value: '123' });
const cookies = await driver.manage().getCookies();
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
return driver;
} catch (error) {
console.error('Error:', error);
return driver;
}
}
export async function useChrome(initialUrl: string = "https://www.tripadvisor.com/"): Promise<WebDriver | null> {
// Set Chrome options
const options = new chrome.Options();
// 1. Block all cookies
//options.setUserPreferences({
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
// 'profile.block_third_party_cookies': true
//});
// 2. Optional: Launch in incognito for extra privacy
options.addArguments('--incognito');
options.addArguments('--start-maximized');
let driver: WebDriver | null = null;
try {
driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
await driver.get(initialUrl);
console.log('Chrome launched with cookies disabled.');
// Optional: Verify cookies are blocked by trying to set/get a cookie
await driver.manage().addCookie({ name: 'test', value: '123' });
const cookies = await driver.manage().getCookies();
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
return driver;
} catch (error) {
console.error('Error:', error);
return driver;
}
}
export async function useExistingChrome(): Promise<WebDriver> {
// Connect to an existing Chrome browser running in debug mode on port 9222
const options = new chrome.Options();
// Set the debugger address to connect to the existing Chrome instance
options.debuggerAddress('localhost:9222');
// Create WebDriver instance that connects to the existing browser
const driver: WebDriver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
return driver;
}