Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7dbafd064e | |||
| 661dd24550 | |||
| 6d4ade340c | |||
| af7e55882d | |||
| 3d566a408e | |||
| 6153b70c1e | |||
| 47134525b8 | |||
| fe177abd85 | |||
| 79ea65c74b | |||
| 1ffea0f61f | |||
| b6cad2a241 | |||
| 6f36809932 | |||
| 0e6df98a50 | |||
| d0bfe15fa4 |
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
@ -8,7 +8,7 @@
|
|||||||
"skipFiles": [
|
"skipFiles": [
|
||||||
"<node_internals>/**"
|
"<node_internals>/**"
|
||||||
],
|
],
|
||||||
"program": "${workspaceFolder}/src/index.ts",
|
"program": "${workspaceFolder}/src/index_getcontact",
|
||||||
"preLaunchTask": "tsc: build - tsconfig.json",
|
"preLaunchTask": "tsc: build - tsconfig.json",
|
||||||
"outFiles": [
|
"outFiles": [
|
||||||
"${workspaceFolder}/dist/**/*.js"
|
"${workspaceFolder}/dist/**/*.js"
|
||||||
@ -26,7 +26,7 @@
|
|||||||
"runtimeArgs": [
|
"runtimeArgs": [
|
||||||
"--loader",
|
"--loader",
|
||||||
"ts-node/esm",
|
"ts-node/esm",
|
||||||
"${workspaceFolder}/src/index.ts"
|
"${workspaceFolder}/src/index_getcontact"
|
||||||
],
|
],
|
||||||
"sourceMaps": true
|
"sourceMaps": true
|
||||||
}
|
}
|
||||||
|
|||||||
36553
data/museums.csv
Normal file
36553
data/museums.csv
Normal file
File diff suppressed because it is too large
Load Diff
139
src/index.ts
139
src/index.ts
@ -9,7 +9,7 @@ import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
|||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { getCities } from './lib/cities';
|
import { getCities } from './lib/cities';
|
||||||
import { WebDriverUtils, saveContactInfoToCSV } from './lib/utils';
|
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils';
|
||||||
import * as UIActions from './lib/UIActions';
|
import * as UIActions from './lib/UIActions';
|
||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
|
|
||||||
@ -23,134 +23,91 @@ async function visitCityPages(): Promise<void> {
|
|||||||
|
|
||||||
console.log('Connecting to existing Chrome browser...');
|
console.log('Connecting to existing Chrome browser...');
|
||||||
|
|
||||||
// Connect to an existing Chrome browser running in debug mode on port 9222
|
const driver = await useExistingChrome();
|
||||||
const options = new chrome.Options();
|
if (!driver) return;
|
||||||
|
|
||||||
// Set the debugger address to connect to the existing Chrome instance
|
|
||||||
options.debuggerAddress('localhost:9222');
|
|
||||||
|
|
||||||
// Create WebDriver instance that connects to the existing browser
|
|
||||||
const driver: WebDriver = await new Builder()
|
|
||||||
.forBrowser('chrome')
|
|
||||||
.setChromeOptions(options)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// Visit each city's TripAdvisor page
|
// Visit each city's TripAdvisor page
|
||||||
for (let i = 0; i < cities.length; i++) {
|
for (let i = 0; i < cities.length; i++) {
|
||||||
|
|
||||||
const city = cities[i];
|
const city = cities[i];
|
||||||
console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`);
|
console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`);
|
||||||
let originalWindow;
|
|
||||||
let cityTopWindow;
|
|
||||||
let attactionsWindow;
|
|
||||||
let museumWindow;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const originalWindow = await driver.getWindowHandle();
|
|
||||||
|
|
||||||
console.log("Logo click")
|
console.log("Logo click")
|
||||||
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
||||||
await WebDriverUtils.wait(5);
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
console.log("Exec Search")
|
console.log("Exec Search")
|
||||||
if (!await UIActions.execSearch(driver, city)) throw `${city} failed`;
|
if (!await UIActions.execSearch(driver, `"${city}" museums`)) throw `${city} failed`;
|
||||||
await WebDriverUtils.wait(5);
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
console.log("Click See all")
|
|
||||||
if (!await UIActions.clickSeeAll(driver)) {
|
|
||||||
if (!await UIActions.clickTourismLink(driver)) throw `${city} failed`;
|
|
||||||
if (!await UIActions.clickSeeAll(driver)) throw `${city} failed`;
|
|
||||||
}
|
|
||||||
await WebDriverUtils.wait(5);
|
|
||||||
|
|
||||||
console.log("Switch tab")
|
|
||||||
let windows = await driver.getAllWindowHandles();
|
|
||||||
// Switch to the newly opened window/tab
|
|
||||||
for (const handle of windows) {
|
|
||||||
if (handle !== originalWindow) {
|
|
||||||
cityTopWindow = handle;
|
|
||||||
await driver.switchTo().window(handle);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("Click See all attractions")
|
|
||||||
if (!await UIActions.clickSeeAllAttractions(driver)) throw `${city} failed`;
|
|
||||||
await WebDriverUtils.wait(5);
|
|
||||||
|
|
||||||
console.log("Switch tab to Attraction")
|
|
||||||
windows = await driver.getAllWindowHandles();
|
|
||||||
// Switch to the newly opened window/tab
|
|
||||||
for (const handle of windows) {
|
|
||||||
if (handle !== originalWindow && handle !== cityTopWindow) {
|
|
||||||
attactionsWindow = handle;
|
|
||||||
await driver.switchTo().window(attactionsWindow);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// click museum
|
|
||||||
console.log("Click Museum link");
|
|
||||||
if (!await UIActions.clickMuseumsLink(driver)) throw `${city} failed`;
|
|
||||||
await WebDriverUtils.wait(5);
|
|
||||||
|
|
||||||
let page = 1;
|
let page = 1;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
|
||||||
// get list of museums
|
// get list of museums
|
||||||
console.log("Get list of museums");
|
console.log("Get list of museums");
|
||||||
const museumElms = await UIActions.getMusiums(driver);
|
const museumsInList = await UIActions.getAttractionNames(driver);
|
||||||
await WebDriverUtils.wait(1);
|
|
||||||
|
museumsInList.forEach((museumName) => {
|
||||||
|
saveMuseumnameToCSV(city, museumName, path.join(__dirname, '../data/museums.csv'));
|
||||||
|
})
|
||||||
|
|
||||||
for (const listItem of museumElms) {
|
UIActions.clickPagination(driver, page);
|
||||||
|
await WebDriverUtils.wait(driver,5);
|
||||||
|
|
||||||
await listItem.click();
|
if (museumsInList.length < 30)
|
||||||
await WebDriverUtils.wait(3);
|
break;
|
||||||
|
|
||||||
windows = await driver.getAllWindowHandles();
|
|
||||||
for (const handle of windows) {
|
|
||||||
if (handle !== originalWindow && handle !== cityTopWindow && handle !== attactionsWindow) {
|
|
||||||
museumWindow = handle;
|
|
||||||
await driver.switchTo().window(museumWindow);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
|
|
||||||
|
|
||||||
console.log(`${websiteUrl} / ${email}`);
|
|
||||||
saveContactInfoToCSV(city, { websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
|
|
||||||
|
|
||||||
museumWindow && await driver.switchTo().window(museumWindow);
|
|
||||||
await driver.close();
|
|
||||||
await WebDriverUtils.wait(1);
|
|
||||||
|
|
||||||
attactionsWindow && await driver.switchTo().window(attactionsWindow);
|
|
||||||
await WebDriverUtils.wait(1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
page++;
|
page++;
|
||||||
|
|
||||||
if (page > 10) break;
|
if (page > 10) break;
|
||||||
|
|
||||||
UIActions.clickPagination(driver, page);
|
|
||||||
await WebDriverUtils.wait(5);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
for (const museumIndex in museumNames) {
|
||||||
|
|
||||||
await UIActions.closeAllTabsExceptFirst(driver);
|
const museumName = museumNames[museumIndex];
|
||||||
|
|
||||||
|
try{
|
||||||
|
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
|
||||||
|
|
||||||
|
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
await UIActions.clickFirstAttractionLinkInForm(driver)
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
|
||||||
|
|
||||||
|
console.log(`${websiteUrl} / ${email}`);
|
||||||
|
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
|
||||||
|
|
||||||
|
await UIActions.simulateClickAt(driver,100,100);
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
}catch(e){
|
||||||
|
console.error(`failed ${museumName}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
if (i < cities.length - 1) {
|
if (i < cities.length - 1) {
|
||||||
console.log(`Waiting for 5000 seconds before next city...`);
|
console.log(`Waiting for 5000 seconds before next city...`);
|
||||||
await WebDriverUtils.wait(5); // Wait 5000 seconds before next city
|
await WebDriverUtils.wait(); // Wait 5000 seconds before next city
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
|
|
||||||
await UIActions.closeAllTabsExceptFirst(driver);
|
} catch (error) {
|
||||||
|
|
||||||
// If the button is not found within the timeout, log and continue to the next city
|
// If the button is not found within the timeout, log and continue to the next city
|
||||||
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
|
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
|
||||||
await WebDriverUtils.wait(5); // Wait 5 seconds before next city
|
await WebDriverUtils.wait(); // Wait 5 seconds before next city
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
112
src/index_getcontact.ts
Normal file
112
src/index_getcontact.ts
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
/**
|
||||||
|
* Selenium WebDriver script to visit TripAdvisor pages for random cities
|
||||||
|
* & 'C:\Program Files\Google\Chrome\Application\chrome.exe' --remote-debugging-port=9222
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
|
||||||
|
import * as chromedriver from 'chromedriver';
|
||||||
|
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { getMuseums } from './lib/museums';
|
||||||
|
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils';
|
||||||
|
import * as UIActions from './lib/UIActions';
|
||||||
|
import { randomUUID } from 'crypto';
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to visit TripAdvisor pages for each city
|
||||||
|
*/
|
||||||
|
async function visitMuseumPages(): Promise<void> {
|
||||||
|
|
||||||
|
const museums = getMuseums(path.join(__dirname, '../data/museums.csv'));
|
||||||
|
const contacts = getMuseums(path.join(__dirname, '../data/museums.csv'));
|
||||||
|
|
||||||
|
console.log('Connecting to existing Chrome browser...');
|
||||||
|
|
||||||
|
const driver = await useExistingChrome();
|
||||||
|
if (!driver) return;
|
||||||
|
|
||||||
|
// Visit each city's TripAdvisor page
|
||||||
|
for (let i = 0; i < museums.length; i++) {
|
||||||
|
|
||||||
|
const row = museums[i];
|
||||||
|
const city = row["city"];
|
||||||
|
const museum = row["name"];
|
||||||
|
|
||||||
|
console.log(`[${i + 1}/${museums.length}] Visiting TripAdvisor page for ${museum}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
console.log(`reading museum ${i}/${museums.length}`);
|
||||||
|
|
||||||
|
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
if (!await UIActions.typeSearch(driver, `"${city}" ${museum}`)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
await UIActions.clickFirstAttractionLinkInForm(driver)
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
|
||||||
|
|
||||||
|
console.log(`${websiteUrl} / ${email}`);
|
||||||
|
saveContactInfoToCSV(city, { name: museum, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
|
||||||
|
|
||||||
|
await UIActions.simulateClickAt(driver,100,100);
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
for (const museumIndex in museumNames) {
|
||||||
|
|
||||||
|
const museumName = museumNames[museumIndex];
|
||||||
|
|
||||||
|
try{
|
||||||
|
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
|
||||||
|
|
||||||
|
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
await UIActions.clickFirstAttractionLinkInForm(driver)
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
|
||||||
|
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
|
||||||
|
|
||||||
|
console.log(`${websiteUrl} / ${email}`);
|
||||||
|
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
|
||||||
|
|
||||||
|
await UIActions.simulateClickAt(driver,100,100);
|
||||||
|
await WebDriverUtils.wait(driver);
|
||||||
|
}catch(e){
|
||||||
|
console.error(`failed ${museumName}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (i < cities.length - 1) {
|
||||||
|
console.log(`Waiting for 5000 seconds before next city...`);
|
||||||
|
await WebDriverUtils.wait(); // Wait 5000 seconds before next city
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
|
||||||
|
// If the button is not found within the timeout, log and continue to the next city
|
||||||
|
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
|
||||||
|
await WebDriverUtils.wait(); // Wait 5 seconds before next city
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Finished visiting all cities!');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the function
|
||||||
|
visitMuseumPages().catch(error => {
|
||||||
|
console.error('Error in main function:', error);
|
||||||
|
});
|
||||||
@ -1,4 +1,4 @@
|
|||||||
import { Builder, By, until, WebDriver, WebElement } from 'selenium-webdriver';
|
import { Builder, By, until, WebDriver, WebElement, Actions } from 'selenium-webdriver';
|
||||||
import * as chromedriver from 'chromedriver';
|
import * as chromedriver from 'chromedriver';
|
||||||
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
@ -8,7 +8,7 @@ import { ContactInfo } from './types';
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
export async function execSearch(driver: WebDriver, city: string): Promise<boolean> {
|
export async function execSearch(driver: WebDriver, searchTerm: string): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
// Find the search input field
|
// Find the search input field
|
||||||
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
|
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
|
||||||
@ -18,12 +18,12 @@ export async function execSearch(driver: WebDriver, city: string): Promise<boole
|
|||||||
|
|
||||||
// Clear any existing text and enter the city name
|
// Clear any existing text and enter the city name
|
||||||
await searchInput.clear();
|
await searchInput.clear();
|
||||||
await searchInput.sendKeys(city);
|
await searchInput.sendKeys(searchTerm);
|
||||||
|
|
||||||
// Submit the search (press Enter)
|
// Submit the search (press Enter)
|
||||||
await WebDriverUtils.wait(2);
|
await WebDriverUtils.wait(driver);
|
||||||
await searchInput.sendKeys('\uE007'); // Unicode for Enter key
|
await searchInput.sendKeys('\uE007'); // Unicode for Enter key
|
||||||
await WebDriverUtils.wait(5); // Wait 5 seconds before next city
|
await WebDriverUtils.wait(driver); // Wait 5 seconds before next city
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@ -31,6 +31,25 @@ export async function execSearch(driver: WebDriver, city: string): Promise<boole
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function typeSearch(driver: WebDriver, searchTerm: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
// Find the search input field
|
||||||
|
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
|
||||||
|
await WebDriverUtils.waitForElement(driver, searchSelector, 10000);
|
||||||
|
console.log("Search box found");
|
||||||
|
const searchInput = await driver.findElement(By.css(searchSelector));
|
||||||
|
|
||||||
|
// Clear any existing text and enter the city name
|
||||||
|
await searchInput.clear();
|
||||||
|
await searchInput.sendKeys(searchTerm);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
|
export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
const seeAllElement = await driver.wait(
|
const seeAllElement = await driver.wait(
|
||||||
@ -49,6 +68,21 @@ export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getSeeAllUrl(driver: WebDriver): Promise<string> {
|
||||||
|
const xpath = `//h3[normalize-space(.)='Things to do']/ancestor::div[1]//a[starts-with(@href, '/Attractions')]`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const anchor = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||||
|
const url = await anchor.getAttribute('href');
|
||||||
|
console.log('Found Attractions URL:', url);
|
||||||
|
return url;
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('Could not find the Attractions link:', err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
export async function gotoHome(driver: WebDriver): Promise<boolean> {
|
export async function gotoHome(driver: WebDriver): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
// Click on the Tripadvisor logo before searching for the city
|
// Click on the Tripadvisor logo before searching for the city
|
||||||
@ -65,7 +99,7 @@ export async function gotoHome(driver: WebDriver): Promise<boolean> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> {
|
export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> {
|
||||||
const xpath = `//h2[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
|
const xpath = `//h3[starts-with(normalize-space(.), 'Things to do')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||||
@ -82,6 +116,22 @@ export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getSeeAllAttractionsUrl(driver: WebDriver): Promise<string | null> {
|
||||||
|
const xpath = `//h3[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||||
|
|
||||||
|
await driver.wait(until.elementIsVisible(anchorElement), 5000);
|
||||||
|
|
||||||
|
const href = await anchorElement.getAttribute('href');
|
||||||
|
return href;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Element not found or href not retrievable.', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function clickMuseumsLink(driver: WebDriver): Promise<boolean> {
|
export async function clickMuseumsLink(driver: WebDriver): Promise<boolean> {
|
||||||
const xpath = `//a[.//*[normalize-space(.)='Museums']]`;
|
const xpath = `//a[.//*[normalize-space(.)='Museums']]`;
|
||||||
|
|
||||||
@ -130,8 +180,51 @@ export async function getMusiums(driver: WebDriver): Promise<WebElement[]> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getMuseumsLinks(driver: WebDriver): Promise<string[]> {
|
||||||
|
const xpath = `//div//section[.//a[starts-with(@href, '/Attraction')] and .//h3]//a[starts-with(@href, '/Attraction') and .//img]`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const links = await driver.findElements(By.xpath(xpath));
|
||||||
|
|
||||||
|
const urls: string[] = [];
|
||||||
|
for (const link of links) {
|
||||||
|
const href = await link.getAttribute('href');
|
||||||
|
if (href) {
|
||||||
|
urls.push(href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return urls;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error getting attraction URLs:', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getAttractionNames(driver: WebDriver): Promise<string[]> {
|
||||||
|
// XPath to find <h3> inside <a> whose href starts with /Attraction_Review
|
||||||
|
const xpath = `//a[starts-with(@href, '/Attraction_Review')]/h3`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const h3Elements = await driver.findElements(By.xpath(xpath));
|
||||||
|
|
||||||
|
const names: string[] = [];
|
||||||
|
for (const h3 of h3Elements) {
|
||||||
|
const text = await h3.getText();
|
||||||
|
if (text) {
|
||||||
|
names.push(text.trim().replace(/^\d+\.\s*/, ''));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return names;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error getting attraction names:', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> {
|
export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> {
|
||||||
const result: ContactInfo = { websiteUrl: null, email: null };
|
const result: ContactInfo = { name: null, websiteUrl: null, email: null };
|
||||||
|
|
||||||
// XPath to find URL (starting with 'http' but not containing 'tripadvisor')
|
// XPath to find URL (starting with 'http' but not containing 'tripadvisor')
|
||||||
const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`;
|
const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`;
|
||||||
@ -193,4 +286,42 @@ export async function closeAllTabsExceptFirst(driver: WebDriver): Promise<void>
|
|||||||
|
|
||||||
await driver.switchTo().window(originalHandle);
|
await driver.switchTo().window(originalHandle);
|
||||||
console.log(`Switched back to original tab: ${originalHandle}`);
|
console.log(`Switched back to original tab: ${originalHandle}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function clickFirstAttractionLinkInForm(driver: WebDriver): Promise<boolean> {
|
||||||
|
const xpath = `//form//a[starts-with(@href, '/Attraction')]`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Wait for the link to appear inside a form
|
||||||
|
const link = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||||
|
|
||||||
|
await driver.wait(until.elementIsVisible(link), 5000);
|
||||||
|
await driver.wait(until.elementIsEnabled(link), 5000);
|
||||||
|
|
||||||
|
await link.click();
|
||||||
|
console.log('Clicked the first attraction link in the form.');
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Attraction link not found or not clickable.', error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function simulateClickAt(driver: WebDriver, x: number, y: number): Promise<void> {
|
||||||
|
try {
|
||||||
|
|
||||||
|
const bodyElement = await driver.findElement(By.css('body'));
|
||||||
|
|
||||||
|
await driver
|
||||||
|
.actions({ bridge: true }) // optional; required in some environments
|
||||||
|
.move({ x, y, origin: bodyElement }) // move relative to the viewport
|
||||||
|
.click()
|
||||||
|
.perform();
|
||||||
|
|
||||||
|
console.log(`Clicked at (${x}, ${y})`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('An error occurred:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
26
src/lib/contacts.ts
Normal file
26
src/lib/contacts.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
/**
|
||||||
|
* List of cities to visit on TripAdvisor
|
||||||
|
*/
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { parse } from 'csv-parse/sync';
|
||||||
|
|
||||||
|
export function getContacts(csvFilePath: string) {
|
||||||
|
|
||||||
|
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
|
||||||
|
|
||||||
|
const records = parse(fileContent, {
|
||||||
|
columns: true,
|
||||||
|
skip_empty_lines: true
|
||||||
|
});
|
||||||
|
|
||||||
|
const cities: {city:string,name:string}[] = records.map((record: any) => {
|
||||||
|
return {
|
||||||
|
city: record['City'],
|
||||||
|
name: record['Name of City'],
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return cities;
|
||||||
|
}
|
||||||
26
src/lib/museums.ts
Normal file
26
src/lib/museums.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
/**
|
||||||
|
* List of cities to visit on TripAdvisor
|
||||||
|
*/
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { parse } from 'csv-parse/sync';
|
||||||
|
|
||||||
|
export function getMuseums(csvFilePath: string) {
|
||||||
|
|
||||||
|
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
|
||||||
|
|
||||||
|
const records = parse(fileContent, {
|
||||||
|
columns: true,
|
||||||
|
skip_empty_lines: true
|
||||||
|
});
|
||||||
|
|
||||||
|
const cities: {city:string,name:string}[] = records.map((record: any) => {
|
||||||
|
return {
|
||||||
|
city: record['City'],
|
||||||
|
name: record['Name of City'],
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return cities;
|
||||||
|
}
|
||||||
@ -1,4 +1,5 @@
|
|||||||
export interface ContactInfo {
|
export interface ContactInfo {
|
||||||
|
name: string | null,
|
||||||
websiteUrl: string | null;
|
websiteUrl: string | null;
|
||||||
email: string | null;
|
email: string | null;
|
||||||
}
|
}
|
||||||
|
|||||||
143
src/lib/utils.ts
143
src/lib/utils.ts
@ -2,7 +2,8 @@
|
|||||||
* Utility class for common WebDriver operations
|
* Utility class for common WebDriver operations
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { WebDriver, By, until } from 'selenium-webdriver';
|
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
|
||||||
|
import chrome from 'selenium-webdriver/chrome';
|
||||||
import { writeFileSync, existsSync, appendFileSync } from 'fs';
|
import { writeFileSync, existsSync, appendFileSync } from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { ContactInfo } from './types';
|
import { ContactInfo } from './types';
|
||||||
@ -13,9 +14,33 @@ export class WebDriverUtils {
|
|||||||
* @param seconds Number of seconds to wait
|
* @param seconds Number of seconds to wait
|
||||||
* @returns Promise that resolves after the specified time
|
* @returns Promise that resolves after the specified time
|
||||||
*/
|
*/
|
||||||
static async wait(seconds: number): Promise<void> {
|
static async wait(driver?: WebDriver,baseTime: number = 3): Promise<void> {
|
||||||
console.log(`Waiting for ${seconds} seconds...`);
|
const seconds = Math.floor(Math.random() * 1000) % 3 + baseTime;
|
||||||
return new Promise(resolve => setTimeout(resolve, seconds * 1000));
|
console.log(`Scrolling to bottom for ${seconds} seconds...`);
|
||||||
|
|
||||||
|
const endTime = Date.now() + seconds * 1000;
|
||||||
|
|
||||||
|
let scrollCounter = 0;
|
||||||
|
|
||||||
|
while (Date.now() < endTime) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
try {
|
||||||
|
if (driver) {
|
||||||
|
await driver.executeScript(`
|
||||||
|
window.scrollBy(0, 10);
|
||||||
|
`);
|
||||||
|
|
||||||
|
scrollCounter++;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Scroll failed:', error);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Wait a little between scrolls
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -33,7 +58,7 @@ export class WebDriverUtils {
|
|||||||
|
|
||||||
export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void {
|
export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void {
|
||||||
const headers = 'City,Website URL,Email\n';
|
const headers = 'City,Website URL,Email\n';
|
||||||
const line = `"${city},"${contactInfo.websiteUrl}","${contactInfo.email}"\n`;
|
const line = `"${city}","${contactInfo.name}","${contactInfo.websiteUrl}","${contactInfo.email}"\n`;
|
||||||
|
|
||||||
if (!existsSync(filePath)) {
|
if (!existsSync(filePath)) {
|
||||||
writeFileSync(filePath, headers + line);
|
writeFileSync(filePath, headers + line);
|
||||||
@ -43,3 +68,111 @@ export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, fil
|
|||||||
|
|
||||||
console.log(`Contact info saved to ${filePath}`);
|
console.log(`Contact info saved to ${filePath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function saveMuseumnameToCSV(city: string, museumName: string, filePath: string): void {
|
||||||
|
const headers = 'City,Website URL,Email\n';
|
||||||
|
const line = `"${city},"${museumName}"\n`;
|
||||||
|
|
||||||
|
if (!existsSync(filePath)) {
|
||||||
|
writeFileSync(filePath, headers + line);
|
||||||
|
} else {
|
||||||
|
appendFileSync(filePath, line);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Museum name saved to ${filePath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export async function disableCookiesInChrome(): Promise<WebDriver | null> {
|
||||||
|
// Set Chrome options
|
||||||
|
const options = new chrome.Options();
|
||||||
|
|
||||||
|
// 1. Block all cookies
|
||||||
|
//options.setUserPreferences({
|
||||||
|
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
|
||||||
|
// 'profile.block_third_party_cookies': true
|
||||||
|
//});
|
||||||
|
|
||||||
|
// 2. Optional: Launch in incognito for extra privacy
|
||||||
|
options.addArguments('--incognito');
|
||||||
|
options.addArguments('--start-maximized');
|
||||||
|
|
||||||
|
let driver: WebDriver | null = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
driver = await new Builder()
|
||||||
|
.forBrowser('chrome')
|
||||||
|
.setChromeOptions(options)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
await driver.get('https://www.tripadvisor.com/');
|
||||||
|
|
||||||
|
console.log('Chrome launched with cookies disabled.');
|
||||||
|
|
||||||
|
// Optional: Verify cookies are blocked by trying to set/get a cookie
|
||||||
|
await driver.manage().addCookie({ name: 'test', value: '123' });
|
||||||
|
const cookies = await driver.manage().getCookies();
|
||||||
|
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
|
||||||
|
|
||||||
|
return driver;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function useChrome(initialUrl: string = "https://www.tripadvisor.com/"): Promise<WebDriver | null> {
|
||||||
|
// Set Chrome options
|
||||||
|
const options = new chrome.Options();
|
||||||
|
|
||||||
|
// 1. Block all cookies
|
||||||
|
//options.setUserPreferences({
|
||||||
|
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
|
||||||
|
// 'profile.block_third_party_cookies': true
|
||||||
|
//});
|
||||||
|
|
||||||
|
// 2. Optional: Launch in incognito for extra privacy
|
||||||
|
options.addArguments('--incognito');
|
||||||
|
options.addArguments('--start-maximized');
|
||||||
|
|
||||||
|
let driver: WebDriver | null = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
driver = await new Builder()
|
||||||
|
.forBrowser('chrome')
|
||||||
|
.setChromeOptions(options)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
await driver.get(initialUrl);
|
||||||
|
|
||||||
|
console.log('Chrome launched with cookies disabled.');
|
||||||
|
|
||||||
|
// Optional: Verify cookies are blocked by trying to set/get a cookie
|
||||||
|
await driver.manage().addCookie({ name: 'test', value: '123' });
|
||||||
|
const cookies = await driver.manage().getCookies();
|
||||||
|
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
|
||||||
|
|
||||||
|
return driver;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function useExistingChrome(): Promise<WebDriver> {
|
||||||
|
// Connect to an existing Chrome browser running in debug mode on port 9222
|
||||||
|
const options = new chrome.Options();
|
||||||
|
|
||||||
|
// Set the debugger address to connect to the existing Chrome instance
|
||||||
|
options.debuggerAddress('localhost:9222');
|
||||||
|
|
||||||
|
// Create WebDriver instance that connects to the existing browser
|
||||||
|
const driver: WebDriver = await new Builder()
|
||||||
|
.forBrowser('chrome')
|
||||||
|
.setChromeOptions(options)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user