7 Commits

Author SHA1 Message Date
7dbafd064e save work 2025-03-26 17:02:32 +01:00
661dd24550 Merge branch 'main' of https://git.yasue.org/ken/tripadviser_scraper 2025-03-25 14:48:32 +01:00
6d4ade340c changed to save museum name 2025-03-25 14:47:38 +01:00
af7e55882d wip 2025-03-25 11:24:17 +01:00
3d566a408e add name to contact info 2025-03-25 06:31:19 +01:00
6153b70c1e new way of scraping 2025-03-24 20:24:30 +01:00
47134525b8 save work 2025-03-24 19:00:26 +01:00
11 changed files with 36922 additions and 2920 deletions

4
.vscode/launch.json vendored
View File

@ -8,7 +8,7 @@
"skipFiles": [ "skipFiles": [
"<node_internals>/**" "<node_internals>/**"
], ],
"program": "${workspaceFolder}/src/index.ts", "program": "${workspaceFolder}/src/index_getcontact",
"preLaunchTask": "tsc: build - tsconfig.json", "preLaunchTask": "tsc: build - tsconfig.json",
"outFiles": [ "outFiles": [
"${workspaceFolder}/dist/**/*.js" "${workspaceFolder}/dist/**/*.js"
@ -26,7 +26,7 @@
"runtimeArgs": [ "runtimeArgs": [
"--loader", "--loader",
"ts-node/esm", "ts-node/esm",
"${workspaceFolder}/src/index.ts" "${workspaceFolder}/src/index_getcontact"
], ],
"sourceMaps": true "sourceMaps": true
} }

View File

@ -1,4 +1,27 @@
rank,Latitude,Longitude,Name of City,Country,2021 Population,2020 Population,Growth,Population Difference,Population Change rank,Latitude,Longitude,Name of City,Country,2021 Population,2020 Population,Growth,Population Difference,Population Change
1,35.6828387,139.7594549,Tokyo,Japan,37339804,37393128,-0.0014,53324,declined
2,28.6517178,77.2219388,Delhi,India,31181376,30290936,0.0294,890440,grew
3,31.2322758,121.4692071,Shanghai,China,27795702,27058480,0.0272,737222,grew
4,-23.5506507,-46.6333824,Sao Paulo,Brazil,22237472,22043028,0.0088,194444,grew
5,19.4326296,-99.1331785,Mexico City,Mexico,21918936,21782378,0.0063,136558,grew
6,23.7861979,90.4026151,Dhaka,Bangladesh,21741090,21005860,0.035,735230,grew
7,30.0443879,31.2357257,Cairo,Egypt,21322750,20900604,0.0202,422146,grew
8,39.906217,116.3912757,Beijing,China,20896820,20462610,0.0212,434210,grew
9,19.0759899,72.8773928,Mumbai,India,20667656,20411274,0.0126,256382,grew
10,34.6198813,135.490357,Osaka,Japan,19110616,19165340,-0.0029,54724,declined
11,24.8546842,67.0207055,Karachi,Pakistan,16459472,16093786,0.0227,365686,grew
12,29.5647398,106.5478767,Chongqing,China,16382376,15872179,0.0321,510197,grew
13,41.0096334,28.9651646,Istanbul,Turkey,15415197,15190336,0.0148,224861,grew
14,-34.6075682,-58.4370894,Buenos Aires,Argentina,15257673,15153729,0.0069,103944,grew
15,22.5414185,88.3576912,Kolkata,India,14974073,14850066,0.0084,124007,grew
16,-4.3217055,15.3125974,Kinshasa,DR Congo,14970460,14342439,0.0438,628021,grew
17,6.4550575,3.3941795,Lagos,Nigeria,14862111,14368332,0.0344,493779,grew
18,14.5907332,120.9809674,Manila,Philippines,14158573,13923452,0.0169,235121,grew
19,39.0856735,117.1951073,Tianjin,China,13794450,13589078,0.0151,205372,grew
20,23.1301964,113.2592945,Guangzhou,China,13635397,13301532,0.0251,333865,grew
21,-22.9110137,-43.2093727,Rio de Janeiro,Brazil,13544462,13458075,0.0064,86387,grew
22,31.5656822,74.3141829,Lahore,Pakistan,13095166,12642423,0.0358,452743,grew
23,12.9767936,77.590082,Bangalore,India,12764935,12326532,0.0356,438403,grew
24,55.7504461,37.6174943,Moscow,Russia,12593252,12537954,0.0044,55298,grew 24,55.7504461,37.6174943,Moscow,Russia,12593252,12537954,0.0044,55298,grew
25,22.555454,114.0543297,Shenzhen,China,12591696,12356820,0.019,234876,grew 25,22.555454,114.0543297,Shenzhen,China,12591696,12356820,0.019,234876,grew
26,13.0836939,80.270186,Chennai,India,11235018,10971108,0.0241,263910,grew 26,13.0836939,80.270186,Chennai,India,11235018,10971108,0.0241,263910,grew

1 rank Latitude Longitude Name of City Country 2021 Population 2020 Population Growth Population Difference Population Change
2 1 35.6828387 139.7594549 Tokyo Japan 37339804 37393128 -0.0014 53324 declined
3 2 28.6517178 77.2219388 Delhi India 31181376 30290936 0.0294 890440 grew
4 3 31.2322758 121.4692071 Shanghai China 27795702 27058480 0.0272 737222 grew
5 4 -23.5506507 -46.6333824 Sao Paulo Brazil 22237472 22043028 0.0088 194444 grew
6 5 19.4326296 -99.1331785 Mexico City Mexico 21918936 21782378 0.0063 136558 grew
7 6 23.7861979 90.4026151 Dhaka Bangladesh 21741090 21005860 0.035 735230 grew
8 7 30.0443879 31.2357257 Cairo Egypt 21322750 20900604 0.0202 422146 grew
9 8 39.906217 116.3912757 Beijing China 20896820 20462610 0.0212 434210 grew
10 9 19.0759899 72.8773928 Mumbai India 20667656 20411274 0.0126 256382 grew
11 10 34.6198813 135.490357 Osaka Japan 19110616 19165340 -0.0029 54724 declined
12 11 24.8546842 67.0207055 Karachi Pakistan 16459472 16093786 0.0227 365686 grew
13 12 29.5647398 106.5478767 Chongqing China 16382376 15872179 0.0321 510197 grew
14 13 41.0096334 28.9651646 Istanbul Turkey 15415197 15190336 0.0148 224861 grew
15 14 -34.6075682 -58.4370894 Buenos Aires Argentina 15257673 15153729 0.0069 103944 grew
16 15 22.5414185 88.3576912 Kolkata India 14974073 14850066 0.0084 124007 grew
17 16 -4.3217055 15.3125974 Kinshasa DR Congo 14970460 14342439 0.0438 628021 grew
18 17 6.4550575 3.3941795 Lagos Nigeria 14862111 14368332 0.0344 493779 grew
19 18 14.5907332 120.9809674 Manila Philippines 14158573 13923452 0.0169 235121 grew
20 19 39.0856735 117.1951073 Tianjin China 13794450 13589078 0.0151 205372 grew
21 20 23.1301964 113.2592945 Guangzhou China 13635397 13301532 0.0251 333865 grew
22 21 -22.9110137 -43.2093727 Rio de Janeiro Brazil 13544462 13458075 0.0064 86387 grew
23 22 31.5656822 74.3141829 Lahore Pakistan 13095166 12642423 0.0358 452743 grew
24 23 12.9767936 77.590082 Bangalore India 12764935 12326532 0.0356 438403 grew
25 24 55.7504461 37.6174943 Moscow Russia 12593252 12537954 0.0044 55298 grew
26 25 22.555454 114.0543297 Shenzhen China 12591696 12356820 0.019 234876 grew
27 26 13.0836939 80.270186 Chennai India 11235018 10971108 0.0241 263910 grew

File diff suppressed because it is too large Load Diff

36553
data/museums.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@ import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
import { getCities } from './lib/cities'; import { getCities } from './lib/cities';
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome } from './lib/utils'; import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils';
import * as UIActions from './lib/UIActions'; import * as UIActions from './lib/UIActions';
import { randomUUID } from 'crypto'; import { randomUUID } from 'crypto';
@ -23,136 +23,87 @@ async function visitCityPages(): Promise<void> {
console.log('Connecting to existing Chrome browser...'); console.log('Connecting to existing Chrome browser...');
const driver = await useChrome(); const driver = await useExistingChrome();
if (!driver) return; if (!driver) return;
// Visit each city's TripAdvisor page // Visit each city's TripAdvisor page
for (let i = 0; i < cities.length; i++) { for (let i = 0; i < cities.length; i++) {
const city = cities[i]; const city = cities[i];
console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`); console.log(`[${i + 1}/${cities.length}] Visiting TripAdvisor page for ${city}...`);
let originalWindow;
let cityTopWindow;
let attactionsWindow;
let museumWindow;
try { try {
const originalWindow = await driver.getWindowHandle();
console.log("Logo click") console.log("Logo click")
if (!await UIActions.gotoHome(driver)) throw `${city} failed`; if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(driver);
console.log("Exec Search") console.log("Exec Search")
if (!await UIActions.execSearch(driver, city)) throw `${city} failed`; if (!await UIActions.execSearch(driver, `"${city}" museums`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
console.log("Click See all")
let seeAllUrl = await UIActions.getSeeAllUrl(driver);
if (seeAllUrl.length == 0) {
if (!await UIActions.clickTourismLink(driver)) throw `${city} failed`;
seeAllUrl = await UIActions.getSeeAllUrl(driver);
}
if (seeAllUrl.length == 0) throw `${city} failed`;
await WebDriverUtils.wait();
// open new incognito window
const driver2 = await useChrome();
if (!driver2) throw `${city} failed`;
await WebDriverUtils.wait();
await driver2.get(seeAllUrl);
await WebDriverUtils.wait();
console.log("Switch tab")
let windows = await driver.getAllWindowHandles();
// Switch to the newly opened window/tab
for (const handle of windows) {
if (handle !== originalWindow) {
cityTopWindow = handle;
await driver.switchTo().window(handle);
}
}
console.log("Click See all attractions")
if (!await UIActions.getSeeAllAttractionsUrl(driver)) throw `${city} failed`;
await WebDriverUtils.wait();
console.log("Switch tab to Attraction")
windows = await driver.getAllWindowHandles();
// Switch to the newly opened window/tab
for (const handle of windows) {
if (handle !== originalWindow && handle !== cityTopWindow) {
attactionsWindow = handle;
await driver.switchTo().window(attactionsWindow);
}
}
// click museum
console.log("Click Museum link");
if (!await UIActions.clickMuseumsLink(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(driver);
let page = 1; let page = 1;
while (1) { while (1) {
// get list of museums // get list of museums
console.log("Get list of museums"); console.log("Get list of museums");
const museumElms = await UIActions.getMusiums(driver); const museumsInList = await UIActions.getAttractionNames(driver);
await WebDriverUtils.wait(driver);
for (const listItem of museumElms) { museumsInList.forEach((museumName) => {
saveMuseumnameToCSV(city, museumName, path.join(__dirname, '../data/museums.csv'));
})
await listItem.click(); UIActions.clickPagination(driver, page);
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(driver,5);
windows = await driver.getAllWindowHandles(); if (museumsInList.length < 30)
for (const handle of windows) { break;
if (handle !== originalWindow && handle !== cityTopWindow && handle !== attactionsWindow) {
museumWindow = handle;
await driver.switchTo().window(museumWindow);
}
}
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
museumWindow && await driver.switchTo().window(museumWindow);
await driver.close();
await WebDriverUtils.wait(driver);
attactionsWindow && await driver.switchTo().window(attactionsWindow);
await WebDriverUtils.wait(driver);
}
page++; page++;
if (page > 10) break; if (page > 10) break;
UIActions.clickPagination(driver, page);
await WebDriverUtils.wait(driver);
} }
/*
for (const museumIndex in museumNames) {
await UIActions.closeAllTabsExceptFirst(driver); const museumName = museumNames[museumIndex];
try{
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
}catch(e){
console.error(`failed ${museumName}`)
}
}
*/
if (i < cities.length - 1) { if (i < cities.length - 1) {
console.log(`Waiting for 5000 seconds before next city...`); console.log(`Waiting for 5000 seconds before next city...`);
await WebDriverUtils.wait(); // Wait 5000 seconds before next city await WebDriverUtils.wait(); // Wait 5000 seconds before next city
} }
} catch (error) {
await UIActions.closeAllTabsExceptFirst(driver); } catch (error) {
// If the button is not found within the timeout, log and continue to the next city // If the button is not found within the timeout, log and continue to the next city
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`); console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);

112
src/index_getcontact.ts Normal file
View File

@ -0,0 +1,112 @@
/**
* Selenium WebDriver script to visit TripAdvisor pages for random cities
* & 'C:\Program Files\Google\Chrome\Application\chrome.exe' --remote-debugging-port=9222
*/
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
import * as chromedriver from 'chromedriver';
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs';
import * as path from 'path';
import { getMuseums } from './lib/museums';
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome, saveMuseumnameToCSV } from './lib/utils';
import * as UIActions from './lib/UIActions';
import { randomUUID } from 'crypto';
/**
* Function to visit TripAdvisor pages for each city
*/
async function visitMuseumPages(): Promise<void> {
const museums = getMuseums(path.join(__dirname, '../data/museums.csv'));
const contacts = getMuseums(path.join(__dirname, '../data/museums.csv'));
console.log('Connecting to existing Chrome browser...');
const driver = await useExistingChrome();
if (!driver) return;
// Visit each city's TripAdvisor page
for (let i = 0; i < museums.length; i++) {
const row = museums[i];
const city = row["city"];
const museum = row["name"];
console.log(`[${i + 1}/${museums.length}] Visiting TripAdvisor page for ${museum}...`);
try {
console.log(`reading museum ${i}/${museums.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `"${city}" ${museum}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museum, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
/*
for (const museumIndex in museumNames) {
const museumName = museumNames[museumIndex];
try{
console.log(`reading museum ${museumIndex}/${museumNames.length}`);
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
if (!await UIActions.typeSearch(driver, `${city} ${museumName}`)) throw `${city} failed`;
await WebDriverUtils.wait(driver);
await UIActions.clickFirstAttractionLinkInForm(driver)
await WebDriverUtils.wait(driver);
const { websiteUrl, email } = await UIActions.getWebsiteAndEmail(driver);
console.log(`${websiteUrl} / ${email}`);
saveContactInfoToCSV(city, { name: museumName, websiteUrl: websiteUrl, email: email }, path.join(__dirname, '../data/contact_info.csv'));
await UIActions.simulateClickAt(driver,100,100);
await WebDriverUtils.wait(driver);
}catch(e){
console.error(`failed ${museumName}`)
}
}
*/
if (i < cities.length - 1) {
console.log(`Waiting for 5000 seconds before next city...`);
await WebDriverUtils.wait(); // Wait 5000 seconds before next city
}
} catch (error) {
// If the button is not found within the timeout, log and continue to the next city
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
await WebDriverUtils.wait(); // Wait 5 seconds before next city
}
}
console.log('Finished visiting all cities!');
}
// Run the function
visitMuseumPages().catch(error => {
console.error('Error in main function:', error);
});

View File

@ -1,4 +1,4 @@
import { Builder, By, until, WebDriver, WebElement } from 'selenium-webdriver'; import { Builder, By, until, WebDriver, WebElement, Actions } from 'selenium-webdriver';
import * as chromedriver from 'chromedriver'; import * as chromedriver from 'chromedriver';
import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome'; import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
import * as fs from 'fs'; import * as fs from 'fs';
@ -8,7 +8,7 @@ import { ContactInfo } from './types';
export async function execSearch(driver: WebDriver, city: string): Promise<boolean> { export async function execSearch(driver: WebDriver, searchTerm: string): Promise<boolean> {
try { try {
// Find the search input field // Find the search input field
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]'; const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
@ -18,7 +18,7 @@ export async function execSearch(driver: WebDriver, city: string): Promise<boole
// Clear any existing text and enter the city name // Clear any existing text and enter the city name
await searchInput.clear(); await searchInput.clear();
await searchInput.sendKeys(city); await searchInput.sendKeys(searchTerm);
// Submit the search (press Enter) // Submit the search (press Enter)
await WebDriverUtils.wait(driver); await WebDriverUtils.wait(driver);
@ -31,6 +31,25 @@ export async function execSearch(driver: WebDriver, city: string): Promise<boole
} }
} }
export async function typeSearch(driver: WebDriver, searchTerm: string): Promise<boolean> {
try {
// Find the search input field
const searchSelector = 'input[name="q"][placeholder="Places to go, things to do, hotels..."]';
await WebDriverUtils.waitForElement(driver, searchSelector, 10000);
console.log("Search box found");
const searchInput = await driver.findElement(By.css(searchSelector));
// Clear any existing text and enter the city name
await searchInput.clear();
await searchInput.sendKeys(searchTerm);
return true;
} catch (e) {
return false;
}
}
export async function clickSeeAll(driver: WebDriver): Promise<boolean> { export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
try { try {
const seeAllElement = await driver.wait( const seeAllElement = await driver.wait(
@ -80,7 +99,7 @@ export async function gotoHome(driver: WebDriver): Promise<boolean> {
} }
export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> { export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean> {
const xpath = `//h2[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`; const xpath = `//h3[starts-with(normalize-space(.), 'Things to do')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
try { try {
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000); const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
@ -98,7 +117,7 @@ export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean
} }
export async function getSeeAllAttractionsUrl(driver: WebDriver): Promise<string | null> { export async function getSeeAllAttractionsUrl(driver: WebDriver): Promise<string | null> {
const xpath = `//h2[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`; const xpath = `//h3[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
try { try {
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000); const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
@ -161,8 +180,51 @@ export async function getMusiums(driver: WebDriver): Promise<WebElement[]> {
} }
} }
export async function getMuseumsLinks(driver: WebDriver): Promise<string[]> {
const xpath = `//div//section[.//a[starts-with(@href, '/Attraction')] and .//h3]//a[starts-with(@href, '/Attraction') and .//img]`;
try {
const links = await driver.findElements(By.xpath(xpath));
const urls: string[] = [];
for (const link of links) {
const href = await link.getAttribute('href');
if (href) {
urls.push(href);
}
}
return urls;
} catch (error) {
console.warn('Error getting attraction URLs:', error);
return [];
}
}
export async function getAttractionNames(driver: WebDriver): Promise<string[]> {
// XPath to find <h3> inside <a> whose href starts with /Attraction_Review
const xpath = `//a[starts-with(@href, '/Attraction_Review')]/h3`;
try {
const h3Elements = await driver.findElements(By.xpath(xpath));
const names: string[] = [];
for (const h3 of h3Elements) {
const text = await h3.getText();
if (text) {
names.push(text.trim().replace(/^\d+\.\s*/, ''));
}
}
return names;
} catch (error) {
console.warn('Error getting attraction names:', error);
return [];
}
}
export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> { export async function getWebsiteAndEmail(driver: WebDriver): Promise<ContactInfo> {
const result: ContactInfo = { websiteUrl: null, email: null }; const result: ContactInfo = { name: null, websiteUrl: null, email: null };
// XPath to find URL (starting with 'http' but not containing 'tripadvisor') // XPath to find URL (starting with 'http' but not containing 'tripadvisor')
const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`; const urlXPath = `//a[starts-with(@href, 'http') and not(contains(@href, 'tripadvisor'))]`;
@ -225,3 +287,41 @@ export async function closeAllTabsExceptFirst(driver: WebDriver): Promise<void>
await driver.switchTo().window(originalHandle); await driver.switchTo().window(originalHandle);
console.log(`Switched back to original tab: ${originalHandle}`); console.log(`Switched back to original tab: ${originalHandle}`);
} }
export async function clickFirstAttractionLinkInForm(driver: WebDriver): Promise<boolean> {
const xpath = `//form//a[starts-with(@href, '/Attraction')]`;
try {
// Wait for the link to appear inside a form
const link = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
await driver.wait(until.elementIsVisible(link), 5000);
await driver.wait(until.elementIsEnabled(link), 5000);
await link.click();
console.log('Clicked the first attraction link in the form.');
return true;
} catch (error) {
console.warn('Attraction link not found or not clickable.', error);
return false;
}
}
export async function simulateClickAt(driver: WebDriver, x: number, y: number): Promise<void> {
try {
const bodyElement = await driver.findElement(By.css('body'));
await driver
.actions({ bridge: true }) // optional; required in some environments
.move({ x, y, origin: bodyElement }) // move relative to the viewport
.click()
.perform();
console.log(`Clicked at (${x}, ${y})`);
} catch (error) {
console.error('An error occurred:', error);
}
}

26
src/lib/contacts.ts Normal file
View File

@ -0,0 +1,26 @@
/**
* List of cities to visit on TripAdvisor
*/
import fs from 'fs';
import path from 'path';
import { parse } from 'csv-parse/sync';
export function getContacts(csvFilePath: string) {
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
const records = parse(fileContent, {
columns: true,
skip_empty_lines: true
});
const cities: {city:string,name:string}[] = records.map((record: any) => {
return {
city: record['City'],
name: record['Name of City'],
}
});
return cities;
}

26
src/lib/museums.ts Normal file
View File

@ -0,0 +1,26 @@
/**
* List of cities to visit on TripAdvisor
*/
import fs from 'fs';
import path from 'path';
import { parse } from 'csv-parse/sync';
export function getMuseums(csvFilePath: string) {
const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
const records = parse(fileContent, {
columns: true,
skip_empty_lines: true
});
const cities: {city:string,name:string}[] = records.map((record: any) => {
return {
city: record['City'],
name: record['Name of City'],
}
});
return cities;
}

View File

@ -1,4 +1,5 @@
export interface ContactInfo { export interface ContactInfo {
name: string | null,
websiteUrl: string | null; websiteUrl: string | null;
email: string | null; email: string | null;
} }

View File

@ -14,8 +14,8 @@ export class WebDriverUtils {
* @param seconds Number of seconds to wait * @param seconds Number of seconds to wait
* @returns Promise that resolves after the specified time * @returns Promise that resolves after the specified time
*/ */
static async wait(driver?: WebDriver): Promise<void> { static async wait(driver?: WebDriver,baseTime: number = 3): Promise<void> {
const seconds = Math.floor(Math.random() * 1000) % 3 + 3; const seconds = Math.floor(Math.random() * 1000) % 3 + baseTime;
console.log(`Scrolling to bottom for ${seconds} seconds...`); console.log(`Scrolling to bottom for ${seconds} seconds...`);
const endTime = Date.now() + seconds * 1000; const endTime = Date.now() + seconds * 1000;
@ -23,23 +23,20 @@ export class WebDriverUtils {
let scrollCounter = 0; let scrollCounter = 0;
while (Date.now() < endTime) { while (Date.now() < endTime) {
/*
try { try {
if (driver) { if (driver) {
if(scrollCounter < 4){
await driver.executeScript(` await driver.executeScript(`
window.scrollBy(0, window.innerHeight); window.scrollBy(0, 10);
`); `);
}else{
await driver.executeScript(`
window.scrollTo(0, 0);
`);
}
scrollCounter++; scrollCounter++;
} }
} catch (error) { } catch (error) {
console.warn('Scroll failed:', error); console.warn('Scroll failed:', error);
} }
*/
// Wait a little between scrolls // Wait a little between scrolls
await new Promise(resolve => setTimeout(resolve, 500)); await new Promise(resolve => setTimeout(resolve, 500));
@ -61,7 +58,7 @@ export class WebDriverUtils {
export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void { export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, filePath: string): void {
const headers = 'City,Website URL,Email\n'; const headers = 'City,Website URL,Email\n';
const line = `"${city},"${contactInfo.websiteUrl}","${contactInfo.email}"\n`; const line = `"${city}","${contactInfo.name}","${contactInfo.websiteUrl}","${contactInfo.email}"\n`;
if (!existsSync(filePath)) { if (!existsSync(filePath)) {
writeFileSync(filePath, headers + line); writeFileSync(filePath, headers + line);
@ -72,6 +69,20 @@ export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, fil
console.log(`Contact info saved to ${filePath}`); console.log(`Contact info saved to ${filePath}`);
} }
export function saveMuseumnameToCSV(city: string, museumName: string, filePath: string): void {
const headers = 'City,Website URL,Email\n';
const line = `"${city},"${museumName}"\n`;
if (!existsSync(filePath)) {
writeFileSync(filePath, headers + line);
} else {
appendFileSync(filePath, line);
}
console.log(`Museum name saved to ${filePath}`);
}
export async function disableCookiesInChrome(): Promise<WebDriver | null> { export async function disableCookiesInChrome(): Promise<WebDriver | null> {
// Set Chrome options // Set Chrome options
const options = new chrome.Options(); const options = new chrome.Options();
@ -111,7 +122,7 @@ export async function disableCookiesInChrome(): Promise<WebDriver | null> {
} }
export async function useChrome(): Promise<WebDriver | null> { export async function useChrome(initialUrl: string = "https://www.tripadvisor.com/"): Promise<WebDriver | null> {
// Set Chrome options // Set Chrome options
const options = new chrome.Options(); const options = new chrome.Options();
@ -133,7 +144,7 @@ export async function useChrome(): Promise<WebDriver | null> {
.setChromeOptions(options) .setChromeOptions(options)
.build(); .build();
await driver.get('https://www.tripadvisor.com/'); await driver.get(initialUrl);
console.log('Chrome launched with cookies disabled.'); console.log('Chrome launched with cookies disabled.');