Compare commits
8 Commits
edge
...
open_brows
| Author | SHA1 | Date | |
|---|---|---|---|
| 607bb07520 | |||
| fe177abd85 | |||
| 79ea65c74b | |||
| 1ffea0f61f | |||
| b6cad2a241 | |||
| 6f36809932 | |||
| 0e6df98a50 | |||
| d0bfe15fa4 |
@ -1,27 +1,4 @@
|
||||
rank,Latitude,Longitude,Name of City,Country,2021 Population,2020 Population,Growth,Population Difference,Population Change
|
||||
1,35.6828387,139.7594549,Tokyo,Japan,37339804,37393128,-0.0014,53324,declined
|
||||
2,28.6517178,77.2219388,Delhi,India,31181376,30290936,0.0294,890440,grew
|
||||
3,31.2322758,121.4692071,Shanghai,China,27795702,27058480,0.0272,737222,grew
|
||||
4,-23.5506507,-46.6333824,Sao Paulo,Brazil,22237472,22043028,0.0088,194444,grew
|
||||
5,19.4326296,-99.1331785,Mexico City,Mexico,21918936,21782378,0.0063,136558,grew
|
||||
6,23.7861979,90.4026151,Dhaka,Bangladesh,21741090,21005860,0.035,735230,grew
|
||||
7,30.0443879,31.2357257,Cairo,Egypt,21322750,20900604,0.0202,422146,grew
|
||||
8,39.906217,116.3912757,Beijing,China,20896820,20462610,0.0212,434210,grew
|
||||
9,19.0759899,72.8773928,Mumbai,India,20667656,20411274,0.0126,256382,grew
|
||||
10,34.6198813,135.490357,Osaka,Japan,19110616,19165340,-0.0029,54724,declined
|
||||
11,24.8546842,67.0207055,Karachi,Pakistan,16459472,16093786,0.0227,365686,grew
|
||||
12,29.5647398,106.5478767,Chongqing,China,16382376,15872179,0.0321,510197,grew
|
||||
13,41.0096334,28.9651646,Istanbul,Turkey,15415197,15190336,0.0148,224861,grew
|
||||
14,-34.6075682,-58.4370894,Buenos Aires,Argentina,15257673,15153729,0.0069,103944,grew
|
||||
15,22.5414185,88.3576912,Kolkata,India,14974073,14850066,0.0084,124007,grew
|
||||
16,-4.3217055,15.3125974,Kinshasa,DR Congo,14970460,14342439,0.0438,628021,grew
|
||||
17,6.4550575,3.3941795,Lagos,Nigeria,14862111,14368332,0.0344,493779,grew
|
||||
18,14.5907332,120.9809674,Manila,Philippines,14158573,13923452,0.0169,235121,grew
|
||||
19,39.0856735,117.1951073,Tianjin,China,13794450,13589078,0.0151,205372,grew
|
||||
20,23.1301964,113.2592945,Guangzhou,China,13635397,13301532,0.0251,333865,grew
|
||||
21,-22.9110137,-43.2093727,Rio de Janeiro,Brazil,13544462,13458075,0.0064,86387,grew
|
||||
22,31.5656822,74.3141829,Lahore,Pakistan,13095166,12642423,0.0358,452743,grew
|
||||
23,12.9767936,77.590082,Bangalore,India,12764935,12326532,0.0356,438403,grew
|
||||
24,55.7504461,37.6174943,Moscow,Russia,12593252,12537954,0.0044,55298,grew
|
||||
25,22.555454,114.0543297,Shenzhen,China,12591696,12356820,0.019,234876,grew
|
||||
26,13.0836939,80.270186,Chennai,India,11235018,10971108,0.0241,263910,grew
|
||||
|
||||
|
2801
data/contact_info.csv
Normal file
2801
data/contact_info.csv
Normal file
File diff suppressed because it is too large
Load Diff
89
src/index.ts
89
src/index.ts
@ -9,57 +9,23 @@ import chrome, { ServiceBuilder } from 'selenium-webdriver/chrome';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { getCities } from './lib/cities';
|
||||
import { WebDriverUtils, saveContactInfoToCSV } from './lib/utils';
|
||||
import { WebDriverUtils, saveContactInfoToCSV, useExistingChrome, disableCookiesInChrome, useChrome } from './lib/utils';
|
||||
import * as UIActions from './lib/UIActions';
|
||||
import { randomUUID } from 'crypto';
|
||||
import os from 'os';
|
||||
import edge from 'selenium-webdriver/edge';
|
||||
|
||||
|
||||
/**
|
||||
* Function to visit TripAdvisor pages for each city
|
||||
*/
|
||||
async function visitCityPages(): Promise<void> {
|
||||
|
||||
const userHomeDir = os.homedir(); // gets C:\Users\<YourName>
|
||||
const driverPath = path.join(userHomeDir, 'Documents', 'edgedriver_win64', 'msedgedriver.exe');
|
||||
|
||||
// Configure Edge service to use your custom driver path
|
||||
const service = new edge.ServiceBuilder(driverPath);
|
||||
|
||||
const options = new edge.Options();
|
||||
options.addArguments('--inprivate');
|
||||
options.addArguments('--start-maximized');
|
||||
|
||||
let driver: WebDriver;
|
||||
driver = await new Builder()
|
||||
.forBrowser('MicrosoftEdge')
|
||||
.setEdgeOptions(options)
|
||||
.setEdgeService(service)
|
||||
.build();
|
||||
|
||||
await driver.get('https://www.tripadvisor.com');
|
||||
await WebDriverUtils.wait(5);
|
||||
|
||||
/*
|
||||
|
||||
|
||||
// Connect to an existing Chrome browser running in debug mode on port 9222
|
||||
const options = new chrome.Options();
|
||||
|
||||
// Set the debugger address to connect to the existing Chrome instance
|
||||
options.debuggerAddress('localhost:9222');
|
||||
|
||||
// Create WebDriver instance that connects to the existing browser
|
||||
const driver: WebDriver = await new Builder()
|
||||
.forBrowser('chrome')
|
||||
.setChromeOptions(options)
|
||||
.build();
|
||||
*/
|
||||
|
||||
const cities = getCities(path.join(__dirname, '../data/cities.csv'));
|
||||
|
||||
console.log('Connecting to existing Chrome browser...');
|
||||
|
||||
const driver = await useChrome();
|
||||
if (!driver) return;
|
||||
|
||||
// Visit each city's TripAdvisor page
|
||||
for (let i = 0; i < cities.length; i++) {
|
||||
const city = cities[i];
|
||||
@ -75,18 +41,33 @@ async function visitCityPages(): Promise<void> {
|
||||
|
||||
console.log("Logo click")
|
||||
if (!await UIActions.gotoHome(driver)) throw `${city} failed`;
|
||||
await WebDriverUtils.wait(5);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
console.log("Exec Search")
|
||||
if (!await UIActions.execSearch(driver, city)) throw `${city} failed`;
|
||||
await WebDriverUtils.wait(5);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
console.log("Click See all")
|
||||
if (!await UIActions.clickSeeAll(driver)) {
|
||||
let seeAllUrl = await UIActions.getSeeAllUrl(driver);
|
||||
|
||||
if (seeAllUrl.length == 0) {
|
||||
if (!await UIActions.clickTourismLink(driver)) throw `${city} failed`;
|
||||
if (!await UIActions.clickSeeAll(driver)) throw `${city} failed`;
|
||||
seeAllUrl = await UIActions.getSeeAllUrl(driver);
|
||||
}
|
||||
await WebDriverUtils.wait(5);
|
||||
|
||||
if (seeAllUrl.length == 0) throw `${city} failed`;
|
||||
await WebDriverUtils.wait();
|
||||
|
||||
|
||||
// open new incognito window
|
||||
const driver2 = await useChrome();
|
||||
if (!driver2) throw `${city} failed`;
|
||||
|
||||
await WebDriverUtils.wait();
|
||||
await driver2.get(seeAllUrl);
|
||||
await WebDriverUtils.wait();
|
||||
|
||||
|
||||
|
||||
console.log("Switch tab")
|
||||
let windows = await driver.getAllWindowHandles();
|
||||
@ -99,8 +80,8 @@ async function visitCityPages(): Promise<void> {
|
||||
}
|
||||
|
||||
console.log("Click See all attractions")
|
||||
if (!await UIActions.clickSeeAllAttractions(driver)) throw `${city} failed`;
|
||||
await WebDriverUtils.wait(5);
|
||||
if (!await UIActions.getSeeAllAttractionsUrl(driver)) throw `${city} failed`;
|
||||
await WebDriverUtils.wait();
|
||||
|
||||
console.log("Switch tab to Attraction")
|
||||
windows = await driver.getAllWindowHandles();
|
||||
@ -115,7 +96,7 @@ async function visitCityPages(): Promise<void> {
|
||||
// click museum
|
||||
console.log("Click Museum link");
|
||||
if (!await UIActions.clickMuseumsLink(driver)) throw `${city} failed`;
|
||||
await WebDriverUtils.wait(5);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
let page = 1;
|
||||
while (1) {
|
||||
@ -123,12 +104,12 @@ async function visitCityPages(): Promise<void> {
|
||||
// get list of museums
|
||||
console.log("Get list of museums");
|
||||
const museumElms = await UIActions.getMusiums(driver);
|
||||
await WebDriverUtils.wait(1);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
for (const listItem of museumElms) {
|
||||
|
||||
await listItem.click();
|
||||
await WebDriverUtils.wait(3);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
windows = await driver.getAllWindowHandles();
|
||||
for (const handle of windows) {
|
||||
@ -145,10 +126,10 @@ async function visitCityPages(): Promise<void> {
|
||||
|
||||
museumWindow && await driver.switchTo().window(museumWindow);
|
||||
await driver.close();
|
||||
await WebDriverUtils.wait(1);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
attactionsWindow && await driver.switchTo().window(attactionsWindow);
|
||||
await WebDriverUtils.wait(1);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
}
|
||||
|
||||
@ -157,7 +138,7 @@ async function visitCityPages(): Promise<void> {
|
||||
if (page > 10) break;
|
||||
|
||||
UIActions.clickPagination(driver, page);
|
||||
await WebDriverUtils.wait(5);
|
||||
await WebDriverUtils.wait(driver);
|
||||
|
||||
}
|
||||
|
||||
@ -167,7 +148,7 @@ async function visitCityPages(): Promise<void> {
|
||||
|
||||
if (i < cities.length - 1) {
|
||||
console.log(`Waiting for 5000 seconds before next city...`);
|
||||
await WebDriverUtils.wait(5); // Wait 5000 seconds before next city
|
||||
await WebDriverUtils.wait(); // Wait 5000 seconds before next city
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
@ -175,7 +156,7 @@ async function visitCityPages(): Promise<void> {
|
||||
|
||||
// If the button is not found within the timeout, log and continue to the next city
|
||||
console.log(`No Museums button found for ${city}. Moving to next city after 5 seconds...`);
|
||||
await WebDriverUtils.wait(5); // Wait 5 seconds before next city
|
||||
await WebDriverUtils.wait(); // Wait 5 seconds before next city
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -21,9 +21,9 @@ export async function execSearch(driver: WebDriver, city: string): Promise<boole
|
||||
await searchInput.sendKeys(city);
|
||||
|
||||
// Submit the search (press Enter)
|
||||
await WebDriverUtils.wait(2);
|
||||
await WebDriverUtils.wait(driver);
|
||||
await searchInput.sendKeys('\uE007'); // Unicode for Enter key
|
||||
await WebDriverUtils.wait(5); // Wait 5 seconds before next city
|
||||
await WebDriverUtils.wait(driver); // Wait 5 seconds before next city
|
||||
|
||||
return true;
|
||||
} catch (e) {
|
||||
@ -49,6 +49,21 @@ export async function clickSeeAll(driver: WebDriver): Promise<boolean> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function getSeeAllUrl(driver: WebDriver): Promise<string> {
|
||||
const xpath = `//h3[normalize-space(.)='Things to do']/ancestor::div[1]//a[starts-with(@href, '/Attractions')]`;
|
||||
|
||||
try {
|
||||
const anchor = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||
const url = await anchor.getAttribute('href');
|
||||
console.log('Found Attractions URL:', url);
|
||||
return url;
|
||||
} catch (err) {
|
||||
console.warn('Could not find the Attractions link:', err);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
export async function gotoHome(driver: WebDriver): Promise<boolean> {
|
||||
try {
|
||||
// Click on the Tripadvisor logo before searching for the city
|
||||
@ -82,6 +97,22 @@ export async function clickSeeAllAttractions(driver: WebDriver): Promise<boolean
|
||||
}
|
||||
}
|
||||
|
||||
export async function getSeeAllAttractionsUrl(driver: WebDriver): Promise<string | null> {
|
||||
const xpath = `//h2[starts-with(normalize-space(.), 'Top Attractions in')]/parent::*[1]//a[starts-with(@href, '/Attractions') and .//span[normalize-space(.)='See all']]`;
|
||||
|
||||
try {
|
||||
const anchorElement = await driver.wait(until.elementLocated(By.xpath(xpath)), 5000);
|
||||
|
||||
await driver.wait(until.elementIsVisible(anchorElement), 5000);
|
||||
|
||||
const href = await anchorElement.getAttribute('href');
|
||||
return href;
|
||||
} catch (error) {
|
||||
console.warn('Element not found or href not retrievable.', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function clickMuseumsLink(driver: WebDriver): Promise<boolean> {
|
||||
const xpath = `//a[.//*[normalize-space(.)='Museums']]`;
|
||||
|
||||
|
||||
130
src/lib/utils.ts
130
src/lib/utils.ts
@ -2,7 +2,8 @@
|
||||
* Utility class for common WebDriver operations
|
||||
*/
|
||||
|
||||
import { WebDriver, By, until } from 'selenium-webdriver';
|
||||
import { Builder, By, until, WebDriver } from 'selenium-webdriver';
|
||||
import chrome from 'selenium-webdriver/chrome';
|
||||
import { writeFileSync, existsSync, appendFileSync } from 'fs';
|
||||
import * as path from 'path';
|
||||
import { ContactInfo } from './types';
|
||||
@ -13,9 +14,36 @@ export class WebDriverUtils {
|
||||
* @param seconds Number of seconds to wait
|
||||
* @returns Promise that resolves after the specified time
|
||||
*/
|
||||
static async wait(seconds: number): Promise<void> {
|
||||
console.log(`Waiting for ${seconds} seconds...`);
|
||||
return new Promise(resolve => setTimeout(resolve, seconds * 1000));
|
||||
static async wait(driver?: WebDriver): Promise<void> {
|
||||
const seconds = Math.floor(Math.random() * 1000) % 3 + 3;
|
||||
console.log(`Scrolling to bottom for ${seconds} seconds...`);
|
||||
|
||||
const endTime = Date.now() + seconds * 1000;
|
||||
|
||||
let scrollCounter = 0;
|
||||
|
||||
while (Date.now() < endTime) {
|
||||
try {
|
||||
if(driver){
|
||||
if(scrollCounter < 4){
|
||||
await driver.executeScript(`
|
||||
window.scrollBy(0, window.innerHeight);
|
||||
`);
|
||||
}else{
|
||||
await driver.executeScript(`
|
||||
window.scrollTo(0, 0);
|
||||
`);
|
||||
}
|
||||
|
||||
scrollCounter++;
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Scroll failed:', error);
|
||||
}
|
||||
|
||||
// Wait a little between scrolls
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -43,3 +71,97 @@ export function saveContactInfoToCSV(city: string, contactInfo: ContactInfo, fil
|
||||
|
||||
console.log(`Contact info saved to ${filePath}`);
|
||||
}
|
||||
|
||||
export async function disableCookiesInChrome(): Promise<WebDriver | null> {
|
||||
// Set Chrome options
|
||||
const options = new chrome.Options();
|
||||
|
||||
// 1. Block all cookies
|
||||
//options.setUserPreferences({
|
||||
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
|
||||
// 'profile.block_third_party_cookies': true
|
||||
//});
|
||||
|
||||
// 2. Optional: Launch in incognito for extra privacy
|
||||
options.addArguments('--incognito');
|
||||
options.addArguments('--start-maximized');
|
||||
|
||||
let driver: WebDriver | null = null;
|
||||
|
||||
try {
|
||||
driver = await new Builder()
|
||||
.forBrowser('chrome')
|
||||
.setChromeOptions(options)
|
||||
.build();
|
||||
|
||||
await driver.get('https://www.tripadvisor.com/');
|
||||
|
||||
console.log('Chrome launched with cookies disabled.');
|
||||
|
||||
// Optional: Verify cookies are blocked by trying to set/get a cookie
|
||||
await driver.manage().addCookie({ name: 'test', value: '123' });
|
||||
const cookies = await driver.manage().getCookies();
|
||||
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
|
||||
|
||||
return driver;
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
return driver;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export async function useChrome(): Promise<WebDriver | null> {
|
||||
// Set Chrome options
|
||||
const options = new chrome.Options();
|
||||
|
||||
// 1. Block all cookies
|
||||
//options.setUserPreferences({
|
||||
// 'profile.default_content_setting_values.cookies': 2, // 2 = Block all
|
||||
// 'profile.block_third_party_cookies': true
|
||||
//});
|
||||
|
||||
// 2. Optional: Launch in incognito for extra privacy
|
||||
options.addArguments('--incognito');
|
||||
options.addArguments('--start-maximized');
|
||||
|
||||
let driver: WebDriver | null = null;
|
||||
|
||||
try {
|
||||
driver = await new Builder()
|
||||
.forBrowser('chrome')
|
||||
.setChromeOptions(options)
|
||||
.build();
|
||||
|
||||
await driver.get('https://www.tripadvisor.com/');
|
||||
|
||||
console.log('Chrome launched with cookies disabled.');
|
||||
|
||||
// Optional: Verify cookies are blocked by trying to set/get a cookie
|
||||
await driver.manage().addCookie({ name: 'test', value: '123' });
|
||||
const cookies = await driver.manage().getCookies();
|
||||
console.log('Cookies after trying to add:', cookies); // Should be empty or restricted
|
||||
|
||||
return driver;
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
return driver;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export async function useExistingChrome(): Promise<WebDriver> {
|
||||
// Connect to an existing Chrome browser running in debug mode on port 9222
|
||||
const options = new chrome.Options();
|
||||
|
||||
// Set the debugger address to connect to the existing Chrome instance
|
||||
options.debuggerAddress('localhost:9222');
|
||||
|
||||
// Create WebDriver instance that connects to the existing browser
|
||||
const driver: WebDriver = await new Builder()
|
||||
.forBrowser('chrome')
|
||||
.setChromeOptions(options)
|
||||
.build();
|
||||
|
||||
return driver;
|
||||
}
|
||||
Reference in New Issue
Block a user