This code uses Selenium to scrape location names and descriptions from Google Maps search results, handling pagination to retrieve all available data. It then prepares the data for further processing or storage, likely in JSON format.
npm run import -- "google maps data list"
var importer = require('../Core');
var runSeleniumCell = importer.import("selenium cell");
var fs = require('fs');
var path = require('path');
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';
function getResultsPage() {
return client
.then(() => getAllXPath([
'//*[contains(@class, "section-result-text-content")]',
{
name: './/h3[contains(@class, "section-result-title")]//text()',
description: './/span[contains(@class, "section-result-details")]//text()|.//span[contains(@class, "section-result-location")]//text()'
}
]))
.then(r => r.map(l => ({
name: typeof l.name === 'string' ? l.name : l.name.join('\n').trim(),
description: typeof l.description === 'string'
? l.description
: l.description.join('\n').trim().split(/\s*\n\s*/ig),
})));
}
function getAllResults() {
var locations = [];
return getResultsPage()
.then(newLocs => {
locations = newLocs;
return client.isExisting('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]');
})
.then(is => {
if(is) {
return client.click('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
.pause(3000)
.then(() => getAllResults())
.then(newLocs => locations.concat(newLocs))
} else {
return locations;
}
});
}
function getNearbyJSON(place) {
if(typeof place === 'undefined') {
place = 'bars+near+Flagstaff,+AZ';
}
return client.url('https://www.google.com/maps/search/' + place)
.then(() => getAllResults())
.then(r => {
const day = new Date();
const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
fs.writeFileSync(path.join(project, 'locations-' + date + '.json'), JSON.stringify(r, null, 4));
return r;
})
}
module.exports = getNearbyJSON;
if(typeof $ !== 'undefined') {
$.async();
runSeleniumCell('google maps data list', false)
.then(func => func())
.then(r => $.sendResult(r))
.catch(e => $.sendError(e))
}
// Import required modules
const fs = require('fs');
const path = require('path');
const importer = require('../Core');
const runSeleniumCell = importer.import('selenium cell');
const moment = require('moment'); // for date formatting
// Get user's home path
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
// Set project path
const project = `${PROFILE_PATH}/Collections/flagstaff-happy`;
// Set default location query
const DEFAULT_LOCATION = 'bars+near+Flagstaff,+AZ';
// Selenium client (assuming already initialized)
const client = require('./seleniumClient');
// Function to get results page
function getResultsPage() {
return client
.then(() => getAllXPath([
'//*[contains(@class, "section-result-text-content")]',
{
name: './/h3[contains(@class, "section-result-title")]//text()',
description: './/span[contains(@class, "section-result-details")]//text()|.//span[contains(@class, "section-result-location")]//text()'
}
]))
.then(r => r.map(item => ({
name: typeof item.name ==='string'? item.name : item.name.join('\n').trim(),
description: typeof item.description ==='string'
? item.description
: item.description.join('\n').trim().split(/\s*\n\s*/ig),
})));
}
// Function to get all results (recursive)
function getAllResults() {
const locations = [];
return getResultsPage()
.then(newLocs => {
locations.push(...newLocs); // use spread operator for concating arrays
return client
.isExisting('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
.then(isExisting => {
if (isExisting) {
// Use async/await for better readability
return client
.click('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
.pause(3000)
.then(() => getAllResults());
} else {
return locations;
}
});
});
}
// Function to get nearby locations in JSON format
function getNearbyJSON(location = DEFAULT_LOCATION) {
// Use async/await for better readability
return client.url(`https://www.google.com/maps/search/${location}`)
.then(() => getAllResults())
.then(results => {
const date = moment().format('YYYY-MM-DD'); // format date using moment
const filePath = path.join(project, `locations-${date}.json`);
fs.writeFileSync(filePath, JSON.stringify(results, null, 4));
return results;
})
.catch(err => {
// Log error if any
console.error(err);
return []; // return empty array on error
});
}
module.exports = getNearbyJSON;
This code snippet uses Selenium to scrape location data from Google Maps search results.
Here's a breakdown:
Setup:
importer
(custom module), selenium-cell
, fs
, and path
.PROFILE_PATH
for storing project files.project
path.getResultsPage
Function:
getAllXPath
to extract location names and descriptions from the page elements.name
and description
properties.getAllResults
Function:
getResultsPage
to get the initial set of results.getAllResults
to fetch additional pages, and combines all results.getNearbyJSON
Function:
place
parameter (defaults to "bars+near+Flagstaff,+AZ").getAllResults
to fetch the location data.Purpose:
This code automates the process of scraping location data from Google Maps search results for a given location. It fetches both the initial page and subsequent pages of results, extracts relevant information, and prepares it for further processing or storage.