scraping | Cell 0 | Cell 2 | Search

This code uses Selenium to scrape location names and descriptions from Google Maps search results, handling pagination to retrieve all available data. It then prepares the data for further processing or storage, likely in JSON format.

Run example

npm run import -- "google maps data list"

google maps data list

var importer = require('../Core');
var runSeleniumCell = importer.import("selenium cell");
var fs = require('fs');
var path = require('path');

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';

function getResultsPage() {
    return client
        .then(() => getAllXPath([
            '//*[contains(@class, "section-result-text-content")]',
            {
                name: './/h3[contains(@class, "section-result-title")]//text()',
                description: './/span[contains(@class, "section-result-details")]//text()|.//span[contains(@class, "section-result-location")]//text()'
            }
        ]))
        .then(r => r.map(l => ({
            name: typeof l.name === 'string' ? l.name : l.name.join('\n').trim(),
            description: typeof l.description === 'string'
                ? l.description
                : l.description.join('\n').trim().split(/\s*\n\s*/ig),
        })));
}

function getAllResults() {
    var locations = [];
    return getResultsPage()
        .then(newLocs => {
            locations = newLocs;
            return client.isExisting('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]');
        })
        .then(is => {
            if(is) {
                return client.click('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
                    .pause(3000)
                    .then(() => getAllResults())
                    .then(newLocs => locations.concat(newLocs))
            } else {
                return locations;
            }
        });
}

function getNearbyJSON(place) {
    if(typeof place === 'undefined') {
        place = 'bars+near+Flagstaff,+AZ';
    }
    return client.url('https://www.google.com/maps/search/' + place)
        .then(() => getAllResults())
        .then(r => {
            const day = new Date();
            const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
            fs.writeFileSync(path.join(project, 'locations-' + date + '.json'), JSON.stringify(r, null, 4));
            return r;
        })
}
module.exports = getNearbyJSON;

if(typeof $ !== 'undefined') {
    $.async();
    runSeleniumCell('google maps data list', false)
        .then(func => func())
        .then(r => $.sendResult(r))
        .catch(e => $.sendError(e))
}

What the code could have been:

// Import required modules
const fs = require('fs');
const path = require('path');
const importer = require('../Core');
const runSeleniumCell = importer.import('selenium cell');
const moment = require('moment'); // for date formatting

// Get user's home path
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;

// Set project path
const project = `${PROFILE_PATH}/Collections/flagstaff-happy`;

// Set default location query
const DEFAULT_LOCATION = 'bars+near+Flagstaff,+AZ';

// Selenium client (assuming already initialized)
const client = require('./seleniumClient');

// Function to get results page
function getResultsPage() {
    return client
       .then(() => getAllXPath([
            '//*[contains(@class, "section-result-text-content")]',
            {
                name: './/h3[contains(@class, "section-result-title")]//text()',
                description: './/span[contains(@class, "section-result-details")]//text()|.//span[contains(@class, "section-result-location")]//text()'
            }
        ]))
       .then(r => r.map(item => ({
            name: typeof item.name ==='string'? item.name : item.name.join('\n').trim(),
            description: typeof item.description ==='string'
               ? item.description
                : item.description.join('\n').trim().split(/\s*\n\s*/ig),
        })));
}

// Function to get all results (recursive)
function getAllResults() {
    const locations = [];
    return getResultsPage()
       .then(newLocs => {
            locations.push(...newLocs); // use spread operator for concating arrays
            return client
               .isExisting('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
               .then(isExisting => {
                    if (isExisting) {
                        // Use async/await for better readability
                        return client
                           .click('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
                           .pause(3000)
                           .then(() => getAllResults());
                    } else {
                        return locations;
                    }
                });
        });
}

// Function to get nearby locations in JSON format
function getNearbyJSON(location = DEFAULT_LOCATION) {
    // Use async/await for better readability
    return client.url(`https://www.google.com/maps/search/${location}`)
       .then(() => getAllResults())
       .then(results => {
            const date = moment().format('YYYY-MM-DD'); // format date using moment
            const filePath = path.join(project, `locations-${date}.json`);
            fs.writeFileSync(filePath, JSON.stringify(results, null, 4));
            return results;
        })
       .catch(err => {
            // Log error if any
            console.error(err);
            return []; // return empty array on error
        });
}

module.exports = getNearbyJSON;

This code snippet uses Selenium to scrape location data from Google Maps search results.

Here's a breakdown:

  1. Setup:

  2. getResultsPage Function:

  3. getAllResults Function:

  4. getNearbyJSON Function:

Purpose:

This code automates the process of scraping location data from Google Maps search results for a given location. It fetches both the initial page and subsequent pages of results, extracts relevant information, and prepares it for further processing or storage.