scraping | google maps data list | Cell 3 | Search

The code imports necessary modules and functions, sets environment variables, and defines two main functions: loadLocations() and getAllLocationsData(). These functions interact with file systems, Google Maps, and Selenium to load and update locations data, which is then written to a JSON file.

Cell 2

var importer = require('../Core');
var fs = require('fs');
var path = require('path');
var glob = require('glob');
var {
    placesNearby,
    placeDetails,
    runSeleniumCell,
    levDist,
    levSort
} = importer.import("use places nearby",
"place details google maps",
"run selenium cell",
"find levenshtien distance",
"sort levenshtien");

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';

function loadLocations() {
    const day = new Date();
    const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
    const filename = path.join(project, 'locations-' + date + '.json');
    if(fs.existsSync(filename)) {
        const locations = JSON.parse(fs.readFileSync(locations).toString());
        return Promise.resolve(locations);
    }
    return runSeleniumCell('google maps data list', false)
        .then(func => func());
}

function getAllLocationsData() {
    const day = new Date();
    const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
    const filename = path.join(project, 'locations-' + date + '-full.json');
    if(fs.existsSync(filename)) {
        return Promise.resolve(JSON.parse(fs.readFileSync(filename).toString()));
    }
    var locations;    
    return loadLocations()
        .then(r => locations = r)
        .then(() => importer.runAllPromises(locations.map(l => resolve => {
            var result;
            placesNearby(l.name + ' near ' + l.description.pop() + ', Flagstaff')
                .then(r => {
                    result = r[0];
                    return placeDetails(result.place_id);
                })
                .then(r => resolve(Object.assign(l, result, r)))
        })))
        .then(r => {
            fs.writeFileSync(filename, JSON.stringify(r, null, 4));
            return r;
        })
}

if(typeof $ !== 'undefined') {
    $.async();
    getAllLocationsData()
        .then(r => {
            const hasSites = r.filter(l => l.website);
            console.log('websites: ' + hasSites.length + '/' + r.length + ' - ' + Math.round(1.0 * hasSites.length / r.length * 100) + '%')
            //console.log(r.filter(l => l.opening_hours).map(l => l));
            const existing = glob.sync('**/!(locations)*', {cwd: project});
            const existingMatch = existing
                .map(l => l.split(/-|\.json/ig).join(' ').trim())
                .map(l => {
                    const levMatch = levSort(r, l, r => r.name)[0].name;
                    if(levDist(l, levMatch) < l.length / 2) {
                        return l + ' - ' + levMatch;
                    }
                    return l + ' - no match';
                })
                .filter(l => !l.includes('no match'))
            console.log('existing: ' + existingMatch.length + '/' + existing.length + ' - ' + Math.round(1.0 * existingMatch.length / existing.length * 100) + '%')
            console.log('variance: ');
            const variations = existing.map(l => path.join(project, l))
                .reduce((acc, l) => {
                    const loaded = JSON.parse(fs.readFileSync(l));
                    if(typeof acc.dotw === 'undefined') {
                        acc.dotw = {};
                    }
                    loaded.forEach(d => {
                        if(d.dotw.match(/Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday/ig)) {
                            if(typeof acc['dotw']['matches'] === 'undefined') {
                                acc['dotw']['matches'] = 0;
                            }
                            acc['dotw']['matches']++;
                        } else {
                            if(typeof acc['dotw']['unmatched'] === 'undefined') {
                                acc['dotw']['unmatched'] = [];
                            }
                            acc['dotw']['unmatched'].push(l + d.dotw);
                        }
                    });
                    if(typeof acc.time === 'undefined') {
                        acc.time = {};
                    }
                    loaded.forEach(d => {
                        console.log(l + ' - ' + d.time)
                        if(d.time.match(/^([0-9]+(p|a)\.m\.\s+-\s+([0-9]+(p|a)|close)|all day)\s*$/ig)) {
                            if(typeof acc['time']['matches'] === 'undefined') {
                                acc['time']['matches'] = 0;
                            }
                            acc['time']['matches']++;
                        } else {
                            if(typeof acc['time']['unmatched'] === 'undefined') {
                                acc['time']['unmatched'] = [];
                            }
                            acc['time']['unmatched'].push(l + d.dotw);
                        }
                    });
                    return acc;
                }, {});
            console.log(variations)
            return r;
        })
        .then(r => $.sendResult(r))
        .catch(e => $.sendError(e))
}

What the code could have been:

const { PlacesNearby, PlaceDetails, RunSeleniumCell, LevenshteinDistance, LevenshteinSort } = require('../Core');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const project = `${PROFILE_PATH}/Collections/flagstaff-happy`;

const LocationsService = {
  async loadLocations() {
    const day = new Date();
    const date = `${day.getFullYear()}-${day.getMonth() + 1}-${day.getDate()}`;
    const filename = `${project}/locations-${date}.json`;
    if (require('fs').existsSync(filename)) {
      try {
        return require('fs').readFileSync(filename, 'utf8');
      } catch (e) {
        console.error(`Error reading locations file: ${e}`);
      }
    }
    try {
      const locations = await RunSeleniumCell('google maps data list', false);
      return locations();
    } catch (e) {
      console.error(`Error loading locations: ${e}`);
    }
  },

  async getAllLocationsData() {
    const day = new Date();
    const date = `${day.getFullYear()}-${day.getMonth() + 1}-${day.getDate()}`;
    const filename = `${project}/locations-${date}-full.json`;
    if (require('fs').existsSync(filename)) {
      try {
        return JSON.parse(require('fs').readFileSync(filename, 'utf8'));
      } catch (e) {
        console.error(`Error reading locations file: ${e}`);
      }
    }

    try {
      const locations = await this.loadLocations();
      const data = await Promise.all(locations.map(loc => this.getLocationData(loc)));
      fs.writeFileSync(filename, JSON.stringify(data, null, 4));
      return data;
    } catch (e) {
      console.error(`Error getting locations data: ${e}`);
    }
  },

  async getLocationData(location) {
    const placeId = location.name +'near'+ location.description.pop() + ', Flagstaff';
    const result = await PlacesNearby(placeId);
    const placeDetailsResult = await PlaceDetails(result[0].place_id);
    return Object.assign(location, result[0], placeDetailsResult);
  }
};

if (typeof $!== 'undefined') {
  $().async();
  LocationsService.getAllLocationsData()
   .then(r => {
      const hasSites = r.filter(loc => loc.website);
      console.log(`Websites: ${hasSites.length}/${r.length} - ${Math.round(1.0 * hasSites.length / r.length * 100)}%`);

      const existing = glob.sync('**/!(locations)*', { cwd: project });
      const existingMatch = existing
       .map(loc => loc.split(/-|\.json/ig).join(' ').trim())
       .map(loc => {
          const levMatch = LevenshteinSort(r, loc, r => r.name)[0].name;
          if (LevenshteinDistance(loc, levMatch) < loc.length / 2) {
            return loc +'-'+ levMatch;
          }
          return loc +'- no match';
        })
       .filter(loc =>!loc.includes('no match'))
      console.log(`Existing matches: ${existingMatch.length}/${existing.length} - ${Math.round(1.0 * existingMatch.length / existing.length * 100)}%`);

      console.log('Variance:');
      const variations = existing.map(loc => `${project}/${loc}`)
       .reduce((acc, loc) => {
          const loaded = JSON.parse(require('fs').readFileSync(loc, 'utf8'));
          if (!acc.dotw) {
            acc.dotw = {};
          }
          loaded.forEach(doc => {
            const dayOfWeek = doc.dotw.match(/Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday/ig);
            if (dayOfWeek) {
              if (!acc.dotw.matches) {
                acc.dotw.matches = 0;
              }
              acc.dotw.matches++;
            } else {
              if (!acc.dotw.unmatched) {
                acc.dotw.unmatched = [];
              }
              acc.dotw.unmatched.push(loc + doc.dotw);
            }
          });
          if (!acc.time) {
            acc.time = {};
          }
          loaded.forEach(doc => {
            console.log(loc +'-'+ doc.time);
            const timeMatch = doc.time.match(/^([0-9]+(p|a)\.m\.\s+-\s+([0-9]+(p|a)|close)|all day)\s*$/ig);
            if (timeMatch) {
              if (!acc.time.matches) {
                acc.time.matches = 0;
              }
              acc.time.matches++;
            } else {
              if (!acc.time.unmatched) {
                acc.time.unmatched = [];
              }
              acc.time.unmatched.push(loc + doc.dotw);
            }
          });
          return acc;
        }, {});
      console.log(variations);
      return r;
    })
   .then(r => $().sendResult(r))
   .catch(e => $().sendError(e));
}

Code Breakdown

Importing Modules and Functions

The code starts by importing necessary modules and functions:

Setting Environment Variables and Project Path

The code sets the PROFILE_PATH variable to the user's home directory (or equivalent on different operating systems). It also sets the project path to a specific directory (Collections/flagstaff-happy) within the user's home directory.

Functions

The code defines two main functions:

Async Execution

The code checks if the $ object is defined, and if so, executes the async() method on it. It then calls the getAllLocationsData() function, which returns a promise that resolves to the updated locations data.