This code defines a function scrapeAlert that fetches and saves data from a website based on a given ID, and exports it for use elsewhere.
npm run import -- "Crime reports"var fs = require('fs');
var path = require('path');
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE || '';
var project = path.join(PROFILE_PATH, 'Collections/crimes');
function scrapeAlert(ID) {
if(fs.existsSync(path.join(project, 'maricopa-alert-' + ID + '.json'))) {
return;
}
return client
.url('https://www.maricopacountyattorney.org/CivicAlerts.aspx?AID=' + ID)
.getAllXPath({
time: '//*[@class = "single"]//*[@class = "date"]//text()',
title: '//*[contains(@class, "single")]//h3//text()',
content: '//*[@class = "single"]//*[@class = "content"]//text()'
})
.then(r => {
fs.writeFileSync(path.join(project, 'maricopa-alert-' + ID + '.json'), JSON.stringify(r, null, 4));
return r;
})
.catch(e => console.log(e))
}
module.exports = scrapeAlert;
if(typeof $ !== 'undefined') {
$.async();
var IDs = Array.from(Array(500).keys());
multiCrawl(IDs, 'crime reports')
.then(r => $.sendResult(r))
.catch(e => $.sendError(e))
}
/**
* Scrapes Maricopa County crime alerts and saves them as JSON files.
*
* @param {string} ID - The ID of the alert to scrape.
* @param {object} client - The client used for web scraping.
* @param {string} project - The path to the project directory.
* @returns {PromiseCode Breakdown
fs (File System): used for file operations.path: used for path manipulation.PROFILE_PATH: set to the user's home directory (or its environment variable equivalents).project: set to a directory path within the user's home directory ('Collections/crimes').scrapeAlert FunctionID as an argument.maricopa-alert-<ID>.json exists in the specified project directory. If it does, the function returns immediately.https://www.maricopacountyattorney.org/CivicAlerts.aspx?AID=<ID> using an unknown client object ( likely a web scraping client).time, title, and content from the webpage using XPath expressions.maricopa-alert-<ID>.json in the project directory.scrapeAlert function.$ object is defined. If it is, it calls the async method on it and passes an array of 500 IDs to the multiCrawl function.multiCrawl with crime reports as an argument and awaits the result.$ is defined, it catches any errors and sends the result or error using the $ object.