This code defines a function scrapeAlert
that fetches and saves data from a website based on a given ID, and exports it for use elsewhere.
npm run import -- "Crime reports"
var fs = require('fs');
var path = require('path');
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE || '';
var project = path.join(PROFILE_PATH, 'Collections/crimes');
function scrapeAlert(ID) {
if(fs.existsSync(path.join(project, 'maricopa-alert-' + ID + '.json'))) {
return;
}
return client
.url('https://www.maricopacountyattorney.org/CivicAlerts.aspx?AID=' + ID)
.getAllXPath({
time: '//*[@class = "single"]//*[@class = "date"]//text()',
title: '//*[contains(@class, "single")]//h3//text()',
content: '//*[@class = "single"]//*[@class = "content"]//text()'
})
.then(r => {
fs.writeFileSync(path.join(project, 'maricopa-alert-' + ID + '.json'), JSON.stringify(r, null, 4));
return r;
})
.catch(e => console.log(e))
}
module.exports = scrapeAlert;
if(typeof $ !== 'undefined') {
$.async();
var IDs = Array.from(Array(500).keys());
multiCrawl(IDs, 'crime reports')
.then(r => $.sendResult(r))
.catch(e => $.sendError(e))
}
/**
* Scrapes Maricopa County crime alerts and saves them as JSON files.
*
* @param {string} ID - The ID of the alert to scrape.
* @param {object} client - The client used for web scraping.
* @param {string} project - The path to the project directory.
* @returns {Promise
Code Breakdown
fs
(File System): used for file operations.path
: used for path manipulation.PROFILE_PATH
: set to the user's home directory (or its environment variable equivalents).project
: set to a directory path within the user's home directory ('Collections/crimes'
).scrapeAlert
FunctionID
as an argument.maricopa-alert-<ID>.json
exists in the specified project directory. If it does, the function returns immediately.https://www.maricopacountyattorney.org/CivicAlerts.aspx?AID=<ID>
using an unknown client
object ( likely a web scraping client).time
, title
, and content
from the webpage using XPath expressions.maricopa-alert-<ID>.json
in the project directory.scrapeAlert
function.$
object is defined. If it is, it calls the async
method on it and passes an array of 500 IDs to the multiCrawl
function.multiCrawl
with crime reports
as an argument and awaits the result.$
is defined, it catches any errors and sends the result or error using the $
object.