The searchResultsToJson(url)
function extracts search results from a given URL and returns them in JSON format, containing the URL, query, and results. It logs the URL and session ID, sends a request, extracts the search query and results, maps them to a desired format, and catches any errors that occur during the process.
npm run import -- "search results as json"
function searchResultsToJson(url) {
console.log(url + ' - ' + client.requestHandler.sessionID);
return client
.url(url)
.pause(2000)
.getAllXPath({
query: '//input[contains(@aria-label, "Search")]/@value'
+
'|//input[contains(@aria-label, "search")]/@value'
+
// yahoo
'|//label[contains(., "Search")]/following::*//input[@type="text"]/@value'
+
'|//input[contains(@class, "Search")]/@value'
+
// wolfram
'|//input[contains(@name, "query")]/@value'
+
// duckduckgo
'|//input[contains(@id, "search")]/@value'
+
// yandex
'|//input[contains(@aria-label, "Request")]/@value',
results: [
'//h3|//h2|div[contains(@class, "title")]'
+
// ask
'|//*[contains(@class, "item-title")]',
{
name: './/text()',
summary: './/following-sibling::div//text()'
}
]
})
.then(r => {
return {
url: url,
query: typeof r.query === 'string'
? r.query
: r.query[0],
results: r.results.map(s => ({
name: typeof s.name === 'string'
? s.name : s.name.join('\n'),
summary: typeof s.summary === 'string'
? s.summary : s.summary.join('\n')
}))
};
})
.catch(e => {
console.log(e)
return {
url: url,
query: null,
results: []
}
})
}
module.exports = searchResultsToJson;
```javascript
/**
* Extracts search results from a given URL and returns them in JSON format.
*
* @param {string} url - The URL to extract search results from.
* @returns {Promise<Object>} A promise resolving to an object containing the URL, search query, and results.
*/
function searchResultsToJson(url) {
// Log the URL and client session ID for debugging purposes
console.log(`Searching ${url} - Session ID: ${client.requestHandler.sessionID}`);
// Define the XPath query to extract search results
const query = `
//input[contains(@aria-label, "Search")]/@value |
//input[contains(@aria-label, "search")]/@value |
//label[contains(., "Search")]/following::*//input[@type="text"]/@value |
//input[contains(@class, "Search")]/@value |
//input[contains(@name, "query")]/@value |
//input[contains(@id, "search")]/@value |
//input[contains(@aria-label, "Request")]/@value
`;
const resultsQuery = `
//h3 | //h2 | div[contains(@class, "title")] |
//*[contains(@class, "item-title")]
`;
const resultFields = {
name: './/text()',
summary: './/following-sibling::div//text()'
};
// Execute the XPath query and extract the results
return client
.url(url)
.pause(2000)
.getAllXPath({
query,
results: [resultsQuery, resultFields]
})
.then(r => {
// Extract the search query and results from the response
const { query: queryValue, results: resultsArray } = r;
const queryResult = typeof queryValue ==='string'? queryValue : queryValue[0];
const results = resultsArray.map(s => ({
name: typeof s.name ==='string'? s.name : s.name.join('\n'),
summary: typeof s.summary ==='string'? s.summary : s.summary.join('\n')
}));
return {
url,
query: queryResult,
results
};
})
.catch(e => {
// Log any errors and return an empty response
console.error(e);
return { url, query: null, results: [] };
});
}
module.exports = searchResultsToJson;
```
searchResultsToJson(url)
Extracts search results from a given URL and returns them in JSON format.
url
: The URL of the page to extract search results from.An object containing the URL, query, and results of the search. The results are an array of objects with name
and summary
properties.
name
and summary
properties.