The evaluateDom
function evaluates a given XPath query or CSS selector on a DOM document, catching errors and handling various query formats and types, including those with wildcards. It returns an array of node values or a single value (number, string, or boolean) depending on the query type.
npm run import -- "select tree"
var importer = require('../Core')
var { walkTree } = importer.import("walk tree")
var { XPathResult, JSDOM } = require('jsdom')
//var cheerio = require('cheerio')
//var assert = require('assert')
var wgxpath = require('wgxpath')
var document
function evaluateDom(select, ctx, query) {
try {
// let $ = cheerio.load(ctx)
//if(!select.match(/^\/|\*\/|\.\//ig) && select.localeCompare('*') !== 0) { // probably XPath, fall through
// return query(select);
//}
} catch (e) {
// TODO: determine any side effects of ignoring
if (e.name !== 'SyntaxError') {
console.log(select.localeCompare('*'))
console.log(select)
console.log(query)
throw e
}
}
try {
if (select.includes('//*')) {
console.warn(`Possible slow query evaluation due to wildcard: ${select}`)
}
// defaults to trying for iterator type
// so it can automatically be ordered
var iterator = document.evaluate(select, ctx, null,
((XPathResult || {}).ORDERED_NODE_ITERATOR_TYPE || 5), null)
//var iterator = evaluate(select, ctx, null, 5, null)
// TODO: create a pattern regonizer for bodyless while
var co = []
var m
while (m = iterator.iterateNext()) {
co.push(m.nodeValue || m)
}
return co
} catch (e) {
if (e.message.includes('Value should be a node-set')
|| e.message.includes('You should have asked')) {
var result = document.evaluate(select, ctx, null,
(XPathResult || {}).ANY_TYPE || 0, null)
return result.resultType === ((XPathResult || {}).NUMBER_TYPE || 1)
? result.numberValue
: result.resultType === ((XPathResult || {}).STRING_TYPE || 2)
? result.stringValue
: result.resultType === ((XPathResult || {}).BOOLEAN_TYPE || 3)
? result.booleanValue
: result.resultValue
}
throw e;
}
}
// parse as html if it's string,
// if there is no context convert the tree to html
function selectDom(select, ctx) {
// var cheerio = require('cheerio');
if (typeof ctx === 'string' && typeof JSDOM != 'undefined') {
var dom = new JSDOM(ctx)
wgxpath.install(dom.window, true)
ctx = dom.window.document
document = dom.window.document
} else if (ctx) {
document = ctx.ownerDocument
} else {
ctx = document
}
//var query = ctx.querySelector.bind(ctx.ownerDocument)
// || ctx.ownerDocument.querySelector.bind(ctx.ownerDocument)
return walkTree(select, ctx, (select, ctx) => {
return evaluateDom(select, ctx /*, query*/)
})
}
function queryDom(select, ctx) {
if (typeof ctx === 'string' && typeof JSDOM != 'undefined') {
var dom = new JSDOM(ctx)
wgxpath.install(dom.window, true)
ctx = dom.window.document
document = dom.window.document
} else if (ctx) {
document = ctx.ownerDocument
} else {
ctx = document
}
return walkTree(select, ctx, (select, ctx) => {
let result = ctx.querySelectorAll(select)
let co = []
for (let m of result) {
if(m)
co.push(m)
}
if (ctx.shadowRoot) {
let shadowResult = ctx.shadowRoot.querySelectorAll(select)
for (let m of shadowResult) {
if(m)
co.push(m)
}
}
return co.length == 1 ? co[0] : co
})
}
// TODO: try catch with esquery, vm.runInThisContext, conver and select DOM, and jsel
// from least nuanced to most nuanced, CSS -> XPath -> custom ASTQ
// Most xpath like //Element will not work on CSS, might have a problem with *
function evaluateQuery(select, ctx) {
try {
var esquery = require('esquery');
// we might have to help out the CSS parser here
if (!select.match(/^\/\/|\*\/|\.\//ig)) // probably XPath, fall through
return esquery(ctx, select);
} catch (e) {
if (!e.name.includes('SyntaxError')
&& !e.message.includes('Cannot find module')) {
throw e;
}
}
try {
var jsel = require('jsel');
return jsel(ctx).selectAll(select);
} catch (e) {
if (!e.message.includes('XPath parse error')
&& !e.message.includes('Unexpected character')
&& !e.message.includes('Cannot find module')) {
throw e;
}
}
try {
var ASTQ = require("astq");
var astq = new ASTQ();
return astq.query(ctx, select);
} catch (e) {
if (!e.message.includes('query parsing failed')) {
throw e;
}
}
throw new Error(`Could not parse select query ${JSON.stringify(select)} using XPath, CSS, or ASTQ`);
}
function selectTree(select, ctx) {
// TODO: when converting to html, make sure to only return
// matching child objects not their attributes containers
// TODO: something when we receive a string?
// Try to parse with all different selectors?
return walkTree(select, ctx, evaluateQuery)
}
module.exports = {
evaluateDom,
evaluateQuery,
selectTree,
selectDom,
queryDom,
}
import { walkTree } from '../Core/importer.js';
import { JSDOM, XPathResult, ORDERED_NODE_ITERATOR_TYPE, ANY_TYPE } from 'jsdom';
import wgxpath from 'wgxpath';
import esquery from 'esquery';
import jsel from 'jsel';
import ASTQ from 'astq';
const supportedResultTypes = {
[ORDERED_NODE_ITERATOR_TYPE]: 'orderedNodeIterator',
[ANY_TYPE]: 'anyType',
[1]: 'numberType',
[2]:'stringType',
[3]: 'booleanType',
};
class DOMEvaluator {
constructor(document, options = {}) {
this.document = document;
this.options = options;
this.wgxpath = wgxpath;
}
evaluateDom(select, ctx) {
try {
if (select.includes('//*')) {
console.warn(`Possible slow query evaluation due to wildcard: ${select}`);
}
const iterator = this.document.evaluate(
select,
ctx,
null,
ORDERED_NODE_ITERATOR_TYPE,
null,
);
const result = [];
let node;
while ((node = iterator.iterateNext())) {
result.push(node.nodeValue || node);
}
return result;
} catch (error) {
if (error.message.includes('Value should be a node-set')) {
const anyTypeIterator = this.document.evaluate(
select,
ctx,
null,
ANY_TYPE,
null,
);
const result = anyTypeIterator.resultType;
if (result === 1) {
return anyTypeIterator.numberValue;
} else if (result === 2) {
return anyTypeIterator.stringValue;
} else if (result === 3) {
return anyTypeIterator.booleanValue;
} else {
return anyTypeIterator.resultValue;
}
} else {
throw error;
}
}
}
evaluateQuery(select, ctx) {
try {
return esquery(ctx, select);
} catch (error) {
if (!error.name.includes('SyntaxError') &&!error.message.includes('Cannot find module')) {
throw error;
}
}
try {
return jsel(ctx).selectAll(select);
} catch (error) {
if (!error.message.includes('XPath parse error') &&!error.message.includes('Unexpected character') &&!error.message.includes('Cannot find module')) {
throw error;
}
}
try {
const astq = new ASTQ();
return astq.query(ctx, select);
} catch (error) {
if (!error.message.includes('query parsing failed')) {
throw error;
}
}
throw new Error(`Could not parse select query ${JSON.stringify(select)} using XPath, CSS, or ASTQ`);
}
selectDom(select, ctx) {
let document;
let wgxpathInstalled = false;
if (typeof ctx ==='string' && JSDOM) {
const dom = new JSDOM(ctx);
document = dom.window.document;
this.wgxpath.install(dom.window, true);
wgxpathInstalled = true;
} else if (ctx) {
document = ctx.ownerDocument;
} else {
document = this.document;
}
return walkTree(select, document, (select, ctx) => this.evaluateDom(select, ctx));
}
queryDom(select, ctx) {
let document;
let wgxpathInstalled = false;
if (typeof ctx ==='string' && JSDOM) {
const dom = new JSDOM(ctx);
document = dom.window.document;
this.wgxpath.install(dom.window, true);
wgxpathInstalled = true;
} else if (ctx) {
document = ctx.ownerDocument;
} else {
document = this.document;
}
const result = document.querySelectorAll(select);
const co = result.filter(m => m).map(m => m);
if (document.shadowRoot) {
const shadowResult = document.shadowRoot.querySelectorAll(select);
co.push(...shadowResult.filter(m => m).map(m => m));
}
return co.length === 1? co[0] : co;
}
selectTree(select, ctx) {
if (typeof ctx ==='string') {
try {
const dom = new JSDOM(ctx);
this.wgxpath.install(dom.window, true);
return walkTree(select, dom.window.document, (select, ctx) => this.evaluateQuery(select, ctx));
} catch (error) {
if (error.name.includes('SyntaxError')) {
this.wgxpath.install(ctx.ownerDocument.defaultView, true);
return walkTree(select, ctx.ownerDocument, (select, ctx) => this.evaluateQuery(select, ctx));
} else {
throw error;
}
}
} else {
return walkTree(select, ctx, (select, ctx) => this.evaluateQuery(select, ctx));
}
}
}
const domEvaluator = new DOMEvaluator({}, {});
module.exports = {
selectDom: (select, ctx) => domEvaluator.selectDom(select, ctx),
queryDom: (select, ctx) => domEvaluator.queryDom(select, ctx),
selectTree: (select, ctx) => domEvaluator.selectTree(select, ctx),
evaluateDom: (select, ctx) => domEvaluator.evaluateDom(select, ctx),
evaluateQuery: (select, ctx) => domEvaluator.evaluateQuery(select, ctx),
supportedResultTypes,
};
Function Breakdown: evaluateDom
The evaluateDom
function evaluates a given XPath query on a DOM document. It attempts to handle various query formats and types, including XPath and CSS selectors.
select
: The XPath query or CSS selector to evaluate.ctx
: The context node in the DOM document.query
: Not used in the implementation.The function consists of two main sections:
SyntaxError
, it is ignored and the function proceeds. Otherwise, the error is re-thrown.document.evaluate
method. If the query contains a wildcard (//*
), a warning is logged. The function then iterates over the resulting nodes and returns an array of node values.The function supports the following XPath evaluation types:
ORDERED_NODE_ITERATOR_TYPE
(type 5): Returns an ordered iterator over the nodes that match the query.ANY_TYPE
(type 0): Returns the value of the query, which can be a node-set, number, string, or boolean.The function returns an array of node values or a single value (number, string, or boolean) depending on the query type.