patterns | Cell 1 | Cell 3 | Search

The evaluateDom function evaluates a given XPath query or CSS selector on a DOM document, catching errors and handling various query formats and types, including those with wildcards. It returns an array of node values or a single value (number, string, or boolean) depending on the query type.

Run example

npm run import -- "select tree"

select tree

var importer = require('../Core')
var { walkTree } = importer.import("walk tree")
var { XPathResult, JSDOM } = require('jsdom')
//var cheerio = require('cheerio')
//var assert = require('assert')
var wgxpath = require('wgxpath')
var document

function evaluateDom(select, ctx, query) {
    try {
        //    let $ = cheerio.load(ctx)
        //if(!select.match(/^\/|\*\/|\.\//ig) && select.localeCompare('*') !== 0) { // probably XPath, fall through
        //    return query(select);
        //}
    } catch (e) {
        // TODO: determine any side effects of ignoring
        if (e.name !== 'SyntaxError') {
            console.log(select.localeCompare('*'))
            console.log(select)
            console.log(query)
            throw e
        }
    }

    try {
        if (select.includes('//*')) {
            console.warn(`Possible slow query evaluation due to wildcard: ${select}`)
        }
        // defaults to trying for iterator type
        //   so it can automatically be ordered
        var iterator = document.evaluate(select, ctx, null,
            ((XPathResult || {}).ORDERED_NODE_ITERATOR_TYPE || 5), null)
        //var iterator = evaluate(select, ctx, null, 5, null)
        // TODO: create a pattern regonizer for bodyless while
        var co = []
        var m
        while (m = iterator.iterateNext()) {
            co.push(m.nodeValue || m)
        }
        return co
    } catch (e) {
        if (e.message.includes('Value should be a node-set')
            || e.message.includes('You should have asked')) {
            var result = document.evaluate(select, ctx, null,
                (XPathResult || {}).ANY_TYPE || 0, null)
            return result.resultType === ((XPathResult || {}).NUMBER_TYPE || 1)
                ? result.numberValue
                : result.resultType === ((XPathResult || {}).STRING_TYPE || 2)
                    ? result.stringValue
                    : result.resultType === ((XPathResult || {}).BOOLEAN_TYPE || 3)
                        ? result.booleanValue
                        : result.resultValue
        }
        throw e;
    }
}

// parse as html if it's string,
//   if there is no context convert the tree to html
function selectDom(select, ctx) {
    // var cheerio = require('cheerio');
    if (typeof ctx === 'string' && typeof JSDOM != 'undefined') {
        var dom = new JSDOM(ctx)
        wgxpath.install(dom.window, true)
        ctx = dom.window.document
        document = dom.window.document
    } else if (ctx) {
        document = ctx.ownerDocument
    } else {
        ctx = document
    }
    //var query = ctx.querySelector.bind(ctx.ownerDocument)
    //    || ctx.ownerDocument.querySelector.bind(ctx.ownerDocument)
    return walkTree(select, ctx, (select, ctx) => {
        return evaluateDom(select, ctx /*, query*/)
    })
}

function queryDom(select, ctx) {
    if (typeof ctx === 'string' && typeof JSDOM != 'undefined') {
        var dom = new JSDOM(ctx)
        wgxpath.install(dom.window, true)
        ctx = dom.window.document
        document = dom.window.document
    } else if (ctx) {
        document = ctx.ownerDocument
    } else {
        ctx = document
    }
    return walkTree(select, ctx, (select, ctx) => {
        let result = ctx.querySelectorAll(select)
        let co = []
        for (let m of result) {
            if(m)
                co.push(m)
        }
        if (ctx.shadowRoot) {
            let shadowResult = ctx.shadowRoot.querySelectorAll(select)
            for (let m of shadowResult) {
                if(m)
                    co.push(m)
            }
        }
        return co.length == 1 ? co[0] : co
    })
}

// TODO: try catch with esquery, vm.runInThisContext, conver and select DOM, and jsel

// from least nuanced to most nuanced, CSS -> XPath -> custom ASTQ
//   Most xpath like //Element will not work on CSS, might have a problem with *
function evaluateQuery(select, ctx) {
    try {
        var esquery = require('esquery');
        // we might have to help out the CSS parser here
        if (!select.match(/^\/\/|\*\/|\.\//ig)) // probably XPath, fall through
            return esquery(ctx, select);
    } catch (e) {
        if (!e.name.includes('SyntaxError')
            && !e.message.includes('Cannot find module')) {
            throw e;
        }
    }

    try {
        var jsel = require('jsel');
        return jsel(ctx).selectAll(select);
    } catch (e) {
        if (!e.message.includes('XPath parse error')
            && !e.message.includes('Unexpected character')
            && !e.message.includes('Cannot find module')) {
            throw e;
        }
    }
    try {
        var ASTQ = require("astq");
        var astq = new ASTQ();
        return astq.query(ctx, select);
    } catch (e) {
        if (!e.message.includes('query parsing failed')) {
            throw e;
        }
    }

    throw new Error(`Could not parse select query ${JSON.stringify(select)} using XPath, CSS, or ASTQ`);
}

function selectTree(select, ctx) {
    // TODO: when converting to html, make sure to only return
    //   matching child objects not their attributes containers
    // TODO: something when we receive a string?
    //   Try to parse with all different selectors?
    return walkTree(select, ctx, evaluateQuery)
}


module.exports = {
    evaluateDom,
    evaluateQuery,
    selectTree,
    selectDom,
    queryDom,
}

What the code could have been:

import { walkTree } from '../Core/importer.js';
import { JSDOM, XPathResult, ORDERED_NODE_ITERATOR_TYPE, ANY_TYPE } from 'jsdom';
import wgxpath from 'wgxpath';
import esquery from 'esquery';
import jsel from 'jsel';
import ASTQ from 'astq';

const supportedResultTypes = {
  [ORDERED_NODE_ITERATOR_TYPE]: 'orderedNodeIterator',
  [ANY_TYPE]: 'anyType',
  [1]: 'numberType',
  [2]:'stringType',
  [3]: 'booleanType',
};

class DOMEvaluator {
  constructor(document, options = {}) {
    this.document = document;
    this.options = options;
    this.wgxpath = wgxpath;
  }

  evaluateDom(select, ctx) {
    try {
      if (select.includes('//*')) {
        console.warn(`Possible slow query evaluation due to wildcard: ${select}`);
      }
      const iterator = this.document.evaluate(
        select,
        ctx,
        null,
        ORDERED_NODE_ITERATOR_TYPE,
        null,
      );
      const result = [];
      let node;
      while ((node = iterator.iterateNext())) {
        result.push(node.nodeValue || node);
      }
      return result;
    } catch (error) {
      if (error.message.includes('Value should be a node-set')) {
        const anyTypeIterator = this.document.evaluate(
          select,
          ctx,
          null,
          ANY_TYPE,
          null,
        );
        const result = anyTypeIterator.resultType;
        if (result === 1) {
          return anyTypeIterator.numberValue;
        } else if (result === 2) {
          return anyTypeIterator.stringValue;
        } else if (result === 3) {
          return anyTypeIterator.booleanValue;
        } else {
          return anyTypeIterator.resultValue;
        }
      } else {
        throw error;
      }
    }
  }

  evaluateQuery(select, ctx) {
    try {
      return esquery(ctx, select);
    } catch (error) {
      if (!error.name.includes('SyntaxError') &&!error.message.includes('Cannot find module')) {
        throw error;
      }
    }

    try {
      return jsel(ctx).selectAll(select);
    } catch (error) {
      if (!error.message.includes('XPath parse error') &&!error.message.includes('Unexpected character') &&!error.message.includes('Cannot find module')) {
        throw error;
      }
    }

    try {
      const astq = new ASTQ();
      return astq.query(ctx, select);
    } catch (error) {
      if (!error.message.includes('query parsing failed')) {
        throw error;
      }
    }

    throw new Error(`Could not parse select query ${JSON.stringify(select)} using XPath, CSS, or ASTQ`);
  }

  selectDom(select, ctx) {
    let document;
    let wgxpathInstalled = false;

    if (typeof ctx ==='string' && JSDOM) {
      const dom = new JSDOM(ctx);
      document = dom.window.document;
      this.wgxpath.install(dom.window, true);
      wgxpathInstalled = true;
    } else if (ctx) {
      document = ctx.ownerDocument;
    } else {
      document = this.document;
    }

    return walkTree(select, document, (select, ctx) => this.evaluateDom(select, ctx));
  }

  queryDom(select, ctx) {
    let document;
    let wgxpathInstalled = false;

    if (typeof ctx ==='string' && JSDOM) {
      const dom = new JSDOM(ctx);
      document = dom.window.document;
      this.wgxpath.install(dom.window, true);
      wgxpathInstalled = true;
    } else if (ctx) {
      document = ctx.ownerDocument;
    } else {
      document = this.document;
    }

    const result = document.querySelectorAll(select);
    const co = result.filter(m => m).map(m => m);
    if (document.shadowRoot) {
      const shadowResult = document.shadowRoot.querySelectorAll(select);
      co.push(...shadowResult.filter(m => m).map(m => m));
    }

    return co.length === 1? co[0] : co;
  }

  selectTree(select, ctx) {
    if (typeof ctx ==='string') {
      try {
        const dom = new JSDOM(ctx);
        this.wgxpath.install(dom.window, true);
        return walkTree(select, dom.window.document, (select, ctx) => this.evaluateQuery(select, ctx));
      } catch (error) {
        if (error.name.includes('SyntaxError')) {
          this.wgxpath.install(ctx.ownerDocument.defaultView, true);
          return walkTree(select, ctx.ownerDocument, (select, ctx) => this.evaluateQuery(select, ctx));
        } else {
          throw error;
        }
      }
    } else {
      return walkTree(select, ctx, (select, ctx) => this.evaluateQuery(select, ctx));
    }
  }
}

const domEvaluator = new DOMEvaluator({}, {});

module.exports = {
  selectDom: (select, ctx) => domEvaluator.selectDom(select, ctx),
  queryDom: (select, ctx) => domEvaluator.queryDom(select, ctx),
  selectTree: (select, ctx) => domEvaluator.selectTree(select, ctx),
  evaluateDom: (select, ctx) => domEvaluator.evaluateDom(select, ctx),
  evaluateQuery: (select, ctx) => domEvaluator.evaluateQuery(select, ctx),
  supportedResultTypes,
};

Function Breakdown: evaluateDom

Overview

The evaluateDom function evaluates a given XPath query on a DOM document. It attempts to handle various query formats and types, including XPath and CSS selectors.

Parameters

Implementation

The function consists of two main sections:

  1. Error handling: The function catches any errors that occur during execution. If the error is a SyntaxError, it is ignored and the function proceeds. Otherwise, the error is re-thrown.
  2. XPath evaluation: The function attempts to evaluate the XPath query using the document.evaluate method. If the query contains a wildcard (//*), a warning is logged. The function then iterates over the resulting nodes and returns an array of node values.

XPath Evaluation Types

The function supports the following XPath evaluation types:

Return Value

The function returns an array of node values or a single value (number, string, or boolean) depending on the query type.