convert spreadsheet | find known routes to sheets | collect google sheets resources | Search

The code is a part of a web application or static site generator written in JavaScript, which transforms HTML templates into a format usable by a static site generator. It does this by selecting and processing image and URL elements, replacing text nodes with Markdown, adding IDs to headings and paragraphs, and using several dependencies such as stream and remarkable.

Run example

npm run import -- "collect external content and resources"

collect external content and resources

var {Readable} = require('stream');
var importer = require('../Core');
var {Remarkable} = require('remarkable');
var md = new Remarkable({html: true, xhtmlOut: true, breaks: true});
var importer = require('../Core');
var {selectDom} = importer.import("select tree");

var TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig

function safeName(name) {
    return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40);
}

function collectExternalResources(page, rendered, routes) {
    // get all images and urls from template
    var body = selectDom('*', page);
    
    // replace all leaf Text nodes with markdown
    var textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body);
    textObjs.forEach((parent, i) => {
        parent.childNodes.forEach(string => {
            if(string.nodeType !== 3) return;
            var mdHtml = md.render(string.textContent);
            // if all markdown did was insert a paragraph and line break, use value instead
            if(mdHtml.replace(TRIM_ENTITIES, '').trim()
               != string.textContent.replace(TRIM_ENTITIES, '').trim()) {
                string.replaceWith.apply(string, selectDom(['//BODY/*'], `<html><body>${mdHtml}</body></html>`));
            }
        })
    })
    
    // add IDs to h1, h2, h3, etc elements that match their text contents
    var headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body);
    headingsObjs.forEach(h => h.setAttribute('id', safeName(h.textContent)));
    
    var paragraphs = selectDom(['//p'], body)
    paragraphs.forEach(p => {
        p.setAttribute('class', selectDom(['.//*'], p)
                       .map(e => e.tagName).join(' '))
        var img = selectDom(['.//img'], p)[0]
        var id = 'id' + safeName(p.textContent || img.src)
        p.setAttribute('id', id)
        if(img) {
            var style = img.ownerDocument.createElement('style')
            var src = decodeURIComponent(img.getAttribute('src'))
                .replace(/(-final)*\.[^\.]*$/ig, '')
            style.appendChild(img.ownerDocument.createTextNode(`
#${id}:before {background-image: url("${src}-final.jpg");}`));
            p.parentNode.insertBefore(style, p)
        }
    })
    
    var linksObjs = selectDom(['//a[@href]'], body);
    var links = linksObjs.map(l => decodeURIComponent(l.getAttribute('href')));
    
    // TODO: convert images and add timestamps, add svg
    var imgObjs = selectDom(['//img[@src]'], body);
    var imgs = imgObjs.map(l => decodeURIComponent(l.getAttribute('src')));
    imgObjs.forEach(img => {
        var src = decodeURIComponent(img.getAttribute('src'))
            .replace(/(-final)*\.[^\.]*$/ig, '-final.jpg')
        img.setAttribute('src', src)
    })

    // TODO: scan for urls and inline
    var stylesObjs = selectDom(['//link[@href]'], body);
    var styles = stylesObjs.map(l => l.getAttribute('href'));

    // TODO: add timestamps and inline
    var scriptsObjs = selectDom(['//script[@src]'], body);
    var scripts = scriptsObjs.map(l => l.getAttribute('src'));
    
    // TODO: add CSS imports
    var backgrounds = importer.regexToArray(/url\(['"]?(.*?)['"]?\)/ig, page, 1);
    
    var searches = [].concat.apply([], [
        imgs, styles, backgrounds, scripts
    ])
    
    links.forEach(s => routes[routes.length] = s)
    searches.forEach(s => rendered[rendered.length] = s)
    
    // TODO: copy resource images to output directory
    var newPage = body.ownerDocument.documentElement.outerHTML
    backgrounds.forEach(b => newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg')))
    var stream = new Readable();
    stream.push(newPage);
    stream.push(null);
    return Promise.resolve(stream);
}

module.exports = collectExternalResources;

What the code could have been:

const { Readable } = require('stream');
const { selectDom } = require('../Core');
const md = require('remarkable')({ html: true, xhtmlOut: true, breaks: true });

const TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig;

function safeName(name) {
  return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40);
}

function collectExternalResources(page, rendered, routes) {
  const body = selectDom('*', page);
  const textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body);

  textObjs.forEach((parent, i) => {
    parent.childNodes.forEach((string) => {
      if (string.nodeType!== 3) return;
      const mdHtml = md.render(string.textContent);
      const trimmedText = string.textContent.replace(TRIM_ENTITIES, '').trim();
      const trimmedHtml = mdHtml.replace(TRIM_ENTITIES, '').trim();

      if (trimmedHtml!== trimmedText) {
        string.replaceWith(selectDom(['//BODY/*'], `${mdHtml}`)[0]);
      }
    });
  });

  const headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body);
  headingsObjs.forEach((h) => h.setAttribute('id', safeName(h.textContent)));

  const paragraphs = selectDom(['//p'], body);
  paragraphs.forEach((p) => {
    p.setAttribute('class', selectDom(['.//*'], p).map((e) => e.tagName).join(' '));
    const id = `id${safeName(p.textContent || p.querySelector('img').src)}`;
    p.setAttribute('id', id);

    const img = p.querySelector('img');
    if (img) {
      const src = decodeURIComponent(img.src)
       .replace(/(-final)*\.[^\.]*$/ig, '');
      const style = p.ownerDocument.createElement('style');
      style.textContent = `#${id}:before {background-image: url("${src}-final.jpg");}`;
      p.parentNode.insertBefore(style, p);
    }
  });

  const linksObjs = selectDom(['//a[@href]'], body);
  const links = linksObjs.map((l) => decodeURIComponent(l.href));

  const imgObjs = selectDom(['//img[@src]'], body);
  const imgs = imgObjs.map((img) => decodeURIComponent(img.src));
  imgObjs.forEach((img) => {
    const src = decodeURIComponent(img.src)
     .replace(/(-final)*\.[^\.]*$/ig, '-final.jpg');
    img.src = src;
  });

  const stylesObjs = selectDom(['//link[@href]'], body);
  const styles = stylesObjs.map((l) => l.href);

  const scriptsObjs = selectDom(['//script[@src]'], body);
  const scripts = scriptsObjs.map((l) => l.src);

  const backgrounds = require('../Core').regexToArray(/url\(['"]?(.*?)['"]?\)/ig, page, 1);

  const searches = [].concat.apply([], [
    imgs,
    styles,
    backgrounds,
    scripts
  ]);

  links.forEach((s) => routes.push(s));
  searches.forEach((s) => rendered.push(s));

  const newPage = body.ownerDocument.documentElement.outerHTML;
  backgrounds.forEach((b) => {
    newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg'));
  });

  const stream = new Readable();
  stream.push(newPage);
  stream.push(null);
  return Promise.resolve(stream);
}

module.exports = collectExternalResources;

Code Breakdown

This code appears to be a part of a web application or a static site generator, written in JavaScript. It transforms HTML templates into a format that can be used by a static site generator. Here's a high-level overview of the code:

Dependencies

The code requires several dependencies:

Functions

The code defines the following functions:

collectExternalResources Function

The collectExternalResources function takes three arguments:

The function performs the following tasks:

  1. Selects all image and URL elements: uses the selectDom function to select all elements in the HTML template that contain images or URLs.
  2. Replaces text nodes with Markdown: uses the Remarkable library to render Markdown text in the HTML template. If the rendered Markdown text is different from the original text, it replaces the original text with the rendered Markdown text.
  3. Adds IDs to headings: uses the selectDom function to select headings (h1, h2, h3, etc.) in the HTML template that do not have an ID attribute. It sets the ID attribute of each heading to a safe name generated by the safeName function.
  4. Adds IDs to paragraphs: uses the selectDom function to select paragraphs in the HTML template. It sets the ID attribute of each paragraph to a safe name generated by the safeName function. If the paragraph contains an image, it sets the ID attribute to a combination of the paragraph text and the image source.

Note

The code appears to be incomplete, as it references several variables and functions that are not defined in the code snippet. Additionally, the code seems to be part of a larger application, so some of the functions and variables may be defined elsewhere in the codebase.