collect external content and resources

The code is a part of a web application or static site generator written in JavaScript, which transforms HTML templates into a format usable by a static site generator. It does this by selecting and processing image and URL elements, replacing text nodes with Markdown, adding IDs to headings and paragraphs, and using several dependencies such as stream and remarkable.

Run example

var {Readable} = require('stream'); var importer = require('../Core'); var {Remarkable} = require('remarkable'); var md = new Remarkable({html: true, xhtmlOut: true, breaks: true}); var importer = require('../Core'); var {selectDom} = importer.import("select tree"); var TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig function safeName(name) { return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40); } function collectExternalResources(page, rendered, routes) { // get all images and urls from template var body = selectDom('*', page); // replace all leaf Text nodes with markdown var textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body); textObjs.forEach((parent, i) => { parent.childNodes.forEach(string => { if(string.nodeType !== 3) return; var mdHtml = md.render(string.textContent); // if all markdown did was insert a paragraph and line break, use value instead if(mdHtml.replace(TRIM_ENTITIES, '').trim() != string.textContent.replace(TRIM_ENTITIES, '').trim()) { string.replaceWith.apply(string, selectDom(['//BODY/*'], `<html><body>${mdHtml}</body></html>`)); } }) }) // add IDs to h1, h2, h3, etc elements that match their text contents var headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body); headingsObjs.forEach(h => h.setAttribute('id', safeName(h.textContent))); var paragraphs = selectDom(['//p'], body) paragraphs.forEach(p => { p.setAttribute('class', selectDom(['.//*'], p) .map(e => e.tagName).join(' ')) var img = selectDom(['.//img'], p)[0] var id = 'id' + safeName(p.textContent || img.src) p.setAttribute('id', id) if(img) { var style = img.ownerDocument.createElement('style') var src = decodeURIComponent(img.getAttribute('src')) .replace(/(-final)*\.[^\.]*$/ig, '') style.appendChild(img.ownerDocument.createTextNode(` #${id}:before {background-image: url("${src}-final.jpg");}`)); p.parentNode.insertBefore(style, p) } }) var linksObjs = selectDom(['//a[@href]'], body); var links = linksObjs.map(l => decodeURIComponent(l.getAttribute('href'))); // TODO: convert images and add timestamps, add svg var imgObjs = selectDom(['//img[@src]'], body); var imgs = imgObjs.map(l => decodeURIComponent(l.getAttribute('src'))); imgObjs.forEach(img => { var src = decodeURIComponent(img.getAttribute('src')) .replace(/(-final)*\.[^\.]*$/ig, '-final.jpg') img.setAttribute('src', src) }) // TODO: scan for urls and inline var stylesObjs = selectDom(['//link[@href]'], body); var styles = stylesObjs.map(l => l.getAttribute('href')); // TODO: add timestamps and inline var scriptsObjs = selectDom(['//script[@src]'], body); var scripts = scriptsObjs.map(l => l.getAttribute('src')); // TODO: add CSS imports var backgrounds = importer.regexToArray(/url$['"]?(.*?)['"]?$/ig, page, 1); var searches = [].concat.apply([], [ imgs, styles, backgrounds, scripts ]) links.forEach(s => routes[routes.length] = s) searches.forEach(s => rendered[rendered.length] = s) // TODO: copy resource images to output directory var newPage = body.ownerDocument.documentElement.outerHTML backgrounds.forEach(b => newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg'))) var stream = new Readable(); stream.push(newPage); stream.push(null); return Promise.resolve(stream); } module.exports = collectExternalResources;

What the code could have been:

const { Readable } = require('stream');
const { selectDom } = require('../Core');
const md = require('remarkable')({ html: true, xhtmlOut: true, breaks: true });

const TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig;

function safeName(name) {
  return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40);
}

function collectExternalResources(page, rendered, routes) {
  const body = selectDom('*', page);
  const textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body);

  textObjs.forEach((parent, i) => {
    parent.childNodes.forEach((string) => {
      if (string.nodeType!== 3) return;
      const mdHtml = md.render(string.textContent);
      const trimmedText = string.textContent.replace(TRIM_ENTITIES, '').trim();
      const trimmedHtml = mdHtml.replace(TRIM_ENTITIES, '').trim();

      if (trimmedHtml!== trimmedText) {
        string.replaceWith(selectDom(['//BODY/*'], `${mdHtml}`)[0]);
      }
    });
  });

  const headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body);
  headingsObjs.forEach((h) => h.setAttribute('id', safeName(h.textContent)));

  const paragraphs = selectDom(['//p'], body);
  paragraphs.forEach((p) => {
    p.setAttribute('class', selectDom(['.//*'], p).map((e) => e.tagName).join(' '));
    const id = `id${safeName(p.textContent || p.querySelector('img').src)}`;
    p.setAttribute('id', id);

    const img = p.querySelector('img');
    if (img) {
      const src = decodeURIComponent(img.src)
       .replace(/(-final)*\.[^\.]*$/ig, '');
      const style = p.ownerDocument.createElement('style');
      style.textContent = `#${id}:before {background-image: url("${src}-final.jpg");}`;
      p.parentNode.insertBefore(style, p);
    }
  });

  const linksObjs = selectDom(['//a[@href]'], body);
  const links = linksObjs.map((l) => decodeURIComponent(l.href));

  const imgObjs = selectDom(['//img[@src]'], body);
  const imgs = imgObjs.map((img) => decodeURIComponent(img.src));
  imgObjs.forEach((img) => {
    const src = decodeURIComponent(img.src)
     .replace(/(-final)*\.[^\.]*$/ig, '-final.jpg');
    img.src = src;
  });

  const stylesObjs = selectDom(['//link[@href]'], body);
  const styles = stylesObjs.map((l) => l.href);

  const scriptsObjs = selectDom(['//script[@src]'], body);
  const scripts = scriptsObjs.map((l) => l.src);

  const backgrounds = require('../Core').regexToArray(/url\(['"]?(.*?)['"]?\)/ig, page, 1);

  const searches = [].concat.apply([], [
    imgs,
    styles,
    backgrounds,
    scripts
  ]);

  links.forEach((s) => routes.push(s));
  searches.forEach((s) => rendered.push(s));

  const newPage = body.ownerDocument.documentElement.outerHTML;
  backgrounds.forEach((b) => {
    newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg'));
  });

  const stream = new Readable();
  stream.push(newPage);
  stream.push(null);
  return Promise.resolve(stream);
}

module.exports = collectExternalResources;

This code appears to be a part of a web application or a static site generator, written in JavaScript. It transforms HTML templates into a format that can be used by a static site generator. Here's a high-level overview of the code:

Dependencies

Functions

collectExternalResources Function

Note

The code appears to be incomplete, as it references several variables and functions that are not defined in the code snippet. Additionally, the code seems to be part of a larger application, so some of the functions and variables may be defined elsewhere in the codebase.

Run example

collect external content and resources

What the code could have been:

Dependencies

Functions

collectExternalResources Function

Note

`collectExternalResources` Function