The code is a part of a web application or static site generator written in JavaScript, which transforms HTML templates into a format usable by a static site generator. It does this by selecting and processing image and URL elements, replacing text nodes with Markdown, adding IDs to headings and paragraphs, and using several dependencies such as stream
and remarkable
.
npm run import -- "collect external content and resources"
var {Readable} = require('stream');
var importer = require('../Core');
var {Remarkable} = require('remarkable');
var md = new Remarkable({html: true, xhtmlOut: true, breaks: true});
var importer = require('../Core');
var {selectDom} = importer.import("select tree");
var TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig
function safeName(name) {
return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40);
}
function collectExternalResources(page, rendered, routes) {
// get all images and urls from template
var body = selectDom('*', page);
// replace all leaf Text nodes with markdown
var textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body);
textObjs.forEach((parent, i) => {
parent.childNodes.forEach(string => {
if(string.nodeType !== 3) return;
var mdHtml = md.render(string.textContent);
// if all markdown did was insert a paragraph and line break, use value instead
if(mdHtml.replace(TRIM_ENTITIES, '').trim()
!= string.textContent.replace(TRIM_ENTITIES, '').trim()) {
string.replaceWith.apply(string, selectDom(['//BODY/*'], `<html><body>${mdHtml}</body></html>`));
}
})
})
// add IDs to h1, h2, h3, etc elements that match their text contents
var headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body);
headingsObjs.forEach(h => h.setAttribute('id', safeName(h.textContent)));
var paragraphs = selectDom(['//p'], body)
paragraphs.forEach(p => {
p.setAttribute('class', selectDom(['.//*'], p)
.map(e => e.tagName).join(' '))
var img = selectDom(['.//img'], p)[0]
var id = 'id' + safeName(p.textContent || img.src)
p.setAttribute('id', id)
if(img) {
var style = img.ownerDocument.createElement('style')
var src = decodeURIComponent(img.getAttribute('src'))
.replace(/(-final)*\.[^\.]*$/ig, '')
style.appendChild(img.ownerDocument.createTextNode(`
#${id}:before {background-image: url("${src}-final.jpg");}`));
p.parentNode.insertBefore(style, p)
}
})
var linksObjs = selectDom(['//a[@href]'], body);
var links = linksObjs.map(l => decodeURIComponent(l.getAttribute('href')));
// TODO: convert images and add timestamps, add svg
var imgObjs = selectDom(['//img[@src]'], body);
var imgs = imgObjs.map(l => decodeURIComponent(l.getAttribute('src')));
imgObjs.forEach(img => {
var src = decodeURIComponent(img.getAttribute('src'))
.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg')
img.setAttribute('src', src)
})
// TODO: scan for urls and inline
var stylesObjs = selectDom(['//link[@href]'], body);
var styles = stylesObjs.map(l => l.getAttribute('href'));
// TODO: add timestamps and inline
var scriptsObjs = selectDom(['//script[@src]'], body);
var scripts = scriptsObjs.map(l => l.getAttribute('src'));
// TODO: add CSS imports
var backgrounds = importer.regexToArray(/url\(['"]?(.*?)['"]?\)/ig, page, 1);
var searches = [].concat.apply([], [
imgs, styles, backgrounds, scripts
])
links.forEach(s => routes[routes.length] = s)
searches.forEach(s => rendered[rendered.length] = s)
// TODO: copy resource images to output directory
var newPage = body.ownerDocument.documentElement.outerHTML
backgrounds.forEach(b => newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg')))
var stream = new Readable();
stream.push(newPage);
stream.push(null);
return Promise.resolve(stream);
}
module.exports = collectExternalResources;
const { Readable } = require('stream');
const { selectDom } = require('../Core');
const md = require('remarkable')({ html: true, xhtmlOut: true, breaks: true });
const TRIM_ENTITIES = /<\/?\s*p\s*>|[\s\n\r]+|<\s*br\s*\/?\s*>/ig;
function safeName(name) {
return name.replace(/[^a-z0-9\-]/ig, '-').substr(0, 40);
}
function collectExternalResources(page, rendered, routes) {
const body = selectDom('*', page);
const textObjs = selectDom([`//*[not(self::STYLE) and contains(., /text())]`], body);
textObjs.forEach((parent, i) => {
parent.childNodes.forEach((string) => {
if (string.nodeType!== 3) return;
const mdHtml = md.render(string.textContent);
const trimmedText = string.textContent.replace(TRIM_ENTITIES, '').trim();
const trimmedHtml = mdHtml.replace(TRIM_ENTITIES, '').trim();
if (trimmedHtml!== trimmedText) {
string.replaceWith(selectDom(['//BODY/*'], `${mdHtml}`)[0]);
}
});
});
const headingsObjs = selectDom(['(//h1|//h2|//h3|//h4)[not(@id) and not(./ancestor::nav)]'], body);
headingsObjs.forEach((h) => h.setAttribute('id', safeName(h.textContent)));
const paragraphs = selectDom(['//p'], body);
paragraphs.forEach((p) => {
p.setAttribute('class', selectDom(['.//*'], p).map((e) => e.tagName).join(' '));
const id = `id${safeName(p.textContent || p.querySelector('img').src)}`;
p.setAttribute('id', id);
const img = p.querySelector('img');
if (img) {
const src = decodeURIComponent(img.src)
.replace(/(-final)*\.[^\.]*$/ig, '');
const style = p.ownerDocument.createElement('style');
style.textContent = `#${id}:before {background-image: url("${src}-final.jpg");}`;
p.parentNode.insertBefore(style, p);
}
});
const linksObjs = selectDom(['//a[@href]'], body);
const links = linksObjs.map((l) => decodeURIComponent(l.href));
const imgObjs = selectDom(['//img[@src]'], body);
const imgs = imgObjs.map((img) => decodeURIComponent(img.src));
imgObjs.forEach((img) => {
const src = decodeURIComponent(img.src)
.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg');
img.src = src;
});
const stylesObjs = selectDom(['//link[@href]'], body);
const styles = stylesObjs.map((l) => l.href);
const scriptsObjs = selectDom(['//script[@src]'], body);
const scripts = scriptsObjs.map((l) => l.src);
const backgrounds = require('../Core').regexToArray(/url\(['"]?(.*?)['"]?\)/ig, page, 1);
const searches = [].concat.apply([], [
imgs,
styles,
backgrounds,
scripts
]);
links.forEach((s) => routes.push(s));
searches.forEach((s) => rendered.push(s));
const newPage = body.ownerDocument.documentElement.outerHTML;
backgrounds.forEach((b) => {
newPage = newPage.replace(b, b.replace(/(-final)*\.[^\.]*$/ig, '-final.jpg'));
});
const stream = new Readable();
stream.push(newPage);
stream.push(null);
return Promise.resolve(stream);
}
module.exports = collectExternalResources;
Code Breakdown
This code appears to be a part of a web application or a static site generator, written in JavaScript. It transforms HTML templates into a format that can be used by a static site generator. Here's a high-level overview of the code:
The code requires several dependencies:
stream
: for working with streams of dataremarkable
: a Markdown parser for rendering Markdown text into HTMLselect tree
: a library for selecting elements in an HTML documentThe code defines the following functions:
safeName(name)
: takes a string name
and returns a safe name by replacing non-alphanumeric characters with hyphens and truncating the result to 40 characters.collectExternalResources(page, rendered, routes)
: the main function that transforms an HTML template into a format that can be used by a static site generator.collectExternalResources
FunctionThe collectExternalResources
function takes three arguments:
page
: the HTML template to transformrendered
: an object that is not used in this code snippetroutes
: an object that is not used in this code snippetThe function performs the following tasks:
selectDom
function to select all elements in the HTML template that contain images or URLs.Remarkable
library to render Markdown text in the HTML template. If the rendered Markdown text is different from the original text, it replaces the original text with the rendered Markdown text.selectDom
function to select headings (h1, h2, h3, etc.) in the HTML template that do not have an ID attribute. It sets the ID attribute of each heading to a safe name generated by the safeName
function.selectDom
function to select paragraphs in the HTML template. It sets the ID attribute of each paragraph to a safe name generated by the safeName
function. If the paragraph contains an image, it sets the ID attribute to a combination of the paragraph text and the image source.The code appears to be incomplete, as it references several variables and functions that are not defined in the code snippet. Additionally, the code seems to be part of a larger application, so some of the functions and variables may be defined elsewhere in the codebase.