The summerizeAll
function extracts and summarizes all links from a provided startPage
or links
array by selecting a link scraping tool, scraping links, extracting article content, summarizing articles, and persisting summaries. The function uses various modules and functions, including getClient
, extractArticle
, summerizeArticle
, defaultCollector
, and persistSummaries
, and is exported as a module for use elsewhere.
npm run import -- "summarize all articles"
const getClient = importer.import("selenium client")
const extractArticle = importer.import("extract llm article")
const summerizeArticle = importer.import("summarize llm article")
const {
defaultCollector, persistSummaries
} = importer.import("default link collector")
// select link scraping tool
function selectScaper(selector, startPage) {
if(!selector && startPage.includes('reddit.com')) {
selector = importer.import("reddit month of links")
} else if(!selector) {
selector = defaultCollector
} else if(typeof selector == 'string') {
selector = defaultCollector.bind(null, startPage, selector)
}
return selector
}
// extract persist, extract persist
async function summerizeAll(links, selector, startPage, funny) {
if(!startPage && !links) {
console.error('No start page or links to summerize.')
return
}
let selectorFunction = selectScaper(selector, startPage)
let driver = await getClient()
let summaries = persistSummaries(funny)
try {
if(!links && startPage) {
links = await selectorFunction(driver, startPage)
}
console.log(links)
for (let i = 0; i < links.length; i++) {
if(typeof summaries[links[i].link] != 'undefined')
continue // already loaded
let article = await extractArticle(driver, links[i].link)
let summary = await summerizeArticle(article, funny)
summaries[links[i].link] = summary
persistSummaries(funny, summaries)
}
driver.quit()
return summaries
} catch (e) {
//driver.quit()
throw e
}
}
module.exports = summerizeAll
const Client = require('selenium-client');
const { ExtractLLMArticle, SummarizeLLMArticle } = require('./extract-llm-article');
const { DefaultLinkCollector, persistSummaries } = require('./default-link-collector');
const RedditLinkCollector = require('./reddit-link-collector');
class LinkScraper {
static selectScraper(selector, startPage) {
if (!selector && startPage.includes('reddit.com')) {
selector = new RedditLinkCollector();
} else if (!selector) {
selector = new DefaultLinkCollector();
} else if (typeof selector ==='string') {
selector = () => new DefaultLinkCollector(startPage, selector);
}
return selector;
}
}
async function summarizeAll(links, selector, startPage, funny) {
if (!startPage &&!links) {
throw new Error('No start page or links to summarize.');
}
const client = new Client();
const extractor = new ExtractLLMArticle();
const summarizer = new SummarizeLLMArticle(funny);
const collector = LinkScraper.selectScraper(selector, startPage);
const summaries = persistSummaries(funny);
try {
if (!links && startPage) {
links = await collector(client, startPage);
}
console.log(links);
for (const link of links) {
if (summaries[link.link]) {
continue; // already loaded
}
const article = await extractor.client(client, link.link);
const summary = await summarizer(article);
summaries[link.link] = summary;
persistSummaries(funny, summaries);
}
await client.quit();
return summaries;
} catch (error) {
// do not quit client in case of an error
throw error;
}
}
module.exports = summarizeAll;
Code Breakdown
The code imports several modules using the importer.import
function:
getClient
: imports the Selenium client moduleextractArticle
: imports the article extraction modulesummerizeArticle
: imports the article summarization moduledefaultCollector
and persistSummaries
: imports the default link collector and summary persistence modules, respectivelyThe selectScaper
function determines which link scraping tool to use based on the provided selector
and startPage
parameters. It returns a function that can be used to scrape links.
selector
is falsy and startPage
includes reddit.com
, it uses the reddit month of links
selector.selector
is falsy, it defaults to the defaultCollector
selector.selector
is a string, it binds the defaultCollector
selector to the provided startPage
and selector
.The summerizeAll
function extracts and summarizes all links from a provided startPage
or links
array.
links
: an array of links to summarizeselector
: the link scraping tool to usestartPage
: the starting page to scrape links fromfunny
: a parameter used by the persistSummaries
functionstartPage
or links
are falsy and logs an error message if so.selectScaper
function.getClient
function.persistSummaries
function.startPage
is provided but links
is falsy, it scrapes links from the startPage
using the selected tool.extractArticle
function.summerizeArticle
function.persistSummaries
function.The summerizeAll
function is exported as a module.