The summerizeAll function extracts and summarizes all links from a provided startPage or links array by selecting a link scraping tool, scraping links, extracting article content, summarizing articles, and persisting summaries. The function uses various modules and functions, including getClient, extractArticle, summerizeArticle, defaultCollector, and persistSummaries, and is exported as a module for use elsewhere.
npm run import -- "summarize all articles"const getClient = importer.import("selenium client")
const extractArticle = importer.import("extract llm article")
const summerizeArticle = importer.import("summarize llm article")
const {
defaultCollector, persistSummaries
} = importer.import("default link collector")
// select link scraping tool
function selectScaper(selector, startPage) {
if(!selector && startPage.includes('reddit.com')) {
selector = importer.import("reddit month of links")
} else if(!selector) {
selector = defaultCollector
} else if(typeof selector == 'string') {
selector = defaultCollector.bind(null, startPage, selector)
}
return selector
}
// extract persist, extract persist
async function summerizeAll(links, selector, startPage, funny) {
if(!startPage && !links) {
console.error('No start page or links to summerize.')
return
}
let selectorFunction = selectScaper(selector, startPage)
let driver = await getClient()
let summaries = persistSummaries(funny)
try {
if(!links && startPage) {
links = await selectorFunction(driver, startPage)
}
console.log(links)
for (let i = 0; i < links.length; i++) {
if(typeof summaries[links[i].link] != 'undefined')
continue // already loaded
let article = await extractArticle(driver, links[i].link)
let summary = await summerizeArticle(article, funny)
summaries[links[i].link] = summary
persistSummaries(funny, summaries)
}
driver.quit()
return summaries
} catch (e) {
//driver.quit()
throw e
}
}
module.exports = summerizeAll
const Client = require('selenium-client');
const { ExtractLLMArticle, SummarizeLLMArticle } = require('./extract-llm-article');
const { DefaultLinkCollector, persistSummaries } = require('./default-link-collector');
const RedditLinkCollector = require('./reddit-link-collector');
class LinkScraper {
static selectScraper(selector, startPage) {
if (!selector && startPage.includes('reddit.com')) {
selector = new RedditLinkCollector();
} else if (!selector) {
selector = new DefaultLinkCollector();
} else if (typeof selector ==='string') {
selector = () => new DefaultLinkCollector(startPage, selector);
}
return selector;
}
}
async function summarizeAll(links, selector, startPage, funny) {
if (!startPage &&!links) {
throw new Error('No start page or links to summarize.');
}
const client = new Client();
const extractor = new ExtractLLMArticle();
const summarizer = new SummarizeLLMArticle(funny);
const collector = LinkScraper.selectScraper(selector, startPage);
const summaries = persistSummaries(funny);
try {
if (!links && startPage) {
links = await collector(client, startPage);
}
console.log(links);
for (const link of links) {
if (summaries[link.link]) {
continue; // already loaded
}
const article = await extractor.client(client, link.link);
const summary = await summarizer(article);
summaries[link.link] = summary;
persistSummaries(funny, summaries);
}
await client.quit();
return summaries;
} catch (error) {
// do not quit client in case of an error
throw error;
}
}
module.exports = summarizeAll;Code Breakdown
The code imports several modules using the importer.import function:
getClient: imports the Selenium client moduleextractArticle: imports the article extraction modulesummerizeArticle: imports the article summarization moduledefaultCollector and persistSummaries: imports the default link collector and summary persistence modules, respectivelyThe selectScaper function determines which link scraping tool to use based on the provided selector and startPage parameters. It returns a function that can be used to scrape links.
selector is falsy and startPage includes reddit.com, it uses the reddit month of links selector.selector is falsy, it defaults to the defaultCollector selector.selector is a string, it binds the defaultCollector selector to the provided startPage and selector.The summerizeAll function extracts and summarizes all links from a provided startPage or links array.
links: an array of links to summarizeselector: the link scraping tool to usestartPage: the starting page to scrape links fromfunny: a parameter used by the persistSummaries functionstartPage or links are falsy and logs an error message if so.selectScaper function.getClient function.persistSummaries function.startPage is provided but links is falsy, it scrapes links from the startPage using the selected tool.extractArticle function.summerizeArticle function.persistSummaries function.The summerizeAll function is exported as a module.