The redditWeekly
function scrapes top links and posts from Reddit, extracts summaries, generates alternative summaries, and posts them to a specified page, with TODO comments indicating areas for further development.
npm run import -- "reddit weekly"
const redditLinks = importer.import("reddit scraper")
const getClient = importer.import("selenium client")
const redditList = importer.import("reddit month of links")
const redditPost = importer.import("reddit post")
const summerizeArticle = importer.import("summarize llm article")
const {alternativeSummary, CONVERSION_PROMPTS} = importer.import("convert summaries")
const { persistSummaries } = importer.import("default link collector")
const extractArticle = importer.import("extract llm article")
// TODO: send an email or post updates on reddit.com/r/collapseGently?
async function redditWeekly(
startPage = 'CollapseSupport+climatechange+collapse+economicCollapse',
postPage = 'CollapseGently'
) {
if(!startPage.includes('://')) {
startPage = 'https://www.reddit.com/r/' + startPage
}
let driver = await getClient()
// TODO: get top
let top = await redditLinks(driver, startPage + '/top/')
let topLinks = top.map(post => post.link)
// TODO: sort by most comments
let posts = await redditList(driver, startPage)
let topCommented = posts.sort((a, b) => {
let aInt = parseInt(a.comment.replace(/comments*/gi, '').trim())
let bInt = parseInt(b.comment.replace(/comments*/gi, '').trim())
return bInt - aInt
}).filter(post => !topLinks.includes(post.link))
let freshPosts = topCommented.slice(0, 25).concat(top.slice(0, 25))
let summaries = persistSummaries()
// TODO: loop through top 20 (10 of each) and repost
for(let i = 0; i < freshPosts.length; i++) {
let summary = summaries[freshPosts[i].link]
if(!summary) {
let article = await extractArticle(driver, freshPosts[i].link)
summary = await summerizeArticle(article)
}
// TODO: extract funny summary instead
let rand = Math.round(Math.random() * (CONVERSION_PROMPTS.length - 1)) + 1
let alternative = await alternativeSummary(summary, CONVERSION_PROMPTS[rand])
await redditPost(driver,
'TLDR: ' + alternative[1]
+ '\n\n' + alternative[0]
+ '\n\n' + '[' + freshPosts[i].link + '](' + freshPosts[i].link + ')\n',
postPage)
}
}
module.exports = redditWeekly
const { Client } = require('selenium-webdriver');
const { Builder } = require('selenium-webdriver');
const { By } = require('selenium-webdriver');
const { URL } = require('url');
const redditLinks = require('./reddit scraper');
const getClient = require('./selenium client');
const redditList = require('./reddit month of links');
const redditPost = require('./reddit post');
const summerizeArticle = require('./summarize llm article');
const { alternativeSummary, CONVERSION_PROMPTS } = require('./convert summaries');
const { persistSummaries } = require('./default link collector');
const extractArticle = require('./extract llm article');
class RedditWeekly {
async run(startPage = 'CollapseSupport+climatechange+collapse+economicCollapse', postPage = 'CollapseGently') {
this.startPage = new URL(startPage).hostname? startPage : `https://www.reddit.com/r/${startPage}`;
this.postPage = new URL(postPage).hostname? postPage : 'CollapseGently';
this.driver = await this.getClient();
this.summaries = persistSummaries();
await this.scrapeAndPost(startPage, postPage);
}
async getClient() {
return new Client();
}
async scrapeAndPost(startPage, postPage) {
const top = await redditLinks(this.driver, startPage + '/top/');
const topLinks = top.map(post => post.link);
const topCommented = await this.getTopCommentedPosts(startPage);
const freshPosts = topCommented.slice(0, 25).concat(top.slice(0, 25));
for (const post of freshPosts) {
const summary = await this.getSummary(post);
const alternative = await this.getAlternativeSummary(summary);
await redditPost(this.driver, this.getRedditPostBody(post, alternative), postPage);
}
}
async getTopCommentedPosts(startPage) {
const posts = await redditList(this.driver, startPage);
return posts.sort((a, b) => {
const aInt = parseInt(a.comment.replace(/comments*/gi, '').trim());
const bInt = parseInt(b.comment.replace(/comments*/gi, '').trim());
return bInt - aInt;
}).filter(post =>!topLinks.includes(post.link));
}
async getSummary(post) {
const summary = this.summaries[post.link];
if (!summary) {
const article = await extractArticle(this.driver, post.link);
summary = await summerizeArticle(article);
this.summaries[post.link] = summary;
}
return summary;
}
async getAlternativeSummary(summary) {
const rand = Math.round(Math.random() * (CONVERSION_PROMPTS.length - 1)) + 1;
return alternativeSummary(summary, CONVERSION_PROMPTS[rand]);
}
getRedditPostBody(post, alternative) {
return `TLDR: ${alternative[1]}\n\n${alternative[0]}\n\n[${post.link}](${post.link})`;
}
}
module.exports = RedditWeekly;
Code Breakdown
The code begins by importing various modules using the importer.import()
function:
redditLinks
: a module for scraping Reddit linksgetClient
: a module for getting a Selenium clientredditList
: a module for getting a list of Reddit postsredditPost
: a module for posting on RedditsummerizeArticle
: a module for summarizing articlesalternativeSummary
: a module for generating alternative summariespersistSummaries
: a module for persisting summariesextractArticle
: a module for extracting articles from a linkredditWeekly
FunctionThe redditWeekly
function is defined with two optional parameters:
startPage
: the starting page on Reddit (defaults to 'CollapseSupport+climatechange+collapse+economicCollapse')postPage
: the page to post to on Reddit (defaults to 'CollapseGently')The function initializes the Selenium client using getClient()
and scrapes the top links and posts from the specified startPage
.
The function filters the posts to exclude the top links and sorts them by the number of comments in descending order.
The function extracts summaries for each post using the persistSummaries()
function. If a summary is not found, it extracts the article using extractArticle()
and then summarizes it using summerizeArticle()
.
The function generates alternative summaries using alternativeSummary()
and posts the summaries to the specified postPage
on Reddit.
The function loops through the first 25 posts (10 of each) and performs the following steps:
The code is marked with TODO comments, indicating that certain sections are incomplete or require further development.