reddit | test reddit post | reddit month of links | Search

The redditWeekly function scrapes top links and posts from Reddit, extracts summaries, generates alternative summaries, and posts them to a specified page, with TODO comments indicating areas for further development.

Run example

npm run import -- "reddit weekly"

reddit weekly

const redditLinks = importer.import("reddit scraper")
const getClient = importer.import("selenium client")
const redditList = importer.import("reddit month of links")
const redditPost = importer.import("reddit post")
const summerizeArticle = importer.import("summarize llm article")
const {alternativeSummary, CONVERSION_PROMPTS} = importer.import("convert summaries")
const { persistSummaries } = importer.import("default link collector")
const extractArticle = importer.import("extract llm article")

// TODO: send an email or post updates on reddit.com/r/collapseGently?

async function redditWeekly(
  startPage = 'CollapseSupport+climatechange+collapse+economicCollapse',
  postPage = 'CollapseGently'
) {

  if(!startPage.includes('://')) {
    startPage = 'https://www.reddit.com/r/' + startPage
  }

  let driver = await getClient()

  // TODO: get top
  let top = await redditLinks(driver, startPage + '/top/')
  let topLinks = top.map(post => post.link)

  // TODO: sort by most comments
  let posts = await redditList(driver, startPage)
  let topCommented = posts.sort((a, b) => {
    let aInt = parseInt(a.comment.replace(/comments*/gi, '').trim())
    let bInt = parseInt(b.comment.replace(/comments*/gi, '').trim())
    return bInt - aInt
  }).filter(post => !topLinks.includes(post.link))

  let freshPosts = topCommented.slice(0, 25).concat(top.slice(0, 25))
  let summaries = persistSummaries()

  // TODO: loop through top 20 (10 of each) and repost
  for(let i = 0; i < freshPosts.length; i++) {
    let summary = summaries[freshPosts[i].link]
    if(!summary) {
      let article = await extractArticle(driver, freshPosts[i].link)
      summary = await summerizeArticle(article)
    }

    // TODO: extract funny summary instead
    let rand = Math.round(Math.random() * (CONVERSION_PROMPTS.length - 1)) + 1
    let alternative = await alternativeSummary(summary, CONVERSION_PROMPTS[rand])

    await redditPost(driver, 
      'TLDR: ' + alternative[1] 
      + '\n\n' + alternative[0] 
      + '\n\n' + '[' + freshPosts[i].link + '](' + freshPosts[i].link + ')\n', 
      postPage)
  }

}

module.exports = redditWeekly

What the code could have been:

const { Client } = require('selenium-webdriver');
const { Builder } = require('selenium-webdriver');
const { By } = require('selenium-webdriver');
const { URL } = require('url');

const redditLinks = require('./reddit scraper');
const getClient = require('./selenium client');
const redditList = require('./reddit month of links');
const redditPost = require('./reddit post');
const summerizeArticle = require('./summarize llm article');
const { alternativeSummary, CONVERSION_PROMPTS } = require('./convert summaries');
const { persistSummaries } = require('./default link collector');
const extractArticle = require('./extract llm article');

class RedditWeekly {
  async run(startPage = 'CollapseSupport+climatechange+collapse+economicCollapse', postPage = 'CollapseGently') {
    this.startPage = new URL(startPage).hostname? startPage : `https://www.reddit.com/r/${startPage}`;
    this.postPage = new URL(postPage).hostname? postPage : 'CollapseGently';
    this.driver = await this.getClient();
    this.summaries = persistSummaries();
    await this.scrapeAndPost(startPage, postPage);
  }

  async getClient() {
    return new Client();
  }

  async scrapeAndPost(startPage, postPage) {
    const top = await redditLinks(this.driver, startPage + '/top/');
    const topLinks = top.map(post => post.link);
    const topCommented = await this.getTopCommentedPosts(startPage);
    const freshPosts = topCommented.slice(0, 25).concat(top.slice(0, 25));
    for (const post of freshPosts) {
      const summary = await this.getSummary(post);
      const alternative = await this.getAlternativeSummary(summary);
      await redditPost(this.driver, this.getRedditPostBody(post, alternative), postPage);
    }
  }

  async getTopCommentedPosts(startPage) {
    const posts = await redditList(this.driver, startPage);
    return posts.sort((a, b) => {
      const aInt = parseInt(a.comment.replace(/comments*/gi, '').trim());
      const bInt = parseInt(b.comment.replace(/comments*/gi, '').trim());
      return bInt - aInt;
    }).filter(post =>!topLinks.includes(post.link));
  }

  async getSummary(post) {
    const summary = this.summaries[post.link];
    if (!summary) {
      const article = await extractArticle(this.driver, post.link);
      summary = await summerizeArticle(article);
      this.summaries[post.link] = summary;
    }
    return summary;
  }

  async getAlternativeSummary(summary) {
    const rand = Math.round(Math.random() * (CONVERSION_PROMPTS.length - 1)) + 1;
    return alternativeSummary(summary, CONVERSION_PROMPTS[rand]);
  }

  getRedditPostBody(post, alternative) {
    return `TLDR: ${alternative[1]}\n\n${alternative[0]}\n\n[${post.link}](${post.link})`;
  }
}

module.exports = RedditWeekly;

Code Breakdown

Importing Modules

The code begins by importing various modules using the importer.import() function:

Defining the redditWeekly Function

The redditWeekly function is defined with two optional parameters:

Initializing the Selenium Client and Scraping Reddit

The function initializes the Selenium client using getClient() and scrapes the top links and posts from the specified startPage.

Filtering and Sorting Posts

The function filters the posts to exclude the top links and sorts them by the number of comments in descending order.

Extracting Summaries

The function extracts summaries for each post using the persistSummaries() function. If a summary is not found, it extracts the article using extractArticle() and then summarizes it using summerizeArticle().

Generating Alternative Summaries and Posting

The function generates alternative summaries using alternativeSummary() and posts the summaries to the specified postPage on Reddit.

Main Logic

The function loops through the first 25 posts (10 of each) and performs the following steps:

  1. Extracts a summary for each post
  2. Generates an alternative summary
  3. Posts the alternative summary to Reddit

The code is marked with TODO comments, indicating that certain sections are incomplete or require further development.