facebook data | Scrape facebook friends | Cell 8 | Search

This code tracks changes in a Facebook user's friend list by scraping their friend list from Facebook, comparing it to previous lists, and identifying new and removed friends. It then exports this functionality for use in other parts of an application.

Run example

npm run import -- "Automatically diff facebook friends"

Automatically diff facebook friends

var glob = require('glob');
var fs = require('fs');
var path = require('path');
var importer = require('../Core');
var runSeleniumCell = importer.import("selenium cell");

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Conversations';

function getFriends(file) {
    var html = fs.readFileSync(file).toString();
    return importer.regexToArray(/href="([^"]*)/ig, html, 1)
        .filter(h => h.indexOf('.js') === -1 && h.indexOf('.css') === -1
            && h.indexOf('friends_tab') > -1)
        .map(m => (/(.*)\?/ig).exec(m)[1])
        .filter((elem, pos, arr) => arr.indexOf(elem) === pos)
};

function getFriendsDiff() {
    return runSeleniumCell([
        'log in facebook',
        'scrape facebook profile',
        'scrape facebook friends'
    ])
        .then(r => r.scrapeFacebookFriends())
        .then(doc => {
            var date = new Date();
            fs.writeFileSync(project + '/friendlist-' + date.getFullYear()
                + '-' + (date.getMonth() + 1)
                + '-' + date.getDate() + '.htm', '<html>' + doc + '</html>');
            var friends = glob.sync('**/*friend*', {cwd: project, nodir: true});
            friends.sort((a, b) => new Date(fs.statSync(path.join(project, a)).mtime).getTime() - new Date(fs.statSync(
                path.join(project, b)).mtime).getTime());
            //console.log(friends);
            var urls = getFriends(path.join(project, friends.pop()));
            //console.log(urls);
            var urls2 = getFriends(path.join(project, friends.pop()));
            //console.log(urls2);
            var diff = urls.filter(u => urls2.indexOf(u) === -1)
                .concat(urls2.filter(u => urls.indexOf(u) === -1))
            return diff;
        })
        .catch(e => console.log(e))
}
module.exports = getFriendsDiff;

What the code could have been:

const glob = require('glob');
const fs = require('fs');
const path = require('path');
const { regexToArray, regex } = require('../Core');
const { runSeleniumCell } = require('../Core/selenium cell');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const project = path.join(PROFILE_PATH, 'Conversations');

/**
 * Extracts friend URLs from an HTML file.
 * @param {string} file - Path to the HTML file.
 * @returns {string[]} An array of friend URLs.
 */
function getFriends(file) {
    try {
        const html = fs.readFileSync(file, 'utf8');
        const hrefRegex = regex('href="([^"]*)', 'i');
        const hrefMatches = regexToArray(hrefRegex, html, 1);
        const filteredMatches = hrefMatches
           .filter((href) => href.indexOf('.js') === -1 && href.indexOf('.css') === -1
                && href.indexOf('friends_tab') > -1)
           .map((href) => {
                const queryMatch = href.match(/(.*)\?/i);
                return queryMatch && queryMatch[1];
            })
           .filter((elem, pos, arr) => arr.indexOf(elem) === pos);
        return filteredMatches;
    } catch (error) {
        console.error(`Error reading file: ${error}`);
        return [];
    }
}

/**
 * Compares two friend lists and returns a diff.
 * @returns {Promise<string[]>} A promise resolving to an array of friend URLs that have changed.
 */
function getFriendsDiff() {
    return runSeleniumCell([
        'log in facebook',
       'scrape facebook profile',
       'scrape facebook friends'
    ])
       .then((seleniumResult) => seleniumResult.scrapeFacebookFriends())
       .then((doc) => {
            const date = new Date();
            fs.writeFileSync(
                path.join(project, `friendlist-${date.getFullYear()}-${date.getMonth() + 1}-${date.getDate()}.htm`),
                `<html>${doc}</html>`
            );

            const friends = glob.sync('**/*friend*', { cwd: project, nodir: true });
            friends.sort((a, b) => {
                const aDate = new Date(fs.statSync(path.join(project, a)).mtime);
                const bDate = new Date(fs.statSync(path.join(project, b)).mtime);
                return aDate.getTime() - bDate.getTime();
            });

            const urls = getFriends(path.join(project, friends.pop()));
            const urls2 = getFriends(path.join(project, friends.pop()));

            const diff = [...new Set(urls.filter((u) => urls2.indexOf(u) === -1))].concat([...new Set(urls2.filter((u) => urls.indexOf(u) === -1))]);
            return diff;
        })
       .catch((error) => console.error(error));
}

module.exports = getFriendsDiff;

This code snippet is designed to track changes in a Facebook user's friend list over time.

Here's a breakdown:

  1. Setup: It imports necessary modules for file system operations, path manipulation, and a custom importer module.

  2. getFriends Function: This function takes a file path as input and extracts URLs from the HTML content, likely representing friend profiles. It filters out irrelevant URLs (like CSS or JavaScript files) and removes duplicates.

  3. getFriendsDiff Function: This function orchestrates the entire process:

  4. Export: The getFriendsDiff function is exported, making it available for use in other parts of the application.