linkedin connections | scrape entire linkedin profile | Cell 3 | Search

This code fetches a list of LinkedIn connections, prioritizing loading from a cached file if it's recent, otherwise scraping the data from LinkedIn and saving it to a local file. It uses Selenium to automate the web scraping process.

Run example

npm run import -- "scrape linkedin contacts"

scrape linkedin contacts

var importer = require('../Core');
var fs = require('fs');

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Conversations';

function listAllConnections(force = false) {
    var friends = [];
    if (fs.existsSync(project + '/connections.json') && !force
        && (new Date()).getTime() - fs.statSync(project + '/connections.json').mtime.getTime() < 1000 * 60 * 60 * 24) {
        return Promise.resolve(JSON.parse(
            fs.readFileSync(project + '/connections.json')));
    }
    console.log('fetching new linkedin contacts');
    return client
        .getUrl()
        .url('https://www.linkedin.com/mynetwork/invite-connect/connections/')
        .pause(3000)
        .then(() => getAllUntil(
            false,
            '//a[contains(@href, "/in/")]/@href',
            friends,
            (a, b) => a === b,
            (i) => i < 10
        ))
        .then(r => r.filter((l, i, arr) => arr.indexOf(l) === i))
        .then(r => {
            fs.writeFileSync(
                project + '/connections.json',
                JSON.stringify(r, null, 4));
            return r;
        })
        .catch(e => console.log(e))
};
module.exports = listAllConnections;

What the code could have been:

const fs = require('fs').promises; // Import fs module with promises
const path = require('path');
const { URL } = require('url');

// Environment variables
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const PROJECT_PATH = path.join(PROFILE_PATH, 'Conversations');

// Import dependencies
const importer = require('../Core');

// Function to list all connections
async function listAllConnections(force = false) {
  try {
    // Check if connections file exists and is up to date
    const connectionsFile = path.join(PROJECT_PATH, 'connections.json');
    if (fs.existsSync(connectionsFile) &&!force && (await getModifiedTime(connectionsFile)) < Date.now() - 24 * 60 * 60 * 1000) {
      // Return cached connections if up to date
      return JSON.parse(await fs.readFile(connectionsFile, 'utf8'));
    }

    // Fetch new LinkedIn contacts
    console.log('Fetching new LinkedIn contacts');
    const client = importer.getClient(); // Assume importer has getClient method
    const url = new URL(client.getUrl()).origin + '/mynetwork/invite-connect/connections/';
    const response = await fetch(url);
    const html = await response.text();
    const friends = await getAllUntil(html, '//a[contains(@href, "/in/")]/@href');

    // Remove duplicates and write to file
    const uniqueFriends = [...new Set(friends)];
    await fs.writeFile(connectionsFile, JSON.stringify(uniqueFriends, null, 4));
    return uniqueFriends;
  } catch (e) {
    console.error(e);
    throw e;
  }
}

// Function to get modified time of a file
async function getModifiedTime(filePath) {
  return fs.stat(filePath).mtimeMs;
}

// Function to get all elements until a condition is met
async function getAllUntil(html, selector, current, equals = (a, b) => a === b, condition = (i) => i < 10) {
  const elements = await parseHtml(html);
  const results = [];

  for (const element of elements) {
    if (condition(results.length)) {
      results.push(element);
      if (equals(current, element)) {
        break;
      }
    }
  }

  return results;
}

// Function to parse HTML
function parseHtml(html) {
  // This function can be implemented using a library like cheerio
  // or a simple DOM parser
  // For simplicity, this example just returns an array of all a elements
  return html.match(/]+)>/g).map((element) => element.match(/href=['"]([^'"]+)['"]/)[1]);
}

module.exports = listAllConnections;

This code snippet fetches and stores a list of LinkedIn connections.

Here's a breakdown:

  1. Dependencies:

  2. Configuration:

  3. listAllConnections Function:

  4. Module Export: