linkedin connections | | scrape entire linkedin profile | Search

This code snippet synchronizes LinkedIn contact data with a local storage directory by scraping new contacts from LinkedIn and comparing them to existing data. It uses Selenium to automate the web scraping process and identifies new contacts for storage.

Run example

npm run import -- "sync linkedin contacts with google contacts"

sync linkedin contacts with google contacts

var importer = require('../Core');
var fs = require('fs');
var glob = require('glob');
var path = require('path');
var {
    getContacts,
    runSeleniumCell
} = importer.import("list google contacts",
"selenium cell");
var listAllConnections, scrapeEntireLinkedInProfile;

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Conversations';
function escapeFilename(f) {
    return f.replace(/[^a-z0-9-\s]/ig, '_')
}

function syncLinkedInContacts() {
    var loaded = glob.sync('**/linkedin-*.json', {cwd: project})
        .reduce((arr, l) => {
            try {
                arr.push(escapeFilename(path.basename(l).replace('.json', '').replace('linkedin-', '')))
                arr.push(escapeFilename(JSON.parse(fs.readFileSync(path.join(project, l)))[0]
                    .url.replace(/^.*?\/*in\/|\/$/ig, '')));
            } catch (e) {
                console.log(l);
            }
            return arr;
        }, []);
    var fresh;
    return runSeleniumCell([
        'log in linkedin',
        'scrape linkedin profile',
        'scrape linkedin contacts',
        'scrape entire linkedin profile'
    ])
        .then(r => ({
            listAllConnections, scrapeEntireLinkedInProfile
        } = r).loginLinkedIn())
        .then(() => listAllConnections())
        .then((connections) => {
            fresh = connections.filter(c => {
                const fname = escapeFilename(c.replace(/^.*?\/*in\/|\/$/ig, ''));
                return loaded.indexOf(fname) === -1
                    && loaded.indexOf(c) === -1
            });
            const percent = Math
                .round((connections.length - fresh.length) / connections.length * 100);
            console.log((connections.length - fresh.length)
                + ' / ' + connections.length + ' : '
                + percent
                + '%');
        })
        .then(() => {
            const rand = Math.floor(fresh.length / 30 * Math.random()) * 30;
            console.log(fresh.slice(rand, rand + 30));
            return importer.runAllPromises(fresh.slice(rand, rand + 30).map(c => resolve => {
                var linkedIn;
                const fname = c.replace(/^\/*in\/|\/$/ig, '');
                return scrapeEntireLinkedInProfile('https://linkedin.com' + c)
                    .then(r => {
                        linkedIn = r;
                        return getContacts({
                            displayName: r.name.split(/\s/).join('.*')
                        });
                    })
                    .then(r => {
                        console.log(linkedIn.name);
                        if (r.length === 0) {
                            r.push({});
                        }
                        Object.assign(r[0], linkedIn);
                        fs.writeFileSync(
                            project + '/linkedin-' + escapeFilename(fname) + '.json',
                            JSON.stringify(r, null, 4));
                        return resolve(r);
                    })
                    .catch(e => {
                        console.log(e);
                        resolve()
                    })
            }))
        })
};

module.exports = syncLinkedInContacts;

What the code could have been:

const { resolve } = require('path');
const { promisify } = require('util');
const fs = promisify(require('fs'));
const glob = promisify(require('glob'));
const { getContacts, runSeleniumCell, scrapeEntireLinkedInProfile } = require('../Core');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const project = resolve(PROFILE_PATH, 'Conversations');

const escapeFilename = (f) => f.replace(/[^a-z0-9-\s]/ig, '_');

async function syncLinkedInContacts() {
  try {
    // Load existing linkedin contacts and their corresponding urls
    const loadedContacts = await glob('**/linkedin-*.json', { cwd: project })
     .then(fileNames => fileNames.reduce((acc, fileName) => {
        const contactName = fileName.replace('linkedin-', '').replace('.json', '');
        const contact = JSON.parse(await fs.readFile(resolve(project, fileName), 'utf8'))[0];
        return [...acc, { name: contactName, url: contact.url }];
      }, []));

    const { loginLinkedIn, listAllConnections } = await runSeleniumCell([
      'log in linkedin',
     'scrape linkedin profile',
     'scrape linkedin contacts',
     'scrape entire linkedin profile',
    ]);

    const connections = await listAllConnections();

    // Calculate the percentage of new connections
    const newConnections = connections.filter(connection => {
      const trimmedUrl = connection.replace(/^.*?\/*in\/|\/$/ig, '');
      return!loadedContacts.find(item => item.name === trimmedUrl || item.url === trimmedUrl);
    });
    const percentage = Math.round((connections.length - newConnections.length) / connections.length * 100);

    console.log(`${connections.length - newConnections.length} / ${connections.length} : ${percentage}%`);

    // Select a random subset of new connections to scrape
    const rand = Math.floor(newConnections.length / 30 * Math.random()) * 30;
    console.log(newConnections.slice(rand, rand + 30));

    // Scrape and save new connections
    await Promise.all(newConnections.slice(rand, rand + 30).map(async (connection) => {
      try {
        const linkedIn = await scrapeEntireLinkedInProfile(`https://linkedin.com${connection}`);
        const contacts = await getContacts({ displayName: linkedIn.name.split(/\s/).join('.*') });
        if (contacts.length === 0) contacts.push({});

        Object.assign(contacts[0], linkedIn);
        await fs.writeFile(
          resolve(project, `linkedin-${escapeFilename(connection.replace(/^\/*in\/|\/$/ig, ''))}.json`),
          JSON.stringify(contacts, null, 4));
      } catch (error) {
        console.log(error);
      }
    }));
  } catch (error) {
    console.error(error);
  }
}

module.exports = syncLinkedInContacts;

This code snippet appears to be part of a larger script designed to synchronize LinkedIn contact data with a local storage directory.

Here's a breakdown:

  1. Dependencies:

  2. Importing Functions:

  3. Configuration:

  4. escapeFilename Function:

  5. syncLinkedInContacts Function: