This code snippet synchronizes LinkedIn contact data with a local storage directory by scraping new contacts from LinkedIn and comparing them to existing data. It uses Selenium to automate the web scraping process and identifies new contacts for storage.
npm run import -- "sync linkedin contacts with google contacts"
var importer = require('../Core');
var fs = require('fs');
var glob = require('glob');
var path = require('path');
var {
getContacts,
runSeleniumCell
} = importer.import("list google contacts",
"selenium cell");
var listAllConnections, scrapeEntireLinkedInProfile;
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Conversations';
function escapeFilename(f) {
return f.replace(/[^a-z0-9-\s]/ig, '_')
}
function syncLinkedInContacts() {
var loaded = glob.sync('**/linkedin-*.json', {cwd: project})
.reduce((arr, l) => {
try {
arr.push(escapeFilename(path.basename(l).replace('.json', '').replace('linkedin-', '')))
arr.push(escapeFilename(JSON.parse(fs.readFileSync(path.join(project, l)))[0]
.url.replace(/^.*?\/*in\/|\/$/ig, '')));
} catch (e) {
console.log(l);
}
return arr;
}, []);
var fresh;
return runSeleniumCell([
'log in linkedin',
'scrape linkedin profile',
'scrape linkedin contacts',
'scrape entire linkedin profile'
])
.then(r => ({
listAllConnections, scrapeEntireLinkedInProfile
} = r).loginLinkedIn())
.then(() => listAllConnections())
.then((connections) => {
fresh = connections.filter(c => {
const fname = escapeFilename(c.replace(/^.*?\/*in\/|\/$/ig, ''));
return loaded.indexOf(fname) === -1
&& loaded.indexOf(c) === -1
});
const percent = Math
.round((connections.length - fresh.length) / connections.length * 100);
console.log((connections.length - fresh.length)
+ ' / ' + connections.length + ' : '
+ percent
+ '%');
})
.then(() => {
const rand = Math.floor(fresh.length / 30 * Math.random()) * 30;
console.log(fresh.slice(rand, rand + 30));
return importer.runAllPromises(fresh.slice(rand, rand + 30).map(c => resolve => {
var linkedIn;
const fname = c.replace(/^\/*in\/|\/$/ig, '');
return scrapeEntireLinkedInProfile('https://linkedin.com' + c)
.then(r => {
linkedIn = r;
return getContacts({
displayName: r.name.split(/\s/).join('.*')
});
})
.then(r => {
console.log(linkedIn.name);
if (r.length === 0) {
r.push({});
}
Object.assign(r[0], linkedIn);
fs.writeFileSync(
project + '/linkedin-' + escapeFilename(fname) + '.json',
JSON.stringify(r, null, 4));
return resolve(r);
})
.catch(e => {
console.log(e);
resolve()
})
}))
})
};
module.exports = syncLinkedInContacts;
const { resolve } = require('path');
const { promisify } = require('util');
const fs = promisify(require('fs'));
const glob = promisify(require('glob'));
const { getContacts, runSeleniumCell, scrapeEntireLinkedInProfile } = require('../Core');
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const project = resolve(PROFILE_PATH, 'Conversations');
const escapeFilename = (f) => f.replace(/[^a-z0-9-\s]/ig, '_');
async function syncLinkedInContacts() {
try {
// Load existing linkedin contacts and their corresponding urls
const loadedContacts = await glob('**/linkedin-*.json', { cwd: project })
.then(fileNames => fileNames.reduce((acc, fileName) => {
const contactName = fileName.replace('linkedin-', '').replace('.json', '');
const contact = JSON.parse(await fs.readFile(resolve(project, fileName), 'utf8'))[0];
return [...acc, { name: contactName, url: contact.url }];
}, []));
const { loginLinkedIn, listAllConnections } = await runSeleniumCell([
'log in linkedin',
'scrape linkedin profile',
'scrape linkedin contacts',
'scrape entire linkedin profile',
]);
const connections = await listAllConnections();
// Calculate the percentage of new connections
const newConnections = connections.filter(connection => {
const trimmedUrl = connection.replace(/^.*?\/*in\/|\/$/ig, '');
return!loadedContacts.find(item => item.name === trimmedUrl || item.url === trimmedUrl);
});
const percentage = Math.round((connections.length - newConnections.length) / connections.length * 100);
console.log(`${connections.length - newConnections.length} / ${connections.length} : ${percentage}%`);
// Select a random subset of new connections to scrape
const rand = Math.floor(newConnections.length / 30 * Math.random()) * 30;
console.log(newConnections.slice(rand, rand + 30));
// Scrape and save new connections
await Promise.all(newConnections.slice(rand, rand + 30).map(async (connection) => {
try {
const linkedIn = await scrapeEntireLinkedInProfile(`https://linkedin.com${connection}`);
const contacts = await getContacts({ displayName: linkedIn.name.split(/\s/).join('.*') });
if (contacts.length === 0) contacts.push({});
Object.assign(contacts[0], linkedIn);
await fs.writeFile(
resolve(project, `linkedin-${escapeFilename(connection.replace(/^\/*in\/|\/$/ig, ''))}.json`),
JSON.stringify(contacts, null, 4));
} catch (error) {
console.log(error);
}
}));
} catch (error) {
console.error(error);
}
}
module.exports = syncLinkedInContacts;
This code snippet appears to be part of a larger script designed to synchronize LinkedIn contact data with a local storage directory.
Here's a breakdown:
Dependencies:
importer
: A custom module likely containing functions for interacting with various data sources and tools.fs
: Node.js built-in module for file system operations.glob
: Node.js module for finding files matching a pattern.path
: Node.js built-in module for working with file paths.Importing Functions:
getContacts
and runSeleniumCell
from importer
.Configuration:
PROFILE_PATH
to the user's home directory.project
as a subdirectory within the user's home directory for storing LinkedIn data.escapeFilename
Function:
syncLinkedInContacts
Function:
project
directory.