This code fetches a list of LinkedIn connections, prioritizing loading from a cached file if it's recent, otherwise scraping the data from LinkedIn and saving it to a local file. It uses Selenium to automate the web scraping process.
npm run import -- "scrape linkedin contacts"
var importer = require('../Core');
var fs = require('fs');
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Conversations';
function listAllConnections(force = false) {
var friends = [];
if (fs.existsSync(project + '/connections.json') && !force
&& (new Date()).getTime() - fs.statSync(project + '/connections.json').mtime.getTime() < 1000 * 60 * 60 * 24) {
return Promise.resolve(JSON.parse(
fs.readFileSync(project + '/connections.json')));
}
console.log('fetching new linkedin contacts');
return client
.getUrl()
.url('https://www.linkedin.com/mynetwork/invite-connect/connections/')
.pause(3000)
.then(() => getAllUntil(
false,
'//a[contains(@href, "/in/")]/@href',
friends,
(a, b) => a === b,
(i) => i < 10
))
.then(r => r.filter((l, i, arr) => arr.indexOf(l) === i))
.then(r => {
fs.writeFileSync(
project + '/connections.json',
JSON.stringify(r, null, 4));
return r;
})
.catch(e => console.log(e))
};
module.exports = listAllConnections;
const fs = require('fs').promises; // Import fs module with promises
const path = require('path');
const { URL } = require('url');
// Environment variables
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const PROJECT_PATH = path.join(PROFILE_PATH, 'Conversations');
// Import dependencies
const importer = require('../Core');
// Function to list all connections
async function listAllConnections(force = false) {
try {
// Check if connections file exists and is up to date
const connectionsFile = path.join(PROJECT_PATH, 'connections.json');
if (fs.existsSync(connectionsFile) &&!force && (await getModifiedTime(connectionsFile)) < Date.now() - 24 * 60 * 60 * 1000) {
// Return cached connections if up to date
return JSON.parse(await fs.readFile(connectionsFile, 'utf8'));
}
// Fetch new LinkedIn contacts
console.log('Fetching new LinkedIn contacts');
const client = importer.getClient(); // Assume importer has getClient method
const url = new URL(client.getUrl()).origin + '/mynetwork/invite-connect/connections/';
const response = await fetch(url);
const html = await response.text();
const friends = await getAllUntil(html, '//a[contains(@href, "/in/")]/@href');
// Remove duplicates and write to file
const uniqueFriends = [...new Set(friends)];
await fs.writeFile(connectionsFile, JSON.stringify(uniqueFriends, null, 4));
return uniqueFriends;
} catch (e) {
console.error(e);
throw e;
}
}
// Function to get modified time of a file
async function getModifiedTime(filePath) {
return fs.stat(filePath).mtimeMs;
}
// Function to get all elements until a condition is met
async function getAllUntil(html, selector, current, equals = (a, b) => a === b, condition = (i) => i < 10) {
const elements = await parseHtml(html);
const results = [];
for (const element of elements) {
if (condition(results.length)) {
results.push(element);
if (equals(current, element)) {
break;
}
}
}
return results;
}
// Function to parse HTML
function parseHtml(html) {
// This function can be implemented using a library like cheerio
// or a simple DOM parser
// For simplicity, this example just returns an array of all a elements
return html.match(/]+)>/g).map((element) => element.match(/href=['"]([^'"]+)['"]/)[1]);
}
module.exports = listAllConnections;
This code snippet fetches and stores a list of LinkedIn connections.
Here's a breakdown:
Dependencies:
importer
: A custom module likely containing functions for interacting with various data sources and tools.fs
: Node.js built-in module for file system operations.Configuration:
PROFILE_PATH
to the user's home directory.project
as a subdirectory within the user's home directory for storing LinkedIn data.listAllConnections
Function:
force
parameter (defaulting to false
).connections.json
file exists in the project
directory and if it's relatively recent (within the last 24 hours).
connections.json
.Module Export:
listAllConnections
function as the main module export.