This code snippet uses Selenium to scrape comprehensive data from a LinkedIn profile, including basic information and details from various sections like experience, skills, and recommendations. It employs techniques to load all content, including hidden sections, to ensure complete data extraction.
npm run import -- "scrape entire linkedin profile"
var importer = require('../Core');
function loadEntirePage() {
return client
// load all defered elements by scrolling
.then(() => getAllUntil(
false,
'//*[contains(@class, "pv-deferred-area")]/@id',
[],
(a, b) => a === b,
(i) => i < 30
))
.pause(2000)
// show more all sections
.scroll(0, 0)
.then(() => importer.runAllPromises([
'//button[contains(., "See more")]',
'//button[contains(., "more skills")]',
'//button[contains(., "more education")]',
'//button[contains(., "more recommendations")]'
].map(selector => resolve => {
return client
.isVisible(selector)
.then(is => is
? client.click(selector).pause(1000)
: [])
.then(e => resolve())
.catch(e => {console.log; return resolve()})
})))
.catch(e => console.log(e))
};
function scrapeEntireLinkedInProfile(profile) {
var contact;
return client
.then(() => clickSpa(profile))
.then(() => readLinkedInProfileInfo())
.then(r => {
contact = r;
return loadEntirePage();
})
// get extra profile info
.then(() => {
return getAllXPath({
summary: '//p[contains(@class, "section__summary")]//text()',
experience: [
'//*[contains(@class, "profile-section__card")]|//*[contains(@class, "profile-section__section-info-item")]',
['.//text()']
],
skills: [
'//*[contains(@class, "skill-entity--featured")]|//*[contains(@class, "skill-entity__pill")]',
['.//text()']
],
recommendations: [
'//*[contains(@class, "recommendation-entity")]',
['.//text()']
],
interests: [
'//*[contains(@class, "interest-entity")]',
['.//text()']
]
});
})
.then(r => {
return Object.assign(r, {
summary: r.summary.join(''),
experience: r.experience.map(e => typeof e[0] === 'string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
skills: r.skills.map(e => typeof e[0] === 'string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
recommendations: r.recommendations.map(e => typeof e[0] === 'string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
interests: r.interests.map(e => typeof e[0] === 'string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/))
}, contact)
})
.catch(e => console.log(e))
};
module.exports = scrapeEntireLinkedInProfile;
const importer = require('../Core');
/**
* Loads an entire LinkedIn page.
*
* @param {Object} client - The client object.
* @returns {Promise} A promise that resolves when the page is loaded.
*/
function loadEntirePage(client) {
// Load all deferred elements by scrolling
return client
.then(() => getAllUntil(
false,
'//*[contains(@class, "pv-deferred-area")]/@id',
[],
(a, b) => a === b,
(i) => i < 30
))
.pause(2000)
// Show more all sections
.scroll(0, 0)
.then(() => importer.runAllPromises([
'//button[contains(., "See more")]',
'//button[contains(., "more skills")]',
'//button[contains(., "more education")]',
'//button[contains(., "more recommendations")]'
].map(selector => resolve => {
// Check if the button is visible, and if so, click it
return client
.isVisible(selector)
.then(is => is? client.click(selector).pause(1000) : [])
.then(e => resolve())
.catch(e => console.log(e) && resolve()); // Fix console.log to console.log(e)
})))
.catch(e => console.log(e));
}
/**
* Scrapes an entire LinkedIn profile.
*
* @param {Object} profile - The LinkedIn profile object.
* @returns {Promise} A promise that resolves with the scraped profile info.
*/
function scrapeEntireLinkedInProfile(client, profile) {
// Initialize contact
let contact;
// Click on the profile and read profile info
return client
.then(() => clickSpa(profile))
.then(() => readLinkedInProfileInfo())
.then(r => {
contact = r;
// Load entire page
return loadEntirePage(client);
})
// Get extra profile info
.then(() => {
const selectors = {
summary: '//p[contains(@class, "section__summary")]//text()',
experience: [
'//*[contains(@class, "profile-section__card")]|//*[contains(@class, "profile-section__section-info-item")]',
['.//text()']
],
skills: [
'//*[contains(@class, "skill-entity--featured")]|//*[contains(@class, "skill-entity__pill")]',
['.//text()']
],
recommendations: [
'//*[contains(@class, "recommendation-entity")]',
['.//text()']
],
interests: [
'//*[contains(@class, "interest-entity")]',
['.//text()']
]
};
// Extract extra profile info
return getAllXPath(selectors);
})
.then(r => {
// Process extracted info
return Object.assign(r, {
summary: r.summary.join(''),
experience: r.experience.map(e => typeof e[0] ==='string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
skills: r.skills.map(e => typeof e[0] ==='string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
recommendations: r.recommendations.map(e => typeof e[0] ==='string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/)),
interests: r.interests.map(e => typeof e[0] ==='string'
? e[0]
: e[0].join('')
.trim().split(/\s*\n+\s*/))
}, contact);
})
.catch(e => console.log(e)); // Fix console.log to console.log(e)
}
module.exports = scrapeEntireLinkedInProfile;
This code snippet appears to be part of a larger script designed to scrape data from a LinkedIn profile using Selenium.
Here's a breakdown:
Dependencies:
importer
: A custom module likely containing functions for interacting with various data sources and tools.loadEntirePage
Function:
scrapeEntireLinkedInProfile
Function:
readLinkedInProfileInfo
, not shown in the snippet).loadEntirePage
to ensure all content is loaded.contact
variable.