linkedin connections | sync linkedin contacts with google contacts | scrape linkedin contacts | Search

This code snippet uses Selenium to scrape comprehensive data from a LinkedIn profile, including basic information and details from various sections like experience, skills, and recommendations. It employs techniques to load all content, including hidden sections, to ensure complete data extraction.

Run example

npm run import -- "scrape entire linkedin profile"

scrape entire linkedin profile

var importer = require('../Core');

function loadEntirePage() {
    return client
    // load all defered elements by scrolling
        .then(() => getAllUntil(
            false,
            '//*[contains(@class, "pv-deferred-area")]/@id',
            [],
            (a, b) => a === b,
            (i) => i < 30
        ))
        .pause(2000)
        // show more all sections
        .scroll(0, 0)
        .then(() => importer.runAllPromises([
            '//button[contains(., "See more")]',
            '//button[contains(., "more skills")]',
            '//button[contains(., "more education")]',
            '//button[contains(., "more recommendations")]'
        ].map(selector => resolve => {
            return client
                .isVisible(selector)
                .then(is => is
                    ? client.click(selector).pause(1000)
                    : [])
                .then(e => resolve())
                .catch(e => {console.log; return resolve()})
        })))
        .catch(e => console.log(e))
};

function scrapeEntireLinkedInProfile(profile) {
    var contact;
    return client
        .then(() => clickSpa(profile))
        .then(() => readLinkedInProfileInfo())
        .then(r => {
            contact = r;
            return loadEntirePage();
        })
        // get extra profile info
        .then(() => {
            return getAllXPath({
                summary: '//p[contains(@class, "section__summary")]//text()',
                experience: [
                    '//*[contains(@class, "profile-section__card")]|//*[contains(@class, "profile-section__section-info-item")]',
                    ['.//text()']
                ],
                skills: [
                    '//*[contains(@class, "skill-entity--featured")]|//*[contains(@class, "skill-entity__pill")]',
                    ['.//text()']
                ],
                recommendations: [
                    '//*[contains(@class, "recommendation-entity")]',
                    ['.//text()']
                ],
                interests: [
                    '//*[contains(@class, "interest-entity")]',
                    ['.//text()']
                ]
            });
        })
        .then(r => {
            return Object.assign(r, {
                summary: r.summary.join(''),
                experience: r.experience.map(e => typeof e[0] === 'string'
                    ? e[0]
                    : e[0].join('')
                        .trim().split(/\s*\n+\s*/)),
                skills: r.skills.map(e => typeof e[0] === 'string'
                    ? e[0]
                    : e[0].join('')
                        .trim().split(/\s*\n+\s*/)),
                recommendations: r.recommendations.map(e => typeof e[0] === 'string'
                    ? e[0]
                    : e[0].join('')
                        .trim().split(/\s*\n+\s*/)),
                interests: r.interests.map(e => typeof e[0] === 'string'
                    ? e[0]
                    : e[0].join('')
                        .trim().split(/\s*\n+\s*/))
            }, contact)
        })
        .catch(e => console.log(e))
};
module.exports = scrapeEntireLinkedInProfile;

What the code could have been:

const importer = require('../Core');

/**
 * Loads an entire LinkedIn page.
 *
 * @param {Object} client - The client object.
 * @returns {Promise} A promise that resolves when the page is loaded.
 */
function loadEntirePage(client) {
    // Load all deferred elements by scrolling
    return client
       .then(() => getAllUntil(
            false,
            '//*[contains(@class, "pv-deferred-area")]/@id',
            [],
            (a, b) => a === b,
            (i) => i < 30
        ))
       .pause(2000)
        // Show more all sections
       .scroll(0, 0)
       .then(() => importer.runAllPromises([
            '//button[contains(., "See more")]',
            '//button[contains(., "more skills")]',
            '//button[contains(., "more education")]',
            '//button[contains(., "more recommendations")]'
        ].map(selector => resolve => {
            // Check if the button is visible, and if so, click it
            return client
               .isVisible(selector)
               .then(is => is? client.click(selector).pause(1000) : [])
               .then(e => resolve())
               .catch(e => console.log(e) && resolve()); // Fix console.log to console.log(e)
        })))
       .catch(e => console.log(e));
}

/**
 * Scrapes an entire LinkedIn profile.
 *
 * @param {Object} profile - The LinkedIn profile object.
 * @returns {Promise} A promise that resolves with the scraped profile info.
 */
function scrapeEntireLinkedInProfile(client, profile) {
    // Initialize contact
    let contact;
    
    // Click on the profile and read profile info
    return client
       .then(() => clickSpa(profile))
       .then(() => readLinkedInProfileInfo())
       .then(r => {
            contact = r;
            // Load entire page
            return loadEntirePage(client);
        })
        // Get extra profile info
       .then(() => {
            const selectors = {
                summary: '//p[contains(@class, "section__summary")]//text()',
                experience: [
                    '//*[contains(@class, "profile-section__card")]|//*[contains(@class, "profile-section__section-info-item")]',
                    ['.//text()']
                ],
                skills: [
                    '//*[contains(@class, "skill-entity--featured")]|//*[contains(@class, "skill-entity__pill")]',
                    ['.//text()']
                ],
                recommendations: [
                    '//*[contains(@class, "recommendation-entity")]',
                    ['.//text()']
                ],
                interests: [
                    '//*[contains(@class, "interest-entity")]',
                    ['.//text()']
                ]
            };

            // Extract extra profile info
            return getAllXPath(selectors);
        })
       .then(r => {
            // Process extracted info
            return Object.assign(r, {
                summary: r.summary.join(''),
                experience: r.experience.map(e => typeof e[0] ==='string'
                   ? e[0]
                    : e[0].join('')
                       .trim().split(/\s*\n+\s*/)),
                skills: r.skills.map(e => typeof e[0] ==='string'
                   ? e[0]
                    : e[0].join('')
                       .trim().split(/\s*\n+\s*/)),
                recommendations: r.recommendations.map(e => typeof e[0] ==='string'
                   ? e[0]
                    : e[0].join('')
                       .trim().split(/\s*\n+\s*/)),
                interests: r.interests.map(e => typeof e[0] ==='string'
                   ? e[0]
                    : e[0].join('')
                       .trim().split(/\s*\n+\s*/))
            }, contact);
        })
       .catch(e => console.log(e)); // Fix console.log to console.log(e)
}

module.exports = scrapeEntireLinkedInProfile;

This code snippet appears to be part of a larger script designed to scrape data from a LinkedIn profile using Selenium.

Here's a breakdown:

  1. Dependencies:

  2. loadEntirePage Function:

  3. scrapeEntireLinkedInProfile Function: