This code automates the process of extracting a LinkedIn user's profile information, including name, title, URL, phone number, and email address.
npm run import -- "Scrape LinkedIn profile"
function visitMyProfile() {
// TODO: add check for needing to go to LinkedIn
// TODO: add check for needing to login
return client
.pause(2000)
.click('#nav-settings__dropdown-trigger')
.pause(500)
.click('.nav-settings__view-profile-link')
.pause(2000);
}
function readLinkedInProfileInfo() {
var name, title, url, phone, email;
return client
.getUrl().then(url => url.indexOf('/in/') == -1
? visitMyProfile()
: Promise.resolve([]))
.isExisting('.contact-see-more-less')
.then(is => is ? client.click('.contact-see-more-less') : client)
.pause(500)
.then(() => getAllXPath({
name: '//*[contains(@class, "pv-top-card-section__name")]//text()',
title: '//*[contains(@class, "pv-top-card-section__headline")]//text()',
url: '//*[contains(@class, "ci-vanity-url")]//*[contains(@class, "pv-contact-info__contact-item")]//text()',
phone: '//*[contains(@class, "ci-phone")]//*[contains(@class, "pv-contact-info__contact-item")]//text()',
email: '//*[contains(@class, "ci-email")]//*[contains(@class, "pv-contact-info__contact-item")]//text()'
}))
.then(r => ({
name: (typeof r.name === 'string' ? [r.name] : (r.name || [])).join('').trim(),
title: (typeof r.title === 'string' ? [r.title] : (r.title || [])).join('').trim(),
url: (typeof r.url === 'string' ? [r.url] : (r.url || [])).join('').trim(),
phone: (typeof r.phone === 'string' ? [r.phone] : (r.phone || [])).join('').trim(),
email: (typeof r.email === 'string' ? [r.email] : (r.email || [])).join('').trim()
}));
};
module.exports = readLinkedInProfileInfo;
/**
* Function to visit the user's profile page.
* @param {object} client - The browser client object.
* @return {Promise} A promise that resolves when the user's profile page is loaded.
*/
function visitMyProfile(client) {
// Check if we need to go to LinkedIn and handle accordingly.
if (!isOnLinkedin()) {
return navigateToLinkedIn(client);
}
// Check if we need to login and handle accordingly.
if (!isLoggedIn()) {
return login(client);
}
// Wait for the navigation menu to load and click on the profile link.
return client
.pause(2000)
.click('#nav-settings__dropdown-trigger')
.pause(500)
.click('.nav-settings__view-profile-link')
.pause(2000);
}
/**
* Function to read LinkedIn profile info.
* @param {object} client - The browser client object.
* @return {Promise} A promise that resolves with an object containing the user's profile info.
*/
function readLinkedInProfileInfo(client) {
// Check if we are on the user's profile page and return an empty array if not.
const url = client.getUrl();
if (url.indexOf('/in/') == -1) {
return visitMyProfile(client).then(() => []);
}
// Wait for the contact info to load and extract the relevant info.
return client
.isExisting('.contact-see-more-less')
.then(is => is? client.click('.contact-see-more-less') : client)
.pause(500)
.then(() => getAllXPath({
name: '//*[contains(@class, "pv-top-card-section__name")]//text()',
title: '//*[contains(@class, "pv-top-card-section__headline")]//text()',
url: '//*[contains(@class, "ci-vanity-url")]//*[contains(@class, "pv-contact-info__contact-item")]//text()',
phone: '//*[contains(@class, "ci-phone")]//*[contains(@class, "pv-contact-info__contact-item")]//text()',
email: '//*[contains(@class, "ci-email")]//*[contains(@class, "pv-contact-info__contact-item")]//text()'
}))
.then(r => ({
name: extractText(r.name),
title: extractText(r.title),
url: extractText(r.url),
phone: extractText(r.phone),
email: extractText(r.email)
}));
}
// Helper function to check if we are on LinkedIn.
function isOnLinkedin() {
// Implement logic to check if we are on LinkedIn.
}
// Helper function to navigate to LinkedIn.
function navigateToLinkedIn(client) {
// Implement logic to navigate to LinkedIn.
}
// Helper function to check if we are logged in.
function isLoggedIn() {
// Implement logic to check if we are logged in.
}
// Helper function to login.
function login(client) {
// Implement logic to login.
}
// Helper function to extract text from an array or string.
function extractText(text) {
return (typeof text ==='string'? [text] : (text || [])).join('').trim();
}
module.exports = readLinkedInProfileInfo;
This code defines a function readLinkedInProfileInfo
that automates the process of extracting information from a LinkedIn profile.
Here's a breakdown:
visitMyProfile
Function:
readLinkedInProfileInfo
Function:
visitMyProfile
to navigate there.getAllXPath
(a custom function not shown here) to extract the name, title, URL, phone number, and email address from the profile page using XPath selectors.Module Export:
readLinkedInProfileInfo
function is exported as the main function of this module.Let me know if you have any more questions!