This code automates the process of collecting URLs of LinkedIn threads by scrolling through the messages page and extracting thread links until no new threads are found.
npm run import -- "List LinkedIn threads"
function scrollLinkedInThreads(threads) {
// TODO: add check for needing to go to LinkedIn
// TODO: add check for needing to login
// scroll to bottom of messages
return client
.execute(() => {
document.getElementsByClassName(
'msg-conversations-container__conversations-list')[0]
.scrollTop += 10000;
})
.pause(2000)
.then(() => getLinkedInThreads(threads));
}
function getLinkedInThreads(threads) {
return client
.execute(() => {
var iterator = document.evaluate(
'//*[@data-control-name="view_message"]/@href',
document, null,
XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
var co = [];
var m;
while ((m = iterator.iterateNext()) && co.push(m.value)) ;
return co;
})
.then(els => {
var newThreads = els.value.filter(e => threads.indexOf(e) === -1);
threads = threads.concat(newThreads);
return newThreads.length > 0
? scrollLinkedInThreads(threads)
: Promise.resolve(threads);
});
}
function listLinkedInThreads(threads) {
return client.getUrl().then(url => url.indexOf('/messaging') == -1
? client.click('a[href*="/messaging/"]')
: Promise.resolve([]))
.pause(2000)
.then(() => getLinkedInThreads(threads || []))
};
module.exports = listLinkedInThreads;
const { Client } = require('playwright'); // Import playwright client
class LinkedInScraper {
/**
* @param {Client} client - Playwright client instance
*/
constructor(client) {
this.client = client;
}
/**
* Scrolls to the bottom of LinkedIn message threads
* @param {Array<string>} threads - Array of thread URLs
* @returns {Promise<Array<string>>} Updated array of threads
*/
async scrollLinkedInThreads(threads) {
if (!(await this.isLinkedInUrl())) {
await this.navigateToLinkedIn();
}
if (!await this.isLoggedIn()) {
await this.loginLinkedIn();
}
await this.scrollToBottom();
return await this.getLinkedInThreads(threads);
}
/**
* Checks if the current URL is a LinkedIn URL
* @returns {Promise<boolean>} Whether the current URL is LinkedIn
*/
async isLinkedInUrl() {
const url = await this.client.getUrl();
return url.indexOf('/messaging')!== -1;
}
/**
* Checks if the user is logged in to LinkedIn
* @returns {Promise<boolean>} Whether the user is logged in
*/
async isLoggedIn() {
// Todo: implement actual login check
return true;
}
/**
* Navigates to the LinkedIn messaging page
* @returns {Promise<void>}
*/
async navigateToLinkedIn() {
await this.client.goto('https://www.linkedin.com/messaging/');
}
/**
* Logs in to LinkedIn
* @returns {Promise<void>}
*/
async loginLinkedIn() {
// Todo: implement actual login functionality
}
/**
* Scrolls to the bottom of the messaging page
* @returns {Promise<void>}
*/
async scrollToBottom() {
const element = await this.client.$('.msg-conversations-container__conversations-list');
await element.evaluate(element => element.scrollTop += 10000);
}
/**
* Extracts new threads from the messaging page
* @param {Array<string>} threads - Array of thread URLs
* @returns {Promise<Array<string>>} Updated array of threads
*/
async getLinkedInThreads(threads) {
const els = await this.extractThreadUrls();
const newThreads = els.value.filter(e => threads.indexOf(e) === -1);
threads = threads.concat(newThreads);
return newThreads.length > 0
? await this.scrollLinkedInThreads(threads)
: Promise.resolve(threads);
}
/**
* Extracts thread URLs from the messaging page
* @returns {Promise<Array<string>>} Array of thread URLs
*/
async extractThreadUrls() {
const element = await this.client.$x('//*[@data-control-name="view_message"]/@href');
const co = [];
const nodes = element.nodes();
nodes.forEach(node => co.push(node.evaluate(node => node.value)));
return { value: co };
}
/**
* Lists LinkedIn threads
* @param {Array<string>} threads - Array of thread URLs (optional)
* @returns {Promise<Array<string>>} Updated array of threads
*/
async listLinkedInThreads(threads = []) {
await this.scrollLinkedInThreads(threads);
return threads;
}
}
module.exports = (client) => new LinkedInScraper(client);
This code defines a function listLinkedInThreads
that automates the process of retrieving a list of LinkedIn thread URLs.
Here's a breakdown:
scrollLinkedInThreads
Function:
client.execute
to execute JavaScript code that scrolls the page by 10,000 pixels.getLinkedInThreads
to fetch the new threads.getLinkedInThreads
Function:
document.evaluate
with an XPath expression to find all elements with the attribute data-control-name="view_message"
and extract their href
values.threads
array and adds the new threads to it.scrollLinkedInThreads
to load more.listLinkedInThreads
Function:
getLinkedInThreads
to fetch the initial list of threads.Let me know if you have any more questions!