linkedin messages | List LinkedIn threads | scrape linkedin threads | Search

This code snippet is designed to interact with LinkedIn, extracting information about participants in a given thread and attempting to read messages within that thread. It uses a client object to control interactions with the LinkedIn website, but the specifics of this object and how it works are not provided.

Run example

npm run import -- "Read messages LinkedIn thread"

Read messages LinkedIn thread

var importer = require('../Core');
var moment = require('moment');
var chrono = require('chrono-node');

function getThreadParticipants(thread) {
    var profiles = [], alreadyAt = false;
    // check for thread url and go there
    return client
        .getUrl()
        .then(url => {
            return url.indexOf(thread + 'topcard/') === -1
                ? client.url('https://www.linkedin.com' + thread + 'topcard/')
                    .alertText()
                    .then(t => t.indexOf('leave') > -1 ? client.alertAccept() : '')
                    .catch(e => {
                    })
                : Promise.resolve([]);
        })
        .pause(3000)
        .elements('.view-profile')
        .then(els => Promise.all(els.value.map(a => {
            return client.elementIdAttribute(a.ELEMENT, 'href').then(h => h.value);
        })))
        .then(links => {
            return importer.runAllPromises(links.map(link => (resolve => {
                var results = [];
                return client
                    .click('a[href*="' + link.replace('https://www.linkedin.com', '') + '"]')
                    .pause(3000)
                    .then(() => readLinkedInProfileInfo(link))
                    .then(r => results = r)
                    .back()
                    .pause(1000)
                    .then(r => resolve(results))
                    .catch(e => console.log(e))
            })));
        });
};

function scrollLinkedInMessages(messages) {
    // TODO: add check for needing to go to LinkedIn
    // TODO: add check for needing to login
    // scroll to bottom of messages
    return client
        .execute(() => {
            document.getElementsByClassName('msg-s-message-list')[0]
                .scrollTop -= 10000;
        })
        .pause(2000)
        .then(() => readLinkedInMessages(messages));
}

function readLinkedInMessages(messages) {
    // TODO: check for thread id in url?
    var lastTime;
    return client
        .alertText()
        .then(t => t.indexOf('leave') > -1 ? client.alertAccept() : '')
        .catch(e => {
        })
        .then(() => getAllXPath([
            '//li[contains(@class, "msg-s-message-list__event")]',
            {
                from: './/img/@title',
                time: './/time//text()',
                message: './/*[contains(@class, "msg-s-message-listitem__message-bubble")]//text()'
            }
        ]))
        .then(r => {
            return r.map(m => {
                var newTime = chrono.parseDate(m.time + '');
                if (newTime !== null) {
                    newTime.setHours(newTime.getHours() - (new Date()).getTimezoneOffset() / 60)
                    lastTime = newTime;
                }
                return Object.assign(m, {
                    time: lastTime,
                    from: m.from + '',
                    message: m.message.filter(s => !s.match(/^\s*$/igm)).join('\n')
                })
            })
        })
        .then(r => {
            var newMessages = r
                .filter(e => messages
                    .filter(m => m.message === e.message).length === 0);
            messages = newMessages.concat(messages);
            return newMessages.length > 0
                ? scrollLinkedInMessages(messages)
                : Promise.resolve(messages);
        })
        .catch(e => console.log(e))
};

function readLinkedInThread(thread, messages) {
    var participants;
    return client
        .getUrl()
        .then(url => url.indexOf(thread) === -1
            ? client.url('https://www.linkedin.com' + thread)
                .alertText()
                .then(t => t.indexOf('leave') > -1 ? client.alertAccept() : '')
                .catch(e => {
                })
                .pause(1000)
                .isExisting('a[href*="topcard"]').then(is => is
                    ? client.click('a[href*="topcard"]')
                    : Promise.resolve([]))
            : (url.indexOf('topcard') === -1
                ? client.isExisting('a[href*="topcard"]').then(is => is
                    ? client.click('a[href*="topcard"]')
                    : Promise.resolve([]))
                : Promise.resolve([])))
        // get participants from topcard
        .pause(3000)
        .then(() => getThreadParticipants(thread))
        .then(p => participants = p)
        .back()
        // TODO: save to contacts
        .then(() => readLinkedInMessages(messages || []))
        .then(messages => ({
            thread: thread,
            participants: participants,
            messages: messages
        }))
        .catch(e => console.log(e));
};
module.exports = readLinkedInThread;

What the code could have been:

const importer = require('../Core');
const moment = require('moment');
const chrono = require('chrono-node');
const puppeteer = require('puppeteer');

async function getThreadParticipants(thread, client) {
    try {
        // check if thread url is valid
        const url = await client.getUrl();
        if (url.indexOf(thread + 'topcard/') === -1) {
            // navigate to thread topcard url
            await client.url('https://www.linkedin.com' + thread + 'topcard/');
            // handle alert if it exists
            const alertText = await client.alertText();
            if (alertText.indexOf('leave') > -1) {
                await client.alertAccept();
            }
        }
        // wait for 3 seconds to ensure topcard is loaded
        await client.pause(3000);
        // extract participant links
        const links = await client.elements('.view-profile').then(els => Promise.all(els.value.map(a => {
            return client.elementIdAttribute(a.ELEMENT, 'href').then(h => h.value);
        })));
        // fetch participant profiles
        const participants = await importer.runAllPromises(links.map(link => () => {
            return client.click('a[href*="' + link.replace('https://www.linkedin.com', '') + '"]')
               .pause(3000)
               .then(() => readLinkedInProfileInfo(link))
               .then(r => r)
               .back()
               .pause(1000)
               .then(() => Promise.resolve());
        }));
        return participants;
    } catch (error) {
        console.log(error);
        return [];
    }
}

async function scrollLinkedInMessages(messages, client) {
    // scroll to bottom of messages
    try {
        await client.execute(() => {
            document.getElementsByClassName('msg-s-message-list')[0]
               .scrollTop -= 10000;
        });
        await client.pause(2000);
        // fetch new messages
        return readLinkedInMessages(messages);
    } catch (error) {
        console.log(error);
    }
}

async function readLinkedInMessages(messages, client) {
    try {
        // fetch alert text to handle possible alert
        const alertText = await client.alertText();
        if (alertText.indexOf('leave') > -1) {
            await client.alertAccept();
        }
        // fetch all messages
        const messagesList = await getAllXPath([
            '//li[contains(@class, "msg-s-message-list__event")]',
            {
                from: './/img/@title',
                time: './/time//text()',
                message: './/*[contains(@class, "msg-s-message-listitem__message-bubble")]//text()'
            }
        ]);
        // process messages
        const processedMessages = messagesList.map(m => {
            const newTime = chrono.parseDate(m.time + '');
            if (newTime!== null) {
                newTime.setHours(newTime.getHours() - (new Date()).getTimezoneOffset() / 60);
            }
            return Object.assign(m, {
                time: newTime,
                from: m.from + '',
                message: m.message.filter(s =>!s.match(/^\s*$/igm)).join('\n')
            });
        });
        // filter new messages
        const newMessages = processedMessages.filter(e => messages
           .filter(m => m.message === e.message).length === 0);
        messages = newMessages.concat(messages);
        // scroll to bottom if new messages exist
        if (newMessages.length > 0) {
            return scrollLinkedInMessages(messages, client);
        }
        return Promise.resolve(messages);
    } catch (error) {
        console.log(error);
    }
}

async function readLinkedInThread(thread, client, messages = []) {
    try {
        // fetch thread url
        const url = await client.getUrl();
        if (url.indexOf(thread) === -1) {
            // navigate to thread url
            await client.url('https://www.linkedin.com' + thread);
            // handle alert if it exists
            const alertText = await client.alertText();
            if (alertText.indexOf('leave') > -1) {
                await client.alertAccept();
            }
            // click topcard link if it exists
            const isTopcardLinkExisting = await client.isExisting('a[href*="topcard"]');
            if (isTopcardLinkExisting) {
                await client.click('a[href*="topcard"]');
            }
        } else if (url.indexOf('topcard') === -1) {
            // click topcard link if it exists
            const isTopcardLinkExisting = await client.isExisting('a[href*="topcard"]');
            if (isTopcardLinkExisting) {
                await client.click('a[href*="topcard"]');
            }
        }
        // fetch participants
        const participants = await getThreadParticipants(thread, client);
        // fetch messages
        const result = await readLinkedInMessages(messages);
        return {
            thread: thread,
            participants: participants,
            messages: result
        };
    } catch (error) {
        console.log(error);
    }
}

module.exports = async (thread, client) => {
    return readLinkedInThread(thread, client);
};

This code defines functions to interact with LinkedIn messages and extract information about thread participants.

Here's a breakdown:

getThreadParticipants(thread):

  1. Navigation:

  2. Participant Extraction:

scrollLinkedInMessages(messages):

  1. Scrolling:

  2. Reading Messages:

readLinkedInMessages(messages):

  1. Alert Handling:

  2. TODOs:

  3. Message Processing:

Overall:

The code snippet appears to be part of a larger script that interacts with LinkedIn to:

Missing Information:

Let me know if you have any other questions.