The readFacebookProfileInfo
function automates the process of extracting a Facebook profile's name, description, and URL from a given profile page. It uses web scraping techniques to locate and interact with elements on the page, ultimately returning a structured object containing the extracted data.
npm run import -- "Scrape facebook profile"
var importer = require('../Core');
// TODO: pass profile path in as parameter becomes readFacebookProfileInfo
function readFacebookProfileInfo() {
var profile = {};
// TODO: check for profile path
return client
.getAllXPath('//*[contains(@class, "scrollable")]//h4[contains(., "Profile")]/parent::*//a/@href')
.then(r => r != ''
? client.clickSpa(r)
.pause(2000)
.then(() => importer.runAllPromises([
client.getText('[role="main"] h1'),
client.isExisting('#intro_container_id li').then(is => is
? client.getText('#intro_container_id li')
: Promise.resolve('')),
client.getUrl()
]))
.then(r => (profile = ({
name: r[0],
description: r[1],
url: r[2]
})))
: client)
.then(() => profile)
.catch(e => console.log(e))
};
module.exports = readFacebookProfileInfo;
const { Client } = require('../Core');
/**
* Reads Facebook profile information from the browser.
*
* @returns {Object} Profile information (name, description, url)
*/
async function readFacebookProfileInfo(profilePath) {
try {
// Check if profile path is provided
if (!profilePath) {
throw new Error('Profile path is required');
}
// Get all relevant profile links
const profileLinks = await Client.getAllXPath(`//*[contains(@class, "scrollable")]//h4[contains(., "Profile")]/parent::*//a/@href`);
// If no links are found, return an empty profile
if (profileLinks.length === 0) {
return {};
}
// Click on the first profile link
const profileUrl = profileLinks[0];
await Client.clickSpa(profileUrl);
await Client.pause(2000);
// Get profile information
const promises = [
Client.getText('[role="main"] h1'),
Client.isExisting('#intro_container_id li')
.then(isExisting => isExisting
? Client.getText('#intro_container_id li')
: Promise.resolve('')),
Client.getUrl()
];
const profileInfo = await Client.runAllPromises(promises);
// Create profile object
const profile = {
name: profileInfo[0],
description: profileInfo[1],
url: profileInfo[2]
};
return profile;
} catch (error) {
console.log(error);
return {};
}
}
module.exports = readFacebookProfileInfo;
This code snippet defines a function called readFacebookProfileInfo
that aims to extract information from a Facebook profile.
Here's a breakdown:
Initialization:
profile
to store the extracted data.Finding the Profile Link:
client.getAllXPath
to locate the link to the profile page. The XPath expression targets elements containing "Profile" within a scrollable container and extracts the href
attribute of the link.Navigating to the Profile:
client.clickSpa
and waits for 2 seconds using pause(2000)
.Extracting Profile Data:
importer.runAllPromises
to execute multiple asynchronous operations:
client.getText('[role="main"] h1')
: Extracts the profile name from the main heading.client.isExisting('#intro_container_id li').then(is => is ? client.getText('#intro_container_id li') : Promise.resolve(''))
: Checks if an element with the ID intro_container_id
exists and contains list items. If it does, it extracts the text content of those list items (likely the profile description). Otherwise, it resolves with an empty string.client.getUrl()
: Gets the current URL of the profile page.Storing Profile Data:
profile
object, which includes name
, description
, and url
properties.Returning Profile Data:
profile
object containing the extracted information.