This code provides a function scrapeFacebookFriends
that automates the process of logging into Facebook, extracting unique friend URLs from the user's friends page, and storing them in an array. It utilizes Selenium for browser automation and XPath for element selection.
npm run import -- "Scrape facebook friends"
var importer = require('../Core');
function scrapeFacebookFriends() {
var friends = [];
return loginFacebook()
.url('https://www.facebook.com/me/friends')
.pause(4000)
.then(() => getAllUntil(
false,
'//a[contains(@href, "friends_tab")]/@href',
friends,
(a, b) => a === b,
(i) => i < 30
))
.getHTML('body')
.catch(e => console.log(e))
}
module.exports = scrapeFacebookFriends;
// Import the core module
const core = require('../Core');
/**
* Scrape Facebook friends
* @return {Promise} Friends list
*/
function scrapeFacebookFriends() {
// Initialize friends list
const friends = [];
// Login to Facebook
return core.loginFacebook()
.then((result) => {
// Navigate to friends page
return result.url('https://www.facebook.com/me/friends');
})
.then(() => {
// Wait for 4 seconds
return result.pause(4000);
})
.then(() => {
// Scrape friends list
return scrapeFriendsList(false, '//a[contains(@href, "friends_tab")]/@href', friends, (a, b) => a === b, (i) => i < 30);
})
.then((html) => {
// Extract HTML content
return result.getHTML('body');
})
.catch((error) => {
// Log any errors
console.error(error);
});
}
/**
* Scrape Facebook friends list
* @param {boolean} useCache Use cache
* @param {string} selector Selector
* @param {array} list Friends list
* @param {function} equalityComparer Equality comparer
* @param {function} limitCondition Limit condition
* @return {Promise} Friends list
*/
function scrapeFriendsList(useCache, selector, list, equalityComparer, limitCondition) {
// Scrape friends list until condition is met
return getAllUntil(
useCache,
selector,
list,
equalityComparer,
limitCondition
);
}
// Scrape friends list until condition is met
function getAllUntil(useCache, selector, list, equalityComparer, limitCondition) {
// TODO: Implement useCache functionality
return core.getHTML(selector, {})
.then((data) => {
// Parse data
const parsedData = parseData(data);
// Add parsed data to list
list.push(...parsedData);
// Check limit condition
if (limitCondition(list.length)) {
// Return list
return list;
} else {
// Recursively scrape until condition is met
return getAllUntil(useCache, selector, list, equalityComparer, limitCondition);
}
})
.catch((error) => {
// Log any errors
console.error(error);
});
}
// Parse data
function parseData(data) {
// TODO: Implement data parsing logic
// For now, assume data is an array of friend names
return data;
}
module.exports = scrapeFacebookFriends;
This code defines a function scrapeFacebookFriends
that automates the process of scraping Facebook friend URLs.
Here's a breakdown:
Imports:
importer
module from ../Core
, which likely contains helper functions for interacting with Selenium.scrapeFacebookFriends
Function:
friends
to store the extracted URLs.loginFacebook
(presumably from the imported importer
module) to log in to Facebook.https://www.facebook.com/me/friends
).getAllUntil
(likely from the imported importer
module) to extract all unique friend URLs from the page.
false
indicates that it should stop when a duplicate URL is found.'//a[contains(@href, "friends_tab")]/@href'
is the XPath expression to target friend links.friends
is the array to store the extracted URLs.(a, b) => a === b
is a comparison function to check for duplicates.(i) => i < 30
is a condition to limit the number of extracted URLs to 30.Export:
scrapeFacebookFriends
function, making it available for use in other parts of the application.