This code snippet extracts structured timeline data, such as dates, titles, durations, and locations, from a webpage, likely a social media platform or personal website. It uses XPath expressions to target specific elements and chrono-node
to parse dates from the extracted text.
npm run import -- "Read single google timeline page"
var chrono = require('chrono-node');
var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
function readTimelinePage() {
return client.getAllXPath({
day: ['//*[contains(@class,"timeline-subtitle")]//text()|//*[contains(@class,"timeline-title")]//text()'],
items: [
'//*[contains(@class,"timeline-item")]/parent::*/*[@jsinstance]',
{
duration: './/*[contains(@class, "duration-text")]//text()',
data: './/*[contains(@class, "timeline-item")]/@data-segment-key',
title: './/*[contains(@class, "timeline-item-title-content")][.//i]/*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-title-content")][not(.//i)]//text()',
location: './/*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][.//a]/a//*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][not(.//a)]/text()'
}
]
})
.then(r => {
if (r === null || r.day.length == 0) {
return [];
}
var currDate = chrono.parseDate(r.day[r.day.length-1] + '')
|| chrono.parseDate(r.day[0] + '');
if(currDate === null) {
return [];
}
var newKey = currDate.getDate()
+ months[currDate.getMonth()]
+ (currDate.getFullYear() + '').substr(2, 2);
return r.items.map(i => {
var timelineData = (i.data + '').split(':');
var start, end, length;
if (timelineData.length >= 3) {
start = new Date(parseFloat(timelineData[1].split(',')[0]));
end = new Date(parseFloat(timelineData[2].split(',')[0]));
length = end.getTime() - start.getTime();
} else {
start = chrono.parseDate(currDate.getFullYear() + '/'
+ (currDate.getMonth() + 1) + '/'
+ currDate.getDate() + ' '
+ i.duration.join('').trim().split('-')[0]);
end = chrono.parseDate(currDate.getFullYear() + '/'
+ (currDate.getMonth() + 1) + '/'
+ currDate.getDate() + ' '
+ i.duration.join('').trim().split('-')[1]);
length = 0;
}
if(start.getFullYear() !== 2016 || length < 0) {
console.log(start);
}
var traveling = (/(Driving|Walking|Traveling|Flying|Moving).*\s+-\s+(.*),/ig).exec(i.title + ', ' + i.location)
return ({
traveling: traveling ? traveling[0] : false,
type: 'timeline',
timeline: newKey,
name: i.title,
location: i.location,
time: start,
length: isNaN(length) ? 0 : length
})
})
})
.catch(e => console.log(e))
};
if (typeof client.readTimelinePage == 'undefined') {
client.addCommand('readTimelinePage', readTimelinePage);
}
module.exports = readTimelinePage;
const chrono = require('chrono-node');
const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
/**
* Function to read timeline page.
* @return {Promise} A promise that resolves to an array of timeline events.
*/
async function readTimelinePage() {
try {
const response = await client.getAllXPath({
day: ['//*[contains(@class,"timeline-subtitle")]//text()|//*[contains(@class,"timeline-title")]//text()'],
items: [
'//*[contains(@class,"timeline-item")]/parent::*/*[@jsinstance]',
{
duration: './/*[contains(@class, "duration-text")]//text()',
data: './/*[contains(@class, "timeline-item")]/@data-segment-key',
title: './/*[contains(@class, "timeline-item-title-content")][.//i]/*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-title-content")][not(.//i)]//text()',
location: './/*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][.//a]/a//*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][not(.//a)]/text()'
}
]
});
// Handle empty response
if (!response || response.day.length === 0) {
return [];
}
// Parse dates
const currDate = chrono.parseDate(response.day[response.day.length - 1] + '')
|| chrono.parseDate(response.day[0] + '');
if (!currDate) {
return [];
}
// Format date
const newKey = currDate.toLocaleDateString();
// Parse events
return response.items.map((i) => {
const timelineData = (i.data + '').split(':');
let start, end, length;
if (timelineData.length >= 3) {
start = new Date(parseFloat(timelineData[1].split(',')[0]));
end = new Date(parseFloat(timelineData[2].split(',')[0]));
length = end.getTime() - start.getTime();
} else {
// TODO: Handle invalid duration format
start = new Date(currDate.getFullYear(), currDate.getMonth(), currDate.getDate(), 0, 0, 0);
end = new Date(currDate.getFullYear(), currDate.getMonth(), currDate.getDate(), 0, 0, 0);
length = 0;
}
// Check for invalid dates
if (start.getFullYear()!== 2016 || length < 0) {
console.log(start);
}
const traveling = (/(Driving|Walking|Traveling|Flying|Moving).*\s+-\s+(.*),/ig).exec(i.title + ','+ i.location);
return {
traveling: traveling? traveling[0] : false,
type: 'timeline',
timeline: newKey,
name: i.title,
location: i.location,
time: start,
length: isNaN(length)? 0 : length
};
});
} catch (error) {
console.error(error);
return [];
}
}
if (typeof client.readTimelinePage === 'undefined') {
client.addCommand('readTimelinePage', readTimelinePage);
}
module.exports = readTimelinePage;
This code snippet appears to be part of a larger script designed to extract and process timeline data, likely from a social media platform or a personal website.
Here's a breakdown:
Dependencies:
chrono-node
: A library for parsing natural language dates and times.months
Array:
readTimelinePage
Function:
selenium-webdriver
or a similar browser automation tool) to interact with a web page and extract data.client.getAllXPath
: This function likely uses XPath expressions to select specific elements on the page.
day
: Extracts text content from elements containing the class "timeline-subtitle" or "timeline-title".items
: Extracts data from elements containing the class "timeline-item".
duration
: Extracts text content from elements containing the class "duration-text".data
: Extracts the value of the data-segment-key
attribute from elements containing the class "timeline-item".title
: Extracts text content from elements containing the class "timeline-item-title-content".location
: Extracts text content from elements containing the class "timeline-item-text".Data Processing:
day
array using chrono.parseDate
.items
array, extracting data from each timeline item and potentially calculating durations.Overall, this code snippet focuses on extracting structured timeline data from a web page, likely for further analysis or visualization.