google timeline | Scrape google timeline | Find the average latitute and longitude at each destination | Search

This code snippet extracts structured timeline data, such as dates, titles, durations, and locations, from a webpage, likely a social media platform or personal website. It uses XPath expressions to target specific elements and chrono-node to parse dates from the extracted text.

Run example

npm run import -- "Read single google timeline page"

Read single google timeline page

var chrono = require('chrono-node');

var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
function readTimelinePage() {
    return client.getAllXPath({
        day: ['//*[contains(@class,"timeline-subtitle")]//text()|//*[contains(@class,"timeline-title")]//text()'],
        items: [
            '//*[contains(@class,"timeline-item")]/parent::*/*[@jsinstance]',
            {
                duration: './/*[contains(@class, "duration-text")]//text()',
                data: './/*[contains(@class, "timeline-item")]/@data-segment-key',
                title: './/*[contains(@class, "timeline-item-title-content")][.//i]/*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-title-content")][not(.//i)]//text()',
                location: './/*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][.//a]/a//*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][not(.//a)]/text()'
            }
        ]
    })
        .then(r => {
            if (r === null || r.day.length == 0) {
                return [];
            }
            var currDate = chrono.parseDate(r.day[r.day.length-1] + '')
                || chrono.parseDate(r.day[0] + '');
            if(currDate === null) {
                return [];
            }
            var newKey = currDate.getDate()
                + months[currDate.getMonth()]
                + (currDate.getFullYear() + '').substr(2, 2);
            return r.items.map(i => {
                var timelineData = (i.data + '').split(':');
                var start, end, length;
                if (timelineData.length >= 3) {
                    start = new Date(parseFloat(timelineData[1].split(',')[0]));
                    end = new Date(parseFloat(timelineData[2].split(',')[0]));
                    length = end.getTime() - start.getTime();
                } else {
                    start = chrono.parseDate(currDate.getFullYear() + '/'
                                             + (currDate.getMonth() + 1) + '/'
                                             + currDate.getDate() + ' '
                                             + i.duration.join('').trim().split('-')[0]);
                    end = chrono.parseDate(currDate.getFullYear() + '/'
                                           + (currDate.getMonth() + 1) + '/'
                                           + currDate.getDate() + ' '
                                           + i.duration.join('').trim().split('-')[1]);
                    length = 0;
                }
                if(start.getFullYear() !== 2016 || length < 0) {
                    console.log(start);
                }
                var traveling = (/(Driving|Walking|Traveling|Flying|Moving).*\s+-\s+(.*),/ig).exec(i.title + ', ' + i.location)
                return ({
                    traveling: traveling ? traveling[0] : false,
                    type: 'timeline',
                    timeline: newKey,
                    name: i.title,
                    location: i.location,
                    time: start,
                    length: isNaN(length) ? 0 : length
                })
            })
        })
        .catch(e => console.log(e))
};
if (typeof client.readTimelinePage == 'undefined') {
    client.addCommand('readTimelinePage', readTimelinePage);
}
module.exports = readTimelinePage;

What the code could have been:

const chrono = require('chrono-node');
const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];

/**
 * Function to read timeline page.
 * @return {Promise} A promise that resolves to an array of timeline events.
 */
async function readTimelinePage() {
    try {
        const response = await client.getAllXPath({
            day: ['//*[contains(@class,"timeline-subtitle")]//text()|//*[contains(@class,"timeline-title")]//text()'],
            items: [
                '//*[contains(@class,"timeline-item")]/parent::*/*[@jsinstance]',
                {
                    duration: './/*[contains(@class, "duration-text")]//text()',
                    data: './/*[contains(@class, "timeline-item")]/@data-segment-key',
                    title: './/*[contains(@class, "timeline-item-title-content")][.//i]/*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-title-content")][not(.//i)]//text()',
                    location: './/*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][.//a]/a//*[not(self::i)]//text()|.//*[contains(@class, "timeline-item-text")][not(contains(@class,"add-child"))][not(.//a)]/text()'
                }
            ]
        });

        // Handle empty response
        if (!response || response.day.length === 0) {
            return [];
        }

        // Parse dates
        const currDate = chrono.parseDate(response.day[response.day.length - 1] + '')
            || chrono.parseDate(response.day[0] + '');
        if (!currDate) {
            return [];
        }

        // Format date
        const newKey = currDate.toLocaleDateString();

        // Parse events
        return response.items.map((i) => {
            const timelineData = (i.data + '').split(':');
            let start, end, length;

            if (timelineData.length >= 3) {
                start = new Date(parseFloat(timelineData[1].split(',')[0]));
                end = new Date(parseFloat(timelineData[2].split(',')[0]));
                length = end.getTime() - start.getTime();
            } else {
                // TODO: Handle invalid duration format
                start = new Date(currDate.getFullYear(), currDate.getMonth(), currDate.getDate(), 0, 0, 0);
                end = new Date(currDate.getFullYear(), currDate.getMonth(), currDate.getDate(), 0, 0, 0);
                length = 0;
            }

            // Check for invalid dates
            if (start.getFullYear()!== 2016 || length < 0) {
                console.log(start);
            }

            const traveling = (/(Driving|Walking|Traveling|Flying|Moving).*\s+-\s+(.*),/ig).exec(i.title + ','+ i.location);
            return {
                traveling: traveling? traveling[0] : false,
                type: 'timeline',
                timeline: newKey,
                name: i.title,
                location: i.location,
                time: start,
                length: isNaN(length)? 0 : length
            };
        });
    } catch (error) {
        console.error(error);
        return [];
    }
}

if (typeof client.readTimelinePage === 'undefined') {
    client.addCommand('readTimelinePage', readTimelinePage);
}

module.exports = readTimelinePage;

This code snippet appears to be part of a larger script designed to extract and process timeline data, likely from a social media platform or a personal website.

Here's a breakdown:

  1. Dependencies:

  2. months Array:

  3. readTimelinePage Function:

  4. Data Processing:

Overall, this code snippet focuses on extracting structured timeline data from a web page, likely for further analysis or visualization.