google calendar data | convert chrome date to calendar date | get bookmarks from local chrome database | Search

This code extracts structured bookmark data from Chrome's exported HTML files found in Google Takeout, enabling programmatic access and manipulation of the bookmarks. It uses a custom DOM parsing function to navigate the HTML structure and retrieve relevant information about each bookmark.

Run example

npm run import -- "Parse bookmarks file"

Parse bookmarks file

var path = require('path');
var fs = require('fs');
var glob = require('glob');
var importer = require('../Core');
var {selectDom} = importer.import("select tree");
var chromeDtToDate = importer.import("convert chrome date to calendar date")

// TODO: remove this an use ENV transpiling
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var PROJECT_PATH = PROFILE_PATH + '/Downloads';

var bookmarkTree = [
    '*/*/dl/dt[./h3]', // select all the headings
    ctx => selectDom({
        folder: './h3/text()', // get heading text
        links: [ // all the links under that heading
            './dl/dt/a',
            {
                url: './@href',
                time: './@add_date',
                title: './text()'
            },
            (obj) => ({ // a bit of parsing
                url: obj.url + '',
                title: obj.title + '',
                time_usec: parseInt(obj.time + ''),
                date: chromeDtToDate(parseInt(obj.time + '')).getTime()
            })
        ],
        // get children from same context as each heading
        children: (ctx) => selectDom(bookmarkTree, ctx)
    }, ctx)
]

function getBookmarksFromTakeout() {
    var files = glob.sync('Takeout*/Chrome/Bookmarks.html', {cwd: PROJECT_PATH})
        .map(f => path.join(PROJECT_PATH, f));
    files.sort((a, b) => fs.statSync(a).ctime - fs.statSync(b).ctime);
    // parse bookmark html
    var html = fs.readFileSync(files.pop()).toString();
    return selectDom(bookmarkTree, html)
}

module.exports = getBookmarksFromTakeout;

What the code could have been:

const { selectDom, importFunctions } = require('../Core');
const { chromeDtToDate } = importFunctions('convert chrome date to calendar date');
const path = require('path');
const glob = require('glob');
const fs = require('fs');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const PROJECT_PATH = path.join(PROFILE_PATH, 'Downloads');

// Define bookmark tree logic
const bookmarkTree = {
  selector: '*/*/dl/dt[./h3]',
  // Select heading and links, and recursively get children
  item: (ctx) => ({
    folder: selectDom('./h3/text()', ctx),
    links: selectDom(
      [
        './dl/dt/a',
        {
          url: './@href',
          time: './@add_date',
          title: './text()'
        }
      ],
      ctx
    ).map((obj) => ({
      url: obj.url + '',
      title: obj.title + '',
      time_usec: parseInt(obj.time + ''),
      date: chromeDtToDate(parseInt(obj.time + '')).getTime()
    })),
    // Recursively get children from same context as each heading
    children: (ctx) => selectDom(bookmarkTree, ctx)
  })
};

// Define function to get bookmarks from Takeout
function getBookmarksFromTakeout() {
  // Get latest Bookmarks.html file
  const files = glob.sync('Takeout*/Chrome/Bookmarks.html', { cwd: PROJECT_PATH })
   .map((f) => path.join(PROJECT_PATH, f));
  files.sort((a, b) => fs.statSync(a).ctime - fs.statSync(b).ctime);
  const latestFile = files.pop();

  // Parse bookmark HTML and return result
  try {
    const html = fs.readFileSync(latestFile).toString();
    return selectDom(bookmarkTree, html);
  } catch (error) {
    console.error('Error parsing bookmarks:', error);
    return [];
  }
}

module.exports = getBookmarksFromTakeout;

This code parses Chrome bookmarks exported from Google Takeout and extracts a structured representation of the bookmarks.

Here's a breakdown:

  1. Imports:

  2. Constants:

  3. bookmarkTree:

  4. getBookmarksFromTakeout Function:

  5. Export:

In essence, this code provides a way to programmatically access and process Chrome bookmarks exported from Google Takeout, allowing for further analysis, manipulation, or integration with other systems.