Parse bookmarks file

This code extracts structured bookmark data from Chrome's exported HTML files found in Google Takeout, enabling programmatic access and manipulation of the bookmarks. It uses a custom DOM parsing function to navigate the HTML structure and retrieve relevant information about each bookmark.

Run example

What the code could have been:

const { selectDom, importFunctions } = require('../Core');
const { chromeDtToDate } = importFunctions('convert chrome date to calendar date');
const path = require('path');
const glob = require('glob');
const fs = require('fs');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const PROJECT_PATH = path.join(PROFILE_PATH, 'Downloads');

// Define bookmark tree logic
const bookmarkTree = {
  selector: '*/*/dl/dt[./h3]',
  // Select heading and links, and recursively get children
  item: (ctx) => ({
    folder: selectDom('./h3/text()', ctx),
    links: selectDom(
      [
        './dl/dt/a',
        {
          url: './@href',
          time: './@add_date',
          title: './text()'
        }
      ],
      ctx
    ).map((obj) => ({
      url: obj.url + '',
      title: obj.title + '',
      time_usec: parseInt(obj.time + ''),
      date: chromeDtToDate(parseInt(obj.time + '')).getTime()
    })),
    // Recursively get children from same context as each heading
    children: (ctx) => selectDom(bookmarkTree, ctx)
  })
};

// Define function to get bookmarks from Takeout
function getBookmarksFromTakeout() {
  // Get latest Bookmarks.html file
  const files = glob.sync('Takeout*/Chrome/Bookmarks.html', { cwd: PROJECT_PATH })
   .map((f) => path.join(PROJECT_PATH, f));
  files.sort((a, b) => fs.statSync(a).ctime - fs.statSync(b).ctime);
  const latestFile = files.pop();

  // Parse bookmark HTML and return result
  try {
    const html = fs.readFileSync(latestFile).toString();
    return selectDom(bookmarkTree, html);
  } catch (error) {
    console.error('Error parsing bookmarks:', error);
    return [];
  }
}

module.exports = getBookmarksFromTakeout;

This code parses Chrome bookmarks exported from Google Takeout and extracts a structured representation of the bookmarks.

In essence, this code provides a way to programmatically access and process Chrome bookmarks exported from Google Takeout, allowing for further analysis, manipulation, or integration with other systems.