This code extracts structured bookmark data from Chrome's exported HTML files found in Google Takeout, enabling programmatic access and manipulation of the bookmarks. It uses a custom DOM parsing function to navigate the HTML structure and retrieve relevant information about each bookmark.
npm run import -- "Parse bookmarks file"
var path = require('path');
var fs = require('fs');
var glob = require('glob');
var importer = require('../Core');
var {selectDom} = importer.import("select tree");
var chromeDtToDate = importer.import("convert chrome date to calendar date")
// TODO: remove this an use ENV transpiling
var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var PROJECT_PATH = PROFILE_PATH + '/Downloads';
var bookmarkTree = [
'*/*/dl/dt[./h3]', // select all the headings
ctx => selectDom({
folder: './h3/text()', // get heading text
links: [ // all the links under that heading
'./dl/dt/a',
{
url: './@href',
time: './@add_date',
title: './text()'
},
(obj) => ({ // a bit of parsing
url: obj.url + '',
title: obj.title + '',
time_usec: parseInt(obj.time + ''),
date: chromeDtToDate(parseInt(obj.time + '')).getTime()
})
],
// get children from same context as each heading
children: (ctx) => selectDom(bookmarkTree, ctx)
}, ctx)
]
function getBookmarksFromTakeout() {
var files = glob.sync('Takeout*/Chrome/Bookmarks.html', {cwd: PROJECT_PATH})
.map(f => path.join(PROJECT_PATH, f));
files.sort((a, b) => fs.statSync(a).ctime - fs.statSync(b).ctime);
// parse bookmark html
var html = fs.readFileSync(files.pop()).toString();
return selectDom(bookmarkTree, html)
}
module.exports = getBookmarksFromTakeout;
const { selectDom, importFunctions } = require('../Core');
const { chromeDtToDate } = importFunctions('convert chrome date to calendar date');
const path = require('path');
const glob = require('glob');
const fs = require('fs');
const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const PROJECT_PATH = path.join(PROFILE_PATH, 'Downloads');
// Define bookmark tree logic
const bookmarkTree = {
selector: '*/*/dl/dt[./h3]',
// Select heading and links, and recursively get children
item: (ctx) => ({
folder: selectDom('./h3/text()', ctx),
links: selectDom(
[
'./dl/dt/a',
{
url: './@href',
time: './@add_date',
title: './text()'
}
],
ctx
).map((obj) => ({
url: obj.url + '',
title: obj.title + '',
time_usec: parseInt(obj.time + ''),
date: chromeDtToDate(parseInt(obj.time + '')).getTime()
})),
// Recursively get children from same context as each heading
children: (ctx) => selectDom(bookmarkTree, ctx)
})
};
// Define function to get bookmarks from Takeout
function getBookmarksFromTakeout() {
// Get latest Bookmarks.html file
const files = glob.sync('Takeout*/Chrome/Bookmarks.html', { cwd: PROJECT_PATH })
.map((f) => path.join(PROJECT_PATH, f));
files.sort((a, b) => fs.statSync(a).ctime - fs.statSync(b).ctime);
const latestFile = files.pop();
// Parse bookmark HTML and return result
try {
const html = fs.readFileSync(latestFile).toString();
return selectDom(bookmarkTree, html);
} catch (error) {
console.error('Error parsing bookmarks:', error);
return [];
}
}
module.exports = getBookmarksFromTakeout;
This code parses Chrome bookmarks exported from Google Takeout and extracts a structured representation of the bookmarks.
Here's a breakdown:
Imports:
Constants:
bookmarkTree
:
selectDom
.getBookmarksFromTakeout
Function:
selectDom
function to parse the HTML and extract bookmark data according to the bookmarkTree
structure.Export:
getBookmarksFromTakeout
function for use in other parts of the application.In essence, this code provides a way to programmatically access and process Chrome bookmarks exported from Google Takeout, allowing for further analysis, manipulation, or integration with other systems.