The getCells
function extracts notebook cells from a JSON-formatted notebook file based on specified cell types, and returns an array of notebook cells with additional metadata. It resolves the notebook path, reads the file, extracts kernel and cells, filters by type, adds metadata, and returns the modified cells array.
npm run import -- "parse cells from a notebook"
var path = require('path')
var fs = require('fs')
function getCells(notebook, types = ['*', 'code']) {
notebook = path.resolve(notebook)
//console.info('reading notebook ' + notebook);
var json = JSON.parse(fs.readFileSync(notebook))
var kernel = json.metadata.kernelspec;
var cells = json.cells
.filter(c => types.includes(c.cell_type))
.map((c, i) => Object.assign(c, {
language: c.metadata && c.metadata.vscode
? c.metadata.vscode.languageId : (json.metadata.kernelspec
? json.metadata.kernelspec.language : (json.metadata.language_info
? json.metadata.language_info.name : '')),
filename: notebook,
id: `${path.basename(notebook)}[${i}]`
}));
return cells
}
module.exports.getCells = getCells;
const path = require('path');
const fs = require('fs');
/**
* Retrieves a list of cells from a Jupyter Notebook file based on their type.
*
* @param {string} notebook - Path to the Jupyter Notebook file.
* @param {string[]} [types=['*', 'code']] - Types of cells to retrieve.
* @returns {Object[]} Array of cell objects.
*/
function getCells(notebook, types = ['*', 'code']) {
// Resolve the notebook path to ensure it's absolute.
const resolvedNotebook = path.resolve(notebook);
try {
// Read the notebook file as JSON.
const json = JSON.parse(fs.readFileSync(resolvedNotebook, 'utf8'));
// Extract the relevant information from the notebook metadata.
const kernel = json.metadata && json.metadata.kernelspec;
const language = kernel? kernel.language : (json.metadata && json.metadata.language_info? json.metadata.language_info.name : '');
// Filter and map the cells based on the specified types.
const cells = json.cells
.filter((c, i) => types.includes(c.cell_type))
.map((c, i) => ({
...c,
// Determine the language of the cell based on its metadata or the kernel's language.
language: c.metadata && c.metadata.vscode? c.metadata.vscode.languageId : language,
// Construct the filename and id for the cell.
filename: resolvedNotebook,
id: `${path.basename(resolvedNotebook)}[${i}]`
}));
return cells;
} catch (error) {
// Handle file read errors and return an empty array.
// console.error(`Error reading notebook file: ${error}`);
return [];
}
}
module.exports = { getCells };
Code Breakdown
var path = require('path')
: Imports the Node.js path
module for working with file paths.var fs = require('fs')
: Imports the Node.js fs
(File System) module for interacting with the file system.getCells
notebook
: The path to the notebook file.types
: An array of cell types to include (default: ['*', 'code']
).notebook = path.resolve(notebook)
ensures the notebook path is absolute.var json = JSON.parse(fs.readFileSync(notebook))
reads the notebook file and parses its contents as JSON.var kernel = json.metadata.kernelspec;
and var cells = json.cells
extract the kernel specification and notebook cells, respectively.cells.filter(c => types.includes(c.cell_type))
filters the cells to include only those matching the specified types.map((c, i) => Object.assign(c, {... }))
adds additional metadata to each cell, including:
language
: The language ID or name from the kernel specification or cell metadata.filename
: The notebook file path.id
: A unique identifier constructed from the notebook file name and cell index.return cells
module.exports.getCells = getCells
: Exports the getCells
function as a module.