import | run all promises sequentially | Cell 13 | Search

The getCells function extracts notebook cells from a JSON-formatted notebook file based on specified cell types, and returns an array of notebook cells with additional metadata. It resolves the notebook path, reads the file, extracts kernel and cells, filters by type, adds metadata, and returns the modified cells array.

Run example

npm run import -- "parse cells from a notebook"

parse cells from a notebook

var path = require('path')
var fs = require('fs')

function getCells(notebook, types = ['*', 'code']) {
    notebook = path.resolve(notebook)
    //console.info('reading notebook ' + notebook);
    var json = JSON.parse(fs.readFileSync(notebook))
    var kernel = json.metadata.kernelspec;
    var cells = json.cells
        .filter(c => types.includes(c.cell_type))
        .map((c, i) => Object.assign(c, {
            language: c.metadata && c.metadata.vscode 
                ? c.metadata.vscode.languageId : (json.metadata.kernelspec 
                    ? json.metadata.kernelspec.language : (json.metadata.language_info 
                        ? json.metadata.language_info.name : '')),
            filename: notebook,
            id: `${path.basename(notebook)}[${i}]`
        }));
    return cells
}

module.exports.getCells = getCells;

What the code could have been:

const path = require('path');
const fs = require('fs');

/**
 * Retrieves a list of cells from a Jupyter Notebook file based on their type.
 * 
 * @param {string} notebook - Path to the Jupyter Notebook file.
 * @param {string[]} [types=['*', 'code']] - Types of cells to retrieve.
 * @returns {Object[]} Array of cell objects.
 */
function getCells(notebook, types = ['*', 'code']) {
    // Resolve the notebook path to ensure it's absolute.
    const resolvedNotebook = path.resolve(notebook);
    
    try {
        // Read the notebook file as JSON.
        const json = JSON.parse(fs.readFileSync(resolvedNotebook, 'utf8'));
        
        // Extract the relevant information from the notebook metadata.
        const kernel = json.metadata && json.metadata.kernelspec;
        const language = kernel? kernel.language : (json.metadata && json.metadata.language_info? json.metadata.language_info.name : '');
        
        // Filter and map the cells based on the specified types.
        const cells = json.cells
           .filter((c, i) => types.includes(c.cell_type))
           .map((c, i) => ({
               ...c,
                // Determine the language of the cell based on its metadata or the kernel's language.
                language: c.metadata && c.metadata.vscode? c.metadata.vscode.languageId : language,
                // Construct the filename and id for the cell.
                filename: resolvedNotebook,
                id: `${path.basename(resolvedNotebook)}[${i}]`
            }));

        return cells;
    } catch (error) {
        // Handle file read errors and return an empty array.
        // console.error(`Error reading notebook file: ${error}`);
        return [];
    }
}

module.exports = { getCells };

Code Breakdown

Imports

Function: getCells

Function Body

  1. Resolve the notebook path: notebook = path.resolve(notebook) ensures the notebook path is absolute.
  2. Read the notebook file: var json = JSON.parse(fs.readFileSync(notebook)) reads the notebook file and parses its contents as JSON.
  3. Extract kernel and cells: var kernel = json.metadata.kernelspec; and var cells = json.cells extract the kernel specification and notebook cells, respectively.
  4. Filter cells by type: cells.filter(c => types.includes(c.cell_type)) filters the cells to include only those matching the specified types.
  5. Add metadata: map((c, i) => Object.assign(c, {... })) adds additional metadata to each cell, including:
  6. Return the modified cells array: return cells

Export