pyimport | Cell 4 | run python cells | Search

The get_cells function imports a Jupyter Notebook file, extracts cell metadata, and formats it into a list of dictionaries with language, filename, and unique ID information. The function takes two parameters: the notebook path and a list of cell types to extract, and returns the formatted list of cells.

Cell 5

import json
import os

def get_cells(notebook_path, types=['*', 'code']):
    """Extract cells from a Jupyter Notebook with additional metadata."""
    notebook_path = os.path.abspath(notebook_path)

    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)

    kernel = notebook.get('metadata', {}).get('kernelspec', {})
    
    cells = [
        {
            **cell,
            "language": (cell.get("metadata", {}).get("vscode", {}).get("languageId") or
                         kernel.get("language") or
                         notebook.get("metadata", {}).get("language_info", {}).get("name", "")),
            "filename": notebook_path,
            "id": f"{os.path.basename(notebook_path)}[{i}]"
        }
        for i, cell in enumerate(notebook.get("cells", []))
        if '*' in types or cell.get("cell_type") in types
    ]

    return cells

__all__ = {
  "get_cells": get_cells
}

What the code could have been:

import json
import os

def load_notebook(notebook_path: str) -> dict:
    """Loads a Jupyter Notebook from a file.

    Args:
    notebook_path (str): Path to the Jupyter Notebook.

    Returns:
    dict: Loaded Jupyter Notebook content.
    """
    notebook_path = os.path.abspath(notebook_path)
    try:
        with open(notebook_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error loading notebook: {e}")
        return {}

def get_kernel_metadata(notebook: dict) -> dict:
    """Extracts kernel metadata from a Jupyter Notebook.

    Args:
    notebook (dict): Loaded Jupyter Notebook content.

    Returns:
    dict: Kernel metadata.
    """
    return notebook.get('metadata', {}).get('kernelspec', {})

def get_cell_metadata(cell: dict) -> dict:
    """Extracts metadata from a Jupyter Notebook cell.

    Args:
    cell (dict): Cell content.

    Returns:
    dict: Cell metadata.
    """
    return {
        key: value
        for key, value in cell.items()
        if key.startswith('metadata') or key == 'cell_type'
    }

def get_cell_language(cell: dict) -> str:
    """Determines the language of a Jupyter Notebook cell.

    Args:
    cell (dict): Cell content.

    Returns:
    str: Language of the cell.
    """
    language = cell.get("metadata", {}).get("vscode", {}).get("languageId")
    kernel = get_kernel_metadata(cell['metadata'])
    return (language or kernel.get('language') or
            cell.get('metadata', {}).get('language_info', {}).get('name', ''))

def extract_cells(notebook: dict, types: list = ['*', 'code']) -> list:
    """Extracts cells from a Jupyter Notebook with additional metadata.

    Args:
    notebook (dict): Loaded Jupyter Notebook content.
    types (list, optional): Types of cells to extract. Defaults to ['*', 'code'].

    Returns:
    list: Extracted cells with additional metadata.
    """
    return [
        {
            **cell,
            "language": get_cell_language(cell),
            "filename": os.path.abspath(notebook['metadata']['path']),
            "id": f"{os.path.basename(notebook['metadata']['path'])}[{i}]"
        }
        for i, cell in enumerate(notebook.get("cells", []))
        if '*' in types or cell.get("cell_type") in types
    ]

# Example usage:
if __name__ == "__main__":
    notebook_path = 'path/to/notebook.ipynb'
    cells = extract_cells(load_notebook(notebook_path))
    print(cells)

Code Breakdown

Importing Libraries

import json
import os

This code imports two Python libraries:

Function: get_cells

def get_cells(notebook_path, types=['*', 'code']):

This function takes two parameters:

Function Implementation

notebook_path = os.path.abspath(notebook_path)

with open(notebook_path, 'r', encoding='utf-8') as f:
    notebook = json.load(f)

This code:

  1. Converts the notebook_path to its absolute path using os.path.abspath.
  2. Opens the Jupyter Notebook file at the specified path, reads its contents, and loads it as a JSON object using json.load.
kernel = notebook.get('metadata', {}).get('kernelspec', {})

This line retrieves the kernel metadata from the notebook, which is nested several levels deep in the JSON object.

cells = [
    {
        **cell,
        "language": (cell.get("metadata", {}).get("vscode", {}).get("languageId") or
                     kernel.get("language") or
                     notebook.get("metadata", {}).get("language_info", {}).get("name", "")),
        "filename": notebook_path,
        "id": f"{os.path.basename(notebook_path)}[{i}]"
    }
    for i, cell in enumerate(notebook.get("cells", []))
    if '*' in types or cell.get("cell_type") in types
]

This code defines a list cells, which contains dictionaries representing individual cells in the notebook. Each dictionary includes:

The list comprehension iterates over the cells in the notebook, filtering them based on the specified types.

return cells

Finally, the function returns the list of cell dictionaries.

Exporting the Function

__all__ = {
  "get_cells": get_cells
}

This line makes the get_cells function available for import using from module import get_cells.