The get_cells function imports a Jupyter Notebook file, extracts cell metadata, and formats it into a list of dictionaries with language, filename, and unique ID information. The function takes two parameters: the notebook path and a list of cell types to extract, and returns the formatted list of cells.
import json
import os
def get_cells(notebook_path, types=['*', 'code']):
    """Extract cells from a Jupyter Notebook with additional metadata."""
    notebook_path = os.path.abspath(notebook_path)
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    kernel = notebook.get('metadata', {}).get('kernelspec', {})
    
    cells = [
        {
            **cell,
            "language": (cell.get("metadata", {}).get("vscode", {}).get("languageId") or
                         kernel.get("language") or
                         notebook.get("metadata", {}).get("language_info", {}).get("name", "")),
            "filename": notebook_path,
            "id": f"{os.path.basename(notebook_path)}[{i}]"
        }
        for i, cell in enumerate(notebook.get("cells", []))
        if '*' in types or cell.get("cell_type") in types
    ]
    return cells
__all__ = {
  "get_cells": get_cells
}
import json
import os
def load_notebook(notebook_path: str) -> dict:
    """Loads a Jupyter Notebook from a file.
    Args:
    notebook_path (str): Path to the Jupyter Notebook.
    Returns:
    dict: Loaded Jupyter Notebook content.
    """
    notebook_path = os.path.abspath(notebook_path)
    try:
        with open(notebook_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error loading notebook: {e}")
        return {}
def get_kernel_metadata(notebook: dict) -> dict:
    """Extracts kernel metadata from a Jupyter Notebook.
    Args:
    notebook (dict): Loaded Jupyter Notebook content.
    Returns:
    dict: Kernel metadata.
    """
    return notebook.get('metadata', {}).get('kernelspec', {})
def get_cell_metadata(cell: dict) -> dict:
    """Extracts metadata from a Jupyter Notebook cell.
    Args:
    cell (dict): Cell content.
    Returns:
    dict: Cell metadata.
    """
    return {
        key: value
        for key, value in cell.items()
        if key.startswith('metadata') or key == 'cell_type'
    }
def get_cell_language(cell: dict) -> str:
    """Determines the language of a Jupyter Notebook cell.
    Args:
    cell (dict): Cell content.
    Returns:
    str: Language of the cell.
    """
    language = cell.get("metadata", {}).get("vscode", {}).get("languageId")
    kernel = get_kernel_metadata(cell['metadata'])
    return (language or kernel.get('language') or
            cell.get('metadata', {}).get('language_info', {}).get('name', ''))
def extract_cells(notebook: dict, types: list = ['*', 'code']) -> list:
    """Extracts cells from a Jupyter Notebook with additional metadata.
    Args:
    notebook (dict): Loaded Jupyter Notebook content.
    types (list, optional): Types of cells to extract. Defaults to ['*', 'code'].
    Returns:
    list: Extracted cells with additional metadata.
    """
    return [
        {
            **cell,
            "language": get_cell_language(cell),
            "filename": os.path.abspath(notebook['metadata']['path']),
            "id": f"{os.path.basename(notebook['metadata']['path'])}[{i}]"
        }
        for i, cell in enumerate(notebook.get("cells", []))
        if '*' in types or cell.get("cell_type") in types
    ]
# Example usage:
if __name__ == "__main__":
    notebook_path = 'path/to/notebook.ipynb'
    cells = extract_cells(load_notebook(notebook_path))
    print(cells)import json
import os
This code imports two Python libraries:
json: for working with JSON dataos: for interacting with the operating system (e.g., getting absolute paths)get_cellsdef get_cells(notebook_path, types=['*', 'code']):
This function takes two parameters:
notebook_path: the path to a Jupyter Notebook filetypes: a list of cell types to extract; default is ['*', 'code']notebook_path = os.path.abspath(notebook_path)
with open(notebook_path, 'r', encoding='utf-8') as f:
    notebook = json.load(f)
This code:
notebook_path to its absolute path using os.path.abspath.json.load.kernel = notebook.get('metadata', {}).get('kernelspec', {})
This line retrieves the kernel metadata from the notebook, which is nested several levels deep in the JSON object.
cells = [
    {
        **cell,
        "language": (cell.get("metadata", {}).get("vscode", {}).get("languageId") or
                     kernel.get("language") or
                     notebook.get("metadata", {}).get("language_info", {}).get("name", "")),
        "filename": notebook_path,
        "id": f"{os.path.basename(notebook_path)}[{i}]"
    }
    for i, cell in enumerate(notebook.get("cells", []))
    if '*' in types or cell.get("cell_type") in types
]
This code defines a list cells, which contains dictionaries representing individual cells in the notebook. Each dictionary includes:
language: determined based on several possible sources of language informationfilename: the path to the notebook fileid: a unique identifier for the cellThe list comprehension iterates over the cells in the notebook, filtering them based on the specified types.
return cells
Finally, the function returns the list of cell dictionaries.
__all__ = {
  "get_cells": get_cells
}
This line makes the get_cells function available for import using from module import get_cells.