This code defines a lexer for XPath expressions using the ply
module, specifying tokens and their corresponding regular expression patterns to recognize XPath operators, separators, and literal values. The lexer can be used as a foundation for building a parser that constructs an abstract syntax tree (AST) for further processing, making it a crucial component for XPath expression analysis.
"""XPath lexing rules.
To understand how this module works, it is valuable to have a strong
understanding of the `ply <http://www.dabeaz.com/ply/>` module.
"""
from __future__ import unicode_literals
operator_names = {
'or': 'OR_OP',
'and': 'AND_OP',
'div': 'DIV_OP',
'mod': 'MOD_OP',
}
tokens = [
'PATH_SEP',
'ABBREV_PATH_SEP',
'ABBREV_STEP_SELF',
'ABBREV_STEP_PARENT',
'AXIS_SEP',
'ABBREV_AXIS_AT',
'OPEN_PAREN',
'CLOSE_PAREN',
'OPEN_BRACKET',
'CLOSE_BRACKET',
'UNION_OP',
'EQUAL_OP',
'REL_OP',
'PLUS_OP',
'MINUS_OP',
'MULT_OP',
'STAR_OP',
'COMMA',
'LITERAL',
'FLOAT',
'INTEGER',
'NCNAME',
'NODETYPE',
'FUNCNAME',
'AXISNAME',
'COLON',
'DOLLAR',
] + list(operator_names.values())
t_PATH_SEP = r'/'
t_ABBREV_PATH_SEP = r'//'
t_ABBREV_STEP_SELF = r'\.'
t_ABBREV_STEP_PARENT = r'\.\.'
t_AXIS_SEP = r'::'
t_ABBREV_AXIS_AT = r'@'
t_OPEN_PAREN = r'\('
t_CLOSE_PAREN = r'\)'
t_OPEN_BRACKET = r'\['
t_CLOSE_BRACKET = r'\]'
t_UNION_OP = r'\|'
t_EQUAL_OP = r'!?='
t_REL_OP = r'[<>]=?'
t_PLUS_OP = r'\+'
t_MINUS_OP = r'-'
t_COMMA = r','
t_COLON = r':'
t_DOLLAR = r'\