oga/lib/oga/xpath/parser.rll

298 lines
5.4 KiB
Plaintext

%header
{
##
# AST parser for XPath expressions. The AST is built using `AST::Node`
# instances.
#
# Unlike {Oga::XML::Parser} this parser only takes String instances as input.
#
}
%name Oga::XPath::Parser;
%terminals T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT T_TYPE_TEST;
%terminals T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING;
%terminals T_PIPE T_AND T_OR T_ADD T_DIV T_MOD T_EQ T_NEQ T_LT T_GT T_LTE T_GTE;
%terminals T_SUB T_MUL T_VAR;
xpath
= expression?
;
# Expressions And Operators
#
# Operators are handled by using a mixture of iteration (in the form of the *
# operator), recursion and priorities. Priorities are handled by recursing into
# certain rules before processing others.
#
# These rules are largely based on the following resources:
#
# * http://www.w3.org/TR/xquery-xpath-parsing/#XPath-EBNF
# * http://blog.jwbroek.com/2010/07/antlr-grammar-for-parsing-xpath-10.html
#
expression
= and_expr expression_follow* { combine_operators(val) }
;
expression_follow
= T_OR and_expr { [:or, val[1]] }
;
and_expr
= equality_expr and_expr_follow* { combine_operators(val) }
;
and_expr_follow
= T_AND equality_expr { [:and, val[1]] }
;
equality_expr
= relational_expr equality_expr_follow* { combine_operators(val) }
;
equality_expr_follow
= T_EQ relational_expr { [:eq, val[1]] }
| T_NEQ relational_expr { [:neq, val[1]] }
;
relational_expr
= additive_expr relational_expr_follow* { combine_operators(val) }
;
relational_expr_follow
= T_LT additive_expr { [:lt, val[1]] }
| T_GT additive_expr { [:gt, val[1]] }
| T_LTE additive_expr { [:lte, val[1]] }
| T_GTE additive_expr { [:gte, val[1]] }
;
additive_expr
= mult_expr additive_expr_follow* { combine_operators(val) }
;
additive_expr_follow
= T_ADD mult_expr { [:add, val[1]] }
| T_SUB mult_expr { [:sub, val[1]] }
;
mult_expr
= union_expr mult_expr_follow? { combine_optional_operator(val) }
;
mult_expr_follow
= T_DIV mult_expr { [:div, val[1]] }
| T_MOD mult_expr { [:mod, val[1]] }
| T_MUL mult_expr { [:mul, val[1]] }
;
union_expr
= expression_member union_expr_follow* { combine_operators(val) }
;
union_expr_follow
= T_PIPE expression_member { [:pipe, val[1]] }
;
expression_member
= relative_path
| absolute_path
| string
| number
| variable
| T_LPAREN expression T_RPAREN { val[1] }
;
# A, A/B, etc
relative_path
= path_steps { val[0].length > 1 ? s(:path, *val[0]) : val[0][0] }
;
path_steps
= path_step_or_axis path_steps_follow* { [val[0], *val[1]] }
;
path_steps_follow
= T_SLASH path_step_or_axis { val[1] }
;
# /A, /A/B, etc
absolute_path
= T_SLASH path_steps? { s(:absolute_path, *val[1]) }
;
path_step_or_axis
= path_step
| axis
;
# A, A(), A(X), etc
path_step
= T_IDENT path_step_follow
{
type = val[1][0]
args = val[1][1]
pred = val[1][2]
if type.equal?(:test)
# Whenever a bare test is used (e.g. just "A") this actually means
# "child::A". Handling this on parser level is the easiest.
if args
node = s(:axis, 'child', s(:test, val[0], args))
else
node = s(:axis, 'child', s(:test, nil, val[0]))
end
else
node = s(type, val[0], *args)
end
if pred
node = s(:predicate, node, pred)
end
node
}
| type_test { s(:axis, 'child', val[0]) }
;
path_step_follow
= T_LPAREN call_args T_RPAREN { [:call, val[1]] }
| T_COLON T_IDENT predicate? { [:test, val[1], val[2]] }
| predicate? { [:test, nil, val[0]] }
;
predicate
= T_LBRACK expression T_RBRACK { val[1] }
;
type_test
= T_TYPE_TEST { s(:type_test, val[0]) }
;
# Regular test (e.g. tests used as axis values)
test
= T_IDENT test_follow?
{
val[1] ? s(:test, val[0], val[1]) : s(:test, nil, val[0])
}
;
test_follow
= T_COLON T_IDENT { val[1] }
;
call_args
= expression call_args_follow* { [val[0], *val[1]] }
| _
;
call_args_follow
= T_COMMA expression { val[1] }
;
# child::foo, descendant-or-self::foo, etc
axis
= T_AXIS axis_value predicate?
{
ret = s(:axis, val[0], val[1])
if val[2]
ret = s(:predicate, ret, val[2])
end
ret
}
;
axis_value
= test
| type_test
;
string
= T_STRING { s(:string, val[0]) };
number
= T_INT { s(:int, val[0]) }
| T_FLOAT { s(:float, val[0]) }
;
variable
= T_VAR { s(:var, val[0]) }
;
%inner
{
##
# @return [Oga::LRU]
#
CACHE = LRU.new
##
# @param [String] data
# @return [AST::Node]
#
def self.parse_with_cache(data)
return CACHE.get_or_set(data) { new(data).parse }
end
##
# @param [String] data The input to parse.
#
def initialize(data)
@lexer = Lexer.new(data)
end
##
# Creates a new XPath node.
#
# @param [Symbol] type
# @param [Array] children
# @return [AST::Node]
#
def s(type, *children)
return AST::Node.new(type, children)
end
##
# Yields the next token from the lexer.
#
# @yieldparam [Array]
#
def each_token
@lexer.advance do |type, value, line|
@line = line if line
yield [type, value]
end
yield [-1, -1]
end
##
# @param [Array] val
#
def combine_operators(val)
ret = val[0]
val[1].each do |expr|
ret = s(expr[0], ret, expr[1])
end
return ret
end
##
# @param [Array] val
#
def combine_optional_operator(val)
ret = val[0]
if val[1]
ret = s(val[1][0], ret, val[1][1])
end
ret
end
}