298 lines
5.4 KiB
Plaintext
298 lines
5.4 KiB
Plaintext
%header
|
|
{
|
|
##
|
|
# AST parser for XPath expressions. The AST is built using `AST::Node`
|
|
# instances.
|
|
#
|
|
# Unlike {Oga::XML::Parser} this parser only takes String instances as input.
|
|
#
|
|
}
|
|
|
|
%name Oga::XPath::Parser;
|
|
|
|
%terminals T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT T_TYPE_TEST;
|
|
%terminals T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING;
|
|
%terminals T_PIPE T_AND T_OR T_ADD T_DIV T_MOD T_EQ T_NEQ T_LT T_GT T_LTE T_GTE;
|
|
%terminals T_SUB T_MUL T_VAR;
|
|
|
|
xpath
|
|
= expression?
|
|
;
|
|
|
|
# Expressions And Operators
|
|
#
|
|
# Operators are handled by using a mixture of iteration (in the form of the *
|
|
# operator), recursion and priorities. Priorities are handled by recursing into
|
|
# certain rules before processing others.
|
|
#
|
|
# These rules are largely based on the following resources:
|
|
#
|
|
# * http://www.w3.org/TR/xquery-xpath-parsing/#XPath-EBNF
|
|
# * http://blog.jwbroek.com/2010/07/antlr-grammar-for-parsing-xpath-10.html
|
|
#
|
|
|
|
expression
|
|
= and_expr expression_follow* { combine_operators(val) }
|
|
;
|
|
|
|
expression_follow
|
|
= T_OR and_expr { [:or, val[1]] }
|
|
;
|
|
|
|
and_expr
|
|
= equality_expr and_expr_follow* { combine_operators(val) }
|
|
;
|
|
|
|
and_expr_follow
|
|
= T_AND equality_expr { [:and, val[1]] }
|
|
;
|
|
|
|
equality_expr
|
|
= relational_expr equality_expr_follow* { combine_operators(val) }
|
|
;
|
|
|
|
equality_expr_follow
|
|
= T_EQ relational_expr { [:eq, val[1]] }
|
|
| T_NEQ relational_expr { [:neq, val[1]] }
|
|
;
|
|
|
|
relational_expr
|
|
= additive_expr relational_expr_follow* { combine_operators(val) }
|
|
;
|
|
|
|
relational_expr_follow
|
|
= T_LT additive_expr { [:lt, val[1]] }
|
|
| T_GT additive_expr { [:gt, val[1]] }
|
|
| T_LTE additive_expr { [:lte, val[1]] }
|
|
| T_GTE additive_expr { [:gte, val[1]] }
|
|
;
|
|
|
|
additive_expr
|
|
= mult_expr additive_expr_follow* { combine_operators(val) }
|
|
;
|
|
|
|
additive_expr_follow
|
|
= T_ADD mult_expr { [:add, val[1]] }
|
|
| T_SUB mult_expr { [:sub, val[1]] }
|
|
;
|
|
|
|
mult_expr
|
|
= union_expr mult_expr_follow? { combine_optional_operator(val) }
|
|
;
|
|
|
|
mult_expr_follow
|
|
= T_DIV mult_expr { [:div, val[1]] }
|
|
| T_MOD mult_expr { [:mod, val[1]] }
|
|
| T_MUL mult_expr { [:mul, val[1]] }
|
|
;
|
|
|
|
union_expr
|
|
= expression_member union_expr_follow* { combine_operators(val) }
|
|
;
|
|
|
|
union_expr_follow
|
|
= T_PIPE expression_member { [:pipe, val[1]] }
|
|
;
|
|
|
|
expression_member
|
|
= relative_path
|
|
| absolute_path
|
|
| string
|
|
| number
|
|
| variable
|
|
| T_LPAREN expression T_RPAREN { val[1] }
|
|
;
|
|
|
|
# A, A/B, etc
|
|
relative_path
|
|
= path_steps { val[0].length > 1 ? s(:path, *val[0]) : val[0][0] }
|
|
;
|
|
|
|
path_steps
|
|
= path_step_or_axis path_steps_follow* { [val[0], *val[1]] }
|
|
;
|
|
|
|
path_steps_follow
|
|
= T_SLASH path_step_or_axis { val[1] }
|
|
;
|
|
|
|
# /A, /A/B, etc
|
|
absolute_path
|
|
= T_SLASH path_steps? { s(:absolute_path, *val[1]) }
|
|
;
|
|
|
|
path_step_or_axis
|
|
= path_step
|
|
| axis
|
|
;
|
|
|
|
# A, A(), A(X), etc
|
|
path_step
|
|
= T_IDENT path_step_follow
|
|
{
|
|
type = val[1][0]
|
|
args = val[1][1]
|
|
pred = val[1][2]
|
|
|
|
if type == :test
|
|
# Whenever a bare test is used (e.g. just "A") this actually means
|
|
# "child::A". Handling this on parser level is the easiest.
|
|
if args
|
|
node = s(:axis, 'child', s(:test, val[0], args))
|
|
else
|
|
node = s(:axis, 'child', s(:test, nil, val[0]))
|
|
end
|
|
else
|
|
node = s(type, val[0], *args)
|
|
end
|
|
|
|
if pred
|
|
node = s(:predicate, node, pred)
|
|
end
|
|
|
|
node
|
|
}
|
|
| type_test { s(:axis, 'child', val[0]) }
|
|
;
|
|
|
|
path_step_follow
|
|
= T_LPAREN call_args T_RPAREN { [:call, val[1]] }
|
|
| T_COLON T_IDENT predicate? { [:test, val[1], val[2]] }
|
|
| predicate? { [:test, nil, val[0]] }
|
|
;
|
|
|
|
predicate
|
|
= T_LBRACK expression T_RBRACK { val[1] }
|
|
;
|
|
|
|
type_test
|
|
= T_TYPE_TEST { s(:type_test, val[0]) }
|
|
;
|
|
|
|
# Regular test (e.g. tests used as axis values)
|
|
test
|
|
= T_IDENT test_follow?
|
|
{
|
|
val[1] ? s(:test, val[0], val[1]) : s(:test, nil, val[0])
|
|
}
|
|
;
|
|
|
|
test_follow
|
|
= T_COLON T_IDENT { val[1] }
|
|
;
|
|
|
|
call_args
|
|
= expression call_args_follow* { [val[0], *val[1]] }
|
|
| _
|
|
;
|
|
|
|
call_args_follow
|
|
= T_COMMA expression { val[1] }
|
|
;
|
|
|
|
# child::foo, descendant-or-self::foo, etc
|
|
axis
|
|
= T_AXIS axis_value predicate?
|
|
{
|
|
ret = s(:axis, val[0], val[1])
|
|
|
|
if val[2]
|
|
ret = s(:predicate, ret, val[2])
|
|
end
|
|
|
|
ret
|
|
}
|
|
;
|
|
|
|
axis_value
|
|
= test
|
|
| type_test
|
|
;
|
|
|
|
string
|
|
= T_STRING { s(:string, val[0]) };
|
|
|
|
number
|
|
= T_INT { s(:int, val[0]) }
|
|
| T_FLOAT { s(:float, val[0]) }
|
|
;
|
|
|
|
variable
|
|
= T_VAR { s(:var, val[0]) }
|
|
;
|
|
|
|
%inner
|
|
{
|
|
##
|
|
# @return [Oga::LRU]
|
|
#
|
|
CACHE = LRU.new
|
|
|
|
##
|
|
# @param [String] data
|
|
# @return [AST::Node]
|
|
#
|
|
def self.parse_with_cache(data)
|
|
return CACHE.get_or_set(data) { new(data).parse }
|
|
end
|
|
|
|
##
|
|
# @param [String] data The input to parse.
|
|
#
|
|
def initialize(data)
|
|
@lexer = Lexer.new(data)
|
|
end
|
|
|
|
##
|
|
# Creates a new XPath node.
|
|
#
|
|
# @param [Symbol] type
|
|
# @param [Array] children
|
|
# @return [AST::Node]
|
|
#
|
|
def s(type, *children)
|
|
return AST::Node.new(type, children)
|
|
end
|
|
|
|
##
|
|
# Yields the next token from the lexer.
|
|
#
|
|
# @yieldparam [Array]
|
|
#
|
|
def each_token
|
|
@lexer.advance do |type, value, line|
|
|
@line = line if line
|
|
|
|
yield [type, value]
|
|
end
|
|
|
|
yield [-1, -1]
|
|
end
|
|
|
|
##
|
|
# @param [Array] val
|
|
#
|
|
def combine_operators(val)
|
|
ret = val[0]
|
|
|
|
val[1].each do |expr|
|
|
ret = s(expr[0], ret, expr[1])
|
|
end
|
|
|
|
return ret
|
|
end
|
|
|
|
##
|
|
# @param [Array] val
|
|
#
|
|
def combine_optional_operator(val)
|
|
ret = val[0]
|
|
|
|
if val[1]
|
|
ret = s(val[1][0], ret, val[1][1])
|
|
end
|
|
|
|
ret
|
|
end
|
|
}
|