405 lines
11 KiB
Ruby
405 lines
11 KiB
Ruby
module Oga
|
|
module XPath
|
|
##
|
|
# Compiling of XPath ASTs into Ruby code.
|
|
#
|
|
# The Compiler class can be used to turn an XPath AST into Ruby source code
|
|
# that can be executed to match XML nodes in a given input document/element.
|
|
# Compiled source code is cached per expression, removing the need for
|
|
# recompiling the same expression over and over again.
|
|
#
|
|
# @private
|
|
#
|
|
class Compiler
|
|
# @return [Oga::LRU]
|
|
CACHE = LRU.new
|
|
|
|
# Wildcard for node names/namespace prefixes.
|
|
STAR = '*'
|
|
|
|
# Node types that require a NodeSet to push nodes into.
|
|
RETURN_NODESET = [:path, :absolute_path, :axis, :predicate]
|
|
|
|
##
|
|
# Compiles and caches an AST.
|
|
#
|
|
# @see [#compile]
|
|
#
|
|
def self.compile_with_cache(ast)
|
|
CACHE.get_or_set(ast) { new.compile(ast) }
|
|
end
|
|
|
|
##
|
|
# Compiles an XPath AST into a Ruby Proc.
|
|
#
|
|
# @param [AST::Node] ast
|
|
# @return [Proc]
|
|
#
|
|
def compile(ast)
|
|
document = node_literal
|
|
matched = matched_literal
|
|
|
|
if return_nodeset?(ast)
|
|
ruby_ast = process(ast, document) { |node| matched.push(node) }
|
|
else
|
|
ruby_ast = process(ast, document)
|
|
end
|
|
|
|
vars = variables_literal.assign(literal('nil'))
|
|
|
|
proc_ast = literal('lambda').add_block(document, vars) do
|
|
if return_nodeset?(ast)
|
|
matched.assign(literal(XML::NodeSet).new)
|
|
.followed_by(ruby_ast)
|
|
.followed_by(matched)
|
|
else
|
|
ruby_ast
|
|
end
|
|
end
|
|
|
|
generator = Ruby::Generator.new
|
|
source = generator.process(proc_ast)
|
|
|
|
eval(source)
|
|
end
|
|
|
|
##
|
|
# Processes a single XPath AST node.
|
|
#
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
#
|
|
def process(ast, input, &block)
|
|
send("on_#{ast.type}", ast, input, &block)
|
|
end
|
|
|
|
##
|
|
# Processes a relative path.
|
|
#
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
#
|
|
def on_path(ast, input, &block)
|
|
ruby_ast = nil
|
|
var_name = node_literal
|
|
last_index = ast.children.length - 1
|
|
|
|
ast.children.reverse_each.with_index do |child, index|
|
|
# The first block should operate on the variable set in "input", all
|
|
# others should operate on the child variables ("node").
|
|
input_var = index == last_index ? input : var_name
|
|
|
|
# The last segment of the path should add the code that actually
|
|
# pushes the matched node into the node set.
|
|
if index == 0
|
|
ruby_ast = process(child, input_var, &block)
|
|
else
|
|
ruby_ast = process(child, input_var) { ruby_ast }
|
|
end
|
|
end
|
|
|
|
ruby_ast
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_absolute_path(ast, input, &block)
|
|
if ast.children.empty?
|
|
matched_literal.push(input.root_node)
|
|
else
|
|
on_path(ast, input.root_node, &block)
|
|
end
|
|
end
|
|
|
|
##
|
|
# Dispatches the processing of axes to dedicated methods. This works
|
|
# similar to {#process} except the handler names are "on_axis_X" with "X"
|
|
# being the axis name.
|
|
#
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
#
|
|
def on_axis(ast, input, &block)
|
|
name, test = *ast.children
|
|
|
|
handler = name.gsub('-', '_')
|
|
|
|
send(:"on_axis_#{handler}", test, input, &block)
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_axis_child(ast, input, &block)
|
|
child = node_literal
|
|
condition = process(ast, child, &block)
|
|
|
|
input.children.each.add_block(child) do
|
|
condition.if_true { yield child }
|
|
end
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_axis_attribute(ast, input)
|
|
input.is_a?(XML::Element).if_true do
|
|
attribute = literal('attribute')
|
|
|
|
input.attributes.each.add_block(attribute) do
|
|
name_match = match_name_and_namespace(ast, attribute)
|
|
|
|
if name_match
|
|
name_match.if_true { yield attribute }
|
|
else
|
|
yield attribute
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_axis_ancestor_or_self(ast, input, &block)
|
|
parent = literal('parent')
|
|
|
|
self_test = process(ast, input, &block).if_true { yield input }
|
|
|
|
ancestors_test = input.each_ancestor.add_block(parent) do
|
|
process(ast, parent, &block).if_true { yield parent }
|
|
end
|
|
|
|
self_test.followed_by(ancestors_test)
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_axis_ancestor(ast, input, &block)
|
|
parent = literal('parent')
|
|
|
|
input.each_ancestor.add_block(parent) do
|
|
process(ast, parent, &block).if_true { yield parent }
|
|
end
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_predicate(ast, input, &block)
|
|
test, predicate = *ast
|
|
|
|
if number?(predicate)
|
|
on_index_predicate(test, predicate, input, &block)
|
|
else
|
|
on_expression_predicate(test, predicate, input, &block)
|
|
end
|
|
end
|
|
|
|
##
|
|
# Processes an index predicate such as `foo[10]`.
|
|
#
|
|
# @param [AST::Node] test
|
|
# @param [AST::Node] predicate
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
#
|
|
def on_index_predicate(test, predicate, input)
|
|
int1 = literal('1')
|
|
index = to_int(predicate)
|
|
index_var = literal('index')
|
|
|
|
inner = process(test, input) do |matched_test_node|
|
|
index_var.eq(index).if_true { yield matched_test_node }
|
|
.followed_by(index_var.assign(index_var + int1))
|
|
end
|
|
|
|
index_var.assign(int1).followed_by(inner)
|
|
end
|
|
|
|
##
|
|
# Processes a predicate using an expression.
|
|
#
|
|
# This method generates Ruby code that roughly looks like the following:
|
|
#
|
|
# if catch :predicate_matched do
|
|
# node.children.each do |node|
|
|
#
|
|
# if some_condition_that_matches_a_predicate
|
|
# throw :predicate_matched, true
|
|
# end
|
|
#
|
|
# nil
|
|
# end
|
|
#
|
|
# matched.push(node)
|
|
# end
|
|
#
|
|
# @param [AST::Node] test
|
|
# @param [AST::Node] predicate
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
#
|
|
def on_expression_predicate(test, predicate, input)
|
|
catch_arg = symbol(:predicate_matched)
|
|
|
|
process(test, input) do |matched_test_node|
|
|
catch_block = send_message('catch', catch_arg).add_block do
|
|
inner = process(predicate, matched_test_node) do
|
|
send_message('throw', catch_arg, literal('true'))
|
|
end
|
|
|
|
# Ensure that the "catch" only returns a value when "throw" is
|
|
# actually invoked.
|
|
inner.followed_by(literal('nil'))
|
|
end
|
|
|
|
catch_block.if_true { yield matched_test_node }
|
|
end
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_test(ast, input)
|
|
condition = element_or_attribute(input)
|
|
name_match = match_name_and_namespace(ast, input)
|
|
|
|
name_match ? condition.and(name_match) : condition
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_eq(ast, input)
|
|
left, right = *ast
|
|
|
|
process(left, input).eq(process(right, input))
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_string(ast, *)
|
|
string(ast.children[0])
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_int(ast, *)
|
|
literal(ast.children[0].to_f.to_s)
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_float(ast, *)
|
|
literal(ast.children[0].to_s)
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def on_var(ast, input)
|
|
vars = variables_literal
|
|
name = ast.children[0]
|
|
|
|
variables_literal.and(variables_literal[string(name)])
|
|
.or(send_message('raise', string("Undefined XPath variable: #{name}")))
|
|
end
|
|
|
|
private
|
|
|
|
# @param [#to_s] value
|
|
# @return [Oga::Ruby::Node]
|
|
def literal(value)
|
|
Ruby::Node.new(:lit, [value.to_s])
|
|
end
|
|
|
|
# @param [#to_s] value
|
|
# @return [Oga::Ruby::Node]
|
|
def string(value)
|
|
Ruby::Node.new(:string, [value.to_s])
|
|
end
|
|
|
|
# @param [String] value
|
|
# @return [Oga::Ruby::Node]
|
|
def symbol(value)
|
|
Ruby::Node.new(:symbol, [value.to_sym])
|
|
end
|
|
|
|
# @param [String] name
|
|
# @param [Array] args
|
|
# @return [Oga::Ruby::Node]
|
|
def send_message(name, *args)
|
|
Ruby::Node.new(:send, [nil, name, *args])
|
|
end
|
|
|
|
# @param [Oga::Ruby::Node] node
|
|
# @return [Oga::Ruby::Node]
|
|
def element_or_attribute(node)
|
|
node.is_a?(XML::Attribute).or(node.is_a?(XML::Element))
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @param [Oga::Ruby::Node] input
|
|
# @return [Oga::Ruby::Node]
|
|
def match_name_and_namespace(ast, input)
|
|
ns, name = *ast
|
|
|
|
condition = nil
|
|
|
|
if name != STAR
|
|
condition = input.name.eq(string(name))
|
|
end
|
|
|
|
if ns and ns != STAR
|
|
ns_match = input.namespace_name.eq(string(ns))
|
|
condition = condition ? condition.and(ns_match) : ns_match
|
|
end
|
|
|
|
condition
|
|
end
|
|
|
|
# @return [Oga::Ruby::Node]
|
|
def matched_literal
|
|
literal('matched')
|
|
end
|
|
|
|
# @return [Oga::Ruby::Node]
|
|
def node_literal
|
|
literal('node')
|
|
end
|
|
|
|
# @return [Oga::Ruby::Node]
|
|
def variables_literal
|
|
literal('variables')
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @return [Oga::Ruby::Node]
|
|
def to_int(ast)
|
|
literal(ast.children[0].to_i.to_s)
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @return [TrueClass|FalseClass]
|
|
def number?(ast)
|
|
ast.type == :int || ast.type == :float
|
|
end
|
|
|
|
# @param [AST::Node] ast
|
|
# @return [TrueClass|FalseClass]
|
|
def return_nodeset?(ast)
|
|
RETURN_NODESET.include?(ast.type)
|
|
end
|
|
end # Compiler
|
|
end # XPath
|
|
end # Oga
|