diff --git a/lib/oga/blacklist.rb b/lib/oga/blacklist.rb
index 4e28141..85e3f30 100644
--- a/lib/oga/blacklist.rb
+++ b/lib/oga/blacklist.rb
@@ -1,38 +1,28 @@
module Oga
- ##
# @api private
- #
class Blacklist
# @return [Set]
attr_reader :names
- ##
# @param [Array] names
- #
def initialize(names)
@names = Set.new(names + names.map(&:upcase))
end
- ##
# @yieldparam [String]
- #
def each
names.each do |value|
yield value
end
end
- ##
# @return [TrueClass|FalseClass]
- #
def allow?(name)
!names.include?(name)
end
- ##
# @param [Oga::Blacklist] other
# @return [Oga::Blacklist]
- #
def +(other)
self.class.new(names + other.names)
end
diff --git a/lib/oga/css/lexer.rl b/lib/oga/css/lexer.rl
index 4a9bd5c..fc8c50d 100644
--- a/lib/oga/css/lexer.rl
+++ b/lib/oga/css/lexer.rl
@@ -2,7 +2,6 @@
module Oga
module CSS
- ##
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are
# returned as arrays with every array having two values:
#
@@ -17,25 +16,20 @@ module Oga
# instances in threads just fine.
#
# @api private
- #
class Lexer
%% write data;
# % fix highlight
- ##
# @param [String] data The data to lex.
- #
def initialize(data)
@data = data
end
- ##
# Gathers all the tokens for the input and returns them as an Array.
#
# @see [#advance]
# @return [Array]
- #
def lex
tokens = []
@@ -46,7 +40,6 @@ module Oga
return tokens
end
- ##
# Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block.
#
@@ -54,7 +47,6 @@ module Oga
# the lexer loop has finished.
#
# @see [#add_token]
- #
def advance(&block)
@block = block
@@ -88,7 +80,6 @@ module Oga
private
- ##
# Emits a token of which the value is based on the supplied start/stop
# position.
#
@@ -98,25 +89,21 @@ module Oga
#
# @see [#text]
# @see [#add_token]
- #
def emit(type, start, stop)
value = slice_input(start, stop)
add_token(type, value)
end
- ##
# Returns the text between the specified start and stop position.
#
# @param [Fixnum] start
# @param [Fixnum] stop
# @return [String]
- #
def slice_input(start, stop)
return @data.byteslice(start, stop - start)
end
- ##
# Yields a new token to the supplied block.
#
# @param [Symbol] type The token type.
@@ -124,7 +111,6 @@ module Oga
#
# @yieldparam [Symbol] type
# @yieldparam [String|NilClass] value
- #
def add_token(type, value = nil)
@block.call(type, value)
end
@@ -202,7 +188,6 @@ module Oga
#
# Strings can be single or double quoted. They are mainly used for
# attribute values.
- #
dquote = '"';
squote = "'";
diff --git a/lib/oga/css/parser.rll b/lib/oga/css/parser.rll
index 2a46f02..ccffba8 100644
--- a/lib/oga/css/parser.rll
+++ b/lib/oga/css/parser.rll
@@ -1,6 +1,5 @@
%header
{
-##
# AST parser for CSS expressions.
#
# This parser does _not_ build a CSS specific AST, instead it directly produces
@@ -327,47 +326,35 @@ even
%inner
{
- ##
# @return [Oga::LRU]
- #
CACHE = LRU.new
- ##
# @param [String] data
# @return [AST::Node]
- #
def self.parse_with_cache(data)
CACHE.get_or_set(data) { new(data).parse }
end
- ##
# @param [String] data The input to parse.
- #
def initialize(data)
@lexer = Lexer.new(data)
end
- ##
# Resets the internal state of the parser.
- #
def reset
@current_element = nil
end
- ##
# @param [Symbol] type
# @param [Array] children
# @return [AST::Node]
- #
def s(type, *children)
AST::Node.new(type, children)
end
- ##
# Yields the next token from the lexer.
#
# @yieldparam [Array]
- #
def each_token
@lexer.advance do |*args|
yield args
@@ -376,16 +363,13 @@ even
yield [-1, -1]
end
- ##
# Returns the node test for the current element.
#
# @return [AST::Node]
- #
def current_element
@current_element ||= s(:test, nil, '*')
end
- ##
# Parses the input and returns the corresponding AST.
#
# @example
@@ -393,175 +377,140 @@ even
# ast = parser.parse
#
# @return [AST::Node]
- #
def parse
reset
super
end
- ##
# Generates the AST for a node test.
#
# @param [String] namespace
# @param [String] name
# @return [AST::Node]
- #
def on_test(namespace, name)
@current_element = s(:test, namespace, name)
end
- ##
# @param [String] name
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class(name, arg = nil)
handler = "on_pseudo_class_#{name.gsub('-', '_')}"
arg ? send(handler, arg) : send(handler)
end
- ##
# Generates the AST for the `root` pseudo class.
#
# @return [AST::Node]
- #
def on_pseudo_class_root
s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*')))
end
- ##
# Generates the AST for the `nth-child` pseudo class.
#
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class_nth_child(arg)
generate_nth_child('preceding-sibling', arg)
end
- ##
# Generates the AST for the `nth-last-child` pseudo class.
#
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class_nth_last_child(arg)
generate_nth_child('following-sibling', arg)
end
- ##
# Generates the AST for the `nth-of-type` pseudo class.
#
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class_nth_of_type(arg)
generate_nth_child('preceding-sibling', arg, current_element)
end
- ##
# Generates the AST for the `nth-last-of-type` pseudo class.
#
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class_nth_last_of_type(arg)
generate_nth_child('following-sibling', arg, current_element)
end
- ##
# Generates the AST for the `nth` pseudo class.
#
# @param [AST::Node] arg
# @return [AST::Node]
- #
def on_pseudo_class_nth(arg)
s(:eq, s(:call, 'position'), arg)
end
- ##
# Generates the AST for the `:first-child` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_first_child
generate_no_siblings('preceding-sibling')
end
- ##
# Generates the AST for the `:last-child` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_last_child
generate_no_siblings('following-sibling')
end
- ##
# Generates the AST for the `:first-of-type` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_first_of_type
generate_no_siblings('preceding-sibling', current_element)
end
- ##
# Generates the AST for the `:last-of-type` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_last_of_type
generate_no_siblings('following-sibling', current_element)
end
- ##
# Generates the AST for the `:only-child` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_only_child
s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child)
end
- ##
# Generates the AST for the `:only-of-type` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_only_of_type
s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type)
end
- ##
# Generates the AST for the `:empty` selector.
#
# @return [AST::Node]
- #
def on_pseudo_class_empty
s(:call, 'not', s(:axis, 'child', s(:type_test, 'node')))
end
- ##
# Generates the AST for the `=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_eq(attr, value)
s(:eq, attr, value)
end
- ##
# Generates the AST for the `~=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_space_in(attr, value)
s(
:call,
@@ -571,24 +520,20 @@ even
)
end
- ##
# Generates the AST for the `^=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_starts_with(attr, value)
s(:call, 'starts-with', attr, value)
end
- ##
# Generates the AST for the `$=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_ends_with(attr, value)
s(
:eq,
@@ -611,24 +556,20 @@ even
)
end
- ##
# Generates the AST for the `*=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_in(attr, value)
s(:call, 'contains', attr, value)
end
- ##
# Generates the AST for the `|=` operator.
#
# @param [AST::Node] attr
# @param [AST::Node] value
# @return [AST::Node]
- #
def on_op_hyphen_in(attr, value)
s(
:or,
@@ -644,12 +585,10 @@ even
private
- ##
# @param [String] count_axis
# @param [AST::Node] arg
# @param [AST::Node] count_test
# @return [AST::Node]
- #
def generate_nth_child(count_axis, arg, count_test = s(:test, nil, '*'))
count_call = s(:call, 'count', s(:axis, count_axis, count_test))
@@ -679,43 +618,33 @@ even
node
end
- ##
# @param [String] axis
# @param [AST::Node] test
# @return [AST::Node]
- #
def generate_no_siblings(axis, test = s(:test, nil, '*'))
s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0))
end
- ##
# @param [AST::Node] node
# @return [TrueClass|FalseClass]
- #
def int_node?(node)
node.type.equal?(:int)
end
- ##
# @param [AST::Node] node
# @return [TrueClass|FalseClass]
- #
def non_positive_number?(node)
node.children[0] <= 0
end
- ##
# @param [AST::Node] node
# @return [Symbol]
- #
def step_comparison(node)
node.children[0] >= 0 ? :gte : :lte
end
- ##
# @param [AST::Node] step
# @return [AST::Node]
- #
def step_modulo_value(step)
# -2n
if step and non_positive_number?(step)
diff --git a/lib/oga/entity_decoder.rb b/lib/oga/entity_decoder.rb
index 18d01be..adee636 100644
--- a/lib/oga/entity_decoder.rb
+++ b/lib/oga/entity_decoder.rb
@@ -1,17 +1,13 @@
module Oga
module EntityDecoder
- ##
# @see [decode]
- #
def self.try_decode(input, html = false)
input ? decode(input, html) : nil
end
- ##
# @param [String] input
# @param [TrueClass|FalseClass] html
# @return [String]
- #
def self.decode(input, html = false)
decoder = html ? HTML::Entities : XML::Entities
diff --git a/lib/oga/html/entities.rb b/lib/oga/html/entities.rb
index b67641a..a5f2483 100644
--- a/lib/oga/html/entities.rb
+++ b/lib/oga/html/entities.rb
@@ -1,14 +1,12 @@
module Oga
module HTML
module Entities
- ##
# Hash mapping HTML entities to their Unicode character replacements.
#
# Based on the JSON output as listed at
# http://www.w3.org/TR/html5/syntax.html#named-character-references
#
# @return [Hash]
- #
DECODE_MAPPING = {
'Á' => [193].pack('U*'),
'á' => [225].pack('U*'),
@@ -2137,11 +2135,9 @@ module Oga
'' => [8204].pack('U*'),
}
- ##
# Decodes HTML entities.
#
# @see [decode]
- #
def self.decode(input)
XML::Entities.decode(input, DECODE_MAPPING)
end
diff --git a/lib/oga/html/parser.rb b/lib/oga/html/parser.rb
index 221436d..471a0c9 100644
--- a/lib/oga/html/parser.rb
+++ b/lib/oga/html/parser.rb
@@ -1,6 +1,5 @@
module Oga
module HTML
- ##
# Parser for processing HTML input. This parser is a small wrapper around
# {Oga::XML::Parser} and takes care of setting the various options required
# for parsing HTML documents.
@@ -8,13 +7,10 @@ module Oga
# A basic example:
#
# Oga::HTML::Parser.new('').parse
- #
class Parser < XML::Parser
- ##
# @param [String|IO] data
# @param [Hash] options
# @see [Oga::XML::Parser#initialize]
- #
def initialize(data, options = {})
options = options.merge(:html => true)
diff --git a/lib/oga/html/sax_parser.rb b/lib/oga/html/sax_parser.rb
index 07f7731..19eb362 100644
--- a/lib/oga/html/sax_parser.rb
+++ b/lib/oga/html/sax_parser.rb
@@ -1,13 +1,9 @@
module Oga
module HTML
- ##
# SAX parser for HTML documents. See the documentation of
# {Oga::XML::SaxParser} for more information.
- #
class SaxParser < XML::SaxParser
- ##
# @see [Oga::XML::SaxParser#initialize]
- #
def initialize(handler, data, options = {})
options = options.merge(:html => true)
diff --git a/lib/oga/lru.rb b/lib/oga/lru.rb
index be330e3..3c30a15 100644
--- a/lib/oga/lru.rb
+++ b/lib/oga/lru.rb
@@ -1,5 +1,4 @@
module Oga
- ##
# Thread-safe LRU cache using a Hash as the underlying storage engine.
# Whenever the size of the cache exceeds the given limit the oldest keys are
# removed (base on insert order).
@@ -22,11 +21,8 @@ module Oga
# cache.keys # => [:b, :c, :d]
#
# @api private
- #
class LRU
- ##
# @param [Fixnum] maximum
- #
def initialize(maximum = 1024)
@maximum = maximum
@cache = {}
@@ -35,9 +31,7 @@ module Oga
@owner = Thread.current
end
- ##
# @param [Fixnum] value
- #
def maximum=(value)
synchronize do
@maximum = value
@@ -46,30 +40,24 @@ module Oga
end
end
- ##
# @return [Fixnum]
- #
def maximum
synchronize { @maximum }
end
- ##
# Returns the value of the key.
#
# @param [Mixed] key
# @return [Mixed]
- #
def [](key)
synchronize { @cache[key] }
end
- ##
# Sets the key and its value. Old keys are discarded if the LRU size exceeds
# the limit.
#
# @param [Mixed] key
# @param [Mixed] value
- #
def []=(key, value)
synchronize do
@cache[key] = value
@@ -82,35 +70,27 @@ module Oga
end
end
- ##
# Returns a key if it exists, otherwise yields the supplied block and uses
# its return value as the key value.
#
# @param [Mixed] key
# @return [Mixed]
- #
def get_or_set(key)
synchronize { self[key] ||= yield }
end
- ##
# @return [Array]
- #
def keys
synchronize { @keys }
end
- ##
# @param [Mixed] key
# @return [TrueClass|FalseClass]
- #
def key?(key)
synchronize { @cache.key?(key) }
end
- ##
# Removes all keys from the cache.
- #
def clear
synchronize do
@keys.clear
@@ -118,9 +98,7 @@ module Oga
end
end
- ##
# @return [Fixnum]
- #
def size
synchronize { @cache.size }
end
@@ -129,10 +107,8 @@ module Oga
private
- ##
# Yields the supplied block in a synchronized manner (if needed). This
# method is heavily based on `MonitorMixin#mon_enter`.
- #
def synchronize
if @owner != Thread.current
@mutex.synchronize do
@@ -147,10 +123,8 @@ module Oga
end
end
- ##
# Removes old keys until the size of the hash no longer exceeds the maximum
# size.
- #
def resize
return unless size > @maximum
diff --git a/lib/oga/oga.rb b/lib/oga/oga.rb
index dc0f502..e851991 100644
--- a/lib/oga/oga.rb
+++ b/lib/oga/oga.rb
@@ -1,5 +1,4 @@
module Oga
- ##
# Parses the given XML document.
#
# @example
@@ -8,12 +7,10 @@ module Oga
# @see [Oga::XML::Lexer#initialize]
#
# @return [Oga::XML::Document]
- #
def self.parse_xml(xml, options = {})
XML::Parser.new(xml, options).parse
end
- ##
# Parses the given HTML document.
#
# @example
@@ -22,12 +19,10 @@ module Oga
# @see [Oga::XML::Lexer#initialize]
#
# @return [Oga::XML::Document]
- #
def self.parse_html(html, options = {})
HTML::Parser.new(html, options).parse
end
- ##
# Parses the given XML document using the SAX parser.
#
# @example
@@ -36,12 +31,10 @@ module Oga
# Oga.sax_parse_html(handler, 'Hello')
#
# @see [Oga::XML::SaxParser#initialize]
- #
def self.sax_parse_xml(handler, xml, options = {})
XML::SaxParser.new(handler, xml, options).parse
end
- ##
# Parses the given HTML document using the SAX parser.
#
# @example
@@ -50,7 +43,6 @@ module Oga
# Oga.sax_parse_html(handler, '')
#
# @see [Oga::XML::SaxParser#initialize]
- #
def self.sax_parse_html(handler, html, options = {})
HTML::SaxParser.new(handler, html, options).parse
end
diff --git a/lib/oga/ruby/generator.rb b/lib/oga/ruby/generator.rb
index f4b044f..a5965eb 100644
--- a/lib/oga/ruby/generator.rb
+++ b/lib/oga/ruby/generator.rb
@@ -1,17 +1,13 @@
module Oga
module Ruby
- ##
# Class for converting a Ruby AST to a String.
#
# This class takes a {Oga::Ruby::Node} instance and converts it (and its
# child nodes) to a String that in turn can be passed to `eval` and the
# likes.
- #
class Generator
- ##
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def process(ast)
send(:"on_#{ast.type}", ast)
end
@@ -22,12 +18,10 @@ module Oga
ast.to_a.map { |child| process(child) }.join("\n\n")
end
- ##
# Processes an assignment node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_assign(ast)
var, val = *ast
@@ -37,12 +31,10 @@ module Oga
"#{var_str} = #{val_str}"
end
- ##
# Processes a mass assignment node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_massign(ast)
vars, val = *ast
@@ -52,12 +44,10 @@ module Oga
"#{var_names.join(', ')} = #{val_str}"
end
- ##
# Processes a `begin` node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_begin(ast)
body = process(ast.to_a[0])
@@ -68,12 +58,10 @@ end
EOF
end
- ##
# Processes an equality node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_eq(ast)
left, right = *ast
@@ -83,12 +71,10 @@ end
"#{left_str} == #{right_str}"
end
- ##
# Processes a boolean "and" node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_and(ast)
left, right = *ast
@@ -98,12 +84,10 @@ end
"#{left_str} && #{right_str}"
end
- ##
# Processes a boolean "or" node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_or(ast)
left, right = *ast
@@ -113,12 +97,10 @@ end
"(#{left_str} || #{right_str})"
end
- ##
# Processes an if statement node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_if(ast)
cond, body, else_body = *ast
@@ -144,12 +126,10 @@ end
end
end
- ##
# Processes a while statement node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_while(ast)
cond, body = *ast
@@ -163,12 +143,10 @@ end
EOF
end
- ##
# Processes a method call node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_send(ast)
receiver, name, *args = *ast
@@ -188,12 +166,10 @@ end
call
end
- ##
# Processes a block node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_block(ast)
receiver, args, body = *ast
@@ -208,12 +184,10 @@ end
EOF
end
- ##
# Processes a Range node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_range(ast)
start, stop = *ast
@@ -223,32 +197,26 @@ end
"(#{start_str}..#{stop_str})"
end
- ##
# Processes a string node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_string(ast)
ast.to_a[0].inspect
end
- ##
# Processes a Symbol node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_symbol(ast)
ast.to_a[0].to_sym.inspect
end
- ##
# Processes a literal node.
#
# @param [Oga::Ruby::Node] ast
# @return [String]
- #
def on_lit(ast)
ast.to_a[0]
end
diff --git a/lib/oga/ruby/node.rb b/lib/oga/ruby/node.rb
index 5be1395..0b0d5e4 100644
--- a/lib/oga/ruby/node.rb
+++ b/lib/oga/ruby/node.rb
@@ -1,6 +1,5 @@
module Oga
module Ruby
- ##
# Class representing a single node in a Ruby AST.
#
# The setup of this class is roughly based on the "ast" Gem. The "ast" Gem
@@ -25,7 +24,6 @@ module Oga
# end
#
# @private
- #
class Node < BasicObject
undef_method :!, :!=
@@ -46,16 +44,13 @@ module Oga
alias_method :to_ary, :to_a
- ##
# Returns a "to_a" call node.
#
# @return [Oga::Ruby::Node]
- #
def to_array
Node.new(:send, [self, :to_a])
end
- ##
# Returns an assignment node.
#
# This method wraps assigned values in a begin/end block to ensure that
@@ -63,7 +58,6 @@ module Oga
#
# @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node]
- #
def assign(other)
if other.type == :followed_by
other = other.wrap
@@ -72,133 +66,108 @@ module Oga
Node.new(:assign, [self, other])
end
- ##
# Returns an equality expression node.
#
# @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node]
- #
def eq(other)
Node.new(:eq, [self, other])
end
- ##
# Returns a boolean "and" node.
#
# @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node]
- #
def and(other)
Node.new(:and, [self, other])
end
- ##
# Returns a boolean "or" node.
#
# @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node]
- #
def or(other)
Node.new(:or, [self, other])
end
- ##
# Returns a node that evaluates to its inverse.
#
# For example, a variable `foo` would be turned into `!foo`.
#
# @return [Oga::Ruby::Node]
- #
def not
!self
end
- ##
# Returns a node for Ruby's "is_a?" method.
#
# @param [Class] klass
# @return [Oga::Ruby::Node]
- #
def is_a?(klass)
Node.new(:send, [self, 'is_a?', Node.new(:lit, [klass.to_s])])
end
- ##
# Wraps the current node in a block.
#
# @param [Array] args Arguments (as Node instances) to pass to the block.
# @return [Oga::Ruby::Node]
- #
def add_block(*args)
Node.new(:block, [self, args, yield])
end
- ##
# Wraps the current node in a `begin` node.
#
# @return [Oga::Ruby::Node]
- #
def wrap
Node.new(:begin, [self])
end
- ##
# Wraps the current node in an if statement node.
#
# The body of this statement is set to the return value of the supplied
# block.
#
# @return [Oga::Ruby::Node]
- #
def if_true
Node.new(:if, [self, yield])
end
- ##
# Wraps the current node in an `if !...` statement.
#
# @see [#if_true]
- #
def if_false
self.not.if_true { yield }
end
- ##
# Wraps the current node in a `while` statement.
#
# The body of this statement is set to the return value of the supplied
# block.
#
# @return [Oga::Ruby::Node]
- #
def while_true
Node.new(:while, [self, yield])
end
- ##
# Adds an "else" statement to the current node.
#
# This method assumes it's being called only on "if" nodes.
#
# @return [Oga::Ruby::Node]
- #
def else
Node.new(:if, @children + [yield])
end
- ##
# Chains two nodes together.
#
# @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node]
- #
def followed_by(other = nil)
other = yield if ::Kernel.block_given?
Node.new(:followed_by, [self, other])
end
- ##
# Returns a node for a method call.
#
# @param [Symbol] name The name of the method to call.
@@ -207,7 +176,6 @@ module Oga
# method.
#
# @return [Oga::Ruby::Node]
- #
def method_missing(name, *args)
Node.new(:send, [self, name.to_s, *args])
end
diff --git a/lib/oga/whitelist.rb b/lib/oga/whitelist.rb
index 8ffe706..061c09f 100644
--- a/lib/oga/whitelist.rb
+++ b/lib/oga/whitelist.rb
@@ -1,18 +1,12 @@
module Oga
- ##
# @api private
- #
class Whitelist < Blacklist
- ##
# @return [TrueClass|FalseClass]
- #
def allow?(name)
names.include?(name)
end
- ##
# @return [Oga::Blacklist]
- #
def to_blacklist
Blacklist.new(names)
end
diff --git a/lib/oga/xml/attribute.rb b/lib/oga/xml/attribute.rb
index 823219f..c0496e1 100644
--- a/lib/oga/xml/attribute.rb
+++ b/lib/oga/xml/attribute.rb
@@ -1,8 +1,6 @@
module Oga
module XML
- ##
# Class for storing information about a single XML attribute.
- #
class Attribute
include ExpandedName
@@ -19,25 +17,21 @@ module Oga
alias_method :parent, :element
- ##
# The default namespace available to all attributes. This namespace can
# not be modified.
#
# @return [Oga::XML::Namespace]
- #
DEFAULT_NAMESPACE = Namespace.new(
:name => 'xml',
:uri => XML::DEFAULT_NAMESPACE.uri
).freeze
- ##
# @param [Hash] options
#
# @option options [String] :name
# @option options [String] :namespace_name
# @option options [String] :value
# @option options [Oga::XML::Element] :element
- #
def initialize(options = {})
@name = options[:name]
@value = options[:value]
@@ -46,12 +40,10 @@ module Oga
@namespace_name = options[:namespace_name]
end
- ##
# Returns the {Oga::XML::Namespace} instance for the current namespace
# name.
#
# @return [Oga::XML::Namespace]
- #
def namespace
unless @namespace
if namespace_name == DEFAULT_NAMESPACE.name
@@ -64,19 +56,15 @@ module Oga
@namespace
end
- ##
# @param [String] value
- #
def value=(value)
@value = value
@decoded = false
end
- ##
# Returns the value of the attribute or nil if no explicit value was set.
#
# @return [String|NilClass]
- #
def value
if !@decoded and @value
@value = EntityDecoder.try_decode(@value, html?)
@@ -86,18 +74,14 @@ module Oga
@value
end
- ##
# @return [String]
- #
def text
value.to_s
end
alias_method :to_s, :text
- ##
# @return [String]
- #
def to_xml
if namespace_name
full_name = "#{namespace_name}:#{name}"
@@ -110,9 +94,7 @@ module Oga
%Q(#{full_name}="#{enc_value}")
end
- ##
# @return [String]
- #
def inspect
segments = []
@@ -138,9 +120,7 @@ module Oga
private
- ##
# @return [TrueClass|FalseClass]
- #
def html?
!!@element && @element.html?
end
diff --git a/lib/oga/xml/cdata.rb b/lib/oga/xml/cdata.rb
index e3a134b..22180bc 100644
--- a/lib/oga/xml/cdata.rb
+++ b/lib/oga/xml/cdata.rb
@@ -1,14 +1,10 @@
module Oga
module XML
- ##
# Class used for storing information about CDATA tags.
- #
class Cdata < CharacterNode
- ##
# Converts the node back to XML.
#
# @return [String]
- #
def to_xml
""
end
diff --git a/lib/oga/xml/character_node.rb b/lib/oga/xml/character_node.rb
index 3661bde..5206aa8 100644
--- a/lib/oga/xml/character_node.rb
+++ b/lib/oga/xml/character_node.rb
@@ -1,34 +1,26 @@
module Oga
module XML
- ##
# Base class for nodes that represent a text-like value such as Text and
# Comment nodes.
- #
class CharacterNode < Node
# @return [String]
attr_accessor :text
- ##
# @param [Hash] options
#
# @option options [String] :text The text of the node.
- #
def initialize(options = {})
super
@text = options[:text]
end
- ##
# @return [String]
- #
def to_xml
text.to_s
end
- ##
# @return [String]
- #
def inspect
"#{self.class.to_s.split('::').last}(#{text.inspect})"
end
diff --git a/lib/oga/xml/comment.rb b/lib/oga/xml/comment.rb
index 55cada5..31fed66 100644
--- a/lib/oga/xml/comment.rb
+++ b/lib/oga/xml/comment.rb
@@ -1,14 +1,10 @@
module Oga
module XML
- ##
# Class used for storing information about XML comments.
- #
class Comment < CharacterNode
- ##
# Converts the node back to XML.
#
# @return [String]
- #
def to_xml
""
end
diff --git a/lib/oga/xml/default_namespace.rb b/lib/oga/xml/default_namespace.rb
index b486766..67320ab 100644
--- a/lib/oga/xml/default_namespace.rb
+++ b/lib/oga/xml/default_namespace.rb
@@ -1,10 +1,8 @@
module Oga
module XML
- ##
# The default XML namespace.
#
# @return [Oga::XML::Namespace]
- #
DEFAULT_NAMESPACE = Namespace.new(
:name => 'xmlns',
:uri => 'http://www.w3.org/XML/1998/namespace'
diff --git a/lib/oga/xml/doctype.rb b/lib/oga/xml/doctype.rb
index 3b39fa1..feb8593 100644
--- a/lib/oga/xml/doctype.rb
+++ b/lib/oga/xml/doctype.rb
@@ -1,8 +1,6 @@
module Oga
module XML
- ##
# Class used for storing information about Doctypes.
- #
class Doctype
# The name of the doctype (e.g. "HTML").
# @return [String]
@@ -24,7 +22,6 @@ module Oga
# @return [String]
attr_accessor :inline_rules
- ##
# @example
# dtd = Doctype.new(:name => 'html', :type => 'PUBLIC')
#
@@ -34,7 +31,6 @@ module Oga
# @option options [String] :type
# @option options [String] :public_id
# @option options [String] :system_id
- #
def initialize(options = {})
@name = options[:name]
@type = options[:type]
@@ -43,11 +39,9 @@ module Oga
@inline_rules = options[:inline_rules]
end
- ##
# Converts the doctype back to XML.
#
# @return [String]
- #
def to_xml
segments = "'
end
- ##
# Inspects the doctype.
#
# @return [String]
- #
def inspect
segments = []
diff --git a/lib/oga/xml/document.rb b/lib/oga/xml/document.rb
index e5d4a36..b2357da 100644
--- a/lib/oga/xml/document.rb
+++ b/lib/oga/xml/document.rb
@@ -1,9 +1,7 @@
module Oga
module XML
- ##
# Class used for storing information about an entire XML document. This
# includes the doctype, XML declaration, child nodes and more.
- #
class Document
include Querying
include Traversal
@@ -18,14 +16,12 @@ module Oga
# @return [Symbol]
attr_reader :type
- ##
# @param [Hash] options
#
# @option options [Oga::XML::NodeSet] :children
# @option options [Oga::XML::Doctype] :doctype
# @option options [Oga::XML::XmlDeclaration] :xml_declaration
# @option options [Symbol] :type
- #
def initialize(options = {})
@doctype = options[:doctype]
@xml_declaration = options[:xml_declaration]
@@ -34,18 +30,14 @@ module Oga
self.children = options[:children] if options[:children]
end
- ##
# @return [Oga::XML::NodeSet]
- #
def children
@children ||= NodeSet.new([], self)
end
- ##
# Sets the child nodes of the document.
#
# @param [Oga::XML::NodeSet|Array] nodes
- #
def children=(nodes)
if nodes.is_a?(NodeSet)
@children = nodes
@@ -54,23 +46,19 @@ module Oga
end
end
- ##
# Returns self.
#
# This method exists to make this class compatible with Element, which in
# turn makes it easier to use both in the XPath compiler.
#
# @return [Oga::XML::Document]
- #
def root_node
self
end
- ##
# Converts the document and its child nodes to XML.
#
# @return [String]
- #
def to_xml
xml = children.map(&:to_xml).join('')
@@ -85,19 +73,15 @@ module Oga
xml
end
- ##
# @return [TrueClass|FalseClass]
- #
def html?
type.equal?(:html)
end
- ##
# Inspects the document and its child nodes. Child nodes are indented for
# each nesting level.
#
# @return [String]
- #
def inspect
segments = []
diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb
index 04abc13..02bda03 100644
--- a/lib/oga/xml/element.rb
+++ b/lib/oga/xml/element.rb
@@ -1,9 +1,7 @@
module Oga
module XML
- ##
# Class that contains information about an XML element such as the name,
# attributes and child nodes.
- #
class Element < Node
include Querying
include ExpandedName
@@ -20,14 +18,11 @@ module Oga
# @return [Hash]
attr_writer :namespaces
- ##
# The attribute prefix/namespace used for registering element namespaces.
#
# @return [String]
- #
XMLNS_PREFIX = 'xmlns'.freeze
- ##
# @param [Hash] options
#
# @option options [String] :name The name of the element.
@@ -36,7 +31,6 @@ module Oga
#
# @option options [Array] :attributes The attributes
# of the element as an Array.
- #
def initialize(options = {})
super
@@ -49,15 +43,12 @@ module Oga
register_namespaces_from_attributes
end
- ##
# @param [String] name
- #
def namespace_name=(name)
@namespace_name = name
@namespace = nil
end
- ##
# Returns an attribute matching the given name (with or without the
# namespace).
#
@@ -72,7 +63,6 @@ module Oga
# of the attribute.
#
# @return [Oga::XML::Attribute]
- #
def attribute(name)
name, ns = split_name(name)
@@ -85,32 +75,27 @@ module Oga
alias_method :attr, :attribute
- ##
# Returns the value of the given attribute.
#
# @example
# element.get('class') # => "container"
#
# @see [#attribute]
- #
def get(name)
found = attribute(name)
found ? found.value : nil
end
- ##
# Adds a new attribute to the element.
#
# @param [Oga::XML::Attribute] attribute
- #
def add_attribute(attribute)
attribute.element = self
attributes << attribute
end
- ##
# Sets the value of an attribute to the given value. If the attribute does
# not exist it is created automatically.
#
@@ -118,7 +103,6 @@ module Oga
# namespace.
#
# @param [String] value The new value of the attribute.
- #
def set(name, value)
found = attribute(name)
@@ -141,25 +125,21 @@ module Oga
end
end
- ##
# Removes an attribute from the element.
#
# @param [String] name The name (optionally including namespace prefix)
# of the attribute to remove.
#
# @return [Oga::XML::Attribute]
- #
def unset(name)
found = attribute(name)
return attributes.delete(found) if found
end
- ##
# Returns the namespace of the element.
#
# @return [Oga::XML::Namespace]
- #
def namespace
unless @namespace
available = available_namespaces
@@ -169,40 +149,32 @@ module Oga
@namespace
end
- ##
# Returns the namespaces registered on this element, or an empty Hash in
# case of an HTML element.
#
# @return [Hash]
- #
def namespaces
html? ? {} : @namespaces
end
- ##
# Returns true if the current element resides in the default XML
# namespace.
#
# @return [TrueClass|FalseClass]
- #
def default_namespace?
namespace == DEFAULT_NAMESPACE || namespace.nil?
end
- ##
# Returns the text of all child nodes joined together.
#
# @return [String]
- #
def text
children.text
end
- ##
# Returns the text of the current element only.
#
# @return [String]
- #
def inner_text
text = ''
@@ -213,12 +185,10 @@ module Oga
text
end
- ##
# Returns any {Oga::XML::Text} nodes that are a direct child of this
# element.
#
# @return [Oga::XML::NodeSet]
- #
def text_nodes
nodes = NodeSet.new
@@ -229,21 +199,17 @@ module Oga
nodes
end
- ##
# Sets the inner text of the current element to the given String.
#
# @param [String] text
- #
def inner_text=(text)
text_node = XML::Text.new(:text => text)
@children = NodeSet.new([text_node], self)
end
- ##
# Converts the element and its child elements to XML.
#
# @return [String]
- #
def to_xml
if namespace_name
full_name = "#{namespace_name}:#{name}"
@@ -265,9 +231,7 @@ module Oga
end
end
- ##
# @return [String]
- #
def inspect
segments = []
@@ -284,7 +248,6 @@ module Oga
"Element(#{segments.join(' ')})"
end
- ##
# Registers a new namespace for the current element and its child
# elements.
#
@@ -292,7 +255,6 @@ module Oga
# @param [String] uri
# @param [TrueClass|FalseClass] flush
# @see [Oga::XML::Namespace#initialize]
- #
def register_namespace(name, uri, flush = true)
if namespaces[name]
raise ArgumentError, "The namespace #{name.inspect} already exists"
@@ -303,12 +265,10 @@ module Oga
flush_namespaces_cache if flush
end
- ##
# Returns a Hash containing all the namespaces available to the current
# element.
#
# @return [Hash]
- #
def available_namespaces
# HTML(5) completely ignores namespaces
unless @available_namespaces
@@ -333,11 +293,9 @@ module Oga
@available_namespaces
end
- ##
# Returns `true` if the element is a self-closing element.
#
# @return [TrueClass|FalseClass]
- #
def self_closing?
self_closing = children.empty?
root = root_node
@@ -350,10 +308,8 @@ module Oga
self_closing
end
- ##
# Flushes the namespaces cache of the current element and all its child
# elements.
- #
def flush_namespaces_cache
@available_namespaces = nil
@namespace = nil
@@ -365,9 +321,7 @@ module Oga
private
- ##
# Registers namespaces based on any "xmlns" attributes.
- #
def register_namespaces_from_attributes
flush = false
@@ -386,31 +340,25 @@ module Oga
flush_namespaces_cache if flush
end
- ##
# Links all attributes to the current element.
- #
def link_attributes
attributes.each do |attr|
attr.element = self
end
end
- ##
# @param [String] name
# @return [Array]
- #
def split_name(name)
segments = name.to_s.split(':')
[segments.pop, segments.pop]
end
- ##
# @param [Oga::XML::Attribute] attr
# @param [String] ns
# @param [String] name
# @return [TrueClass|FalseClass]
- #
def attribute_matches?(attr, ns, name)
name_matches = attr.name == name
ns_matches = false
diff --git a/lib/oga/xml/entities.rb b/lib/oga/xml/entities.rb
index 9f10ff4..6bff83e 100644
--- a/lib/oga/xml/entities.rb
+++ b/lib/oga/xml/entities.rb
@@ -1,18 +1,14 @@
module Oga
module XML
- ##
# Module for encoding/decoding XML and HTML entities. The mapping of HTML
# entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
- #
module Entities
- ##
# Hash containing XML entities and the corresponding characters.
#
# The `&` mapping must come last to ensure proper conversion of non
# encoded to encoded forms (see {Oga::XML::Text#to_xml}).
#
# @return [Hash]
- #
DECODE_MAPPING = {
'<' => '<',
'>' => '>',
@@ -21,23 +17,19 @@ module Oga
'&' => '&',
}
- ##
# Hash containing characters and the corresponding XML entities.
#
# @return [Hash]
- #
ENCODE_MAPPING = {
'&' => '&',
'>' => '>',
'<' => '<',
}
- ##
# Hash containing characters and the corresponding XML entities to use
# when encoding XML/HTML attribute values.
#
# @return [Hash]
- #
ENCODE_ATTRIBUTE_MAPPING = {
'&' => '&',
'>' => '>',
@@ -46,50 +38,36 @@ module Oga
'"' => '"'
}
- ##
# @return [String]
- #
AMPERSAND = '&'.freeze
- ##
# Regexp for matching XML/HTML entities such as " ".
#
# @return [Regexp]
- #
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
- ##
# Regexp for matching XML/HTML numeric entities such as "&".
#
# @return [Regexp]
- #
NUMERIC_CODE_POINT_ENTITY = /(\d+);/
- ##
# Regexp for matching XML/HTML hex entities such as "<".
#
# @return [Regexp]
- #
HEX_CODE_POINT_ENTITY = /([a-fA-F0-9]+);/
- ##
# @return [Regexp]
- #
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
- ##
# @return [Regexp]
- #
ENCODE_ATTRIBUTE_REGEXP =
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
- ##
# Decodes XML entities.
#
# @param [String] input
# @param [Hash] mapping
# @return [String]
- #
def self.decode(input, mapping = DECODE_MAPPING)
return input unless input.include?(AMPERSAND)
@@ -110,23 +88,19 @@ module Oga
input
end
- ##
# Encodes special characters as XML entities.
#
# @param [String] input
# @param [Hash] mapping
# @return [String]
- #
def self.encode(input, mapping = ENCODE_MAPPING)
input.gsub(ENCODE_REGEXP, mapping)
end
- ##
# Encodes special characters in an XML attribute value.
#
# @param [String] input
# @return [String]
- #
def self.encode_attribute(input)
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
end
diff --git a/lib/oga/xml/expanded_name.rb b/lib/oga/xml/expanded_name.rb
index 63484d9..f78b3ad 100644
--- a/lib/oga/xml/expanded_name.rb
+++ b/lib/oga/xml/expanded_name.rb
@@ -1,11 +1,9 @@
module Oga
module XML
module ExpandedName
- ##
# Returns the expanded name of the current Element or Attribute.
#
# @return [String]
- #
def expanded_name
namespace_name ? "#{namespace_name}:#{name}" : name
end
diff --git a/lib/oga/xml/html_void_elements.rb b/lib/oga/xml/html_void_elements.rb
index 71bf9ea..4b30693 100644
--- a/lib/oga/xml/html_void_elements.rb
+++ b/lib/oga/xml/html_void_elements.rb
@@ -1,12 +1,10 @@
module Oga
module XML
- ##
# Names of the HTML void elements that should be handled when HTML lexing
# is enabled.
#
# @api private
# @return [Oga::Whitelist]
- #
HTML_VOID_ELEMENTS = Whitelist.new(%w{
area base br col command embed hr img input keygen link meta param source
track wbr
diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index 8ffc830..23eb975 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -1,6 +1,5 @@
module Oga
module XML
- ##
# Low level lexer that supports both XML and HTML (using an extra option).
# To lex HTML input set the `:html` option to `true` when creating an
# instance of the lexer:
@@ -46,7 +45,6 @@ module Oga
# Strict mode only applies to XML documents.
#
# @private
- #
class Lexer
# These are all constant/frozen to remove the need for String allocations
# every time they are referenced in the lexer.
@@ -96,12 +94,9 @@ module Oga
HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
end
- ##
# Names of HTML tags of which the content should be lexed as-is.
- #
LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
- ##
# @param [String|IO] data The data to lex. This can either be a String or
# an IO instance.
#
@@ -113,7 +108,6 @@ module Oga
#
# @option options [TrueClass|FalseClass] :strict Enables/disables strict
# parsing of XML documents, disabled by default.
- #
def initialize(data, options = {})
@data = data
@html = options[:html]
@@ -122,11 +116,9 @@ module Oga
reset
end
- ##
# Resets the internal state of the lexer. Typically you don't need to
# call this method yourself as its called by #lex after lexing a given
# String.
- #
def reset
@line = 1
@elements = []
@@ -136,12 +128,10 @@ module Oga
reset_native
end
- ##
# Yields the data to lex to the supplied block.
#
# @return [String]
# @yieldparam [String]
- #
def read_data
if @data.is_a?(String)
yield @data
@@ -157,7 +147,6 @@ module Oga
end
end
- ##
# Gathers all the tokens for the input and returns them as an Array.
#
# This method resets the internal state of the lexer after consuming the
@@ -165,7 +154,6 @@ module Oga
#
# @see #advance
# @return [Array]
- #
def lex
tokens = []
@@ -178,7 +166,6 @@ module Oga
tokens
end
- ##
# Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block.
#
@@ -196,7 +183,6 @@ module Oga
# @yieldparam [Symbol] type
# @yieldparam [String] value
# @yieldparam [Fixnum] line
- #
def advance(&block)
@block = block
@@ -212,44 +198,33 @@ module Oga
@block = nil
end
- ##
# @return [TrueClass|FalseClass]
- #
def html?
@html == true
end
- ##
# @return [TrueClass|FalseClass]
- #
def strict?
@strict
end
- ##
# @return [TrueClass|FalseClass]
- #
def html_script?
html? && current_element == HTML_SCRIPT
end
- ##
# @return [TrueClass|FalseClass]
- #
def html_style?
html? && current_element == HTML_STYLE
end
private
- ##
# @param [Fixnum] amount The amount of lines to advance.
- #
def advance_line(amount = 1)
@line += amount
end
- ##
# Calls the supplied block with the information of the current token.
#
# @param [Symbol] type The token type.
@@ -258,192 +233,145 @@ module Oga
# @yieldparam [String] type
# @yieldparam [String] value
# @yieldparam [Fixnum] line
- #
def add_token(type, value = nil)
@block.call(type, value, @line)
end
- ##
# Returns the name of the element we're currently in.
#
# @return [String]
- #
def current_element
@elements.last
end
- ##
# Called when processing a single quote.
- #
def on_string_squote
add_token(:T_STRING_SQUOTE)
end
- ##
# Called when processing a double quote.
- #
def on_string_dquote
add_token(:T_STRING_DQUOTE)
end
- ##
# Called when processing the body of a string.
#
# @param [String] value The data between the quotes.
- #
def on_string_body(value)
add_token(:T_STRING_BODY, value)
end
- ##
# Called when a doctype starts.
- #
def on_doctype_start
add_token(:T_DOCTYPE_START)
end
- ##
# Called on the identifier specifying the type of the doctype.
#
# @param [String] value
- #
def on_doctype_type(value)
add_token(:T_DOCTYPE_TYPE, value)
end
- ##
# Called on the identifier specifying the name of the doctype.
#
# @param [String] value
- #
def on_doctype_name(value)
add_token(:T_DOCTYPE_NAME, value)
end
- ##
# Called on the end of a doctype.
- #
def on_doctype_end
add_token(:T_DOCTYPE_END)
end
- ##
# Called on an inline doctype block.
#
# @param [String] value
- #
def on_doctype_inline(value)
add_token(:T_DOCTYPE_INLINE, value)
end
- ##
# Called on the open CDATA tag.
- #
def on_cdata_start
add_token(:T_CDATA_START)
end
- ##
# Called on the closing CDATA tag.
- #
def on_cdata_end
add_token(:T_CDATA_END)
end
- ##
# Called for the body of a CDATA tag.
#
# @param [String] value
- #
def on_cdata_body(value)
add_token(:T_CDATA_BODY, value)
end
- ##
# Called on the open comment tag.
- #
def on_comment_start
add_token(:T_COMMENT_START)
end
- ##
# Called on the closing comment tag.
- #
def on_comment_end
add_token(:T_COMMENT_END)
end
- ##
# Called on a comment.
#
# @param [String] value
- #
def on_comment_body(value)
add_token(:T_COMMENT_BODY, value)
end
- ##
# Called on the start of an XML declaration tag.
- #
def on_xml_decl_start
add_token(:T_XML_DECL_START)
end
- ##
# Called on the end of an XML declaration tag.
- #
def on_xml_decl_end
add_token(:T_XML_DECL_END)
end
- ##
# Called on the start of a processing instruction.
- #
def on_proc_ins_start
add_token(:T_PROC_INS_START)
end
- ##
# Called on a processing instruction name.
#
# @param [String] value
- #
def on_proc_ins_name(value)
add_token(:T_PROC_INS_NAME, value)
end
- ##
# Called on the body of a processing instruction.
#
# @param [String] value
- #
def on_proc_ins_body(value)
add_token(:T_PROC_INS_BODY, value)
end
- ##
# Called on the end of a processing instruction.
- #
def on_proc_ins_end
add_token(:T_PROC_INS_END)
end
- ##
# Called on the name of an element.
#
# @param [String] name The name of the element, including namespace.
- #
def on_element_name(name)
before_html_element_name(name) if html?
add_element(name)
end
- ##
# Handles inserting of any missing tags whenever a new HTML tag is opened.
#
# @param [String] name
- #
def before_html_element_name(name)
close_current = HTML_CLOSE_SELF[current_element]
@@ -463,27 +391,21 @@ module Oga
end
end
- ##
# @param [String] name
- #
def add_element(name)
@elements << name
add_token(:T_ELEM_NAME, name)
end
- ##
# Called on the element namespace.
#
# @param [String] namespace
- #
def on_element_ns(namespace)
add_token(:T_ELEM_NS, namespace)
end
- ##
# Called on the closing `>` of the open tag of an element.
- #
def on_element_open_end
return unless html?
@@ -496,12 +418,10 @@ module Oga
end
end
- ##
# Called on the closing tag of an element.
#
# @param [String] name The name of the element (minus namespace
# prefix). This is not set for self closing tags.
- #
def on_element_end(name = nil)
return if @elements.empty?
@@ -520,31 +440,25 @@ module Oga
@elements.pop
end
- ##
# Called on regular text values.
#
# @param [String] value
- #
def on_text(value)
return if value.empty?
add_token(:T_TEXT, value)
end
- ##
# Called on attribute namespaces.
#
# @param [String] value
- #
def on_attribute_ns(value)
add_token(:T_ATTR_NS, value)
end
- ##
# Called on tag attributes.
#
# @param [String] value
- #
def on_attribute(value)
add_token(:T_ATTR, value)
end
diff --git a/lib/oga/xml/namespace.rb b/lib/oga/xml/namespace.rb
index a1cf56b..5160d89 100644
--- a/lib/oga/xml/namespace.rb
+++ b/lib/oga/xml/namespace.rb
@@ -1,9 +1,7 @@
module Oga
module XML
- ##
# The Namespace class contains information about XML namespaces such as the
# name and URI.
- #
class Namespace
# @return [String]
attr_accessor :name
@@ -11,35 +9,27 @@ module Oga
# @return [String]
attr_accessor :uri
- ##
# @param [Hash] options
#
# @option options [String] :name
# @option options [String] :uri
- #
def initialize(options = {})
@name = options[:name]
@uri = options[:uri]
end
- ##
# @return [String]
- #
def to_s
name.to_s
end
- ##
# @return [String]
- #
def inspect
"Namespace(name: #{name.inspect} uri: #{uri.inspect})"
end
- ##
# @param [Oga::XML::Namespace] other
# @return [TrueClass|FalseClass]
- #
def ==(other)
other.is_a?(self.class) && name == other.name && uri == other.uri
end
diff --git a/lib/oga/xml/node.rb b/lib/oga/xml/node.rb
index 751edd3..2e40d73 100644
--- a/lib/oga/xml/node.rb
+++ b/lib/oga/xml/node.rb
@@ -1,17 +1,14 @@
module Oga
module XML
- ##
# A generic XML node. Instances of this class can belong to a
# {Oga::XML::NodeSet} and can be used to query surrounding and parent
# nodes.
- #
class Node
include Traversal
# @return [Oga::XML::NodeSet]
attr_reader :node_set
- ##
# @param [Hash] options
#
# @option options [Oga::XML::NodeSet] :node_set The node set that this
@@ -19,35 +16,28 @@ module Oga
#
# @option options [Oga::XML::NodeSet|Array] :children The child nodes of
# the current node.
- #
def initialize(options = {})
self.node_set = options[:node_set]
self.children = options[:children] if options[:children]
end
- ##
# @param [Oga::XML::NodeSet] set
- #
def node_set=(set)
@node_set = set
@root_node = nil
@html_p = nil
end
- ##
# Returns the child nodes of the current node.
#
# @return [Oga::XML::NodeSet]
- #
def children
@children ||= NodeSet.new([], self)
end
- ##
# Sets the child nodes of the element.
#
# @param [Oga::XML::NodeSet|Array] nodes
- #
def children=(nodes)
if nodes.is_a?(NodeSet)
@children = nodes
@@ -56,32 +46,26 @@ module Oga
end
end
- ##
# Returns the parent node of the current node. If there is no parent node
# `nil` is returned instead.
#
# @return [Oga::XML::Node]
- #
def parent
node_set ? node_set.owner : nil
end
- ##
# Returns the preceding node, or nil if there is none.
#
# @return [Oga::XML::Node]
- #
def previous
index = node_set.index(self) - 1
index >= 0 ? node_set[index] : nil
end
- ##
# Returns the following node, or nil if there is none.
#
# @return [Oga::XML::Node]
- #
def next
index = node_set.index(self) + 1
length = node_set.length
@@ -89,11 +73,9 @@ module Oga
index <= length ? node_set[index] : nil
end
- ##
# Returns the previous element node or nil if there is none.
#
# @return [Oga::XML::Element]
- #
def previous_element
node = self
@@ -104,11 +86,9 @@ module Oga
return
end
- ##
# Returns the next element node or nil if there is none.
#
# @return [Oga::XML::Element]
- #
def next_element
node = self
@@ -119,12 +99,10 @@ module Oga
return
end
- ##
# Returns the root document/node of the current node. The node is
# retrieved by traversing upwards in the DOM tree from the current node.
#
# @return [Oga::XML::Document|Oga::XML::Node]
- #
def root_node
unless @root_node
node = self
@@ -143,16 +121,13 @@ module Oga
@root_node
end
- ##
# Removes the current node from the owning node set.
#
# @return [Oga::XML::Node]
- #
def remove
return node_set.delete(self) if node_set
end
- ##
# Replaces the current node with another.
#
# @example Replacing with an element
@@ -163,7 +138,6 @@ module Oga
# some_node.replace('this will replace the current node with a text node')
#
# @param [String|Oga::XML::Node] other
- #
def replace(other)
if other.is_a?(String)
other = Text.new(:text => other)
@@ -173,31 +147,25 @@ module Oga
remove
end
- ##
# Inserts the given node before the current node.
#
# @param [Oga::XML::Node] other
- #
def before(other)
index = node_set.index(self)
node_set.insert(index, other)
end
- ##
# Inserts the given node after the current node.
#
# @param [Oga::XML::Node] other
- #
def after(other)
index = node_set.index(self) + 1
node_set.insert(index, other)
end
- ##
# @return [TrueClass|FalseClass]
- #
def html?
if @html_p.nil?
root = root_node
@@ -208,14 +176,11 @@ module Oga
@html_p
end
- ##
# @return [TrueClass|FalseClass]
- #
def xml?
!html?
end
- ##
# Yields all ancestor elements of the current node.
#
# @example
@@ -224,7 +189,6 @@ module Oga
# end
#
# @yieldparam [Oga::XML::Node]
- #
def each_ancestor
node = parent
diff --git a/lib/oga/xml/node_set.rb b/lib/oga/xml/node_set.rb
index 3ebcaa9..3535c0f 100644
--- a/lib/oga/xml/node_set.rb
+++ b/lib/oga/xml/node_set.rb
@@ -1,6 +1,5 @@
module Oga
module XML
- ##
# The NodeSet class contains a set of unique {Oga::XML::Node} instances that
# can be queried and modified. Optionally NodeSet instances can take
# ownership of a node (besides just containing it). This allows the nodes to
@@ -30,17 +29,14 @@ module Oga
#
# If ownership was not handled then you'd have to manually set the
# `element` variable's `node_set` attribute after pushing it into a set.
- #
class NodeSet
include Enumerable
# @return [Oga::XML::Node]
attr_accessor :owner
- ##
# @param [Array] nodes The nodes to add to the set.
# @param [Oga::XML::NodeSet] owner The owner of the set.
- #
def initialize(nodes = [], owner = nil)
@nodes = nodes
@owner = owner
@@ -53,38 +49,30 @@ module Oga
end
end
- ##
# Yields the supplied block for every node.
#
# @yieldparam [Oga::XML::Node]
- #
def each
@nodes.each { |node| yield node }
end
- ##
# Returns the last node in the set.
#
# @return [Oga::XML::Node]
- #
def last
@nodes[-1]
end
- ##
# Returns `true` if the set is empty.
#
# @return [TrueClass|FalseClass]
- #
def empty?
@nodes.empty?
end
- ##
# Returns the amount of nodes in the set.
#
# @return [Fixnum]
- #
def length
@nodes.length
end
@@ -92,21 +80,17 @@ module Oga
alias_method :count, :length
alias_method :size, :length
- ##
# Returns the index of the given node.
#
# @param [Oga::XML::Node] node
# @return [Fixnum]
- #
def index(node)
@nodes.index(node)
end
- ##
# Pushes the node at the end of the set.
#
# @param [Oga::XML::Node] node
- #
def push(node)
return if exists?(node)
@@ -119,11 +103,9 @@ module Oga
alias_method :<<, :push
- ##
# Pushes the node at the start of the set.
#
# @param [Oga::XML::Node] node
- #
def unshift(node)
return if exists?(node)
@@ -134,11 +116,9 @@ module Oga
take_ownership(node) if @owner
end
- ##
# Shifts a node from the start of the set.
#
# @return [Oga::XML::Node]
- #
def shift
node = @nodes.shift
@@ -151,11 +131,9 @@ module Oga
node
end
- ##
# Pops a node from the end of the set.
#
# @return [Oga::XML::Node]
- #
def pop
node = @nodes.pop
@@ -168,12 +146,10 @@ module Oga
node
end
- ##
# Inserts a node into the set at the given index.
#
# @param [Fixnum] index The index to insert the node at.
# @param [Oga::XML::Node] node
- #
def insert(index, node)
return if exists?(node)
@@ -184,73 +160,59 @@ module Oga
take_ownership(node) if @owner
end
- ##
# Returns the node for the given index.
#
# @param [Fixnum] index
# @return [Oga::XML::Node]
- #
def [](index)
@nodes[index]
end
- ##
# Converts the current set to an Array.
#
# @return [Array]
- #
def to_a
@nodes
end
- ##
# Creates a new set based on the current and the specified set. The newly
# created set does not inherit ownership rules of the current set.
#
# @param [Oga::XML::NodeSet] other
# @return [Oga::XML::NodeSet]
- #
def +(other)
self.class.new(to_a | other.to_a)
end
- ##
# Returns `true` if the current node set and the one given in `other` are
# equal to each other.
#
# @param [Oga::XML::NodeSet] other
- #
def ==(other)
other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
end
- ##
# Returns `true` if the nodes given in `nodes` are equal to those
# specified in the current `@nodes` variable. This method allows two
# NodeSet instances to compare each other without the need of exposing
# `@nodes` to the public.
#
# @param [Array] nodes
- #
def equal_nodes?(nodes)
@nodes == nodes
end
- ##
# Adds the nodes of the given node set to the current node set.
#
# @param [Oga::XML::NodeSet] other
- #
def concat(other)
other.each { |node| push(node) }
end
- ##
# Removes the current nodes from their owning set. The nodes are *not*
# removed from the current set.
#
# This method is intended to remove nodes from an XML document/node.
- #
def remove
sets = []
@@ -270,9 +232,7 @@ module Oga
end
end
- ##
# Removes a node from the current set only.
- #
def delete(node)
removed = @nodes.delete(node)
@@ -285,12 +245,10 @@ module Oga
removed
end
- ##
# Returns the values of the given attribute.
#
# @param [String|Symbol] name The name of the attribute.
# @return [Array]
- #
def attribute(name)
values = []
@@ -305,11 +263,9 @@ module Oga
alias_method :attr, :attribute
- ##
# Returns the text of all nodes in the set, ignoring comment nodes.
#
# @return [String]
- #
def text
text = ''
@@ -322,9 +278,7 @@ module Oga
text
end
- ##
# @return [String]
- #
def inspect
values = @nodes.map(&:inspect).join(', ')
@@ -333,21 +287,17 @@ module Oga
private
- ##
# Takes ownership of the given node. This only occurs when the current
# set has an owner.
#
# @param [Oga::XML::Node] node
- #
def take_ownership(node)
node.node_set = self
end
- ##
# Removes ownership of the node if it belongs to the current set.
#
# @param [Oga::XML::Node] node
- #
def remove_ownership(node)
node.node_set = nil if node.node_set == self
end
diff --git a/lib/oga/xml/parser.rll b/lib/oga/xml/parser.rll
index c1d33e5..506fac1 100644
--- a/lib/oga/xml/parser.rll
+++ b/lib/oga/xml/parser.rll
@@ -1,6 +1,5 @@
%header
{
-##
# DOM parser for both XML and HTML.
#
# This parser does not produce a dedicated AST, instead it emits XML nodes
@@ -205,11 +204,9 @@ string_body
%inner
{
- ##
# Hash mapping token types and dedicated error labels.
#
# @return [Hash]
- #
TOKEN_ERROR_MAPPING = {
:T_STRING => 'string',
:T_TEXT => 'text',
@@ -234,11 +231,9 @@ string_body
-1 => 'end of input'
}
- ##
# @param [String|IO] data The input to parse.
# @param [Hash] options
# @see [Oga::XML::Lexer#initialize]
- #
def initialize(data, options = {})
@data = data
@lexer = Lexer.new(data, options)
@@ -246,20 +241,16 @@ string_body
reset
end
- ##
# Resets the internal state of the parser.
- #
def reset
@line = 1
@lexer.reset
end
- ##
# Yields the next token from the lexer.
#
# @yieldparam [Array]
- #
def each_token
@lexer.advance do |type, value, line|
@line = line if line
@@ -270,12 +261,10 @@ string_body
yield [-1, -1]
end
- ##
# @param [Fixnum] stack_type
# @param [Fixnum] stack_value
# @param [Symbol] token_type
# @param [String] token_value
- #
def parser_error(stack_type, stack_value, token_type, token_value)
case id_to_type(stack_type)
when :rule
@@ -294,9 +283,7 @@ string_body
raise LL::ParserError, message
end
- ##
# @see [LL::Driver#parse]
- #
def parse
retval = super
@@ -305,10 +292,8 @@ string_body
retval
end
- ##
# @param [Array] children
# @return [Oga::XML::Document]
- #
def on_document(children = [])
document = Document.new(:type => @lexer.html? ? :html : :xml)
@@ -327,42 +312,32 @@ string_body
document
end
- ##
# @param [Hash] options
- #
def on_doctype(options = {})
Doctype.new(options)
end
- ##
# @param [String] text
# @return [Oga::XML::Cdata]
- #
def on_cdata(text = nil)
Cdata.new(:text => text)
end
- ##
# @param [String] text
# @return [Oga::XML::Comment]
- #
def on_comment(text = nil)
Comment.new(:text => text)
end
- ##
# @param [String] name
# @param [String] text
# @return [Oga::XML::ProcessingInstruction]
- #
def on_proc_ins(name, text = nil)
ProcessingInstruction.new(:name => name, :text => text)
end
- ##
# @param [Array] attributes
# @return [Oga::XML::XmlDeclaration]
- #
def on_xml_decl(attributes = [])
options = {}
@@ -373,20 +348,16 @@ string_body
XmlDeclaration.new(options)
end
- ##
# @param [String] text
# @return [Oga::XML::Text]
- #
def on_text(text)
Text.new(:text => text)
end
- ##
# @param [String] namespace
# @param [String] name
# @param [Hash] attributes
# @return [Oga::XML::Element]
- #
def on_element(namespace, name, attributes = {})
element = Element.new(
:namespace_name => namespace,
@@ -397,31 +368,25 @@ string_body
element
end
- ##
# @param [Oga::XML::Element] element
# @param [Array] children
# @return [Oga::XML::Element]
- #
def on_element_children(element, children = [])
element.children = children
element
end
- ##
# @param [Oga::XML::Element] element
# @return [Oga::XML::Element]
- #
def after_element(element)
element
end
- ##
# @param [String] name
# @param [String] ns_name
# @param [String] value
# @return [Oga::XML::Attribute]
- #
def on_attribute(name, ns_name = nil, value = nil)
Attribute.new(
:namespace_name => ns_name,
@@ -430,9 +395,7 @@ string_body
)
end
- ##
# @param [Array] attrs
- #
def on_attributes(attrs)
attrs
end
diff --git a/lib/oga/xml/processing_instruction.rb b/lib/oga/xml/processing_instruction.rb
index 925d4c9..be05af7 100644
--- a/lib/oga/xml/processing_instruction.rb
+++ b/lib/oga/xml/processing_instruction.rb
@@ -1,34 +1,26 @@
module Oga
module XML
- ##
# Class used for storing information about a single processing instruction.
- #
class ProcessingInstruction < CharacterNode
# @return [String]
attr_accessor :name
- ##
# @param [Hash] options
#
# @option options [String] :name The name of the instruction.
# @see [Oga::XML::CharacterNode#initialize]
- #
def initialize(options = {})
super
@name = options[:name]
end
- ##
# @return [String]
- #
def to_xml
"#{name}#{text}?>"
end
- ##
# @return [String]
- #
def inspect
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
end
diff --git a/lib/oga/xml/pull_parser.rb b/lib/oga/xml/pull_parser.rb
index 3bf3709..37b5cf7 100644
--- a/lib/oga/xml/pull_parser.rb
+++ b/lib/oga/xml/pull_parser.rb
@@ -1,6 +1,5 @@
module Oga
module XML
- ##
# The PullParser class can be used to parse an XML document incrementally
# instead of parsing it as a whole. This results in lower memory usage and
# potentially faster parsing times. The downside is that pull parsers are
@@ -18,7 +17,6 @@ module Oga
#
# This parses yields proper XML instances such as {Oga::XML::Element}.
# Doctypes and XML declarations are ignored by this parser.
- #
class PullParser < Parser
# @return [Oga::XML::Node]
attr_reader :node
@@ -27,9 +25,7 @@ module Oga
# @return [Array]
attr_reader :nesting
- ##
# @return [Array]
- #
DISABLED_CALLBACKS = [
:on_document,
:on_doctype,
@@ -37,9 +33,7 @@ module Oga
:on_element_children
]
- ##
# @return [Array]
- #
BLOCK_CALLBACKS = [
:on_cdata,
:on_comment,
@@ -47,11 +41,9 @@ module Oga
:on_proc_ins
]
- ##
# Returns the shorthands that can be used for various node classes.
#
# @return [Hash]
- #
NODE_SHORTHANDS = {
:text => XML::Text,
:node => XML::Node,
@@ -62,9 +54,7 @@ module Oga
:xml_declaration => XML::XmlDeclaration
}
- ##
# @see Oga::XML::Parser#reset
- #
def reset
super
@@ -73,11 +63,9 @@ module Oga
@node = nil
end
- ##
# Parses the input and yields every node to the supplied block.
#
# @yieldparam [Oga::XML::Node]
- #
def parse(&block)
@block = block
@@ -86,7 +74,6 @@ module Oga
return
end
- ##
# Calls the supplied block if the current node type and optionally the
# nesting match. This method allows you to write this:
#
@@ -120,7 +107,6 @@ module Oga
# returned by {Oga::XML::Node#node_type}.
#
# @param [Array] nesting The element name nesting to act upon.
- #
def on(type, nesting = [])
if node.is_a?(NODE_SHORTHANDS[type])
if nesting.empty? or nesting == self.nesting
@@ -149,9 +135,7 @@ module Oga
EOF
end
- ##
# @see Oga::XML::Parser#on_element
- #
def on_element(*args)
@node = super
@@ -162,9 +146,7 @@ module Oga
return
end
- ##
# @see Oga::XML::Parser#on_element_children
- #
def after_element(*args)
nesting.pop
diff --git a/lib/oga/xml/querying.rb b/lib/oga/xml/querying.rb
index d834d61..1eee81c 100644
--- a/lib/oga/xml/querying.rb
+++ b/lib/oga/xml/querying.rb
@@ -1,11 +1,8 @@
module Oga
module XML
- ##
# The Querying module provides methods that make it easy to run XPath/CSS
# queries on XML documents/elements.
- #
module Querying
- ##
# Evaluates the given XPath expression.
#
# Querying a document:
@@ -34,7 +31,6 @@ module Oga
# be String values.
#
# @return [Oga::XML::NodeSet]
- #
def xpath(expression, variables = {})
ast = XPath::Parser.parse_with_cache(expression)
block = XPath::Compiler.compile_with_cache(ast)
@@ -42,7 +38,6 @@ module Oga
block.call(self, variables)
end
- ##
# Evaluates the XPath expression and returns the first matched node.
#
# Querying a document:
@@ -59,14 +54,12 @@ module Oga
#
# @see [#xpath]
# @return [Oga::XML::Node|Oga::XML::Attribute]
- #
def at_xpath(*args)
result = xpath(*args)
result.is_a?(XML::NodeSet) ? result.first : result
end
- ##
# Evaluates the given CSS expression.
#
# Querying a document:
@@ -81,7 +74,6 @@ module Oga
#
# @param [String] expression The CSS expression to run.
# @return [Oga::XML::NodeSet]
- #
def css(expression)
ast = CSS::Parser.parse_with_cache(expression)
block = XPath::Compiler.compile_with_cache(ast)
@@ -89,12 +81,10 @@ module Oga
block.call(self)
end
- ##
# Evaluates the CSS expression and returns the first matched node.
#
# @see [#css]
# @return [Oga::XML::Node|Oga::XML::Attribute]
- #
def at_css(*args)
result = css(*args)
diff --git a/lib/oga/xml/sax_parser.rb b/lib/oga/xml/sax_parser.rb
index 16e97d8..189e334 100644
--- a/lib/oga/xml/sax_parser.rb
+++ b/lib/oga/xml/sax_parser.rb
@@ -1,6 +1,5 @@
module Oga
module XML
- ##
# The SaxParser class provides the basic interface for writing custom SAX
# parsers. All callback methods defined in {Oga::XML::Parser} are delegated
# to a dedicated handler class.
@@ -66,12 +65,9 @@ module Oga
# attribute names (optionally prefixed by their namespace) and their values.
# You can overwrite `on_attribute` to control individual attributes and
# `on_attributes` to control the final set.
- #
class SaxParser < Parser
- ##
# @param [Object] handler The SAX handler to delegate callbacks to.
# @see [Oga::XML::Parser#initialize]
- #
def initialize(handler, *args)
@handler = handler
@@ -89,38 +85,32 @@ module Oga
EOF
end
- ##
# Manually overwrite `on_element` so we can ensure that `after_element`
# always receives the namespace and name.
#
# @see [Oga::XML::Parser#on_element]
# @return [Array]
- #
def on_element(namespace, name, attrs = [])
run_callback(:on_element, namespace, name, attrs)
[namespace, name]
end
- ##
# Manually overwrite `after_element` so it can take a namespace and name.
# This differs a bit from the regular `after_element` which only takes an
# {Oga::XML::Element} instance.
#
# @param [Array] namespace_with_name
- #
def after_element(namespace_with_name)
run_callback(:after_element, *namespace_with_name)
return
end
- ##
# Manually overwrite this method since for this one we _do_ want the
# return value so it can be passed to `on_element`.
#
# @see [Oga::XML::Parser#on_attribute]
- #
def on_attribute(name, ns = nil, value = nil)
if @handler.respond_to?(:on_attribute)
return run_callback(:on_attribute, name, ns, value)
@@ -135,12 +125,10 @@ module Oga
{key => value}
end
- ##
# Merges the attributes together into a Hash.
#
# @param [Array] attrs
# @return [Hash]
- #
def on_attributes(attrs)
if @handler.respond_to?(:on_attributes)
return run_callback(:on_attributes, attrs)
@@ -156,9 +144,7 @@ module Oga
merged
end
- ##
# @param [String] text
- #
def on_text(text)
if @handler.respond_to?(:on_text)
unless inside_literal_html?
@@ -173,17 +159,13 @@ module Oga
private
- ##
# @return [TrueClass|FalseClass]
- #
def inside_literal_html?
@lexer.html_script? || @lexer.html_style?
end
- ##
# @param [Symbol] method
# @param [Array] args
- #
def run_callback(method, *args)
@handler.send(method, *args) if @handler.respond_to?(method)
end
diff --git a/lib/oga/xml/text.rb b/lib/oga/xml/text.rb
index 9c5b9be..2ee1734 100644
--- a/lib/oga/xml/text.rb
+++ b/lib/oga/xml/text.rb
@@ -1,9 +1,7 @@
module Oga
module XML
- ##
# Class containing information about a single text node. Text nodes don't
# have any children, attributes and the likes; just text.
- #
class Text < CharacterNode
def initialize(*args)
super
@@ -11,20 +9,16 @@ module Oga
@decoded = false
end
- ##
# @param [String] value
- #
def text=(value)
@decoded = false
@text = value
end
- ##
# Returns the text as a String. Upon the first call any XML/HTML entities
# are decoded.
#
# @return [String]
- #
def text
if decode_entities?
@text = EntityDecoder.try_decode(@text, html?)
@@ -34,9 +28,7 @@ module Oga
@text
end
- ##
# @see [Oga::XML::CharacterNode#to_xml]
- #
def to_xml
return super if inside_literal_html?
@@ -45,16 +37,12 @@ module Oga
private
- ##
# @return [TrueClass|FalseClass]
- #
def decode_entities?
!@decoded && !inside_literal_html?
end
- ##
# @return [TrueClass|FalseClass]
- #
def inside_literal_html?
node = parent
diff --git a/lib/oga/xml/traversal.rb b/lib/oga/xml/traversal.rb
index b8f0707..9c334a2 100644
--- a/lib/oga/xml/traversal.rb
+++ b/lib/oga/xml/traversal.rb
@@ -1,10 +1,7 @@
module Oga
module XML
- ##
# Module that provides methods to traverse DOM trees.
- #
module Traversal
- ##
# Traverses through the node and yields every child node to the supplied
# block.
#
@@ -29,7 +26,6 @@ module Oga
# end
#
# @yieldparam [Oga::XML::Node] The current node.
- #
def each_node
visit = children.to_a.reverse
diff --git a/lib/oga/xml/xml_declaration.rb b/lib/oga/xml/xml_declaration.rb
index b112cff..2ad6e16 100644
--- a/lib/oga/xml/xml_declaration.rb
+++ b/lib/oga/xml/xml_declaration.rb
@@ -1,8 +1,6 @@
module Oga
module XML
- ##
# Class containing information about an XML declaration tag.
- #
class XmlDeclaration
# @return [String]
attr_accessor :version
@@ -14,24 +12,20 @@ module Oga
# @return [String]
attr_accessor :standalone
- ##
# @param [Hash] options
#
# @option options [String] :version
# @option options [String] :encoding
# @option options [String] :standalone
- #
def initialize(options = {})
@version = options[:version] || '1.0'
@encoding = options[:encoding] || 'UTF-8'
@standalone = options[:standalone]
end
- ##
# Converts the declaration tag to XML.
#
# @return [String]
- #
def to_xml
pairs = []
@@ -44,9 +38,7 @@ module Oga
""
end
- ##
# @return [String]
- #
def inspect
segments = []
diff --git a/lib/oga/xpath/compiler.rb b/lib/oga/xpath/compiler.rb
index 940a093..f02abda 100644
--- a/lib/oga/xpath/compiler.rb
+++ b/lib/oga/xpath/compiler.rb
@@ -1,6 +1,5 @@
module Oga
module XPath
- ##
# Compiling of XPath ASTs into Ruby code.
#
# The Compiler class can be used to turn an XPath AST into Ruby source code
@@ -9,7 +8,6 @@ module Oga
# recompiling the same expression over and over again.
#
# @private
- #
class Compiler
# @return [Oga::LRU]
CACHE = LRU.new
@@ -36,11 +34,9 @@ module Oga
:on_or => [:to_boolean, :or]
}
- ##
# Compiles and caches an AST.
#
# @see [#compile]
- #
def self.compile_with_cache(ast)
CACHE.get_or_set(ast) { new.compile(ast) }
end
@@ -57,12 +53,10 @@ module Oga
@predicate_indexes = []
end
- ##
# Compiles an XPath AST into a Ruby Proc.
#
# @param [AST::Node] ast
# @return [Proc]
- #
def compile(ast)
document = literal(:node)
matched = matched_literal
@@ -97,13 +91,11 @@ module Oga
reset
end
- ##
# Processes a single XPath AST node.
#
# @param [AST::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
- #
def process(ast, input, &block)
send("on_#{ast.type}", ast, input, &block)
end
@@ -119,7 +111,6 @@ module Oga
end
end
- ##
# Dispatches the processing of axes to dedicated methods. This works
# similar to {#process} except the handler names are "on_axis_X" with "X"
# being the axis name.
@@ -127,7 +118,6 @@ module Oga
# @param [AST::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
- #
def on_axis(ast, input, &block)
name, test, following = *ast
@@ -425,14 +415,12 @@ module Oga
ast
end
- ##
# Processes a predicate that requires a temporary NodeSet.
#
# @param [Oga::Ruby::Node] input
# @param [AST::Node] test
# @param [AST::Node] predicate
# @return [Oga::Ruby::Node]
- #
def on_predicate_temporary(input, test, predicate)
temp_set = unique_literal(:temp_set)
pred_node = unique_literal(:pred_node)
@@ -472,14 +460,12 @@ module Oga
ast
end
- ##
# Processes a predicate that doesn't require temporary NodeSet.
#
# @param [Oga::Ruby::Node] input
# @param [AST::Node] test
# @param [AST::Node] predicate
# @return [Oga::Ruby::Node]
- #
def on_predicate_direct(input, test, predicate)
pred_var = unique_literal(:pred_var)
index_var = predicate_index
@@ -514,14 +500,12 @@ module Oga
end
end
- ##
# Processes a predicate that uses a literal index.
#
# @param [Oga::Ruby::Node] input
# @param [AST::Node] test
# @param [AST::Node] predicate
# @return [Oga::Ruby::Node]
- #
def on_predicate_index(input, test, predicate)
index_var = predicate_index
index_step = literal(1)
@@ -549,11 +533,9 @@ module Oga
name_match ? condition.and(name_match) : condition
end
- ##
# Processes the `=` operator.
#
# @see [#operator]
- #
def on_eq(ast, input, &block)
conv = literal(Conversion)
@@ -567,11 +549,9 @@ module Oga
end
end
- ##
# Processes the `!=` operator.
#
# @see [#operator]
- #
def on_neq(ast, input, &block)
conv = literal(Conversion)
@@ -599,11 +579,9 @@ module Oga
end
end
- ##
# Processes the `|` operator.
#
# @see [#operator]
- #
def on_pipe(ast, input, &block)
left, right = *ast
@@ -649,13 +627,11 @@ module Oga
.or(send_message(:raise, string("Undefined XPath variable: #{name}")))
end
- ##
# Delegates function calls to specific handlers.
#
# @param [AST::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
- #
def on_call(ast, input, &block)
name, *args = *ast
@@ -808,7 +784,6 @@ module Oga
end
end
- ##
# Processes the `id()` function call.
#
# The XPath specification states that this function's behaviour should be
@@ -825,7 +800,6 @@ module Oga
# @param [Oga::Ruby::Node] input
# @param [AST::Node] arg
# @return [Oga::Ruby::Node]
- #
def on_call_id(input, arg)
orig_input = original_input_literal
node = unique_literal(:node)
@@ -1270,13 +1244,11 @@ module Oga
index.to_f
end
- ##
# Delegates type tests to specific handlers.
#
# @param [AST::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
- #
def on_type_test(ast, input, &block)
name, following = *ast
@@ -1414,13 +1386,11 @@ module Oga
condition
end
- ##
# Returns an AST matching the first node of a node set.
#
# @param [Oga::Ruby::Node] ast
# @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node]
- #
def match_first_node(ast, input)
catch_message(:value) do
process(ast, input) do |node|
@@ -1429,11 +1399,9 @@ module Oga
end
end
- ##
# Tries to match the first node in a set, otherwise processes it as usual.
#
# @see [#match_first_node]
- #
def try_match_first_node(ast, input, optimize_first = true)
if return_nodeset?(ast) and optimize_first
match_first_node(ast, input)
@@ -1460,7 +1428,6 @@ module Oga
arg_var.assign(arg_ast).followed_by { yield arg_var }
end
- ##
# Generates the code for an operator.
#
# The generated code is optimized so that expressions such as `a/b = c`
@@ -1479,7 +1446,6 @@ module Oga
# @param [Oga::Ruby::Node] input
# @param [TrueClass|FalseClass] optimize_first
# @return [Oga::Ruby::Node]
- #
def operator(ast, input, optimize_first = true)
left, right = *ast
diff --git a/lib/oga/xpath/conversion.rb b/lib/oga/xpath/conversion.rb
index 87987aa..30bd938 100644
--- a/lib/oga/xpath/conversion.rb
+++ b/lib/oga/xpath/conversion.rb
@@ -1,16 +1,12 @@
module Oga
module XPath
- ##
# Module for converting XPath objects such as NodeSets.
#
# @private
- #
module Conversion
- ##
# Converts both arguments to a type that can be compared using ==.
#
# @return [Array]
- #
def self.to_compatible_types(left, right)
if left.is_a?(XML::NodeSet) or left.respond_to?(:text)
left = to_string(left)
diff --git a/lib/oga/xpath/lexer.rl b/lib/oga/xpath/lexer.rl
index 898ca34..821b398 100644
--- a/lib/oga/xpath/lexer.rl
+++ b/lib/oga/xpath/lexer.rl
@@ -2,7 +2,6 @@
module Oga
module XPath
- ##
# Lexer for turning XPath expressions into a set of tokens. Tokens are
# returned as arrays with every array having two values:
#
@@ -33,18 +32,15 @@ module Oga
# shared state.
#
# @api private
- #
class Lexer
%% write data;
# % fix highlight
- ##
# Maps certain XPath axes written in their short form to their long form
# equivalents.
#
# @return [Hash]
- #
AXIS_MAPPING = {
'@' => 'attribute',
'//' => 'descendant-or-self',
@@ -52,33 +48,25 @@ module Oga
'.' => 'self'
}
- ##
# Axes that require a separate `node()` call to be emitted.
#
# @return [Array]
- #
AXIS_EMIT_NODE = %w{descendant-or-self parent self}
- ##
# Axes that require an extra T_SLASH token to be emitted.
#
# @return [Array]
- #
AXIS_EMIT_EXTRA_SLASH = %w{descendant-or-self}
- ##
# @param [String] data The data to lex.
- #
def initialize(data)
@data = data
end
- ##
# Gathers all the tokens for the input and returns them as an Array.
#
# @see [#advance]
# @return [Array]
- #
def lex
tokens = []
@@ -89,7 +77,6 @@ module Oga
return tokens
end
- ##
# Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block.
#
@@ -103,7 +90,6 @@ module Oga
# the lexer loop has finished.
#
# @see [#add_token]
- #
def advance(&block)
@block = block
@@ -137,7 +123,6 @@ module Oga
private
- ##
# Emits a token of which the value is based on the supplied start/stop
# position.
#
@@ -147,25 +132,21 @@ module Oga
#
# @see [#text]
# @see [#add_token]
- #
def emit(type, start, stop)
value = slice_input(start, stop)
add_token(type, value)
end
- ##
# Returns the text between the specified start and stop position.
#
# @param [Fixnum] start
# @param [Fixnum] stop
# @return [String]
- #
def slice_input(start, stop)
return @data.byteslice(start, stop - start)
end
- ##
# Yields a new token to the supplied block.
#
# @param [Symbol] type The token type.
@@ -173,7 +154,6 @@ module Oga
#
# @yieldparam [Symbol] type
# @yieldparam [String|NilClass] value
- #
def add_token(type, value = nil)
@block.call(type, value)
end
@@ -228,7 +208,6 @@ module Oga
#
# Strings can be single or double quoted. They are mainly used for
# attribute values.
- #
dquote = '"';
squote = "'";
@@ -244,7 +223,6 @@ module Oga
# Full Axes
#
# XPath axes in their full syntax.
- #
axis_full = ('ancestor'
| 'ancestor-or-self'
| 'attribute'
@@ -268,7 +246,6 @@ module Oga
# XPath axes in their abbreviated form. When lexing these are mapped to
# their full forms so that the parser doesn't have to take care of
# this.
- #
axis_short = '@' | '//' | '..' | '.';
action emit_axis_short {
@@ -358,7 +335,6 @@ module Oga
# can not assign variables in an expression, you can only refer to them.
# This means that libraries themselves have to expose an interface for
# setting variables.
- #
var = '$' identifier;
action emit_variable {
diff --git a/lib/oga/xpath/parser.rll b/lib/oga/xpath/parser.rll
index 8af5252..b92e51b 100644
--- a/lib/oga/xpath/parser.rll
+++ b/lib/oga/xpath/parser.rll
@@ -1,6 +1,5 @@
%header
{
-##
# AST parser for XPath expressions. The AST is built using `AST::Node`
# instances.
#
@@ -248,42 +247,32 @@ variable
%inner
{
- ##
# @return [Oga::LRU]
- #
CACHE = LRU.new
- ##
# @param [String] data
# @return [AST::Node]
- #
def self.parse_with_cache(data)
CACHE.get_or_set(data) { new(data).parse }
end
- ##
# @param [String] data The input to parse.
- #
def initialize(data)
@lexer = Lexer.new(data)
end
- ##
# Creates a new XPath node.
#
# @param [Symbol] type
# @param [Array] children
# @return [AST::Node]
- #
def s(type, *children)
AST::Node.new(type, children)
end
- ##
# Yields the next token from the lexer.
#
# @yieldparam [Array]
- #
def each_token
@lexer.advance do |type, value, line|
@line = line if line
@@ -294,9 +283,7 @@ variable
yield [-1, -1]
end
- ##
# @param [Array] val
- #
def combine_operators(val)
ret = val[0]
@@ -307,9 +294,7 @@ variable
ret
end
- ##
# @param [Array] val
- #
def combine_optional_operator(val)
ret = val[0]