diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl
index e41b40e..d5f8070 100644
--- a/ext/ragel/base_lexer.rl
+++ b/ext/ragel/base_lexer.rl
@@ -53,6 +53,9 @@
ident_char = unicode | [a-zA-Z0-9\-_\.];
identifier = ident_char+;
+ html_ident_char = unicode | [a-zA-Z0-9\-_\.:];
+ html_identifier = html_ident_char+;
+
whitespace_or_newline = whitespace | newline;
action count_newlines {
@@ -390,12 +393,23 @@
# Machine used for lexing the name/namespace of an element.
element_name := |*
identifier ':' => {
- callback(id_on_element_ns, data, encoding, ts, te - 1);
+ if ( !html_p )
+ {
+ callback(id_on_element_ns, data, encoding, ts, te - 1);
+ }
};
identifier => {
callback(id_on_element_name, data, encoding, ts, te);
- fnext element_head;
+
+ if ( html_p )
+ {
+ fnext html_element_head;
+ }
+ else
+ {
+ fnext element_head;
+ }
};
*|;
@@ -508,8 +522,33 @@
any => hold_and_return;
*|;
- # Machine used for processing the contents of an element's starting tag.
- # This includes the name, namespace and attributes.
+ action start_attribute_pre {
+ fcall attribute_pre;
+ }
+
+ action close_open_element {
+ callback_simple(id_on_element_open_end);
+
+ if ( html_script_p() )
+ {
+ fnext html_script;
+ }
+ else if ( html_style_p() )
+ {
+ fnext html_style;
+ }
+ else
+ {
+ fnext main;
+ }
+ }
+
+ action close_self_closing_element {
+ callback_simple(id_on_element_end);
+ fnext main;
+ }
+
+ # Machine used for processing the contents of an XML element's starting tag.
element_head := |*
newline => advance_newline;
@@ -522,12 +561,30 @@
callback(id_on_attribute, data, encoding, ts, te);
};
- # Attribute values.
- '=' => {
- fcall attribute_pre;
+ '=' => start_attribute_pre;
+
+ '>' => {
+ callback_simple(id_on_element_open_end);
+
+ fnext main;
};
- # We're done with the open tag of the element.
+ '/>' => close_self_closing_element;
+
+ any;
+ *|;
+
+ # Machine used for processing the contents of an HTML element's starting
+ # tag.
+ html_element_head := |*
+ newline => advance_newline;
+
+ html_identifier => {
+ callback(id_on_attribute, data, encoding, ts, te);
+ };
+
+ '=' => start_attribute_pre;
+
'>' => {
callback_simple(id_on_element_open_end);
@@ -545,11 +602,7 @@
}
};
- # Self closing tags.
- '/>' => {
- callback_simple(id_on_element_end);
- fnext main;
- };
+ '/>' => close_self_closing_element;
any;
*|;
diff --git a/lib/oga/xml/element.rb b/lib/oga/xml/element.rb
index 02bda03..c513f3c 100644
--- a/lib/oga/xml/element.rb
+++ b/lib/oga/xml/element.rb
@@ -64,7 +64,11 @@ module Oga
#
# @return [Oga::XML::Attribute]
def attribute(name)
- name, ns = split_name(name)
+ if html?
+ ns = nil
+ else
+ name, ns = split_name(name)
+ end
attributes.each do |attr|
return attr if attribute_matches?(attr, ns, name)
diff --git a/spec/oga/html/lexer/attributes_spec.rb b/spec/oga/html/lexer/attributes_spec.rb
index e31c525..7773dd6 100644
--- a/spec/oga/html/lexer/attributes_spec.rb
+++ b/spec/oga/html/lexer/attributes_spec.rb
@@ -138,5 +138,16 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 2]
]
end
+
+ it 'lexes an element containing a namespaced attribute' do
+ lex_html('').should == [
+ [:T_ELEM_NAME, 'foo', 1],
+ [:T_ATTR, 'bar:baz', 1],
+ [:T_STRING_DQUOTE, nil, 1],
+ [:T_STRING_BODY, '10', 1],
+ [:T_STRING_DQUOTE, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end
diff --git a/spec/oga/html/lexer/elements_spec.rb b/spec/oga/html/lexer/elements_spec.rb
new file mode 100644
index 0000000..3988e8c
--- /dev/null
+++ b/spec/oga/html/lexer/elements_spec.rb
@@ -0,0 +1,12 @@
+require 'spec_helper'
+
+describe Oga::XML::Lexer do
+ describe 'HTML elements' do
+ it 'lexes an element containing an element namespace' do
+ lex_html('').should == [
+ [:T_ELEM_NAME, 'bar', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+ end
+end
diff --git a/spec/oga/xml/element_spec.rb b/spec/oga/xml/element_spec.rb
index 286a1d4..d43c6e5 100644
--- a/spec/oga/xml/element_spec.rb
+++ b/spec/oga/xml/element_spec.rb
@@ -105,6 +105,16 @@ describe Oga::XML::Element do
it 'returns nil if an attribute has a namespace that is not given' do
@instance.attribute('bar').nil?.should == true
end
+
+ describe 'using an HTML document' do
+ it 'returns an attribute containing a namespace separator' do
+ attr = Oga::XML::Attribute.new(:name => 'foo:bar', :value => 'foo')
+ el = described_class.new(:name => 'foo', :attributes => [attr])
+ doc = Oga::XML::Document.new(:children => [el], :type => :html)
+
+ el.attribute('foo:bar').should == attr
+ end
+ end
end
describe '#get' do