diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 8a6e469..1546f2b 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -363,16 +363,15 @@ # body of an element is lexed using the `main` machine. # - element_start = '<' ident_char; - element_end = ''; - action start_element { fhold; fnext element_name; } action close_element { - callback_simple(id_on_element_end); + callback(id_on_element_end, data, encoding, mark, te - 1); + + mark = 0; } action close_element_fnext_main { @@ -381,6 +380,12 @@ fnext main; } + element_start = '<' ident_char; + + element_end = '' + | '' + ; + # Machine used for lexing the name/namespace of an element. element_name := |* identifier ':' => { diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 460b167..e2c724d 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -476,9 +476,19 @@ module Oga ## # Called on the closing tag of an element. # - def on_element_end + # @param [String] ns_name The name of the element (minus namespace + # prefix). This is not set for self closing tags. + # + def on_element_end(name = nil) return if @elements.empty? + if html? and name and @elements.include?(name) + while current_element != name + add_token(:T_ELEM_END) + @elements.pop + end + end + add_token(:T_ELEM_END) @elements.pop diff --git a/spec/oga/html/lexer/closing_mismatch_spec.rb b/spec/oga/html/lexer/closing_mismatch_spec.rb new file mode 100644 index 0000000..f177745 --- /dev/null +++ b/spec/oga/html/lexer/closing_mismatch_spec.rb @@ -0,0 +1,13 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + describe 'closing HTML elements with mismatched closing tags' do + it 'lexes a

element closed using a element' do + lex_html('

foo').should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_TEXT, 'foo', 1], + [:T_ELEM_END, nil, 1] + ] + end + end +end diff --git a/spec/oga/html/lexer/closing_rules/ul_spec.rb b/spec/oga/html/lexer/closing_rules/ul_spec.rb new file mode 100644 index 0000000..39ae1a8 --- /dev/null +++ b/spec/oga/html/lexer/closing_rules/ul_spec.rb @@ -0,0 +1,49 @@ +require 'spec_helper' + +describe Oga::XML::Lexer do + describe 'using HTML