diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 0b47ad3..739c15c 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -330,8 +330,6 @@ if ( literal_html_element_p() ) { - mark = ts + 1; - fnext literal_html_element; } else @@ -404,10 +402,25 @@ # Certain tags in HTML can contain basically anything except for the literal # closing tag. Two examples are script and style tags. As a result of this # we can't use the regular text machine. - literal_html_element := |* - '' | '' => { - callback(id_on_text, data, encoding, mark, ts); + literal_html_closing_tags = '' | ''; + literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines; + literal_html_element := |* + literal_html_allowed => { + callback(id_on_text, data, encoding, ts, te); + + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } + }; + + literal_html_allowed %{ mark = p; } literal_html_closing_tags => { + callback(id_on_text, data, encoding, ts, mark); + + p = mark - 1; mark = 0; if ( lines > 0 ) @@ -417,12 +430,8 @@ lines = 0; } - callback_simple(id_on_element_end); - fnext main; }; - - any $count_newlines; *|; # The main machine aka the entry point of Ragel. diff --git a/spec/oga/xml/lexer/html_style_spec.rb b/spec/oga/xml/lexer/html_style_spec.rb index cfa3907..6ac7353 100644 --- a/spec/oga/xml/lexer/html_style_spec.rb +++ b/spec/oga/xml/lexer/html_style_spec.rb @@ -2,6 +2,14 @@ require 'spec_helper' describe Oga::XML::Lexer do describe 'HTML style elements' do + it 'lexes an empty ', :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_ELEM_END, nil, 1] + ] + end + it 'treats the content of a style tag as plain text' do lex('', :html => true).should == [ [:T_ELEM_START, nil, 1], @@ -10,5 +18,26 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ] end + + it 'lexes a multi-line ", :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_TEXT, "foo\nbar", 1], + [:T_ELEM_END, nil, 2] + ] + end + + it 'lexes a multi-line ") + + lex(io, :html => true).should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'style', 1], + [:T_TEXT, "foo\n", 1], + [:T_TEXT, 'bar', 2], + [:T_ELEM_END, nil, 2] + ] + end end end