diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 7f06ca9..6c0c333 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -149,21 +149,6 @@ comment_start = ''; - action start_comment { - callback_simple("on_comment_start"); - fcall comment; - } - - # Machine used for processing the contents of a comment. Everything - # inside a comment is treated as plain text (similar to CDATA tags). - comment := |* - any* comment_end => { - callback("on_text", data, encoding, ts, te - 3); - callback_simple("on_comment_end"); - fret; - }; - *|; - # XML declaration tags # # http://www.w3.org/TR/REC-xml/#sec-prolog-dtd @@ -257,9 +242,12 @@ '<' => start_element; doctype_start => start_doctype; cdata_start => start_cdata; - comment_start => start_comment; xml_decl_start => start_xml_decl; + comment_start any* comment_end => { + callback("on_comment", data, encoding, ts + 4, te - 3); + }; + # Enter the body of the tag. If HTML mode is enabled and the current # element is a void element we'll close it and bail out. '>' => { diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index acb85d4..bf15f4b 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -217,17 +217,12 @@ module Oga end ## - # Called on the start of a comment. + # Called on a comment. # - def on_comment_start - add_token(:T_COMMENT_START) - end - - ## - # Called on the end of a comment. + # @param [String] value # - def on_comment_end - add_token(:T_COMMENT_END) + def on_comment(value) + add_token(:T_COMMENT, value) end ## diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index 4832d99..d444512 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -12,8 +12,7 @@ class Oga::XML::Parser token T_STRING T_TEXT token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME token T_DOCTYPE_INLINE -token T_CDATA_START T_CDATA_END -token T_COMMENT_START T_COMMENT_END +token T_CDATA_START T_CDATA_END T_COMMENT token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR token T_XML_DECL_START T_XML_DECL_END @@ -92,11 +91,8 @@ rule # Comments comment - # - : T_COMMENT_START T_COMMENT_END { on_comment } - # - | T_COMMENT_START T_TEXT T_COMMENT_END { on_comment(val[1]) } + | T_COMMENT { on_comment(val[0]) } ; # Elements diff --git a/spec/oga/xml/lexer/comments_spec.rb b/spec/oga/xml/lexer/comments_spec.rb index 25b0bd5..0c3176e 100644 --- a/spec/oga/xml/lexer/comments_spec.rb +++ b/spec/oga/xml/lexer/comments_spec.rb @@ -3,33 +3,20 @@ require 'spec_helper' describe Oga::XML::Lexer do context 'comments' do example 'lex a comment' do - lex('').should == [ - [:T_COMMENT_START, nil, 1], - [:T_TEXT, ' foo ', 1], - [:T_COMMENT_END, nil, 1] - ] + lex('').should == [[:T_COMMENT, ' foo ', 1]] end example 'lex a comment containing --' do - lex('').should == [ - [:T_COMMENT_START, nil, 1], - [:T_TEXT, ' -- ', 1], - [:T_COMMENT_END, nil, 1] - ] + lex('').should == [[:T_COMMENT, ' -- ', 1]] end example 'lex a comment containing ->' do - lex('').should == [ - [:T_COMMENT_START, nil, 1], - [:T_TEXT, ' -> ', 1], - [:T_COMMENT_END, nil, 1] - ] + lex('').should == [[:T_COMMENT, ' -> ', 1]] end example 'lex a comment followed by text' do lex('foo').should == [ - [:T_COMMENT_START, nil, 1], - [:T_COMMENT_END, nil, 1], + [:T_COMMENT, '', 1], [:T_TEXT, 'foo', 1] ] end @@ -37,8 +24,7 @@ describe Oga::XML::Lexer do example 'lex text followed by a comment' do lex('foo').should == [ [:T_TEXT, 'foo', 1], - [:T_COMMENT_START, nil, 1], - [:T_COMMENT_END, nil, 1] + [:T_COMMENT, '', 1] ] end @@ -47,8 +33,7 @@ describe Oga::XML::Lexer do [:T_ELEM_START, nil, 1], [:T_ELEM_NAME, 'p', 1], [:T_ELEM_END, nil, 1], - [:T_COMMENT_START, nil, 1], - [:T_COMMENT_END, nil, 1] + [:T_COMMENT, '', 1] ] end end