diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index 15cf7a4..db3a73d 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -78,12 +78,14 @@ module Oga slash = '/'; exclamation = '!'; equals = '='; + colon = ':'; s_quote = "'"; d_quote = '"'; + # FIXME: there really should be a better way of doing this. text = (any - s_quote - d_quote - equals - exclamation - slash - - greater - smaller - whitespace - newline)+; + greater - smaller - whitespace - newline - colon)+; # Unicode characters, taken from whitequark's wonderful parser library. # (I honestly need to buy that dude a beer or 100). Basically this @@ -99,6 +101,7 @@ module Oga slash => { t(:T_SLASH) }; d_quote => { t(:T_DQUOTE) }; s_quote => { t(:T_SQUOTE) }; + colon => { t(:T_COLON) }; exclamation => { t(:T_EXCLAMATION) }; equals => { t(:T_EQUALS) }; text => { t(:T_TEXT) }; diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb index 0c1620a..2593c65 100644 --- a/spec/oga/lexer_spec.rb +++ b/spec/oga/lexer_spec.rb @@ -53,5 +53,48 @@ describe Oga::Lexer do [:T_GREATER, '>', 1, 15] ] end + + example 'lex a tag with text inside it' do + lex('

Foo

').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'p', 1, 2], + [:T_GREATER, '>', 1, 3], + [:T_TEXT, 'Foo', 1, 4], + [:T_SMALLER, '<', 1, 7], + [:T_SLASH, '/', 1, 8], + [:T_TEXT, 'p', 1, 9], + [:T_GREATER, '>', 1, 10] + ] + end + + example 'lex a tag with an attribute with a dash in it' do + lex('

').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'p', 1, 2], + [:T_SPACE, ' ', 1, 3], + [:T_TEXT, 'foo-bar', 1, 4], + [:T_EQUALS, '=', 1, 11], + [:T_DQUOTE, '"', 1, 12], + [:T_TEXT, 'baz', 1, 13], + [:T_DQUOTE, '"', 1, 16], + [:T_GREATER, '>', 1, 17] + ] + end + end + + context 'tags with namespaces' do + example 'lex a tag with a dummy namespace' do + lex('

').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'foo', 1, 2], + [:T_COLON, ':', 1, 5], + [:T_TEXT, 'p', 1, 6], + [:T_GREATER, '>', 1, 7], + [:T_SMALLER, '<', 1, 8], + [:T_SLASH, '/', 1, 9], + [:T_TEXT, 'p', 1, 10], + [:T_GREATER, '>', 1, 11] + ] + end end end