diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index 15cf7a4..db3a73d 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -78,12 +78,14 @@ module Oga slash = '/'; exclamation = '!'; equals = '='; + colon = ':'; s_quote = "'"; d_quote = '"'; + # FIXME: there really should be a better way of doing this. text = (any - s_quote - d_quote - equals - exclamation - slash - - greater - smaller - whitespace - newline)+; + greater - smaller - whitespace - newline - colon)+; # Unicode characters, taken from whitequark's wonderful parser library. # (I honestly need to buy that dude a beer or 100). Basically this @@ -99,6 +101,7 @@ module Oga slash => { t(:T_SLASH) }; d_quote => { t(:T_DQUOTE) }; s_quote => { t(:T_SQUOTE) }; + colon => { t(:T_COLON) }; exclamation => { t(:T_EXCLAMATION) }; equals => { t(:T_EQUALS) }; text => { t(:T_TEXT) }; diff --git a/spec/oga/lexer_spec.rb b/spec/oga/lexer_spec.rb index 0c1620a..2593c65 100644 --- a/spec/oga/lexer_spec.rb +++ b/spec/oga/lexer_spec.rb @@ -53,5 +53,48 @@ describe Oga::Lexer do [:T_GREATER, '>', 1, 15] ] end + + example 'lex a tag with text inside it' do + lex('
Foo
').should == [ + [:T_SMALLER, '<', 1, 1], + [:T_TEXT, 'p', 1, 2], + [:T_GREATER, '>', 1, 3], + [:T_TEXT, 'Foo', 1, 4], + [:T_SMALLER, '<', 1, 7], + [:T_SLASH, '/', 1, 8], + [:T_TEXT, 'p', 1, 9], + [:T_GREATER, '>', 1, 10] + ] + end + + example 'lex a tag with an attribute with a dash in it' do + lex('').should == [
+ [:T_SMALLER, '<', 1, 1],
+ [:T_TEXT, 'p', 1, 2],
+ [:T_SPACE, ' ', 1, 3],
+ [:T_TEXT, 'foo-bar', 1, 4],
+ [:T_EQUALS, '=', 1, 11],
+ [:T_DQUOTE, '"', 1, 12],
+ [:T_TEXT, 'baz', 1, 13],
+ [:T_DQUOTE, '"', 1, 16],
+ [:T_GREATER, '>', 1, 17]
+ ]
+ end
+ end
+
+ context 'tags with namespaces' do
+ example 'lex a tag with a dummy namespace' do
+ lex('