diff --git a/lib/oga/lexer.rl b/lib/oga/lexer.rl index b761170..a74ab94 100644 --- a/lib/oga/lexer.rl +++ b/lib/oga/lexer.rl @@ -230,19 +230,35 @@ module Oga # # http://www.w3.org/TR/html-markup/syntax.html#syntax-elements # - element_name = [a-zA-Z0-9\-_]+; - element_start = '<' element_name; - # First emit the token, then advance the column. This way the column - # number points to the < and not the "p" in

. + # Action that creates the tokens for the opening tag, name and namespace + # (if any). Remaining work is delegated to a dedicated machine. action open_element { - t(:T_ELEM_OPEN, @ts + 1) - + add_token(:T_ELEM_OPEN, nil) advance_column + # Add the element name. If the name includes a namespace we'll break + # the name up into two separate tokens. + name = text(@ts + 1) + + if name.include?(':') + ns, name = name.split(':') + + add_token(:T_ELEM_NS, ns) + + # Advance the column for the colon (:) that separates the namespace + # and element name. + advance_column + end + + add_token(:T_ELEM_NAME, name) + fcall element; } + element_name = [a-zA-Z0-9\-_:]+; + element_start = '<' element_name; + element_text := |* ^'<' => buffer_text; @@ -275,12 +291,13 @@ module Oga # Non self-closing elements. ' open_element; - - #dquote => { t(:T_DQUOTE) }; - #squote => { t(:T_SQUOTE) }; *|; }%% end # Lexer diff --git a/spec/oga/lexer/elements_spec.rb b/spec/oga/lexer/elements_spec.rb index 77e6b2e..467cda1 100644 --- a/spec/oga/lexer/elements_spec.rb +++ b/spec/oga/lexer/elements_spec.rb @@ -4,32 +4,36 @@ describe Oga::Lexer do context 'elements' do example 'lex an opening element' do lex('

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1] + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2] ] end example 'lex an opening an closing element' do lex('

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1], - [:T_ELEM_CLOSE, 'p', 1, 4] + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2], + [:T_ELEM_CLOSE, nil, 1, 4] ] end example 'lex a paragraph element with text inside it' do lex('

Hello

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1], + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2], [:T_TEXT, 'Hello', 1, 4], - [:T_ELEM_CLOSE, 'p', 1, 9] + [:T_ELEM_CLOSE, nil, 1, 9] ] end example 'lex a paragraph element with attributes' do lex('

Hello

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1], + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2], [:T_ATTR, 'class', 1, 4], [:T_STRING, 'foo', 1, 10], [:T_TEXT, 'Hello', 1, 16], - [:T_ELEM_CLOSE, 'p', 1, 21] + [:T_ELEM_CLOSE, nil, 1, 21] ] end end @@ -37,22 +41,26 @@ describe Oga::Lexer do context 'nested elements' do example 'lex a nested element' do lex('

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1], - [:T_ELEM_OPEN, 'a', 1, 4], - [:T_ELEM_CLOSE, 'a', 1, 7], - [:T_ELEM_CLOSE, 'p', 1, 11] + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2], + [:T_ELEM_OPEN, nil, 1, 4], + [:T_ELEM_NAME, 'a', 1, 5], + [:T_ELEM_CLOSE, nil, 1, 7], + [:T_ELEM_CLOSE, nil, 1, 11] ] end example 'lex nested elements and text nodes' do lex('

Foobarbaz

').should == [ - [:T_ELEM_OPEN, 'p', 1, 1], + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'p', 1, 2], [:T_TEXT, 'Foo', 1, 4], - [:T_ELEM_OPEN, 'a', 1, 7], + [:T_ELEM_OPEN, nil, 1, 7], + [:T_ELEM_NAME, 'a', 1, 8], [:T_TEXT, 'bar', 1, 10], - [:T_ELEM_CLOSE, 'a', 1, 13], + [:T_ELEM_CLOSE, nil, 1, 13], [:T_TEXT, 'baz', 1, 17], - [:T_ELEM_CLOSE, 'p', 1, 20] + [:T_ELEM_CLOSE, nil, 1, 20] ] end end @@ -60,18 +68,31 @@ describe Oga::Lexer do context 'void elements' do example 'lex a void element' do lex('
').should == [ - [:T_ELEM_OPEN, 'br', 1, 1], + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'br', 1, 2], [:T_ELEM_CLOSE, nil, 1, 6] ] end example 'lex a void element with an attribute' do lex('
').should == [ - [:T_ELEM_OPEN, 'br', 1, 1], + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NAME, 'br', 1, 2], [:T_ATTR, 'class', 1, 5], [:T_STRING, 'foo', 1, 11], [:T_ELEM_CLOSE, nil, 1, 18] ] end end + + context 'elements with namespaces' do + example 'lex an element with namespaces' do + lex('

').should == [ + [:T_ELEM_OPEN, nil, 1, 1], + [:T_ELEM_NS, 'foo', 1, 2], + [:T_ELEM_NAME, 'p', 1, 6], + [:T_ELEM_CLOSE, nil, 1, 8] + ] + end + end end