From fd307a0fcc3616ded272432ba27f972a9113953a Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 8 Jun 2015 06:46:08 +0200 Subject: [PATCH] Support HTML attributes without starting quotes This allows the lexer to process input such as: For XML input the lexer still expects properly opened/closed attribute values. Fixes #109 --- ext/ragel/base_lexer.rl | 6 +++--- spec/oga/html/lexer/attributes_spec.rb | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index d1e3270..87a99b4 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -422,9 +422,9 @@ # Characters that can be used for unquoted HTML attribute values. # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example # for more info. - html_unquoted_value = ^( - squote | dquote | '`' | '=' | '<' | '>' | whitespace_or_newline - )+; + html_unquoted_value = + ^(squote | dquote | whitespace_or_newline) + ^('`' | '=' | '<' | '>' | whitespace_or_newline)+; # Machine used after matching the "=" of an attribute and just before moving # into the actual attribute value. diff --git a/spec/oga/html/lexer/attributes_spec.rb b/spec/oga/html/lexer/attributes_spec.rb index e5a5739..61d8b63 100644 --- a/spec/oga/html/lexer/attributes_spec.rb +++ b/spec/oga/html/lexer/attributes_spec.rb @@ -58,6 +58,28 @@ describe Oga::XML::Lexer do ] end + it 'lexes an attribute with a value without a starting double quote' do + lex_html('').should == [ + [:T_ELEM_NAME, 'a', 1], + [:T_ATTR, 'href', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_STRING_BODY, 'foo"', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes an attribute with a value without a starting single quote' do + lex_html("").should == [ + [:T_ELEM_NAME, 'a', 1], + [:T_ATTR, 'href', 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_STRING_BODY, "foo'", 1], + [:T_STRING_SQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + it 'lexes an element with spaces around the attribute equal sign' do lex_html('

').should == [ [:T_ELEM_NAME, 'p', 1],