diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb
index e2c724d..465fd2a 100644
--- a/lib/oga/xml/lexer.rb
+++ b/lib/oga/xml/lexer.rb
@@ -50,6 +50,10 @@ module Oga
%w{thead tbody tfoot tr caption colgroup col}
)
+ HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template})
+
+ HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS
+
# Elements that should be closed automatically before a new opening tag is
# processed.
HTML_CLOSE_SELF = {
@@ -71,11 +75,11 @@ module Oga
'option' => Blacklist.new(%w{optgroup option}),
'colgroup' => Whitelist.new(%w{col template}),
'caption' => HTML_TABLE_ALLOWED.to_blacklist,
- 'table' => HTML_TABLE_ALLOWED,
- 'thead' => Whitelist.new(%w{tr}),
- 'tbody' => Whitelist.new(%w{tr}),
- 'tfoot' => Whitelist.new(%w{tr}),
- 'tr' => Whitelist.new(%w{td th}),
+ 'table' => HTML_TABLE_ALLOWED + HTML_SCRIPT_ELEMENTS,
+ 'thead' => HTML_TABLE_ROW_ELEMENTS,
+ 'tbody' => HTML_TABLE_ROW_ELEMENTS,
+ 'tfoot' => HTML_TABLE_ROW_ELEMENTS,
+ 'tr' => Whitelist.new(%w{td th}) + HTML_SCRIPT_ELEMENTS,
'td' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED,
'th' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED
}
diff --git a/spec/oga/html/lexer/closing_rules/table_spec.rb b/spec/oga/html/lexer/closing_rules/table_spec.rb
index 5a7f437..160f80c 100644
--- a/spec/oga/html/lexer/closing_rules/table_spec.rb
+++ b/spec/oga/html/lexer/closing_rules/table_spec.rb
@@ -1,30 +1,46 @@
require 'spec_helper'
describe Oga::XML::Lexer do
- describe 'HTML tables' do
- describe 'with unclosed
tags' do
- it 'lexes a
tag followed by a
tag' do
- lex_html('foo
').should == [
- [:T_ELEM_NAME, 'tr', 1],
- [:T_TEXT, 'foo', 1],
- [:T_ELEM_END, nil, 1],
- [:T_ELEM_NAME, 'tbody', 1],
- [:T_ELEM_END, nil, 1]
- ]
- end
+ describe 'using HTML elements' do
+ it 'lexes two unclosed elements following each other as separate elements' do
+ lex_html('foobar').should == [
+ [:T_ELEM_NAME, 'table', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_NAME, 'table', 1],
+ [:T_TEXT, 'bar', 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
- it 'lexes an unclosed | tag followed by a | tag' do
- lex_html('| foo |
bar').should == [
- [:T_ELEM_NAME, 'tr', 1],
- [:T_ELEM_NAME, 'th', 1],
- [:T_TEXT, 'foo', 1],
- [:T_ELEM_END, nil, 1],
- [:T_ELEM_END, nil, 1],
- [:T_ELEM_NAME, 'tbody', 1],
- [:T_TEXT, 'bar', 1],
- [:T_ELEM_END, nil, 1]
- ]
- end
+ it 'lexes a element containing a element' do
+ lex_html('').should == [
+ [:T_ELEM_NAME, 'table', 1],
+ [:T_ELEM_NAME, 'thead', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a ').should == [
+ [:T_ELEM_NAME, 'table', 1],
+ [:T_ELEM_NAME, 'script', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a element containing a element' do
+ lex_html('').should == [
+ [:T_ELEM_NAME, 'table', 1],
+ [:T_ELEM_NAME, 'template', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
end
end
end
diff --git a/spec/oga/html/lexer/closing_rules/tbody_spec.rb b/spec/oga/html/lexer/closing_rules/tbody_spec.rb
index f024f1c..4864dae 100644
--- a/spec/oga/html/lexer/closing_rules/tbody_spec.rb
+++ b/spec/oga/html/lexer/closing_rules/tbody_spec.rb
@@ -33,5 +33,25 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ it 'lexes a element containing a ').should == [
+ [:T_ELEM_NAME, 'tbody', 1],
+ [:T_ELEM_NAME, 'script', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a element containing a element' do
+ lex_html('foo').should == [
+ [:T_ELEM_NAME, 'tbody', 1],
+ [:T_ELEM_NAME, 'template', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end
diff --git a/spec/oga/html/lexer/closing_rules/tfoot_spec.rb b/spec/oga/html/lexer/closing_rules/tfoot_spec.rb
index 4acb170..2bf2c90 100644
--- a/spec/oga/html/lexer/closing_rules/tfoot_spec.rb
+++ b/spec/oga/html/lexer/closing_rules/tfoot_spec.rb
@@ -33,5 +33,25 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ it 'lexes a element containing a ').should == [
+ [:T_ELEM_NAME, 'tfoot', 1],
+ [:T_ELEM_NAME, 'script', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a element containing a element' do
+ lex_html('foo').should == [
+ [:T_ELEM_NAME, 'tfoot', 1],
+ [:T_ELEM_NAME, 'template', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end
diff --git a/spec/oga/html/lexer/closing_rules/thead_spec.rb b/spec/oga/html/lexer/closing_rules/thead_spec.rb
index efcaf27..f9b1d71 100644
--- a/spec/oga/html/lexer/closing_rules/thead_spec.rb
+++ b/spec/oga/html/lexer/closing_rules/thead_spec.rb
@@ -33,5 +33,25 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ it 'lexes a element containing a ').should == [
+ [:T_ELEM_NAME, 'thead', 1],
+ [:T_ELEM_NAME, 'script', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a element containing a element' do
+ lex_html('foo').should == [
+ [:T_ELEM_NAME, 'thead', 1],
+ [:T_ELEM_NAME, 'template', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end
diff --git a/spec/oga/html/lexer/closing_rules/tr_spec.rb b/spec/oga/html/lexer/closing_rules/tr_spec.rb
index 5dd1f56..4952b17 100644
--- a/spec/oga/html/lexer/closing_rules/tr_spec.rb
+++ b/spec/oga/html/lexer/closing_rules/tr_spec.rb
@@ -32,5 +32,25 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1]
]
end
+
+ it 'lexes a element containing a
').should == [
+ [:T_ELEM_NAME, 'tr', 1],
+ [:T_ELEM_NAME, 'script', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
+
+ it 'lexes a element containing a element' do
+ lex_html('foo
').should == [
+ [:T_ELEM_NAME, 'tr', 1],
+ [:T_ELEM_NAME, 'template', 1],
+ [:T_TEXT, 'foo', 1],
+ [:T_ELEM_END, nil, 1],
+ [:T_ELEM_END, nil, 1]
+ ]
+ end
end
end