From 6ad51704766446227f4d63f4ff89f970aa4e54ad Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 14 Aug 2014 21:51:58 +0200 Subject: [PATCH] Support for lexing/parsing XPath type tests. Unlike what I thought before syntax such as "node()" is not a function call. Instead this is a special node test that tests the *types* of nodes, not their names. --- lib/oga/xpath/lexer.rl | 19 ++++++++++++++++--- lib/oga/xpath/parser.y | 7 ++++++- spec/oga/xpath/lexer/axes_spec.rb | 16 ++++------------ spec/oga/xpath/lexer/general_spec.rb | 8 ++------ spec/oga/xpath/lexer/node_type_spec.rb | 23 +++++++++++++++++++++++ spec/oga/xpath/parser/axes_spec.rb | 6 +++--- spec/oga/xpath/parser/node_type_spec.rb | 23 +++++++++++++++++++++++ 7 files changed, 77 insertions(+), 25 deletions(-) create mode 100644 spec/oga/xpath/lexer/node_type_spec.rb create mode 100644 spec/oga/xpath/parser/node_type_spec.rb diff --git a/lib/oga/xpath/lexer.rl b/lib/oga/xpath/lexer.rl index a594cce..9c66e93 100644 --- a/lib/oga/xpath/lexer.rl +++ b/lib/oga/xpath/lexer.rl @@ -249,9 +249,7 @@ module Oga # added on lexer level to make it easier to handle these cases on # parser/evaluator level. if AXIS_EMIT_NODE.include?(value) - add_token(:T_IDENT, 'node') - add_token(:T_LPAREN) - add_token(:T_RPAREN) + add_token(:T_NODE_TYPE, 'node') if AXIS_EMIT_EXTRA_SLASH.include?(value) and te != eof add_token(:T_SLASH) @@ -305,10 +303,25 @@ module Oga | op_sub ; + # Node types + # + # While these look like functions they are actually node tests. For + # example, comment() matches all comment nodes. + # + # See http://www.w3.org/TR/xpath/#NT-NodeType for more information. + + node_type = 'comment' | 'text' | 'processing-instruction' | 'node'; + + action emit_node_type { + emit(:T_NODE_TYPE, ts, te - 2) + } + main := |* operator; whitespace | slash | lparen | rparen | comma | colon; + node_type '()' => emit_node_type; + '[' => { add_token(:T_LBRACK) }; ']' => { add_token(:T_RBRACK) }; diff --git a/lib/oga/xpath/parser.y b/lib/oga/xpath/parser.y index 316086e..118d49d 100644 --- a/lib/oga/xpath/parser.y +++ b/lib/oga/xpath/parser.y @@ -3,7 +3,7 @@ # class Oga::XPath::Parser -token T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT +token T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT T_NODE_TYPE token T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING token T_PIPE T_AND T_OR T_ADD T_DIV T_MOD T_EQ T_NEQ T_LT T_GT T_LTE T_GTE token T_SUB T_MUL @@ -76,6 +76,11 @@ rule node_test : node_name { s(:test, *val[0]) } | node_name predicate { s(:test, *val[0], val[1]) } + | node_type { val[0] } + ; + + node_type + : T_NODE_TYPE { s(:node_type, val[0]) } ; node_name diff --git a/spec/oga/xpath/lexer/axes_spec.rb b/spec/oga/xpath/lexer/axes_spec.rb index d804cdc..aab6e8c 100644 --- a/spec/oga/xpath/lexer/axes_spec.rb +++ b/spec/oga/xpath/lexer/axes_spec.rb @@ -120,9 +120,7 @@ describe Oga::XPath::Lexer do lex_xpath('//A').should == [ [:T_SLASH, nil], [:T_AXIS, 'descendant-or-self'], - [:T_IDENT, 'node'], - [:T_LPAREN, nil], - [:T_RPAREN, nil], + [:T_NODE_TYPE, 'node'], [:T_SLASH, nil], [:T_IDENT, 'A'] ] @@ -132,9 +130,7 @@ describe Oga::XPath::Lexer do lex_xpath('/..').should == [ [:T_SLASH, nil], [:T_AXIS, 'parent'], - [:T_IDENT, 'node'], - [:T_LPAREN, nil], - [:T_RPAREN, nil], + [:T_NODE_TYPE, 'node'] ] end @@ -142,18 +138,14 @@ describe Oga::XPath::Lexer do lex_xpath('/.').should == [ [:T_SLASH, nil], [:T_AXIS, 'self'], - [:T_IDENT, 'node'], - [:T_LPAREN, nil], - [:T_RPAREN, nil], + [:T_NODE_TYPE, 'node'] ] end example 'lex the . axis followed by a path' do lex_xpath('./foo').should == [ [:T_AXIS, 'self'], - [:T_IDENT, 'node'], - [:T_LPAREN, nil], - [:T_RPAREN, nil], + [:T_NODE_TYPE, 'node'], [:T_SLASH, nil], [:T_IDENT, 'foo'] ] diff --git a/spec/oga/xpath/lexer/general_spec.rb b/spec/oga/xpath/lexer/general_spec.rb index f027a48..05dbe38 100644 --- a/spec/oga/xpath/lexer/general_spec.rb +++ b/spec/oga/xpath/lexer/general_spec.rb @@ -37,9 +37,7 @@ describe Oga::XPath::Lexer do [:T_IDENT, 'wikimedia'], [:T_SLASH, nil], [:T_AXIS, 'descendant-or-self'], - [:T_IDENT, 'node'], - [:T_LPAREN, nil], - [:T_RPAREN, nil], + [:T_NODE_TYPE, 'node'], [:T_SLASH, nil], [:T_IDENT, 'editions'] ] @@ -66,9 +64,7 @@ describe Oga::XPath::Lexer do [:T_SLASH, nil], [:T_IDENT, 'edition'], [:T_SLASH, nil], - [:T_IDENT, 'text'], - [:T_LPAREN, nil], - [:T_RPAREN, nil] + [:T_NODE_TYPE, 'text'] ] end end diff --git a/spec/oga/xpath/lexer/node_type_spec.rb b/spec/oga/xpath/lexer/node_type_spec.rb new file mode 100644 index 0000000..5e052b7 --- /dev/null +++ b/spec/oga/xpath/lexer/node_type_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +describe Oga::XPath::Lexer do + context 'node types' do + example 'lex the "node" type' do + lex_xpath('node()').should == [[:T_NODE_TYPE, 'node']] + end + + example 'lex the "comment" type' do + lex_xpath('comment()').should == [[:T_NODE_TYPE, 'comment']] + end + + example 'lex the "text" type' do + lex_xpath('text()').should == [[:T_NODE_TYPE, 'text']] + end + + example 'lex the "processing-instruction" type' do + lex_xpath('processing-instruction()').should == [ + [:T_NODE_TYPE, 'processing-instruction'] + ] + end + end +end diff --git a/spec/oga/xpath/parser/axes_spec.rb b/spec/oga/xpath/parser/axes_spec.rb index 1951016..b60438c 100644 --- a/spec/oga/xpath/parser/axes_spec.rb +++ b/spec/oga/xpath/parser/axes_spec.rb @@ -105,7 +105,7 @@ describe Oga::XPath::Parser do example 'parse the // axis' do parse_xpath('//A').should == s( :absolute_path, - s(:axis, 'descendant-or-self', s(:call, 'node')), + s(:axis, 'descendant-or-self', s(:node_type, 'node')), s(:axis, 'child', s(:test, nil, 'A')) ) end @@ -113,14 +113,14 @@ describe Oga::XPath::Parser do example 'parse the .. axis' do parse_xpath('/..').should == s( :absolute_path, - s(:axis, 'parent', s(:call, 'node')) + s(:axis, 'parent', s(:node_type, 'node')) ) end example 'parse the . axis' do parse_xpath('/.').should == s( :absolute_path, - s(:axis, 'self', s(:call, 'node')) + s(:axis, 'self', s(:node_type, 'node')) ) end end diff --git a/spec/oga/xpath/parser/node_type_spec.rb b/spec/oga/xpath/parser/node_type_spec.rb new file mode 100644 index 0000000..cfabc82 --- /dev/null +++ b/spec/oga/xpath/parser/node_type_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +describe Oga::XPath::Parser do + context 'node types' do + example 'parse the "node" type' do + parse_xpath('node()').should == s(:axis, 'child', s(:node_type, 'node')) + end + + example 'parse the "comment" type' do + parse_xpath('comment()') + .should == s(:axis, 'child', s(:node_type, 'comment')) + end + + example 'parse the "text" type' do + parse_xpath('text()').should == s(:axis, 'child', s(:node_type, 'text')) + end + + example 'parse the "processing-instruction" type' do + parse_xpath('processing-instruction()') + .should == s(:axis, 'child', s(:node_type, 'processing-instruction')) + end + end +end