diff --git a/Rakefile b/Rakefile index dad27a3..e47bda3 100644 --- a/Rakefile +++ b/Rakefile @@ -8,10 +8,9 @@ GEMSPEC = Gem::Specification.load('oga.gemspec') LEXER_INPUT = 'lib/oga/lexer.rl' LEXER_OUTPUT = 'lib/oga/lexer.rb' -#PARSER_INPUT = 'lib/oga/parser.y' -#PARSER_OUTPUT = 'lib/oga/parser.rb' +HTML_PARSER = 'lib/oga/parser/html.rb' -GENERATED_FILES = ['coverage', 'yardoc', LEXER_OUTPUT] +GENERATED_FILES = ['coverage', 'yardoc', LEXER_OUTPUT, HTML_PARSER] GENERATED_FILES.each do |file| CLEAN << file if File.exist?(file) diff --git a/lib/oga.rb b/lib/oga.rb index f8be7b3..997e7fa 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -2,3 +2,4 @@ require 'ast' require_relative 'oga/ast/node' require_relative 'oga/lexer' +require_relative 'oga/parser/html' diff --git a/lib/oga/ast/node.rb b/lib/oga/ast/node.rb index 0accb9a..5b49908 100644 --- a/lib/oga/ast/node.rb +++ b/lib/oga/ast/node.rb @@ -3,7 +3,7 @@ module Oga ## # class Node < ::AST::Node - + attr_reader :line, :column end # Node end # AST end # Oga diff --git a/lib/oga/parser/html.rb b/lib/oga/parser/html.rb new file mode 100644 index 0000000..d0cb126 --- /dev/null +++ b/lib/oga/parser/html.rb @@ -0,0 +1,223 @@ +# +# DO NOT MODIFY!!!! +# This file is automatically generated by Racc 1.4.11 +# from Racc grammer file "". +# + +require 'racc/parser.rb' +module Oga + module Parser + class HTML < Racc::Parser + + + def initialize + @lexer = Lexer.new + end + + def reset + @lines = [] + @line = 1 + @column = 1 + end + + def s(type, *children) + return AST::Node.new( + type, + children.flatten, + :line => @line, + :column => @column + ) + end + + def next_token + type, value, line, column = @tokens.shift + + @line = line if line + @column = column if column + + return type ? [type, value] : [false, false] + end + + def on_error(type, value, stack) + name = token_to_str(type) + line_str = @lines[@line - 1] + indicator = '~' * (@column - 1) + '^' + + raise Racc::ParseError, <<-EOF.strip +Failed to parse the supplied input. + +Reason: unexpected #{name} with value #{value.inspect} +Location: line #{@line}, column #{@column} + +Offending code: + +#{line_str} +#{indicator} + +Current stack: + +#{stack.inspect} + EOF + end + + def parse(string) + @lines = string.lines + @tokens = @lexer.lex(string) + ast = do_parse + + reset + + return ast + end + +# vim: set ft=racc: +##### State transition tables begin ### + +racc_action_table = [ + 5, 7, 5, 9, 11, 12, 13, 14, 15, 16, + 17 ] + +racc_action_check = [ + 0, 1, 2, 5, 6, 7, 9, 10, 14, 15, + 16 ] + +racc_action_pointer = [ + -4, 1, -2, nil, nil, -12, -11, 5, nil, 1, + 3, nil, nil, nil, 2, -6, 5, nil ] + +racc_action_default = [ + -2, -9, -1, -4, -5, -9, -9, -9, -3, -9, + -9, -8, 18, -6, -9, -9, -9, -7 ] + +racc_goto_table = [ + 3, 1, 8, 2, 10 ] + +racc_goto_check = [ + 3, 1, 3, 2, 6 ] + +racc_goto_pointer = [ + nil, 1, 3, 0, nil, nil, -2 ] + +racc_goto_default = [ + nil, nil, nil, nil, 4, 6, nil ] + +racc_reduce_table = [ + 0, 0, :racc_error, + 1, 17, :_reduce_1, + 0, 17, :_reduce_2, + 2, 18, :_reduce_3, + 1, 18, :_reduce_4, + 1, 19, :_reduce_none, + 3, 21, :_reduce_6, + 6, 20, :_reduce_7, + 1, 22, :_reduce_none ] + +racc_reduce_n = 9 + +racc_shift_n = 18 + +racc_token_table = { + false => 0, + :error => 1, + :T_SPACE => 2, + :T_NEWLINE => 3, + :T_SMALLER => 4, + :T_GREATER => 5, + :T_SLASH => 6, + :T_DQUOTE => 7, + :T_SQUOTE => 8, + :T_DASH => 9, + :T_RBRACKET => 10, + :T_LBRACKET => 11, + :T_COLON => 12, + :T_BANG => 13, + :T_EQUALS => 14, + :T_TEXT => 15 } + +racc_nt_base = 16 + +racc_use_result_var = false + +Racc_arg = [ + racc_action_table, + racc_action_check, + racc_action_default, + racc_action_pointer, + racc_goto_table, + racc_goto_check, + racc_goto_default, + racc_goto_pointer, + racc_nt_base, + racc_reduce_table, + racc_token_table, + racc_shift_n, + racc_reduce_n, + racc_use_result_var ] + +Racc_token_to_s_table = [ + "$end", + "error", + "T_SPACE", + "T_NEWLINE", + "T_SMALLER", + "T_GREATER", + "T_SLASH", + "T_DQUOTE", + "T_SQUOTE", + "T_DASH", + "T_RBRACKET", + "T_LBRACKET", + "T_COLON", + "T_BANG", + "T_EQUALS", + "T_TEXT", + "$start", + "document", + "expressions", + "expression", + "tag", + "tag_start", + "tag_body" ] + +Racc_debug_parser = false + +##### State transition tables end ##### + +# reduce 0 omitted + +def _reduce_1(val, _values) + s(:document, val[0]) +end + +def _reduce_2(val, _values) + s(:document) +end + +def _reduce_3(val, _values) + val.compact +end + +def _reduce_4(val, _values) + val[0] +end + +# reduce 5 omitted + +def _reduce_6(val, _values) + val[1] +end + +def _reduce_7(val, _values) + s(:element, val[0], val[1]) + +end + +# reduce 8 omitted + +def _reduce_none(val, _values) + val[0] +end + + end # class HTML + end # module Parser + end # module Oga diff --git a/lib/oga/parser/html.y b/lib/oga/parser/html.y new file mode 100644 index 0000000..e1d1d38 --- /dev/null +++ b/lib/oga/parser/html.y @@ -0,0 +1,104 @@ +class Oga::Parser::HTML + +token T_SPACE T_NEWLINE T_SMALLER T_GREATER T_SLASH +token T_DQUOTE T_SQUOTE T_DASH T_RBRACKET T_LBRACKET +token T_COLON T_BANG T_EQUALS T_TEXT + +options no_result_var + +rule + document + : expressions { s(:document, val[0]) } + | /* none */ { s(:document) } + ; + + expressions + : expressions expression { val.compact } + | expression { val[0] } + ; + + expression + : tag + ; + + tag_start + #
+ : T_SMALLER T_TEXT T_GREATER { val[1] } + ; + + tag + #
foo
+ : tag_start tag_body T_SMALLER T_SLASH T_TEXT T_GREATER + { + s(:element, val[0], val[1]) + } + ; + + tag_body + : T_TEXT + ; +end + +---- inner + + def initialize + @lexer = Lexer.new + end + + def reset + @lines = [] + @line = 1 + @column = 1 + end + + def s(type, *children) + return AST::Node.new( + type, + children.flatten, + :line => @line, + :column => @column + ) + end + + def next_token + type, value, line, column = @tokens.shift + + @line = line if line + @column = column if column + + return type ? [type, value] : [false, false] + end + + def on_error(type, value, stack) + name = token_to_str(type) + line_str = @lines[@line - 1] + indicator = '~' * (@column - 1) + '^' + + raise Racc::ParseError, <<-EOF.strip +Failed to parse the supplied input. + +Reason: unexpected #{name} with value #{value.inspect} +Location: line #{@line}, column #{@column} + +Offending code: + +#{line_str} +#{indicator} + +Current stack: + +#{stack.inspect} + EOF + end + + def parse(string) + @lines = string.lines + @tokens = @lexer.lex(string) + ast = do_parse + + reset + + return ast + end + +# vim: set ft=racc: diff --git a/task/generate.rake b/task/generate.rake index cb10232..ab6ede3 100644 --- a/task/generate.rake +++ b/task/generate.rake @@ -1,5 +1,5 @@ desc 'Generates auto-generated files' -task :generate => [:lexer] +task :generate => [:lexer, :parser] desc 'Regenerates auto-generated files' task :regenerate => [:clean, :generate] diff --git a/task/parser.rake b/task/parser.rake index 18dfcc9..2a84ff4 100644 --- a/task/parser.rake +++ b/task/parser.rake @@ -1,4 +1,3 @@ -=begin rule '.rb' => '.y' do |task| Cliver.assert('racc', '~> 1.4') @@ -6,5 +5,4 @@ rule '.rb' => '.y' do |task| end desc 'Generates the parser' -task :parser => [PARSER_OUTPUT] -=end +task :parser => [HTML_PARSER]