oga/lib/oga/parser.y

187 lines
3.3 KiB
Plaintext

class Oga::Parser
token T_STRING T_TEXT
token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE
token T_CDATA_START T_CDATA_END
token T_COMMENT_START T_COMMENT_END
token T_ELEM_OPEN T_ELEM_NAME T_ELEM_NS T_ELEM_CLOSE T_ATTR
options no_result_var
rule
document
: expressions { s(:document, val[0]) }
| /* none */ { s(:document) }
;
expressions
: expressions expression { val.compact }
| expression { val[0] }
| /* none */ { nil }
;
expression
: doctype
| cdata
| comment
| element
| text
;
# Doctypes
doctype
# <!DOCTYPE html>
: T_DOCTYPE_START T_DOCTYPE_END { s(:doctype) }
# <!DOCTYPE html PUBLIC>
| T_DOCTYPE_START T_DOCTYPE_TYPE T_DOCTYPE_END
{
s(:doctype, val[1])
}
# <!DOCTYPE html PUBLIC "foo">
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2])
}
# <!DOCTYPE html PUBLIC "foo" "bar">
| T_DOCTYPE_START T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END
{
s(:doctype, val[1], val[2], val[3])
}
;
# CDATA tags
cdata
# <![CDATA[]]>
: T_CDATA_START T_CDATA_END { s(:cdata) }
# <![CDATA[foo]]>
| T_CDATA_START T_TEXT T_CDATA_END { s(:cdata, val[1]) }
;
# Comments
comment
# <!---->
: T_COMMENT_START T_COMMENT_END { s(:comment) }
# <!-- foo -->
| T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) }
;
# Elements
element
: element_open attributes expressions T_ELEM_CLOSE
{
s(:element, val[0], val[1], val[2])
}
;
element_open
# <p>
: T_ELEM_OPEN T_ELEM_NAME { [nil, val[1]] }
# <foo:p>
| T_ELEM_OPEN T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] }
;
# Attributes
attributes
: attributes_ { s(:attributes, val[0]) }
| /* none */ { nil }
;
attributes_
: attributes_ attribute { val }
| attribute { val }
;
attribute
# foo
: T_ATTR { s(:attribute, val[0]) }
# foo="bar"
| T_ATTR T_STRING { s(:attribute, val[0], val[1]) }
;
# Plain text
text
: T_TEXT { s(:text, val[0]) }
;
end
---- inner
def initialize(options = {})
@lexer = Lexer.new(options)
end
def reset
@lines = []
@line = 1
end
def s(type, *children)
return AST::Node.new(
type,
children.flatten,
:line => @line
)
end
def next_token
type, value, line = @tokens.shift
@line = line if line
return type ? [type, value] : [false, false]
end
def on_error(type, value, stack)
name = token_to_str(type)
index = @line - 1
lines = ''
# Show up to 2 lines before and after the offending line (if they exist).
(-5..5).each do |offset|
line = @lines[index + offset]
if line
number = @line + offset
if offset == 0
prefix = '=> '
else
prefix = ' '
end
lines << "#{prefix}#{number}: #{line}"
end
end
raise Racc::ParseError, <<-EOF
Unexpected #{name} with value #{value.inspect} on line #{@line}:
#{lines}
EOF
end
def parse(string)
@lines = string.lines
@tokens = @lexer.lex(string)
ast = do_parse
reset
return ast
end
# vim: set ft=racc: