#FIXME - email addresses get munged right now.. hard to fix without lookback
require 'cgi'
module Iowa
# Parse a template for dynamic elements.
class TemplateParser
Bodyless = %w(p input li hr option)
OID = "oid"
PatternString = "(<\\s*([^>]+?)\\s+([^>]*?" + OID +
"\\s*=('|\")\\s*(.*?)\\s*(\\4)[^>]*?(/)?\\s*)>)" +
"|" + "(@([\\w.]+))"
# Regexp constant that defines the regexp to use to parse the template
# for dynamic content tags.
Pattern = Regexp.new(PatternString)
# Constant that defines the position in Pattern of the dynamic tag data.
DynamicTag = 1
Tag = 2
Attributes = 3
ID = 5
Close = 7
Shortform = 9
OpenTag = Shortform + 1
CloseTag = Shortform + 2
# Take the data to parse and the bindings for the template.
# Store each in object variables, and then parse the template
# data for dynamic tags.
def initialize(data, bindings)
@data = data
@nodestack = [Element.new("templateRoot", {}, {})]
@bindings = bindings
@pos = 0
@length = @data.length
@len = @data.length
begin
loop do
match = Pattern.match(@data[@pos,@len])
break unless match
#@data = match.post_match
@pos += match.end(0)
@len = @length - @pos
textToken(match.pre_match)
parseTag(match)
end
rescue Exception
raise "#{$!}: parsed error around #{$&}"
end
textToken(@data[@pos,@len])
end
# Return the last element of the nodestack.
def root
@nodestack.last
end
# Determine if a given tag is part of the set of bodyless tags.
def bodyless?(tag)
Bodyless.include? tag.downcase
end
# Parse the contents of a single tag.
def parseTag(match)
if(match[Shortform])
startToken("String", match[Shortform], "")
endToken
return
end
startToken(match[Tag], match[ID], match[Attributes])
if match[Close] || bodyless?(match[Tag])
endToken
return
end
parseTagBody match[Tag]
end
# Traverse a tag's body, looking for other dynamic elements embedded
# within it.
def parseTagBody(tag)
tagRE = reForTag(tag)
openTags = 1
while(openTags > 0)
tagMatch = tagRE.match(@data[@pos,@len])
begin
#@data = tagMatch.post_match
@pos += tagMatch.end(0)
@len = @length - @pos
rescue Exception => exception
raise exception, "Trying to match #{tag}"
end
if(tagMatch[DynamicTag] || tagMatch[Shortform])
textToken(tagMatch.pre_match)
parseTag(tagMatch)
elsif(tagMatch[OpenTag])
openTags += 1
textToken(tagMatch.pre_match + tagMatch[0])
elsif(tagMatch[CloseTag])
openTags -= 1
textToken(tagMatch.pre_match)
textToken(tagMatch[0]) unless openTags == 0
end
end
endToken()
end
# Returns a regular expression object that matches either a dynamic
# element as defined in the global template parsing Pattern, or an
# opening element of the given tag (argument passed to the method)
# or a closing element of the given tag.
def reForTag(tag)
Regexp.new( PatternString +
"|" +
"(<\\s*" + tag + ".*?>)" +
"|" +
"(<\\s*/" + tag + "\\s*>)"
)
end
def startToken(tag, id, attributeString)
bindings = @bindings[id]
bindings = {} unless bindings
klass = bindings["class"]
klass = tag unless klass
attributes = {}
attributeString.scan(/(\w+)\s*=\s*(["'])(.*?)(\2)/) do |key, quote, value|
attributes[key] = CGI::unescapeHTML(value) unless key == OID
end
# Kludge. I want a regexp that can match both key=value attributes and
# key only attributes.
attributes['checked'] = nil if attributeString =~ /\bchecked/i
node = Element.newElement(klass, id, bindings, attributes)
@nodestack.last.addChild node
@nodestack.push node
end
def endToken
@nodestack.pop
end
def textToken(text)
@nodestack.last.addChild TextElement.new(text)
end
end
end