<?php
/* Class to verify that HTML output conforms to strict HTML v4.01 spec.
*
* Written by: Chris Studholme
* Copyright: GPL (http://www.fsf.org/copyleft/gpl.html)
* $Id: html_verifier.php,v 1.1 2003/12/23 06:07:15 cstudhol Exp $
*/
class Tag {
var $singleton; // (boolean) no end tag?
var $attributes; // (array of strings) value attributes
var $max_nesting; // (int) maximum nesting level (0=infinite)
// var $requires; // (string[])
// var $requires_immediate;
// var $allows;
var $nesting; // (int) current nesting level
function Tag($attributes,
$singleton=false,
$max_nesting=1) {
$this->singleton = $singleton;
$this->max_nesting = $max_nesting;
$this->nesting = 0;
$this->attributes = array();
reset($attributes);
while (list($key,$value)=each($attributes))
$this->attributes[$value]=true;
}
}
class HTMLVerifier {
// master tag list
var $tag_list = array();
// attributes valid in all tags
var $global_attributes = array();
// stack to keep track of html elements
var $tag_stack = array();
var $tag_stack_top = -1;
var $errors = array();
function HTMLVerifier() {
$this->global_attributes =
array("style"=>true,
"id"=>true);
$this->tag_list =
array("a" => new Tag(array("href","id","name","onMouseOver")),
"b" => new Tag(array()),
"blockquote" => new Tag(array()),
"body" => new Tag(array()),
"div" => new Tag(array()),
"form" => new Tag(array("action","method","name")),
"h1" => new Tag(array()),
"h2" => new Tag(array()),
"h3" => new Tag(array()),
"head" => new Tag(array()),
"html" => new Tag(array()),
"i" => new Tag(array()),
"li" => new Tag(array()),
"nobr" => new Tag(array()),
"ol" => new Tag(array()),
"option" => new Tag(array("value","selected")),
"p" => new Tag(array()),
"script" => new Tag(array("type")),
"select" => new Tag(array("multiple","name","onChange","size")),
"span" => new Tag(array()),
"strong" => new Tag(array()),
"table" => new Tag(array("border","cellspacing","cellpadding","width"),false,0),
"td" => new Tag(array("align","colspan","valign")),
"textarea" => new Tag(array("cols","name","rows")),
"th" => new Tag(array("align","colspan","valign")),
"title" => new Tag(array()),
"tr" => new Tag(array()),
"ul" => new Tag(array()),
// singleton tags
"br" => new Tag(array(),true),
"hr" => new Tag(array(),true),
"img" => new Tag(array("alt","height","src","width"),true),
"input" => new Tag(array("checked","maxlength","name","onClick","src","size","type","value"),true),
"link" => new Tag(array("href","rel","type"),true),
);
}
function tag_check($tag,$attributes,$singleton=false,$result="") {
$result=ereg_replace("[<]","<",ereg_replace("[>]",">",$result));
$tagobj=$this->tag_list[$tag];
// check tag
if (!$tagobj) {
$this->errors[]="Invalid tag ".$result;
return;
}
if ($tagobj->singleton!=$singleton) {
$this->errors[]="Tag is incorrect type ".$result;
}
// check nesting level
if ($tagobj->max_nesting&&($tagobj->max_nesting==$tagobj->nesting)) {
$this->errors[]="Tag nested too deep ".$result;
}
// check attributes
if (is_array($attributes)) {
reset($attributes);
while (list($key,$value)=each($attributes)) {
if (!$tagobj->attributes[$key]&&!$this->global_attributes[$key])
$this->errors[]="Invalid attribute '".$key."=".$value.
"' for tag ".$result;
else if (ereg("[<>\]",$value))
$this->errors[]="Invalid attribute '".$key."=".$value.
"' for tag ".$result;
else if (ereg("\"",$value)) {
if (ereg("'",$value))
$this->errors[]="Too many quote types in value '".$value.
"' for tag ".$result;
else
$this->errors[]="WARNING: double quote used in value '".$value.
"' for tag ".$result;
}
}
}
}
function check_tag($tag,$attributes,$content,$result="") {
$this->tag_check($tag,$attributes,false,$result);
}
function check_stag($tag,$attributes,$result="") {
$this->tag_check($tag,$attributes,true,$result);
}
function check_btag($tag,$attributes,$result="") {
$this->tag_check($tag,$attributes,false,$result);
$this->tag_stack[++$this->tag_stack_top]=$tag;
$tagobj=$this->tag_list[$tag];
if ($tagobj)
$tagobj->nesting++;
}
function check_etag($tag,$result="") {
if ($this->tag_stack_top<0)
$this->errors[]="Tag stack is empty";
else if ($this->tag_stack[$this->tag_stack_top--]!=$tag) {
$result = "Overlapping or missing tags: expected '".$tag.
"', current stack:";
for ($i=$this->tag_stack_top+1; $i>=0; --$i)
$result .= " ".$this->tag_stack[$i];
$this->errors[]=$result;
// remove tags until we find the one we want
while (($this->tag_stack[$this->tag_stack_top--]!=$tag)&&
($this->tag_stack_top>=0)) ;
}
$tagobj=$this->tag_list[$tag];
if ($tagobj)
$tagobj->nesting--;
}
function report() {
echo "Maximum stack depth: ".count($this->tag_stack).stag("br")."\n";
// check for non-empy html stack
if ($this->tag_stack_top>=0) {
$err="Tags left on stack:";
for ($i=$this->tag_stack_top; $i>=0; --$i)
$err.=" ".$this->tag_stack[$i];
$this->errors[] = $err;
}
// display html errors
if ($this->errors) {
reset($this->errors);
while (list($key,$value)=each($this->errors)) {
echo tag("strong",false,$value).stag("br")."\n";
}
}
}
}
?>