/*
Wotonomy: OpenStep design patterns for pure Java applications.
Copyright (C) 2000 Blacksmith, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see http://www.gnu.org
*/
package net.wotonomy.util;
import java.util.*; //collections
import java.io.*;
/**
* PropertyListParser can parse a property list (plist) file or string, and
* return the top-level object represented by the plist. <p>
*
* A property list is a heirarchical data structure containing only Maps,
* Lists, and Strings -- nothing else. In other words, a property list is
* either a Map, List, or String instance, with the restrictions that the
* collections may only contain Map, List, or String instances. <p>
*
* This class can read a particularly-formatted string or file, and create
* the property list structure described. It provides a convenient means
* for having a structured data file, letting programs simply deal with the
* structure rather than having to do a lot of string parsing work as well.
* The concept is similar to Properties files, except that the values can
* be nested Maps or Lists instead of only Strings. <p>
*
* A Map is specified in a file by key/value pairs surrounded by brace
* characters. An equal sign (=) must be between the key and value, and
* there must be a semicolon (;) following the value.
*
* <pre>
* {
* key1 = value1;
* key2 = value2;
* etc...
* }
* </pre>
*
* A List is specified by a comma-separated list of values surrounded by parentheses, like:
* <pre>
* ( value1, value2, value3, etc... )
* </pre>
*
* A String can either be quoted in the manner of a constant string in
* Java, or unquoted. If unquoted, the string can only contain
* alphanumerics, underscores (_), periods (.), dollar signs ($), colons
* (:), or forward slashes (/). If any other character appears in the
* string, it must be quoted (i.e., surrounded by " characters).
* Quoted strings may also contain \n, \t, \f, \v, \b, and \a escapes,
* octal escapes of the form \000, and unicode escapes of the form of \U
* followed by four hexadecimal characters. Any other character escaped
* by a backslash will be treated as that character, and the escaping
* backslash character will be omitted. Thus, to represent an actual
* backslash, it must appear as \\ in the quoted string. <p>
*
* All whitespace between elements is ignored, and both //-style and
* /*-style comments are allowed to appear anywhere between elements. <p>
*
* If there are any syntax errors encountered while parsing,
* RuntimeExceptions are thrown with the line number and column of the
* problem. <p>
*
* Currenty, HashMaps and ArrayLists are the actual Map and List classes
* used when creating the property list. <p>
*
* Examples: <p><blockquote>
<pre>
// This plist file represents a Map, since it starts with a '{'.
{
Map1 = { subkey1 = "foo"; };
Map2 =
{
"key1" = "This is a quoted string.";
"key 2" = "bar\nbaz"; // the value has a newline in it
key3 = ("a", b, c, "quux quux"); // a List of four Strings
}; // We need a semicolon here, since it's following the value of the "Map2" key
List1 = (foobar,foobaz,"foo,baz", (aa, ab, ac)); // a List of 3 Strings and a List
// And now a List of two Maps
List2 = (
{
key1 = value1;
key2 = "value 2";
key3 = (a,b,c,d);
key4 = ();
}, // We need the comma here
{
key1 = {}; // an empty Map
key2 = "another String value";
}
);
}
</pre>
</blockquote>
* For those wondering, this is essentially a re-implementation of
* NeXT/Apple's property lists, except that data values are not supported.
*
* @author clindberg@blacksmith.com
* @version $Revision: 1.1.1.1 $
*/
public class PropertyListParser
{
private char buffer[];
private int currIndex;
private int lineNumber;
private int currLineStartIndex;
/** Reads an object (String, List, or Map) from plistString and returns it.
* RuntimeExceptions are raised if there are parse problems.
*/
public static Object propertyListFromString(String plistString)
{
PropertyListParser parser = new PropertyListParser(plistString);
return parser.readTopLevelObject();
}
/**
* Reads all remaining characters from the Reader, and returns the
* result of propertyListFromString(). RuntimeExceptions are raised if
* there are parse problems
*/
public static Object propertyListFromReader(Reader reader) throws IOException
{
char charBuffer[] = new char[2048];
StringBuffer stringBuffer = new StringBuffer();
int numRead = 0;
while (numRead >= 0)
{
numRead = reader.read(charBuffer);
if (numRead > 0) stringBuffer.append(charBuffer, 0, numRead);
}
return propertyListFromString(stringBuffer.toString());
}
/**
* Reads the contents of the specified file, and parses the contents.
* If any error occurs, prints out a message using System.out.println()
* and returns null.
*/
public static Object propertyListFromFile(String filename)
{
try {
FileInputStream stream = new FileInputStream(filename);
return propertyListFromReader(new InputStreamReader(stream));
} catch (Exception exception) {
String errorMessage = exception.getMessage();
System.out.println("Error parsing property list from "+filename+": "+errorMessage);
}
return null;
}
/**
* Creates a new PropertyListParser to parse the contents of the
* specified String.
*/
public PropertyListParser(String plistString)
{
this(plistString.toCharArray());
}
/**
* Creates a new PropertyListParser to parse the specified char array.
*/
public PropertyListParser(char[] charArray)
{
buffer = charArray;
lineNumber = 1;
currLineStartIndex = 1;
currIndex = 0;
}
public Object readTopLevelObject()
{
Object plist = readObject();
skipCommentWhitespace();
if (!isAtEnd())
{
throwParseException("Extra characters in plist string after parsing object. A plist should only contain one top-level object.");
}
return plist;
}
private void throwParseException(String errorMessage)
{
int column = currIndex - currLineStartIndex + 1;
throw new RuntimeException(errorMessage + " (Line " + lineNumber + ", column " + column + ")");
}
private void updateLineNumberWithIndex(int lineStartIndex)
{
lineNumber++;
currLineStartIndex = lineStartIndex;
}
private boolean isAtEnd()
{
return currIndex >= buffer.length;
}
private void skipDoubleslashComment()
{
while (!isAtEnd() && buffer[currIndex] != '\n') {
currIndex++;
}
}
private void skipStandardCComment()
{
currIndex++; //skip over the starting '/'
while (!isAtEnd())
{
if (buffer[currIndex] == '\n')
updateLineNumberWithIndex(currIndex+1);
currIndex++;
if (buffer[currIndex-2] == '*' && buffer[currIndex-1] == '/')
{
return;
}
}
throwParseException("Input exhausted while parsing comment");
}
private void skipWhitespace()
{
while (!isAtEnd() && isWhitespace(buffer[currIndex]))
{
if (buffer[currIndex] == '\n')
updateLineNumberWithIndex(currIndex+1);
currIndex++;
}
}
private void skipCommentWhitespace()
{
boolean done = false;
while (!done)
{
done = true;
skipWhitespace();
if ((buffer.length - currIndex) > 1 && buffer[currIndex] == '/')
{
if (buffer[currIndex+1] == '/') {
done = false; //iterate again
skipDoubleslashComment();
}
else if (buffer[currIndex+1] == '*') {
done = false; //iterate again
skipStandardCComment();
}
}
}
}
private Object readObject()
{
skipCommentWhitespace();
if (isAtEnd()) return null;
// Data (i.e. byte[]) not supported
if (buffer[currIndex] == '"')
return readQuotedString();
if (buffer[currIndex] == '(')
return readList();
if (buffer[currIndex] == '{')
return readMap();
return readUnquotedString();
}
private static final byte valueForHexDigit(char c)
{
if(c >= '0' && c <= '9') return (byte)(c - '0');
if(c >= 'a' && c <= 'f') return (byte)((c - 'a') + 10);
if(c >= 'A' && c <= 'F') return (byte)((c - 'A') + 10);
return 0;
}
private static final boolean isOctalDigit(char c)
{
return c >= '0' && c <= '7';
}
private static final boolean isHexDigit(char c)
{
return (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
}
private static String unquotedStringChars = "._$:/"; // chars allowed in unquoted strings
private static String whitespaceChars = " \t\n\r\f";
private static final boolean isWhitespace(char c)
{
return whitespaceChars.indexOf(c) >= 0;
}
private static final boolean isValidUnquotedStringChar(char c)
{
return ((c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
unquotedStringChars.indexOf(c) >= 0);
}
private String readUnquotedString()
{
int startIndex = currIndex;
while (!isAtEnd() && isValidUnquotedStringChar(buffer[currIndex]))
currIndex++;
if (startIndex == currIndex)
throwParseException("No allowable characters found to parse unquoted string");
return new String(buffer, startIndex, currIndex - startIndex);
}
private String readQuotedString()
{
currIndex++; //skip over '"'
StringBuffer stringBuffer = new StringBuffer();
int startIndex = currIndex;
while (!isAtEnd() && buffer[currIndex] != '"')
{
if (buffer[currIndex] != '\\')
{
if (buffer[currIndex] == '\n')
updateLineNumberWithIndex(currIndex+1);
/*
* Just increment the index -- all these characters will be
* appended in chunks, either before an escape sequence or
* at the end.
*/
currIndex++;
}
else // it's an escape
{
/* Append anything scanned past before the '\\' */
if (startIndex < currIndex)
stringBuffer.append(buffer, startIndex, currIndex - startIndex);
currIndex++; // skip over '\\'
if (isAtEnd())
throwParseException("Input exhausted while parsing escape sequence");
switch (buffer[currIndex])
{
case 't': stringBuffer.append('\t'); currIndex++; break; // tab
case 'n': stringBuffer.append('\n'); currIndex++; break; // newline
case 'r': stringBuffer.append('\r'); currIndex++; break; // carriage return
case 'f': stringBuffer.append('\f'); currIndex++; break; // form feed
case 'b': stringBuffer.append('\b'); currIndex++; break; // backspace
case 'a': stringBuffer.append('\007'); currIndex++; break; // bell
case 'v': stringBuffer.append('\013'); currIndex++; break; // vertical tab
case 'U':
case 'u':
{
/* A Unicode escape. Always followed by 4 hex digits. */
currIndex++; // skip past the 'U'
if ((currIndex+4) > buffer.length)
throwParseException("Not enough chars to parse \\U sequence");
if(!isHexDigit(buffer[currIndex]) || !isHexDigit(buffer[currIndex+1]) ||
!isHexDigit(buffer[currIndex+2]) || !isHexDigit(buffer[currIndex+3]))
{
throwParseException("Four hex digits not found for \\U sequence");
}
byte byte3 = valueForHexDigit(buffer[currIndex]);
byte byte2 = valueForHexDigit(buffer[currIndex+1]);
byte byte1 = valueForHexDigit(buffer[currIndex+2]);
byte byte0 = valueForHexDigit(buffer[currIndex+3]);
char theChar = (char)((byte3 << 12) + (byte2 << 8) + (byte1 << 4) + byte0);
stringBuffer.append(theChar);
currIndex += 4;
break;
}
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
/* An octal escape. Expect 1, 2, or 3 octal digits. */
int digits = 0;
int value = 0;
do {
value *= 8;
value += (int)(buffer[currIndex] - '0');
currIndex++;
digits++;
} while (digits <= 3 && !isAtEnd() && isOctalDigit(buffer[currIndex]));
if (value > 255)
throwParseException("Value too large in octal escape sequence (> 0377)");
// This assumes value is in ISO Latin 1 encoding
stringBuffer.append((char)value);
break;
}
/* I guess plists can't have the \x{HEX}{HEX} escapes */
default:
{
// Unknown escape sequence, just add the character.
// GCC warns if this isn't a '"', '\'', or '\\'...
stringBuffer.append(buffer[currIndex]);
if (buffer[currIndex] == '\n')
updateLineNumberWithIndex(currIndex+1);
currIndex++;
break;
}
} // end case
/* Reset startIndex, so a verbatim copy will now start from this index */
startIndex = currIndex;
} //end '\\' escape
}
if (isAtEnd())
throwParseException("Input exhausted while parsing quoted string");
if (startIndex < currIndex)
stringBuffer.append(buffer, startIndex, currIndex - startIndex);
currIndex++; //skip past '"'
return stringBuffer.toString();
}
private List readList()
{
List newList = new ArrayList();
currIndex++; //skip over '('
skipCommentWhitespace();
while (!isAtEnd() && buffer[currIndex] != ')')
{
/* A comma is required between list elements */
if (newList.size() > 0)
{
if (buffer[currIndex] != ',')
throwParseException("List parsing failed: expecting ','");
currIndex++;
skipCommentWhitespace();
if (isAtEnd())
throwParseException("Input exhausted while parsing list");
}
if (buffer[currIndex] != ')')
{
Object plistObject = readObject();
if (plistObject == null)
throwParseException("List parsing failed: could not read contained object.");
newList.add(plistObject);
skipCommentWhitespace();
}
}
if (isAtEnd())
throwParseException("Input exhausted while parsing list");
currIndex++; //skip past ')'
return newList;
}
private Map readMap()
{
HashMap newMap = new HashMap();
currIndex++; // skip over open brace
skipCommentWhitespace();
while (!isAtEnd() && buffer[currIndex] != '}')
{
Object key;
Object value;
key = readObject();
if (key == null || !(key instanceof String))
throwParseException("Map parsing failed: could not parse key or key is not a String");
skipCommentWhitespace();
if (isAtEnd() || buffer[currIndex] != '=')
throwParseException("Map parsing failed: expecting '='");
currIndex++; //skip over '='
skipCommentWhitespace();
if (isAtEnd())
throwParseException("Input exhausted while parsing map");
value = readObject();
if (value == null)
throwParseException("Map parsing failed: could not parse value object");
skipCommentWhitespace();
if (isAtEnd() || buffer[currIndex] != ';')
throwParseException("Map parsing failed: expecting ';'");
currIndex++; //skip over ';'
skipCommentWhitespace();
newMap.put(key, value);
}
if (isAtEnd())
throwParseException("Input exhausted while parsing map");
currIndex++; //skip past '}'
return newMap;
}
public static void main(String[] args)
{
String filename = args[0];
Object plist = PropertyListParser.propertyListFromFile(filename);
System.out.println(plist);
}
}