/*
* Enhydra Java Application Server Project
*
* The contents of this file are subject to the Enhydra Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License on
* the Enhydra web site ( http://www.enhydra.org/ ).
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific terms governing rights and limitations
* under the License.
*
* The Initial Developer of the Enhydra Application Server is Lutris
* Technologies, Inc. The Enhydra Application Server and portions created
* by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
* All Rights Reserved.
*
* Contributor(s):
*
* $Id: BMByteSearchStream.java,v 1.15 2005/06/13 09:26:06 draganr Exp $
*/
package com.lutris.util;
import java.io.*;
/**
* Implements the Boyer-Moore pattern matching algorithm for a given
* byte pattern. This object implements searches on byte-oriented input
* streams.
* <P>
* The algorithm was obtained from "Computer Algorithms - Introduction to
* Design and Analysis, Second Edition" by Sara Baase.
*/
public class BMByteSearchStream extends FilterInputStream
{
/**
* EOF value. Traditionally -1.
*/
public static final int EOF = -1;
/**
* "At Pattern" value. Indicates that the stream position is
* currently at the beginning of a detected pattern occurence.
*/
public static final int AT_PATTERN = -2;
/**
* Boyer-Moore byte string searcher for scanning the scan buffer.
*/
private BMByteSearch searcher;
/**
* Scan buffer -- Should be at least ten times the length of the
* search pattern.
*/
private byte[] scanBuf;
/**
* Scan buffer position.
*/
private int scanBufPos;
/**
* Number of bytes currently in the scan buffer.
*/
private int scanBufCount;
/**
* Total size of the scan buffer.
*/
private int scanBufSize;
/**
* The length of the search pattern.
*/
private int patternLength;
/**
* Number of bytes to overlap buffer reads by to ensure that
* the pattern gets matched even if it crosses a read boundary.
* This should normally be the length of the pattern.
*/
private int patternKeep;
/**
* The current position of the pattern within the current
* buffer contents. This may be less than the current buffer
* position if the pattern has already been skipped.
*/
private int patternPos;
/**
* Buffer to hold a single character in order to implement the
* single character read() method.
*/
private byte[] readByte = new byte[1];
/**
* Creates a Boyer-Moore byte stream scanner for a given pattern.
* Creates a buffer of length <code>buflen</code> for the scanning
* buffer.
*
* @param inputSource The input source stream to scan.
* @param pattern The pattern to scan for. Characters
* outside the Latin-1 encoding are truncated
* to signed bytes.
* @param buflen The length to use for the scanning buffer.
*/
public
BMByteSearchStream(InputStream inputSource, String pattern, int buflen)
{
super(inputSource); // Pass input source stream to parent class.
scanBuf = new byte[buflen];
scanBufSize = buflen;
scanBufPos = 0;
scanBufCount = 0;
setPattern(pattern);
}
/**
* Reads the next byte of data from this input stream. The value byte
* is returned as an <code>int</code> in the range 0 to 255. If no
* byte is available because the end of the stream has been reached,
* the value -1 is returned. This method blocks until input data is
* available, the end of the stream is detected, or an exception is
* thrown
*
* @return The next byte of data, or -1 if the end
* of stream is reached.
* @exception IOException If an I/O error occurs.
*/
public
int read()
throws IOException
{
int len = read(readByte, 0, 1);
if (len != 1) return -1;
return ((int)readByte[0] + 0x100) & 0xff;
}
/**
* Reads up to <code>buffer.length</code> bytes of data from this input
* stream into an array of bytes. This method blocks until some input
* is available
*
* @param buffer The buffer into which data are read.
* @return The number of bytes actually read, or -1
* if there are no more bytes because the
* end of stream has been reached.
* @exception IOException If an I/O error occurs.
*/
public
int read(byte[] buffer)
throws IOException
{
return(read(buffer, 0, buffer.length));
}
/**
* Reads <code>length</code> bytes of data from this input stream into
* an array of bytes. This method blocks until some input is available.
*
* @param buffer The buffer into which data are read.
* @param offset The start offset of the data.
* @param length The maximum number of bytes read.
* @return The total number of bytes read into the
* buffer, or -1 if there are no more bytes
* because the end of stream has been reached.
* @exception IOException If an I/O error occurs.
*/
public
int read(byte[] buffer, int offset, int length)
throws IOException
{
int count, dst, i;
while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
if (loadBuf() < 0) {
//
// Underlying stream reached EOF. return remainder of
// buffer or EOF.
//
count = min(length, scanBufCount);
if (count <= 0) {
patternPos = -1;
return EOF;
}
break;
}
}
for (dst=offset, i=count; i > 0; i--) {
buffer[dst++] = scanBuf[scanBufPos++];
}
scanBufCount -= count;
if ((scanBufPos > patternPos) && (patternPos >= 0)) {
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
}
return count;
}
/**
* Skips over and discards n bytes of data from the input stream.
* The <code>skip</code> method may, for a variety of reasons,
* end up skipping over some smaller number of bytes, possibly 0.
* The actual number of bytes skipped is returned.
*
* @param n The number of bytes to be skipped.
* @return The actual number of bytes skipped.
* @exception IOException If an I/O error occurs.
*/
public
long skip(long n)
throws IOException
{
byte[] skipBuf = new byte[1024];
long bytesLeft = n;
long skipped=0;
long r, count;
while (bytesLeft > 0) {
if (bytesLeft < 1024) count = bytesLeft;
else count = 1024;
if ((r = this.read(skipBuf, 0, (int)count))<0) {
patternPos = -1;
return(skipped); // At EOF.
}
skipped += r;
bytesLeft -= r;
}
if ((scanBufPos > patternPos) && (patternPos >= 0)) {
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
}
return skipped;
}
/**
* Returns the number of bytes that can be read from this input stream
* without blocking. This consists of any bytes left in the buffer
* plus the result of the underlying stream's <code>available</code>
* method.
*
* @return The number of bytes that can be read
* without blocking.
* @exception IOException If an I/O error occurs.
*/
public int
available()
throws IOException
{
return(scanBufCount + super.available());
}
/**
* Returns the number of bytes that can be read from this input stream
* without blocking or encountering the search pattern. If the search
* pattern has been found in the buffer, this is the number of bytes
* in the search buffer prior to the search pattern; otherwise, while
* there may be additional data in the input stream buffer that can be
* brought into the search buffer without blocking, this data may or
* may not contain the search pattern (or complete a partial search
* pattern located at the end of the buffer), so this is calculated as
* the number of bytes left in the search buffer less the length of
* the search pattern.
*
* @return The number of bytes that can be read
* for certain without blocking or encountering
* the search pattern.
* @exception IOException If an I/O error occurs.
*/
public int
availableTo()
throws IOException
{
return (patternPos < 0) ? scanBufCount - patternLength : patternPos - scanBufPos;
}
/**
* Reads data into a buffer until the search pattern or the
* end of file is detected. Returns <code>-1</code> if at the
* search pattern or end-of-file.
*
* @param buffer Buffer to read into.
* @param offset Offset in buffer to read into.
* @param length Number of bytes to try to read.
* @return The number of bytes actually read, or
* <code>-1</code> if at the pattern or eof.
* @exception IOException Thrown if an I/O exception occurs.
*/
public
int readTo(byte[] buffer, int offset, int length)
throws IOException
{
int count, dst, i;
if (patternPos == scanBufPos) return AT_PATTERN;
if (patternPos > scanBufPos) {
count = min(length, patternPos - scanBufPos);
for (dst=offset, i=count; i > 0; i--) {
buffer[dst++] = scanBuf[scanBufPos++];
}
scanBufCount -= count;
return count;
}
while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
if (loadBuf() < 0) {
//
// Underlying stream reached EOF. return remainder of
// buffer or EOF.
//
count = min(length, scanBufCount);
if (count <= 0) return EOF;
break;
}
// Bug fixed - must recheck for being at pattern.
if (patternPos == scanBufPos) return AT_PATTERN;
if (patternPos > scanBufPos) {
count = min(length, patternPos - scanBufPos);
for (dst=offset, i=count; i > 0; i--) {
buffer[dst++] = scanBuf[scanBufPos++];
}
scanBufCount -= count;
return count;
}
}
for (dst=offset, i=count; i > 0; i--) {
buffer[dst++] = scanBuf[scanBufPos++];
}
scanBufCount -= count;
return count;
}
/**
* Skips all bytes up to but not including the search pattern or EOF.
* Returns the number of bytes skipped. Repeated calls to
* this method will leave the stream at the same postion until
* another call explicitly reads or skips the pattern data.
*
* @return The number of bytes skipped.
*
* @exception IOException Thrown if an I/O exception occurs.
*/
public
int skipTo()
throws IOException
{
int count=0, skipCount = 0;
while (patternPos < scanBufPos) {
count = min(0, scanBufCount - patternKeep);
if (count >= 0) {
skipCount += count;
scanBufCount -= count;
scanBufPos += count;
}
int nread = loadBuf();
if (nread < 0) {
boolean notFound = (patternPos < scanBufPos);
skipCount += scanBufCount;
scanBufCount = 0;
scanBufPos = 0;
patternPos = -1;
if (notFound) return -1;
return (skipCount <= 0) ? 0 : skipCount;
}
if (nread == 0) {
// Special case. Buffer needs to be discarded.
if (scanBufCount == scanBuf.length) {
int skip = scanBuf.length - patternKeep;
scanBufPos = skip;
scanBufCount = patternKeep;
skipCount += skip;
patternPos = -1; // Pattern not avail.
}
}
}
count = patternPos - scanBufPos;
skipCount += count;
scanBufPos += count;
scanBufCount -= count;
return skipCount;
}
/**
* Skips all bytes up to and including the search pattern or EOF.
*
* @return The number of bytes skipped.
* @exception IOException Thrown if an I/O exception occurs.
*/
public
int skipPattern()
throws IOException
{
int skipCount = skipTo();
if (skipCount < 0) return -1; // Pattern not found.
skipCount += patternLength;
scanBufCount -= patternLength;
scanBufPos += patternLength;
if (scanBufCount > 0) {
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
}
return skipCount;
}
/**
* Set the search pattern. After this call, all new scans will be
* for the new pattern. Characters outside the values 0-255 are
* truncated into signed byte values in the Latin-1 encoding.
*
* @param pattern The new pattern to search for.
*/
public
void setPattern(String pattern)
{
searcher = new BMByteSearch(pattern);
patternLength = searcher.getPatternLength();
patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
}
/**
* Set the search pattern. After this call, all new scans will be
* for the new pattern. Characters outside the values 0-255 are
* truncated into signed byte values in the Latin-1 encoding.
*
* @param search The precomputed Boyer-Moore searcher.
*/
public
void setPattern(BMByteSearch search)
{
searcher = search;
patternLength = searcher.getPatternLength();
patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
}
/**
* Returns the next <code>length</code> bytes of the input buffer
* as a string. If fewer than <code>length</code> bytes remain on
* the input stream, then only the remaining bytes are returned. If
* the input stream is at EOF and there are no more bytes in the buffer
* then an empty string is returned.
*
* @param length The number of bytes to look ahead.
* @return A string containing the lookahead bytes as 8 bit characters.
* @exception IOException Thrown if an I/O exception occurs.
*/
public
String peekAheadString(int length)
throws IOException
{
while (scanBufCount < length) {
if (loadBuf() < 0) break;
}
int count = min(length, scanBufCount);
if (count <= 0) return "";
StringBuffer sb = new StringBuffer();
for (int i=scanBufPos; count > 0; i++, count--) {
sb.append((char)(scanBuf[i] & 0xff));
}
return sb.toString();
}
/**
* Reads more data from the underlying input source, keeping
* track of buffering and pattern matching. Before reading
* new bytes, any remaining bytes in the buffer are copied
* to position 0. The <code>scanBufPos</code>,
* <code>scanBufCount</code>, and <code>patternPos</code>
* fields are reset to appropriate values.
*
* @return The number of new bytes added to the
* buffer, or <code>-1</code> if the input
* stream has reached end of file.
* @exception IOException Thrown if an I/O exception occurs.
*/
private
int loadBuf()
throws IOException
{
//
// First, copy remainder of buffer to beginning.
//
if (scanBufPos > 0) {
for (int i=0; i<scanBufCount; i++) {
scanBuf[i] = scanBuf[scanBufPos + i];
}
scanBufPos = 0;
}
//
// Determine how many bytes are available to read.
// If no bytes are available, then read one byte and block.
// Then, recheck available.
//
int n, avail = super.available();
if (avail == 0) {
// Wait for at least one char.
n = super.read(scanBuf, scanBufCount, 1);
if (n <= 0) {
patternPos = searcher.search(scanBuf, 0, scanBufCount);
return EOF;
}
scanBufCount += n;
avail = super.available();
if (avail == 0) {
patternPos = searcher.search(scanBuf, 0, scanBufCount);
return n;
}
}
//
// Only read currently available bytes. Don't block until
// all bytes are ready. That could cause a hang.
//
int nread = scanBufSize - scanBufCount;
if ((avail >= 0) && (nread > avail)) nread = avail;
//
// Second, read additional bytes using superclass's read.
//
n = super.read(scanBuf, scanBufCount, nread);
if (n < 0) {
patternPos = searcher.search(scanBuf, 0, scanBufCount);
return EOF;
}
scanBufCount += n;
//
// Search for first occurence of pattern.
//
patternPos = searcher.search(scanBuf, 0, scanBufCount);
return n;
}
/**
* Compares two integers and returns the lesser value.
*
* @param i1 First integer to compare.
* @param i2 Second integer to compare.
* @return The lesser of <code>i1</code> or <code>i2</code>.
*/
private static final
int min(int i1, int i2)
{
return (i1 < i2) ? i1 : i2;
}
}