download SpanishStemFilter.java
Language: Java
License: ASL
Copyright: (c) 2001 The Apache Software Foundation. All rights
LOC: 46
Project Info
depmark
Server: java.net
Type: cvs
...e\lucene\analysis\snowball\
   .nbattrs
   porter.in
   porter.out
   SnowballAnalyzerTest.java
   spanish.in
   spanish.out
   spanish.stw
   SpanishAnalyzer.java
   SpanishStemFilter.java

package src.java.org.apache.lucene.analysis.snowball;

/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import net.sf.snowball.ext.spanishStemmer;
import java.io.IOException;
import java.util.Hashtable;

/** Transforms the token stream as per SNOWBALL (Martin Porter)
 * Spanish stemming algorithm.
 * @author    Alex Murzaku (alex at lissus.com)
 */

public final class SpanishStemFilter extends TokenFilter {
    
    private spanishStemmer stemmer;
    private Token token = null;
    private Hashtable exclusions = null;
    
    public SpanishStemFilter(TokenStream in) {
        stemmer = new spanishStemmer();
        input = in;
    }
    
    /**
     * Builds a SpanishStemFilter that uses an exclusiontable.
     */
    public SpanishStemFilter(TokenStream in, Hashtable exclusiontable) {
        this(in);
        exclusions = exclusiontable;
    }
    
    /** Returns the next input Token, after being stemmed */
    public final Token next() throws IOException {
        if ((token = input.next()) == null) {
            return null;
        }
        // Check the exclusiontable
        else if (exclusions != null && exclusions.contains(token.termText())) {
            return token;
        }
        else {
            stemmer.setCurrent(token.termText());
            stemmer.stem();
            String s = stemmer.getCurrent();
            if ( !s.equals( token.termText() ) ) {
                return new Token( s, token.startOffset(),
                token.endOffset(), token.type() );
            }
            return token;
        }
    }
    
    /**
     * Set a alternative/custom Stemmer for this filter.
     */
    public void setStemmer(spanishStemmer stemmer) {
        if ( stemmer != null ) {
            this.stemmer = stemmer;
        }
    }
    
    /**
     * Set an alternative exclusion list for this filter.
     */
    public void setExclusionTable(Hashtable exclusiontable) {
        exclusions = exclusiontable;
    }    
}

About Koders | Resources | Downloads | Support | Black Duck | Submit Project | Terms of Service | DMCA | Privacy Policy | Site Map| Contact Us