/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* TestInstances.java
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
*/
package weka.core;
import weka.core.Capabilities.Capability;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Vector;
/**
* Generates artificial datasets for testing. In case of Multi-Instance data
* the settings for the number of attributes applies to the data inside
* the bag. Originally based on code from the CheckClassifier.<p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -relation <name>
* The name of the data set.</pre>
*
* <pre> -seed <num>
* The seed value.</pre>
*
* <pre> -num-instances <num>
* The number of instances in the datasets (default 20).</pre>
*
* <pre> -class-type <num>
* The class type, see constants in weka.core.Attribute
* (default 1=nominal).</pre>
*
* <pre> -classes-values <num>
* The number of classes to generate (for nominal classes only)
* (default 2).</pre>
*
* <pre> -class-index <num>
* The class index, with -1=last, (default -1).</pre>
*
* <pre> -no-class
* Doesn't include a class attribute in the output.</pre>
*
* <pre> -nominal <num>
* The number of nominal attributes (default 1).</pre>
*
* <pre> -nominal-values <num>
* The number of values for nominal attributes (default 2).</pre>
*
* <pre> -numeric <num>
* The number of numeric attributes (default 0).</pre>
*
* <pre> -string <num>
* The number of string attributes (default 0).</pre>
*
* <pre> -words <comma-separated-list>
* The words to use in string attributes.</pre>
*
* <pre> -word-separators <chars>
* The word separators to use in string attributes.</pre>
*
* <pre> -date <num>
* The number of date attributes (default 0).</pre>
*
* <pre> -relational <num>
* The number of relational attributes (default 0).</pre>
*
* <pre> -num-instances-relational <num>
* The number of instances in relational/bag attributes (default 10).</pre>
*
* <pre> -multi-instance
* Generates multi-instance data.</pre>
*
* <pre> -W <classname>
* The Capabilities handler to base the dataset on.
* The other parameters can be used to override the ones
* determined from the handler. Additional parameters for
* handler can be passed on after the '--'.</pre>
*
<!-- options-end -->
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.7 $
* @see weka.classifiers.CheckClassifier
*/
public class TestInstances
implements Cloneable, Serializable, OptionHandler {
/** for serialization */
private static final long serialVersionUID = -6263968936330390469L;
/** can be used for settting the class attribute index to last
* @see #setClassIndex(int) */
public final static int CLASS_IS_LAST = -1;
/** can be used to avoid generating a class attribute
* @see #setClassIndex(int) */
public final static int NO_CLASS = -2;
/** the default list of words used in strings */
public final static String[] DEFAULT_WORDS = {"The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"};
/** the default word separators used in strings */
public final static String DEFAULT_SEPARATORS = " ";
/** for generating String attributes/classes */
protected String[] m_Words = DEFAULT_WORDS;
/** for generating String attributes/classes */
protected String m_WordSeparators = DEFAULT_SEPARATORS;
/** the name of the relation */
protected String m_Relation = "Testdata";
/** the seed value */
protected int m_Seed = 1;
/** the random number generator */
protected Random m_Random = new Random(m_Seed);
/** the number of instances */
protected int m_NumInstances = 20;
/** the class type */
protected int m_ClassType = Attribute.NOMINAL;
/** the number of classes (in case of NOMINAL class) */
protected int m_NumClasses = 2;
/** the class index (-1 is LAST, -2 means no class)
* @see #CLASS_IS_LAST
* @see #NO_CLASS */
protected int m_ClassIndex = CLASS_IS_LAST;
/** the number of nominal attributes */
protected int m_NumNominal = 1;
/** the number of values for nominal attributes */
protected int m_NumNominalValues = 2;
/** the number of numeric attributes */
protected int m_NumNumeric = 0;
/** the number of string attributes */
protected int m_NumString = 0;
/** the number of date attributes */
protected int m_NumDate = 0;
/** the number of relational attributes */
protected int m_NumRelational = 0;
/** whether to generate Multi-Instance data or not */
protected boolean m_MultiInstance = false;
/** the number of instances in relational attributes (applies also for bags
* in multi-instance) */
protected int m_NumInstancesRelational = 10;
/** the format of the multi-instance data */
protected Instances[] m_RelationalFormat = null;
/** the format of the multi-instance data of the class */
protected Instances m_RelationalClassFormat = null;
/** the generated data */
protected Instances m_Data = null;
/** the CapabilitiesHandler to get the Capabilities from */
protected CapabilitiesHandler m_Handler = null;
/**
* the default constructor
*/
public TestInstances() {
super();
setRelation("Testdata");
setSeed(1);
setNumInstances(20);
setClassType(Attribute.NOMINAL);
setNumClasses(2);
setClassIndex(CLASS_IS_LAST);
setNumNominal(1);
setNumNominalValues(2);
setNumNumeric(0);
setNumString(0);
setNumDate(0);
setNumRelational(0);
setNumInstancesRelational(10);
setMultiInstance(false);
setWords(arrayToList(DEFAULT_WORDS));
setWordSeparators(DEFAULT_SEPARATORS);
}
/**
* creates a clone of the current object
*
* @return a clone of the current object
*/
public Object clone() {
TestInstances result;
result = new TestInstances();
result.assign(this);
return result;
}
/**
* updates itself with all the settings from the given TestInstances
* object
*
* @param t the object to get the settings from
*/
public void assign(TestInstances t) {
setRelation(t.getRelation());
setSeed(t.getSeed());
setNumInstances(t.getNumInstances());
setClassType(t.getClassType());
setNumClasses(t.getNumClasses());
setClassIndex(t.getClassIndex());
setNumNominal(t.getNumNominal());
setNumNominalValues(t.getNumNominalValues());
setNumNumeric(t.getNumNumeric());
setNumString(t.getNumString());
setNumDate(t.getNumDate());
setNumRelational(t.getNumRelational());
setMultiInstance(t.getMultiInstance());
for (int i = 0; i < t.getNumRelational(); i++)
setRelationalFormat(i, t.getRelationalFormat(i));
setRelationalClassFormat(t.getRelationalClassFormat());
setNumInstancesRelational(t.getNumInstancesRelational());
setWords(t.getWords());
setWordSeparators(t.getWordSeparators());
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result = new Vector();
result.addElement(new Option(
"\tThe name of the data set.",
"relation", 1, "-relation <name>"));
result.addElement(new Option(
"\tThe seed value.",
"seed", 1, "-seed <num>"));
result.addElement(new Option(
"\tThe number of instances in the datasets (default 20).",
"num-instances", 1, "-num-instances <num>"));
result.addElement(new Option(
"\tThe class type, see constants in weka.core.Attribute\n"
+ "\t(default 1=nominal).",
"class-type", 1, "-class-type <num>"));
result.addElement(new Option(
"\tThe number of classes to generate (for nominal classes only)\n"
+ "\t(default 2).",
"class-values", 1, "-class-values <num>"));
result.addElement(new Option(
"\tThe class index, with -1=last, (default -1).",
"class-index", 1, "-class-index <num>"));
result.addElement(new Option(
"\tDoesn't include a class attribute in the output.",
"no-class", 0, "-no-class"));
result.addElement(new Option(
"\tThe number of nominal attributes (default 1).",
"nominal", 1, "-nominal <num>"));
result.addElement(new Option(
"\tThe number of values for nominal attributes (default 2).",
"nominal-values", 1, "-nominal-values <num>"));
result.addElement(new Option(
"\tThe number of numeric attributes (default 0).",
"numeric", 1, "-numeric <num>"));
result.addElement(new Option(
"\tThe number of string attributes (default 0).",
"string", 1, "-string <num>"));
result.addElement(new Option(
"\tThe words to use in string attributes.",
"words", 1, "-words <comma-separated-list>"));
result.addElement(new Option(
"\tThe word separators to use in string attributes.",
"word-separators", 1, "-word-separators <chars>"));
result.addElement(new Option(
"\tThe number of date attributes (default 0).",
"date", 1, "-date <num>"));
result.addElement(new Option(
"\tThe number of relational attributes (default 0).",
"relational", 1, "-relational <num>"));
result.addElement(new Option(
"\tThe number of instances in relational/bag attributes (default 10).",
"num-instances-relational", 1, "-num-instances-relational <num>"));
result.addElement(new Option(
"\tGenerates multi-instance data.",
"multi-instance", 0, "-multi-instance"));
result.addElement(new Option(
"\tThe Capabilities handler to base the dataset on.\n"
+ "\tThe other parameters can be used to override the ones\n"
+ "\tdetermined from the handler. Additional parameters for\n"
+ "\thandler can be passed on after the '--'.",
"W", 1, "-W <classname>"));
return result.elements();
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -relation <name>
* The name of the data set.</pre>
*
* <pre> -seed <num>
* The seed value.</pre>
*
* <pre> -num-instances <num>
* The number of instances in the datasets (default 20).</pre>
*
* <pre> -class-type <num>
* The class type, see constants in weka.core.Attribute
* (default 1=nominal).</pre>
*
* <pre> -classes-values <num>
* The number of classes to generate (for nominal classes only)
* (default 2).</pre>
*
* <pre> -class-index <num>
* The class index, with -1=last, (default -1).</pre>
*
* <pre> -no-class
* Doesn't include a class attribute in the output.</pre>
*
* <pre> -nominal <num>
* The number of nominal attributes (default 1).</pre>
*
* <pre> -nominal-values <num>
* The number of values for nominal attributes (default 2).</pre>
*
* <pre> -numeric <num>
* The number of numeric attributes (default 0).</pre>
*
* <pre> -string <num>
* The number of string attributes (default 0).</pre>
*
* <pre> -words <comma-separated-list>
* The words to use in string attributes.</pre>
*
* <pre> -word-separators <chars>
* The word separators to use in string attributes.</pre>
*
* <pre> -date <num>
* The number of date attributes (default 0).</pre>
*
* <pre> -relational <num>
* The number of relational attributes (default 0).</pre>
*
* <pre> -num-instances-relational <num>
* The number of instances in relational/bag attributes (default 10).</pre>
*
* <pre> -multi-instance
* Generates multi-instance data.</pre>
*
* <pre> -W <classname>
* The Capabilities handler to base the dataset on.
* The other parameters can be used to override the ones
* determined from the handler. Additional parameters for
* handler can be passed on after the '--'.</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
Class cls;
CapabilitiesHandler handler;
boolean initialized;
initialized = false;
tmpStr = Utils.getOption('W', options);
if (tmpStr.length() > 0) {
cls = Class.forName(tmpStr);
if (ClassDiscovery.hasInterface(CapabilitiesHandler.class, cls)) {
initialized = true;
handler = (CapabilitiesHandler) cls.newInstance();
if (handler instanceof OptionHandler)
((OptionHandler) handler).setOptions(Utils.partitionOptions(options));
setHandler(handler);
// initialize
this.assign(forCapabilities(handler.getCapabilities()));
}
else {
throw new IllegalArgumentException("Class '" + tmpStr + "' is not a CapabilitiesHandler!");
}
}
tmpStr = Utils.getOption("relation", options);
if (tmpStr.length() != 0)
setRelation(tmpStr);
else if (!initialized)
setRelation("Testdata");
tmpStr = Utils.getOption("seed", options);
if (tmpStr.length() != 0)
setSeed(Integer.parseInt(tmpStr));
else if (!initialized)
setSeed(1);
tmpStr = Utils.getOption("num-instances", options);
if (tmpStr.length() != 0)
setNumInstances(Integer.parseInt(tmpStr));
else if (!initialized)
setNumInstances(20);
setNoClass(Utils.getFlag("no-class", options));
if (!getNoClass()) {
tmpStr = Utils.getOption("class-type", options);
if (tmpStr.length() != 0)
setClassType(Integer.parseInt(tmpStr));
else if (!initialized)
setClassType(Attribute.NOMINAL);
tmpStr = Utils.getOption("class-values", options);
if (tmpStr.length() != 0)
setNumClasses(Integer.parseInt(tmpStr));
else if (!initialized)
setNumClasses(2);
tmpStr = Utils.getOption("class-index", options);
if (tmpStr.length() != 0)
setClassIndex(Integer.parseInt(tmpStr));
else if (!initialized)
setClassIndex(-1);
}
tmpStr = Utils.getOption("nominal", options);
if (tmpStr.length() != 0)
setNumNominal(Integer.parseInt(tmpStr));
else if (!initialized)
setNumNominal(1);
tmpStr = Utils.getOption("nominal-values", options);
if (tmpStr.length() != 0)
setNumNominalValues(Integer.parseInt(tmpStr));
else if (!initialized)
setNumNominalValues(2);
tmpStr = Utils.getOption("numeric", options);
if (tmpStr.length() != 0)
setNumNumeric(Integer.parseInt(tmpStr));
else if (!initialized)
setNumNumeric(0);
tmpStr = Utils.getOption("string", options);
if (tmpStr.length() != 0)
setNumString(Integer.parseInt(tmpStr));
else if (!initialized)
setNumString(0);
tmpStr = Utils.getOption("words", options);
if (tmpStr.length() != 0)
setWords(tmpStr);
else if (!initialized)
setWords(arrayToList(DEFAULT_WORDS));
if (Utils.getOptionPos("word-separators", options) > -1) {
tmpStr = Utils.getOption("word-separators", options);
setWordSeparators(tmpStr);
}
else if (!initialized) {
setWordSeparators(DEFAULT_SEPARATORS);
}
tmpStr = Utils.getOption("date", options);
if (tmpStr.length() != 0)
setNumDate(Integer.parseInt(tmpStr));
else if (!initialized)
setNumDate(0);
tmpStr = Utils.getOption("relational", options);
if (tmpStr.length() != 0)
setNumRelational(Integer.parseInt(tmpStr));
else if (!initialized)
setNumRelational(0);
tmpStr = Utils.getOption("num-instances-relational", options);
if (tmpStr.length() != 0)
setNumInstancesRelational(Integer.parseInt(tmpStr));
else if (!initialized)
setNumInstancesRelational(10);
if (!initialized)
setMultiInstance(Utils.getFlag("multi-instance", options));
}
/**
* Gets the current settings of this object.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
result.add("-relation");
result.add(getRelation());
result.add("-seed");
result.add("" + getSeed());
result.add("-num-instances");
result.add("" + getNumInstances());
if (getNoClass()) {
result.add("-no-class");
}
else {
result.add("-class-type");
result.add("" + getClassType());
result.add("-class-values");
result.add("" + getNumClasses());
result.add("-class-index");
result.add("" + getClassIndex());
}
result.add("-nominal");
result.add("" + getNumNominal());
result.add("-nominal-values");
result.add("" + getNumNominalValues());
result.add("-numeric");
result.add("" + getNumNumeric());
result.add("-string");
result.add("" + getNumString());
result.add("-words");
result.add("" + getWords());
result.add("-word-separators");
result.add("" + getWordSeparators());
result.add("-date");
result.add("" + getNumDate());
result.add("-relational");
result.add("&q