fsmvis.data
Class DataItemCollection

java.lang.Object
  |
  +--fsmvis.data.DataItemCollection

public class DataItemCollection
extends java.lang.Object
implements java.io.Serializable

See Also:
Serialized Form

Field Summary
protected  double[] average
           
protected  java.util.ArrayList columns
           
protected  java.util.ArrayList dataItems
           
static int DATE
           
protected  double[][] desiredDist
           
static int DOUBLE
           
protected  java.util.ArrayList fields
           
static int INTEGER
           
protected  boolean isNormalized
           
static boolean KMEANS
           
protected  double layoutBounds
           
protected  double maxDist
           
protected  java.util.ArrayList maximums
           
protected  double mean
           
protected  double minDist
           
protected  java.util.ArrayList minimums
           
static boolean NO_KMEANS
           
protected  boolean normalising
           
protected  double ORD_FACTOR
           
protected  double sig
           
protected  double[] sigma
           
protected  int size
           
protected  double STANDARD_DEVS
           
static int STRING
           
protected  double sumHDSquares
           
protected  double sumHDVals
           
protected  double[] sumOfSquares
           
protected  double[] sumOfVals
           
protected  java.util.ArrayList types
           
protected  double unrelatedDist
           
protected  boolean useSamples
           
 
Constructor Summary
DataItemCollection()
          Constructor
 
Method Summary
 void addItem(DataItem data)
          adds a DataItem to this collection of dataItems
 void calcDesiredDistances()
          Calculates the desired distances array by using a similarity metric on every object with every other object.
 void calcNormValues()
          Calculates approximations of sigma and mean needed to normalise distance data by taking a sample of distances from the full data set
 void calcNormValues(java.util.ArrayList sample)
          Calculates approximations of sigma and mean needed to normalise distance data by taking a sample of distances from the subset
 double getAverageError()
          Returns the average error in the system, this is the average distance that a dataItem is from its desired distance
 double getAvgVelocity()
          Returns the average velocity of dataItems in the collection
 java.util.ArrayList getColumn(int colNum)
          Returns the data from a specified "column".
 DataItem getDataItem(int index)
          Returns the data item stored at location with index int index
 java.util.ArrayList getDataItems()
          Accessor method for the main collection held within this class
 double getDesiredDist(int item1, int item2)
          Returns the desired distance between these two objects.
 double getDesiredDist(int item1, int item2, java.lang.String s)
          Returns the desired distance between these two objects.
 java.util.ArrayList getFields()
          Returns the names of the fields that this collection contains
 java.lang.Object getMaximum(int col)
          Returns the maximum value for a particular column, this will either be of type Integer, Double or Date.
 java.util.ArrayList getMaximums()
          Returns the arraylist of the maximum values for each column in the data set.
 java.lang.Object getMinimum(int col)
          Returns the minimum value for a particular column, this will either be of type Integer, Double or Date.
 java.util.ArrayList getMinimums()
          Returns the arraylist of the minimum values for each column in the data set.
 int getNumFields()
          Returns the number of fields in each of the records under analysis.
 int getSize()
          Returns the size of this data item collection
 java.util.ArrayList getTypes()
          Returns the arrayList of types that this class contains
 double getUnrelatedDist()
          Returns the distance after which two objects are considered to be unrelated.
 void normalizeDesiredDists(double layoutBounds)
          Normalizes the desired distance data to be within layoutBounds, does this by normalizing by STANDARD_DEVS standard deviations.
 void refresh()
          Called when deserialized, refreshes any data item values which were not stored with serialization.
 void setFields(java.util.ArrayList fields)
          Sets the fields that this collection represents to be fields
 void setNormalizeData(double[] sumOfVals, double[] sumOfSquares)
          Sets the values used for data normalization, the arrays contain values for each column in the data set, which contains numeric data.
 void setTypes(java.util.ArrayList types)
          Sets the types that this collection represents to be types
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

types

protected java.util.ArrayList types

fields

protected java.util.ArrayList fields

dataItems

protected java.util.ArrayList dataItems

columns

protected java.util.ArrayList columns

maximums

protected java.util.ArrayList maximums

minimums

protected java.util.ArrayList minimums

size

protected int size

isNormalized

protected boolean isNormalized

normalising

protected boolean normalising

useSamples

protected boolean useSamples

layoutBounds

protected double layoutBounds

mean

protected double mean

sig

protected double sig

desiredDist

protected double[][] desiredDist

sumOfVals

protected double[] sumOfVals

sumOfSquares

protected double[] sumOfSquares

average

protected double[] average

sigma

protected double[] sigma

sumHDVals

protected double sumHDVals

sumHDSquares

protected double sumHDSquares

maxDist

protected double maxDist

minDist

protected double minDist

unrelatedDist

protected double unrelatedDist

ORD_FACTOR

protected final double ORD_FACTOR

STANDARD_DEVS

protected final double STANDARD_DEVS

STRING

public static final int STRING

DATE

public static final int DATE

INTEGER

public static final int INTEGER

DOUBLE

public static final int DOUBLE

KMEANS

public static final boolean KMEANS

NO_KMEANS

public static final boolean NO_KMEANS
Constructor Detail

DataItemCollection

public DataItemCollection()
Constructor
Method Detail

addItem

public void addItem(DataItem data)
adds a DataItem to this collection of dataItems
Parameters:
data - The data item to be added

setTypes

public void setTypes(java.util.ArrayList types)
Sets the types that this collection represents to be types
Parameters:
types - The collection of types to be used

getTypes

public java.util.ArrayList getTypes()
Returns the arrayList of types that this class contains
Returns:
The arrayList of types that this class contains

setFields

public void setFields(java.util.ArrayList fields)
Sets the fields that this collection represents to be fields
Parameters:
fields - The collection of fields to be used

getFields

public java.util.ArrayList getFields()
Returns the names of the fields that this collection contains
Returns:
The arrayList of fields

setNormalizeData

public void setNormalizeData(double[] sumOfVals,
                             double[] sumOfSquares)
Sets the values used for data normalization, the arrays contain values for each column in the data set, which contains numeric data. the sum of all values and sum of squares of values respectively.
Parameters:
sumOfVals - The sum of all values in each column
sumOfSquares - The sum of squares of all values in each column

normalizeDesiredDists

public void normalizeDesiredDists(double layoutBounds)
Normalizes the desired distance data to be within layoutBounds, does this by normalizing by STANDARD_DEVS standard deviations. This will make most values be between 0 and 1. Then multiply by range size and finally add the rangeLo values, which will ensure (most) values are between rangeLo and rangeHi
Parameters:
layoutBounds -  

calcDesiredDistances

public void calcDesiredDistances()
Calculates the desired distances array by using a similarity metric on every object with every other object. uses normalization on every value as it performs this. Creates a new desired distances object to encapsulate this data

refresh

public void refresh()
Called when deserialized, refreshes any data item values which were not stored with serialization. This is anything which is static, transient or was too large to be stored and can be precomputed. At moment only refreshes the normalization data

getDesiredDist

public double getDesiredDist(int item1,
                             int item2)
Returns the desired distance between these two objects. Normalisation version
Parameters:
item1 - The first object in the distance relation
item2 - The second object
Returns:
The desired distance

getDesiredDist

public double getDesiredDist(int item1,
                             int item2,
                             java.lang.String s)
Returns the desired distance between these two objects. Normalisation version **************************************** -- THIS METHOD IS JUST FOR DEBUGGING -- ***************************************
Parameters:
item1 - The first object in the distance relation
item2 - The second object
Returns:
The desired distance

calcNormValues

public void calcNormValues()
Calculates approximations of sigma and mean needed to normalise distance data by taking a sample of distances from the full data set

calcNormValues

public void calcNormValues(java.util.ArrayList sample)
Calculates approximations of sigma and mean needed to normalise distance data by taking a sample of distances from the subset
Parameters:
subset - The subset of values being initially laid out

getAverageError

public double getAverageError()
Returns the average error in the system, this is the average distance that a dataItem is from its desired distance
Returns:
The average error

getAvgVelocity

public double getAvgVelocity()
Returns the average velocity of dataItems in the collection
Returns:
the average velocity

getDataItems

public java.util.ArrayList getDataItems()
Accessor method for the main collection held within this class
Returns:
The collection that this class encapsulates

getDataItem

public DataItem getDataItem(int index)
Returns the data item stored at location with index int index
Parameters:
index - The index of the dataItem that is required
Returns:
the data item that was stored at this location

getColumn

public java.util.ArrayList getColumn(int colNum)
Returns the data from a specified "column". This allows the data to be accessed from a different direction, instead of just in rows by getting data items
Parameters:
colNum - The required column number
Returns:
The ArrayList of data from that column

getMaximums

public java.util.ArrayList getMaximums()
Returns the arraylist of the maximum values for each column in the data set. Columns with String values just contain empty objects
Returns:
The arraylist of maximums

getMaximum

public java.lang.Object getMaximum(int col)
Returns the maximum value for a particular column, this will either be of type Integer, Double or Date. It may also be an empty object if the type was String.
Parameters:
col - The column number of the maximum required
Returns:
The maximum value for the specified column

getMinimums

public java.util.ArrayList getMinimums()
Returns the arraylist of the minimum values for each column in the data set. Columns with String values just contain empty objects
Returns:
The arraylist of minimums

getMinimum

public java.lang.Object getMinimum(int col)
Returns the minimum value for a particular column, this will either be of type Integer, Double or Date. It may also be an empty object if the type was String.
Parameters:
col - The column number of the minimum required
Returns:
The minimum value for the specified column

getSize

public int getSize()
Returns the size of this data item collection
Returns:
The size of the data collection

getNumFields

public int getNumFields()
Returns the number of fields in each of the records under analysis.
Returns:
The number of fields.

getUnrelatedDist

public double getUnrelatedDist()
Returns the distance after which two objects are considered to be unrelated.
Returns:
The unrelated distance