edu.ksu.cis.kdd.data
Class Database

java.lang.Object
  extended byedu.ksu.cis.kdd.data.Database
All Implemented Interfaces:
Data

public class Database
extends java.lang.Object
implements Data

Author:
Roby Joehanes

Field Summary
static java.lang.String ARFF_FORMAT
           
protected  java.util.List attributes
           
static java.lang.String CSF_FORMAT
           
static java.lang.String DAT_FORMAT
           
protected  java.sql.Connection dbConnection
           
static java.lang.String LIBB_FORMAT
           
static int maxArity
           
static int maxSQLStringLength
           
protected  java.lang.String name
           
protected  java.util.List relevantAttributes
           
protected  java.sql.Statement remoteStatement
           
protected  java.util.List sortedTableListCache
           
protected  java.util.Hashtable tableCache
           
protected  Tally tally
           
static java.lang.String XML_FORMAT
           
 
Constructor Summary
Database()
           
Database(java.sql.Connection c)
           
 
Method Summary
 void addTable(Table tbl)
          Add a table
protected static void assignDefaultType(Attribute attr, int sqlType)
          A utility function to assign an attribute type based on its SQL type
 void disconnect()
           
 void exportToServer(java.sql.Connection conn)
          Export the local data into server.
 java.util.List getAttributes()
           
 java.sql.Connection getConnection()
           
 java.lang.String getName()
          Returns the name.
 java.util.List getRelevantAttributes()
           
 java.sql.Statement getRemoteStatement()
           
 java.util.List getSortedTables()
          Get all tables in reverse sorted dependency order.
 Table getTable(java.lang.String name)
          Get a table based on its name
 java.util.List getTableNames()
          Get all table names
 java.util.List getTables()
          Get all tables.
 Tally getTallyer()
           
 java.util.List getTuples()
          This is the outer join function
 double[] getWeights()
           
 void importDatabaseToLocal()
           
static Database importRemoteSchema(java.sql.Connection conn)
          Connect to remote database.
static Database importRemoteSchema(java.sql.Connection conn, java.util.List tableNames)
          Connect to remote database.
static boolean isKnownFormat(java.lang.String formatExt)
           
 boolean isRemote()
           
protected  java.util.List join(java.util.List t1, java.util.List t2)
           
protected  java.util.List joinAll()
           
static Database load(java.lang.String filename)
           
static Database load(java.lang.String filename, java.lang.String format)
           
static void main(java.lang.String[] args)
           
 Table pickOneTable()
          Get the first table in the database
 void removeTable(java.lang.String name)
          Remove a table based on its name
 void save(java.io.OutputStream out, java.lang.String format)
           
 void save(java.lang.String filename)
           
 void save(java.lang.String filename, java.lang.String format)
          saves in different format
 void setName(java.lang.String name)
          Sets the name.
 void setWeights(double[] weights)
           
 Data subsample(int n)
           
 Data subsample(int n, long seed)
           
 int tableCount()
           
protected  void topologicalSort(java.lang.String curNode, TableSet table, java.util.LinkedList sortedList, java.util.HashSet seenBefore)
          Do topological sort on tables.
protected  java.util.List topologicalSort(TableSet table, Table[] tables)
          Do topological sort on tables.
 java.lang.String toString()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

tableCache

protected java.util.Hashtable tableCache

name

protected java.lang.String name

CSF_FORMAT

public static final java.lang.String CSF_FORMAT
See Also:
Constant Field Values

ARFF_FORMAT

public static final java.lang.String ARFF_FORMAT
See Also:
Constant Field Values

XML_FORMAT

public static final java.lang.String XML_FORMAT
See Also:
Constant Field Values

LIBB_FORMAT

public static final java.lang.String LIBB_FORMAT
See Also:
Constant Field Values

DAT_FORMAT

public static final java.lang.String DAT_FORMAT
See Also:
Constant Field Values

maxSQLStringLength

public static int maxSQLStringLength

maxArity

public static int maxArity

dbConnection

protected java.sql.Connection dbConnection

remoteStatement

protected java.sql.Statement remoteStatement

tally

protected Tally tally

sortedTableListCache

protected java.util.List sortedTableListCache

attributes

protected java.util.List attributes

relevantAttributes

protected java.util.List relevantAttributes
Constructor Detail

Database

public Database()

Database

public Database(java.sql.Connection c)
Method Detail

isKnownFormat

public static boolean isKnownFormat(java.lang.String formatExt)

subsample

public Data subsample(int n,
                      long seed)
Specified by:
subsample in interface Data
See Also:
Data.subsample(int, long)

subsample

public Data subsample(int n)
Specified by:
subsample in interface Data
See Also:
Data.subsample(int)

getWeights

public double[] getWeights()
Specified by:
getWeights in interface Data
See Also:
Data.getWeights()

setWeights

public void setWeights(double[] weights)
Specified by:
setWeights in interface Data
See Also:
Data.setWeights(double[])

tableCount

public int tableCount()

addTable

public void addTable(Table tbl)
Add a table

Parameters:
tbl -

getAttributes

public java.util.List getAttributes()
Specified by:
getAttributes in interface Data

getRelevantAttributes

public java.util.List getRelevantAttributes()
Specified by:
getRelevantAttributes in interface Data

getTuples

public java.util.List getTuples()
This is the outer join function

Specified by:
getTuples in interface Data

joinAll

protected java.util.List joinAll()

join

protected java.util.List join(java.util.List t1,
                              java.util.List t2)

getTallyer

public Tally getTallyer()
Specified by:
getTallyer in interface Data

getSortedTables

public java.util.List getSortedTables()
Get all tables in reverse sorted dependency order. The table that gets reverenced the most gets the first place.

This big chunk of code is for preprocessing the database. The process is roughly as follows:

  1. If it contains single table, short circuit it. Do the preparation in a single tally.
  2. Detect forward and backward reference -- to determine the table graph
  3. Do a topological sort on that graph. Of course, if the table graph is cyclic, it should be able to handle that as well since the topological sort property is minimizing the backedges.

Note to self: Do NOT call this method in the constructor as it would slow down the createSubTally.


topologicalSort

protected java.util.List topologicalSort(TableSet table,
                                         Table[] tables)
Do topological sort on tables. Required for getSortedTables.


topologicalSort

protected void topologicalSort(java.lang.String curNode,
                               TableSet table,
                               java.util.LinkedList sortedList,
                               java.util.HashSet seenBefore)
Do topological sort on tables. Required for getSortedTables. This is the recursive feedback for the other topologicalSort method.


getTables

public java.util.List getTables()
Get all tables. Note that pickOneTable result is not necessarily the same as getTables().get(0)!

Returns:

getTableNames

public java.util.List getTableNames()
Get all table names

Returns:
the list

pickOneTable

public Table pickOneTable()
Get the first table in the database

Returns:

getTable

public Table getTable(java.lang.String name)
Get a table based on its name

Parameters:
name -
Returns:
the table

removeTable

public void removeTable(java.lang.String name)
Remove a table based on its name

Parameters:
name -

getName

public java.lang.String getName()
Returns the name.

Specified by:
getName in interface Data
Returns:
String

setName

public void setName(java.lang.String name)
Sets the name.

Parameters:
name - The name to set

load

public static Database load(java.lang.String filename,
                            java.lang.String format)

load

public static Database load(java.lang.String filename)

importRemoteSchema

public static Database importRemoteSchema(java.sql.Connection conn)
Connect to remote database. Assumption: We use all tables contained in that database URL.


assignDefaultType

protected static void assignDefaultType(Attribute attr,
                                        int sqlType)
A utility function to assign an attribute type based on its SQL type

Parameters:
attr -

importRemoteSchema

public static Database importRemoteSchema(java.sql.Connection conn,
                                          java.util.List tableNames)
Connect to remote database. We use only tables listed in tableNames. WARNING: In some databases (like Oracle), the table names are CASE SENSITIVE IN SOME CASES (such as columns metadata inquiries), but CASE INSENSITIVE in other cases (like the ordinary SQL queries).


exportToServer

public void exportToServer(java.sql.Connection conn)

Export the local data into server. The connection conn must be valid AND make sure you closed all created statements on conn because this routine will create one. This is because a lot of JDBC drivers only support ONE statement creation per connection.

Parameters:
conn -

importDatabaseToLocal

public void importDatabaseToLocal()

getConnection

public java.sql.Connection getConnection()

getRemoteStatement

public java.sql.Statement getRemoteStatement()

isRemote

public boolean isRemote()

disconnect

public void disconnect()

save

public void save(java.lang.String filename)

save

public void save(java.lang.String filename,
                 java.lang.String format)
saves in different format

Parameters:
filename - name of the output file

save

public void save(java.io.OutputStream out,
                 java.lang.String format)

toString

public java.lang.String toString()

main

public static void main(java.lang.String[] args)