
com.splout.db.hadoop.TableSpec Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of splout-hadoop Show documentation
Show all versions of splout-hadoop Show documentation
Splout is a read only, horizontally scalable SQL database that plays well with Hadoop.
package com.splout.db.hadoop;
/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.Serializable;
import java.util.Arrays;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.tuplemr.OrderBy;
/**
* Simple immutable bean that specifies the Pangool Schema of a Splout Table and the Fields that need to be indexed and how it is partitioned.
* It is part of a {@link Table} bean. It is also used by {@link TupleSQLite4JavaOutputFormat}.
*/
@SuppressWarnings("serial")
public class TableSpec implements Serializable {
private final Schema schema;
private final Field[] partitionFields;
private final FieldIndex[] indexes;
private final String partitionByJavaScript;
private final String[] initialSQL;
private final String[] preInsertsSQL;
private final String[] postInsertsSQL;
private final String[] finalSQL;
private transient final OrderBy insertionOrderBy;
/**
* Simple TableSpec constructor that creates one TableSpec for a schema with a single partition field that is also indexed.
* Warning: an index is created automatically for the partition field.
*/
public TableSpec(Schema schema, Field partitionField) {
this(schema, new Field[] { partitionField }, new FieldIndex[] { new FieldIndex(partitionField) }, null, null, null, null, null);
}
/**
* Creates a Table specification.
*
* @param schema The schema that defines the table
* @param partitionFields fields to partition the table by
* @param indexes The indexes that mus be created
* @param initialSQL SQL statements that will be executed at the start of the process, just after
* some default PRAGMA statements and just before the CREATE TABLE statements.
* @param preInsertsSQL SQL statements that will be executed just after the CREATE TABLE statements
* but just before the INSERT statements used to insert data.
* @param postInsertsSQL SQL statements that will be executed just after all data is inserted but
* just before the CREATE INDEX statements.
* @param finalSQL SQL statements that will be executed al the end of the process, just after the
* CREATE INDEX statements.
* @param insertionOrderBy The order in which data is inserted in the database. That is very important
* because affect data locality and some queries could go faster or very
* slow depending on the order the data is stored. Usually, data should be
* sorted in the same order than the main index that you will use for queries.
* As a common rule, sort data in the proper order to answer the most important
* queries of your system.
*/
public TableSpec(Schema schema, Field[] partitionFields, FieldIndex[] indexes,
String[] initialSQL, String[] preInsertsSQL, String[] postInsertsSQL,
String[] finalSQL, OrderBy insertionOrderBy) {
this.schema = schema;
this.partitionFields = partitionFields;
this.indexes = indexes;
this.partitionByJavaScript = null;
this.initialSQL = initialSQL;
this.preInsertsSQL = preInsertsSQL;
this.postInsertsSQL = postInsertsSQL;
this.finalSQL = finalSQL;
this.insertionOrderBy = insertionOrderBy;
}
/**
* Creates a Table specification.
*
* @param schema The schema that defines the table
* @param partitionByJavaScript JavaScript function that applies to rows and returns a key that must
* be used to partition.
* @param indexes The indexes that mus be created
* @param initialSQL SQL statements that will be executed at the start of the process, just after
* some default PRAGMA statements and just before the CREATE TABLE statements.
* @param preInsertsSQL SQL statements that will be executed just after the CREATE TABLE statements
* but just before the INSERT statements used to insert data.
* @param postInsertsSQL SQL statements that will be executed just after all data is inserted but
* just before the CREATE INDEX statements.
* @param finalSQL SQL statements that will be executed al the end of the process, just after the
* CREATE INDEX statements.
* @param insertionOrderBy The order in which data is inserted in the database. That is very important
* because affect data locality and some queries could go faster or very
* slow depending on the order the data is stored. Usually, data should be
* sorted in the same order than the main index that you will use for queries.
* As a common rule, sort data in the proper order to answer the most important
* queries of your system.
*/
public TableSpec(Schema schema, String partitionByJavaScript, FieldIndex[] indexes,
String[] initialSQL, String[] preInsertsSQL, String[] postInsertsSQL,
String[] finalSQL, OrderBy insertionOrderBy) {
this.schema = schema;
this.partitionFields = null;
this.partitionByJavaScript = partitionByJavaScript;
this.indexes = indexes;
this.initialSQL = initialSQL;
this.preInsertsSQL = preInsertsSQL;
this.postInsertsSQL = postInsertsSQL;
this.finalSQL = finalSQL;
this.insertionOrderBy = insertionOrderBy;
}
/**
* A database index made up by one or more Pangool Fields.
*/
public static class FieldIndex implements Serializable {
private Field[] fields;
public FieldIndex(Field field) {
this.fields = new Field[] { field };
}
public FieldIndex(Field... fields) {
this.fields = fields;
}
public Field[] getIndexFields() {
return fields;
}
@Override
public String toString() {
return Arrays.toString(fields);
}
}
// ---- Getters ---- //
public Schema getSchema() {
return schema;
}
public FieldIndex[] getIndexes() {
return indexes;
}
public Field[] getPartitionFields() {
return partitionFields;
}
public String getPartitionByJavaScript() {
return partitionByJavaScript;
}
public String[] getPostInsertsSQL() {
return postInsertsSQL;
}
public String[] getInitialSQL() {
return initialSQL;
}
public String[] getFinalSQL() {
return finalSQL;
}
public String[] getPreInsertsSQL() {
return preInsertsSQL;
}
public OrderBy getInsertionOrderBy() {
return insertionOrderBy;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy