All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.splout.db.hadoop.JSONTablespaceDefinition Maven / Gradle / Ivy

Go to download

Splout is a read only, horizontally scalable SQL database that plays well with Hadoop.

There is a newer version: 0.3.0
Show newest version
package com.splout.db.hadoop;

/*
 * #%L
 * Splout SQL Hadoop library
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.fs.Path;

import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.tuplemr.OrderBy;
import com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat;
import com.splout.db.hadoop.TableBuilder.TableBuilderException;
import com.splout.db.hadoop.TablespaceBuilder.TablespaceBuilderException;

/**
 * A JSON-friendly bean that can map to a {@link TablespaceSpec} easily. It only supports text input. Use
 * {@link #build()} for obtaining a TablespaceSpec instance.
 * 

* For specifying compound indexes use "," separator in the string index definition. */ public class JSONTablespaceDefinition { private String name; private int nPartitions; private List partitionedTables; private List replicateAllTables = new ArrayList(); /* * Inner static method for converting a {@link JSONTableDefinition} into a {@link Table} bean through {@link TableBuilder}. */ private static Table buildTable(JSONTableDefinition table, boolean isReplicateAll) throws TableBuilderException { if (table.getName() == null) { throw new IllegalArgumentException("Must provide a name for all tables."); } if (table.getSchema() == null) { throw new IllegalArgumentException("Must provide an schema for all tables."); } if (!isReplicateAll && (table.getPartitionFields() == null || table.getPartitionFields() == null || table.getPartitionFields().length() == 0)) { throw new IllegalArgumentException("Partitioned table must be partitioned by some field."); } if (table.getTableInputs() == null || table.getTableInputs().size() == 0) { throw new IllegalArgumentException("Table must have some table inputs."); } Schema schema = new Schema(table.getName(), Fields.parse(table.getSchema())); TableBuilder tableBuilder = new TableBuilder(schema); for (String index : table.getIndexes()) { tableBuilder.createIndex(index.split(",")); } if (!isReplicateAll) { tableBuilder.partitionBy(table.getPartitionFields().split(",")); } // Adding pre and post SQL if (table.getInitialStatements().size() != 0) { tableBuilder.preInsertsSQL(table.getInitialStatements().toArray(new String[0])); } if (table.getPreInsertStatements().size() != 0) { tableBuilder.preInsertsSQL(table.getPreInsertStatements().toArray(new String[0])); } if (table.getPostInsertStatements().size() != 0) { tableBuilder.preInsertsSQL(table.getPostInsertStatements().toArray(new String[0])); } if (table.getFinalStatements().size() != 0) { tableBuilder.finalSQL(table.getFinalStatements().toArray(new String[0])); } if(table.getInsertionOrderBy() != null) { tableBuilder.insertionSortOrder(OrderBy.parse(table.getInsertionOrderBy())); } for (JSONTableInputDefinition tableInput : table.getTableInputs()) { TextInputSpecs specs = tableInput.getInputSpecs(); if (specs == null) { specs = new TextInputSpecs(); // default specs (tabulated file) } if (tableInput.getPaths() == null || tableInput.getPaths().size() == 0) { throw new IllegalArgumentException("All table inputs must have input paths."); } for (String file : tableInput.getPaths()) { if (specs.getFixedWidthFields() != null) { // Fixed width fields definition. int[] fieldsArr = new int[specs.getFixedWidthFields().size()]; for (int i = 0; i < fieldsArr.length; i++) { fieldsArr[i] = specs.getFixedWidthFields().get(i); } tableBuilder.addFixedWidthTextFile(new Path(file), schema, fieldsArr, specs.isSkipHeader(), specs.getNullString(), null); } else { // CSV definition tableBuilder.addCSVTextFile(file, specs.getSeparatorChar(), specs.getQuotesChar(), specs.getEscapeChar(), specs.isSkipHeader(), specs.isStrictQuotes(), specs.getNullString()); } } } if (isReplicateAll) { tableBuilder.replicateToAll(); } return tableBuilder.build(); } /** * Use this method for obtaining a {@link TablespaceSpec} through {@link TablespaceBuilder}. */ public TablespaceSpec build() throws TablespaceBuilderException, TableBuilderException { TablespaceBuilder builder = new TablespaceBuilder(); builder.setNPartitions(nPartitions); if (partitionedTables == null) { throw new IllegalArgumentException("Can't build a " + TablespaceSpec.class.getName() + " without any partitioned table."); } if (name == null) { throw new IllegalArgumentException("Must provide a name for the Tablespace."); } for (JSONTableDefinition table : partitionedTables) { builder.add(buildTable(table, false)); } for (JSONTableDefinition table : replicateAllTables) { builder.add(buildTable(table, true)); } return builder.build(); } public static class JSONTableDefinition { private String name; private List tableInputs; private String schema; private String partitionFields; private String insertionOrderBy; private List indexes = new ArrayList(); private List initialStatements = new ArrayList(); private List preInsertStatements = new ArrayList(); private List postInsertStatements = new ArrayList(); private List finalStatements = new ArrayList(); public List getTableInputs() { return tableInputs; } public void setTableInputs(List tableInputs) { this.tableInputs = tableInputs; } public String getSchema() { return schema; } public void setSchema(String schema) { this.schema = schema; } public String getPartitionFields() { return partitionFields; } public void setPartitionFields(String partitionFields) { this.partitionFields = partitionFields; } public List getIndexes() { return indexes; } public void setIndexes(List indexes) { this.indexes = indexes; } public List getInitialStatements() { return initialStatements; } public void setInitialStatements(List initialStatements) { this.initialStatements = initialStatements; } public List getPostInsertStatements() { return postInsertStatements; } public void setPostInsertStatements(List postInsertStatements) { this.postInsertStatements = postInsertStatements; } public String getName() { return name; } public void setName(String name) { this.name = name; } public List getPreInsertStatements() { return preInsertStatements; } public void setPreInsertStatements(List preInsertStatements) { this.preInsertStatements = preInsertStatements; } public List getFinalStatements() { return finalStatements; } public void setFinalStatements(List finalStatements) { this.finalStatements = finalStatements; } String getInsertionOrderBy() { return insertionOrderBy; } void setInsertionOrderBy(String insertionOrderBy) { this.insertionOrderBy = insertionOrderBy; } } public static class JSONTableInputDefinition { private TextInputSpecs inputSpecs; private List paths; public TextInputSpecs getInputSpecs() { return inputSpecs; } public void setInputSpecs(TextInputSpecs inputSpecs) { this.inputSpecs = inputSpecs; } public List getPaths() { return paths; } public void setPaths(List paths) { this.paths = paths; } } public static class TextInputSpecs { private char separatorChar = '\t'; private char quotesChar = TupleTextInputFormat.NO_QUOTE_CHARACTER; private char escapeChar = TupleTextInputFormat.NO_ESCAPE_CHARACTER; private boolean skipHeader = false; private boolean strictQuotes = false; private String nullString = TupleTextInputFormat.NO_NULL_STRING; private ArrayList fixedWidthFields = null; public char getSeparatorChar() { return separatorChar; } public void setSeparatorChar(char separatorChar) { this.separatorChar = separatorChar; } public char getQuotesChar() { return quotesChar; } public void setQuotesChar(char quotesChar) { this.quotesChar = quotesChar; } public char getEscapeChar() { return escapeChar; } public void setEscapeChar(char escapeChar) { this.escapeChar = escapeChar; } public boolean isSkipHeader() { return skipHeader; } public void setSkipHeader(boolean skipHeader) { this.skipHeader = skipHeader; } public boolean isStrictQuotes() { return strictQuotes; } public void setStrictQuotes(boolean strictQuotes) { this.strictQuotes = strictQuotes; } public String getNullString() { return nullString; } public void setNullString(String nullString) { this.nullString = nullString; } public ArrayList getFixedWidthFields() { return fixedWidthFields; } public void setFixedWidthFields(ArrayList fixedWidthFields) { this.fixedWidthFields = fixedWidthFields; } } public int getnPartitions() { return nPartitions; } public void setnPartitions(int nPartitions) { this.nPartitions = nPartitions; } public List getPartitionedTables() { return partitionedTables; } public void setPartitionedTables(List partitionedTables) { this.partitionedTables = partitionedTables; } public List getReplicateAllTables() { return replicateAllTables; } public void setReplicateAllTables(List replicateAllTables) { this.replicateAllTables = replicateAllTables; } public String getName() { return name; } public void setName(String name) { this.name = name; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy