All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.splout.db.hadoop.TupleSQLite4JavaOutputFormat Maven / Gradle / Ivy

Go to download

Splout is a read only, horizontally scalable SQL database that plays well with Hadoop.

There is a newer version: 0.3.0
Show newest version
package com.splout.db.hadoop;

/*
 * #%L
 * Splout SQL Hadoop library
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.almworks.sqlite4java.SQLiteConnection;
import com.almworks.sqlite4java.SQLiteException;
import com.almworks.sqlite4java.SQLiteStatement;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema.Field;
import com.splout.db.common.HeartBeater;
import com.splout.db.hadoop.SQLiteOutputFormat.SQLRecordWriter;
import com.splout.db.hadoop.TableSpec.FieldIndex;

/**
 * An OutputFormat that accepts Pangool's Tuples and writes to a sqlite4Java SQLite file. The Tuples that are written to
 * it must conform to a particular schema: having a "_partition" integer field (which will then create a file named
 * "partition".db) and be a {@link NullableTuple} so that nulls are accepted as normal SQL values.
 * 

* The different schemas that will be given to this OutputFormat are defined in the constructor by providing a * {@link TableSpec}. These TableSpec also contains information such as pre-SQL or post-SQL statements but most notably * contain a Schema so that a CREATE TABLE can be derived automatically from it. Note that the Schema provided to * TableSpec doesn't need to contain a "_partition" field or be nullable. */ @SuppressWarnings("serial") public class TupleSQLite4JavaOutputFormat extends FileOutputFormat implements Serializable { public final static String PARTITION_TUPLE_FIELD = "_partition"; public static Log LOG = LogFactory.getLog(TupleSQLite4JavaOutputFormat.class); private int batchSize; /** * Exception that is thrown if the Output Format cannot be instantiated because the specified parameters are * inconsistent or invalid. The reason is the message of the Exception, and it may optionally wrap another Exception. */ public static class TupleSQLiteOutputFormatException extends Exception { public TupleSQLiteOutputFormatException(String cause) { super(cause); } public TupleSQLiteOutputFormatException(String cause, Exception e) { super(cause, e); } } // Given a {@link TableSpec}, returns the appropriated SQL CREATE TABLE... private static String getCreateTable(TableSpec tableSpec) throws TupleSQLiteOutputFormatException { String createTable = "CREATE TABLE " + tableSpec.getSchema().getName() + " ("; for(Field field : tableSpec.getSchema().getFields()) { if(field.getName().equals(PARTITION_TUPLE_FIELD)) { continue; } if(field.getName().equals(NullableSchema.NULLS_FIELD)) { continue; } createTable += field.getName() + " "; switch(field.getType()) { /* * This mapping is done after SQLite's documentation. For instance, SQLite doesn't have Booleans (have to be * INTEGERs). It doesn't have LONGS either. */ case INT: createTable += "INTEGER, "; break; case LONG: createTable += "INTEGER, "; break; case DOUBLE: createTable += "REAL, "; break; case FLOAT: createTable += "REAL, "; break; case STRING: createTable += "TEXT, "; break; case BOOLEAN: createTable += "INTEGER, "; break; default: throw new TupleSQLiteOutputFormatException("Unsupported field type: " + field.getType()); } } createTable = createTable.substring(0, createTable.length() - 2); return createTable += ");"; } // Get all the CREATE TABLE... for a list of {@link TableSpec} protected static String[] getCreateTables(TableSpec... tableSpecs) throws TupleSQLiteOutputFormatException { List createTables = new ArrayList(); // First the initSQL provided by user for(TableSpec tableSpec : tableSpecs) { if(tableSpec.getInitialSQL() != null) { createTables.addAll(Arrays.asList(tableSpec.getInitialSQL())); } } // CREATE TABLE statements for(TableSpec tableSpec : tableSpecs) { createTables.add(getCreateTable(tableSpec)); } // Add user preInsertsSQL if exists just after the CREATE TABLE's for(TableSpec tableSpec : tableSpecs) { if(tableSpec.getPreInsertsSQL() != null) { createTables.addAll(Arrays.asList(tableSpec.getPreInsertsSQL())); } } return createTables.toArray(new String[0]); } // Get a list of CREATE INDEX... Statements for a {@link TableSpec} list. protected static String[] getCreateIndexes(TableSpec... tableSpecs) throws TupleSQLiteOutputFormatException { List createIndexes = new ArrayList(); // Add user postInsertsSQL if exists just before the CREATE INDEX statements for(TableSpec tableSpec : tableSpecs) { if(tableSpec.getPostInsertsSQL() != null) { createIndexes.addAll(Arrays.asList(tableSpec.getPostInsertsSQL())); } } for(TableSpec tableSpec : tableSpecs) { for(FieldIndex index : tableSpec.getIndexes()) { for(Field field : index.getIndexFields()) { if(!tableSpec.getSchema().getFields().contains(field)) { throw new TupleSQLiteOutputFormatException("Field to index (" + index + ") not contained in input schema (" + tableSpec.getSchema() + ")"); } } // The following code is able to create indexes for one field or for multiple fields String createIndex = "CREATE INDEX idx_" + tableSpec.getSchema().getName() + "_"; for(Field field : index.getIndexFields()) { createIndex += field.getName(); } createIndex += " ON " + tableSpec.getSchema().getName() + "("; for(Field field : index.getIndexFields()) { createIndex += field.getName() + ", "; } createIndex = createIndex.substring(0, createIndex.length() - 2) + ");"; createIndexes.add(createIndex); } } // Add user finalSQL if exists just after the CREATE INDEX statements for(TableSpec tableSpec : tableSpecs) { if(tableSpec.getFinalSQL() != null) { createIndexes.addAll(Arrays.asList(tableSpec.getFinalSQL())); } } return createIndexes.toArray(new String[0]); } private String[] preSQL, postSQL; private static AtomicLong FILE_SEQUENCE = new AtomicLong(0); /** * This OutputFormat receives a list of {@link TableSpec}. These are the different tables that will be created. They * will be identified by Pangool Tuples. The batch size is the number of SQL statements to execute before a COMMIT. */ public TupleSQLite4JavaOutputFormat(int batchSize, TableSpec... dbSpec) throws TupleSQLiteOutputFormatException { // Generate create tables and create index statements preSQL = getCreateTables(dbSpec); postSQL = getCreateIndexes(dbSpec); this.batchSize = batchSize; } @Override public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new TupleSQLRecordWriter(context); } /** * A RecordWriter that accepts an Int(Partition), a Tuple and delegates to a {@link SQLRecordWriter} converting the * Tuple into SQL and assigning the partition that comes in the Key. */ public class TupleSQLRecordWriter extends RecordWriter { // Temporary and permanent Paths for properly writing Hadoop output files private Map permPool = new HashMap(); private Map tempPool = new HashMap(); private HeartBeater heartBeater; // Map of prepared statements per Schema and per Partition Map> stCache = new HashMap>(); Map connCache = new HashMap(); long records = 0; private FileSystem fs; private Configuration conf; private TaskAttemptContext context; public TupleSQLRecordWriter(TaskAttemptContext context) { this.context = context; long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000); heartBeater = new HeartBeater(context, waitTimeHeartBeater); heartBeater.needHeartBeat(); conf = context.getConfiguration(); } // This method is called one time per each partition private void initSql(int partition) throws IOException { if(!FileSystem.get(conf).equals(FileSystem.getLocal(conf))) { // This is a trick for not having to use the DistributedCache: // "The child-jvm always has its current working directory added to the java.library.path and LD_LIBRARY_PATH" // (from http://hadoop.apache.org/docs/mapreduce/r0.22.0/mapred_tutorial.html#Task+Execution+%26+Environment) // So we bundle the native libs in the JAR and copy them to the working directory String[] mapRedLocalDirs = conf.get("mapred.local.dir").split(","); for(String mapRedLocaLDir : mapRedLocalDirs) { LOG.info("Mapred local dir: " + mapRedLocaLDir); File[] nativeLibs = new File(mapRedLocaLDir + "/../jars").listFiles(); LOG.info("Examining: " + (mapRedLocaLDir + "/../jars")); if(nativeLibs != null) { for(File nativeLib : nativeLibs) { if((nativeLib + "").contains("sqlite")) { FileUtils.copyFile(nativeLib, new File(".", nativeLib.getName())); } } LOG.info("Found native libraries in : " + Arrays.toString(nativeLibs) + ", copied to task work directory."); break; } } } Path outPath = FileOutputFormat.getOutputPath(context); fs = outPath.getFileSystem(conf); Path perm = new Path(FileOutputFormat.getOutputPath(context), partition + ".db"); Path temp = conf.getLocalPath("mapred.local.dir", partition + "." + FILE_SEQUENCE.incrementAndGet()); fs.delete(perm, true); // delete old, if any fs.delete(temp, true); // delete old, if any Path local = fs.startLocalOutput(perm, temp); // try { permPool.put(partition, perm); tempPool.put(partition, temp); LOG.info("Initializing SQL connection [" + partition + "]"); SQLiteConnection conn = new SQLiteConnection(new File(local.toString())); // Change the default temp_store_directory, otherwise we may run out of disk space as it will go to /var/tmp // In EMR the big disks are at /mnt // It suffices to set it to . as it is the tasks' work directory // Warning: this pragma is deprecated and may be removed in further versions, however there is no choice // other than recompiling SQLite or modifying the environment. conn.open(true); conn.exec("PRAGMA temp_store_directory = '" + new File(".").getAbsolutePath() + "'"); SQLiteStatement st = conn.prepare("PRAGMA temp_store_directory"); st.step(); LOG.info("Changed temp_store_directory to: " + st.columnString(0)); // journal_mode=OFF speeds up insertions conn.exec("PRAGMA journal_mode=OFF"); /* page_size is one of of the most important parameters for speed up indexation. * SQLite performs a merge sort for sorting data before inserting it in an index. * The buffer SQLites uses for sorting has a size equals to * page_size * SQLITE_DEFAULT_TEMP_CACHE_SIZE. Unfortunately, SQLITE_DEFAULT_TEMP_CACHE_SIZE * is a compilation parameter. That is then fixed to the sqlite4java library used. We have * recompiled that library to increase SQLITE_DEFAULT_TEMP_CACHE_SIZE (up to 32000 at * the point of writing this lines), so, at runtime the unique way to change the buffer size * used for sorting is change the page_size. page_size must be changed BEFORE CREATE * STATEMENTS, otherwise it won't have effect. page_size should be a multiple of * the sector size (1024 on linux) in order to be efficient. **/ conn.exec("PRAGMA page_size=8192;"); connCache.put(partition, conn); // Init transaction for(String sql : preSQL) { LOG.info("Executing: " + sql); conn.exec(sql); } conn.exec("BEGIN"); Map stMap = new HashMap(); stCache.put(partition, stMap); } catch(SQLiteException e) { throw new IOException(e); } } @Override public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException { int partition = (Integer) tuple.get(PARTITION_TUPLE_FIELD); Object nulls = null; try { nulls = tuple.get(NullableSchema.NULLS_FIELD); } catch(IllegalArgumentException e) { throw new RuntimeException("Expected a NullableTuple but received a normal Tuple instead."); } try { /* * Key performance trick: Cache PreparedStatements when possible. We will have one PreparedStatement per each * different Tuple Schema (table). */ Map stMap = stCache.get(partition); if(stMap == null) { initSql(partition); stMap = stCache.get(partition); } SQLiteStatement pS = stMap.get(tuple.getSchema().getName()); if(pS == null) { SQLiteConnection conn = connCache.get(partition); // Create a PreparedStatement according to the received Tuple String preparedStatement = "INSERT INTO " + tuple.getSchema().getName() + " VALUES ("; // NOTE: tuple.getSchema().getFields().size() - 2 : quick way of skipping "_nulls" and "_partition" fields here for(int i = 0; i < tuple.getSchema().getFields().size() - 2; i++) { preparedStatement += "?, "; } preparedStatement = preparedStatement.substring(0, preparedStatement.length() - 2) + ");"; pS = conn.prepare(preparedStatement); stMap.put(tuple.getSchema().getName(), pS); } int count = 1, tupleCount = 0; for(Field field : tuple.getSchema().getFields()) { if(field.getName().equals(NullableSchema.NULLS_FIELD)) { tupleCount++; continue; } if(field.getName().equals(PARTITION_TUPLE_FIELD)) { tupleCount++; continue; } boolean isNull = false; if(nulls instanceof ByteBuffer) { ByteBuffer bB = ((ByteBuffer) nulls); isNull = NullableTuple.isNull(tupleCount, bB.array(), bB.position()); } else if(nulls instanceof byte[]) { isNull = NullableTuple.isNull(tupleCount, (byte[]) nulls, 0); } if(isNull) { pS.bindNull(count); } else { switch(field.getType()) { case INT: pS.bind(count, (Integer) tuple.get(tupleCount)); break; case LONG: pS.bind(count, (Long) tuple.get(tupleCount)); break; case DOUBLE: pS.bind(count, (Double) tuple.get(tupleCount)); break; case FLOAT: pS.bind(count, (Float) tuple.get(tupleCount)); break; case STRING: pS.bind(count, tuple.get(tupleCount).toString()); break; case BOOLEAN: // Remember: In SQLite there are no booleans pS.bind(count, ((Boolean) tuple.get(tupleCount)) == true ? 1 : 0); default: break; } } count++; tupleCount++; } pS.step(); pS.reset(); records++; if(records == batchSize) { SQLiteConnection conn = connCache.get(partition); conn.exec("COMMIT"); conn.exec("BEGIN"); records = 0; } } catch(SQLiteException e) { throw new IOException(e); } } @Override public void close(TaskAttemptContext ctx) throws IOException, InterruptedException { try { if(ctx != null) { heartBeater.setProgress(ctx); } for(Map.Entry entry : connCache.entrySet()) { LOG.info("Closing SQL connection [" + entry.getKey() + "]"); // entry.getValue().exec("COMMIT"); if(postSQL != null) { LOG.info("Executing end SQL statements."); for(String sql : postSQL) { LOG.info("Executing: " + sql); entry.getValue().exec(sql); } } entry.getValue().dispose(); // Hadoop - completeLocalOutput() fs.completeLocalOutput(permPool.get(entry.getKey()), tempPool.get(entry.getKey())); } } catch(SQLiteException e) { throw new IOException(e); } finally { // in any case, destroy the HeartBeater heartBeater.cancelHeartBeat(); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy