com.dell.doradus.client.utils.CSVLoader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of doradus-client Show documentation
There is a newer version: 2.4.0
/*
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dell.doradus.client.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.helpers.MessageFormatter;

import com.dell.doradus.client.ApplicationSession;
import com.dell.doradus.client.Client;
import com.dell.doradus.client.OLAPSession;
import com.dell.doradus.client.SpiderSession;
import com.dell.doradus.common.ApplicationDefinition;
import com.dell.doradus.common.BatchResult;
import com.dell.doradus.common.CommonDefs;
import com.dell.doradus.common.ContentType;
import com.dell.doradus.common.DBObject;
import com.dell.doradus.common.DBObjectBatch;
import com.dell.doradus.common.FieldDefinition;
import com.dell.doradus.common.FieldType;
import com.dell.doradus.common.ObjectResult;
import com.dell.doradus.common.TableDefinition;
import com.dell.doradus.common.Utils;

/**
 * Loads data from CSV files into a Doradus database. Each run loads tables for a single
 * application. The CSV files must be named "{table name}{cruft}.csv" where {table name}
 * is a defined table. {cruft} can be distinguishing characters such as "_1" or
 * "_{timestamp}". If table names overlap, CSVLoader figures it out. For example, if the
 * application has table names "Message" and "MessageParticipant". It knows that the
 * file "MessageParticipant_1.csv" belongs to the latter table and won't accidentally
 * load it into the Message table.  
 * 
 * The column names in each CSV file must match the corresponding field names. Values that
 * may contain commas should be quoted to prevent confusion with value-separating commas.
 * If a link or scalar collection has multiple values, they should be concatenated into a
 * single string using "~" as the separator.
 * 

 * The _ID field can be any named field in each file. By default, it is "Key". That is, if
 * a "Key" column is found, it is used as the _ID value, otherwise a unique ID is added
 * automatically. Each CSV file must start with a header row that defines the column names
 * present.
 * 

 * All parameters are defined in the file {@link CSVConfig} have defaults. Parameters
 * can be overridden via arguments to {@link #main(String[])}. For example, "-workers 5"
 * sets the number of parallel load threads to 5. Parameters can also be set via via
 * {@link CSVConfig#set(String, String)}. 
 * 

 * CSVLoader works for applications with all-defined fields; undefined fields are not
 * handled by this app.
 * 
 * The counterpart of CSVLoader is the {@link CSVDumper} utility, which can load the CSV
 * files created by this utility.
 */
public class CSVLoader {
    private static final int MAX_WORK_BACKLOG = 1000;

    // Members:
    private List        m_workerList = new ArrayList();
    private StringBuilder           m_token = new StringBuilder();    // reused -- not thread safe!
    private CSVConfig               m_config = CSVConfig.instance();
    private Client                  m_client;
    private ApplicationSession      m_session;
    private boolean                 m_bOLAPApp;
    private Thread                  m_appThread;
    
    // Logging interface:
    private Logger m_logger = LoggerFactory.getLogger("CSVLoader");
    
    // This holds the same of tables in longest-to-shorted table name. We need this funny
    // ordering to properly derive table names from CSV file names. For example, if a file
    // is called MessageParticipants.csv, we want to to match the table name
    // "MessageParticipants" and not "Message".
    private List m_tableNameList;
    
    // Statistics updated locally:
    private int  m_totalFiles;
    private long m_totalLines;
    private long m_totalBytes;

    // A CSV file being loaded.
    private static class CSVFile {
        final String m_fileName;
        final TableDefinition m_tableDef;
        final List m_fieldList;
        
        CSVFile(String fileName, TableDefinition tableDef, List fieldList) {
            m_fileName = fileName;
            m_tableDef = tableDef;
            m_fieldList = fieldList;
        }   // constructor
    }   // static class CSVFile
    
    // This class stores the field values of a record and the class into which the record
    // should be stored.
    private static class Record {
        final CSVFile m_csvFile;
        final Map m_fieldMap;
    
        // This "null" value tells the worker threads to stop.
        static final Record SENTINEL = new Record(null, null);

        // We use the passed field map; we don't copy it.
        Record(CSVFile csvFile, Map fieldMap) {
            m_csvFile = csvFile;
            m_fieldMap = fieldMap;
        }   // constructor
    }   // Record
    
    // Query of records for workers. A sentinel tells each worker to shut down.
    private final BlockingQueue m_workerQueue =
        new LinkedBlockingQueue(MAX_WORK_BACKLOG);
    
    //----- Public methods
    
    /**
     * Parses the given parameters and calls {@link #run()} to perform the CSV load. Run
     * with "-?" to get a list of popular parameters. Also, see {@link CSVConfig} for a
     * list of all parameters used by CSVLoader.
     * 
     * @param args  Program arguments.
     */
    public static void main(String[] args) {
        try {
        	CSVLoader app = new CSVLoader();
        	app.parseArgs(args);
            app.run();
            System.exit(0);
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }  
    }   // main
 
    /**
     * Performs the CSV load as defined by the {@link CSVConfig} singleton object.
     * This method can be called instead of {@link #main(String[])} for programmatic
     * access. Parameters should be set first by direct access to the CSVConfig
     * object or by calling {@link CSVConfig#set(String, String)}.
     */
    public void run() {
        m_appThread = Thread.currentThread();
        validateRootFolder();
        openDatabase();
        deleteApplication();
        createApplication();
        startWorkers();
        
        long startTime = System.currentTimeMillis();
        loadFolder(new File(m_config.root));
        stopWorkers();
            
        // If loading an OLAP shard, merge the results.
        if (m_bOLAPApp) {
            m_logger.info("Merging shard: {}", m_config.shard);
            ((OLAPSession)m_session).mergeShard(m_config.shard, null);
        }
        
        // Display final statistics.
        long stopTime = System.currentTimeMillis();
        m_logger.info("Total files loaded:    {}", m_totalFiles);
        m_logger.info("Total lines scanned:   {}", m_totalLines);
        m_logger.info("Total bytes read:      {}", m_totalBytes);
        long totalMillis = stopTime - startTime;
        m_logger.info("Total time for load:   {}", Utils.formatElapsedTime(totalMillis));
        m_logger.info("Average lines/sec:     {}", ((m_totalLines * 1000) / totalMillis));
        m_logger.info("Average bytes/sec:     {}", ((m_totalBytes * 1000) / totalMillis));
        
        // Done with the client connection.
        m_client.close();
    }   // run

    //----- Private methods

    private static void usage() {
        display("Usage: CSVLoader ");
        display("where  are:");
        display("   -app          Doradus application name. Default is: {}", CSVConfig.DEFAULT_APPNAME);
        display("   -batchsize <#>      Batch size. Default is: {}", CSVConfig.DEFAULT_BATCH_SIZE);
        display("   -compress [T|F]     Compress messages. Default is: {}", CSVConfig.DEFAULT_COMPRESS);
        display("   -host         Doradus server host name. Default is: {}", CSVConfig.DEFAULT_HOST);
        display("   -id           Column name of ID field. Default is: {}", CSVConfig.DEFAULT_ID_FIELD);
        display("   -increment_ts [T|F] True to increment timestamp fields 1 day per batch. Default is: {}", CSVConfig.DEFAULT_INCREMENT_TS);
        display("   -merge_all [T|F]    (OLAP only): true to merge after every batch. Default is: {}", CSVConfig.DEFAULT_MERGE_ALL);
        display("   -port         Doradus server port. Default is: {}", CSVConfig.DEFAULT_PORT);
        display("   -root       Root folder of CSV files. Default is: {}", CSVConfig.DEFAULT_ROOT);
        display("   -schema       Name of application schema file. Default is: {}", CSVConfig.DEFAULT_SCHEMA);
        display("   -skip_undef [T|F]   True to skip fields not declared in the schema. Default is: {}", CSVConfig.DEFAULT_SKIP_UNDEF);
        display("   -shard        (OLAP only): Name of shard to load. Default is: {}", CSVConfig.DEFAULT_SHARD);
        display("   -workers <#>        # of worker threads. Default is: {}", CSVConfig.DEFAULT_WORKERS);
        display("To use TLS:");
        display("   -tls [T|F]               True to enable TLS/SSL. Default is: {}", CSVConfig.DEFAULT_TLS);
        display("   -keystore          File name of keystore. Default is: {}", CSVConfig.DEFAULT_KEYSTORE);
        display("   -keystorepassword    Password of keystore file. Default is: {}", CSVConfig.DEFAULT_KEYSTORE_PW);
        display("   -truststore        File name of truststore. Default is: {}", CSVConfig.DEFAULT_TRUSTSTORE);
        display("   -truststorepassword  Password of truststore file. Default is: {}", CSVConfig.DEFAULT_TRUSTSTORE_PW);
        display("Deletes and recreates OLAP or Spider application defined by 'schema' file, then loads all CSV");
        display("files found in 'root' folder.");
        System.exit(0);
    }   // usage
    
    // Create the application from the configured schema file name.
    private void createApplication() {
        String schema = getSchema();
        ContentType contentType = null;
        if (m_config.schema.toLowerCase().endsWith(".json")) {
            contentType = ContentType.APPLICATION_JSON;
        } else if (m_config.schema.toLowerCase().endsWith(".xml")) {
            contentType = ContentType.TEXT_XML;
        } else {
            logErrorThrow("Unknown file type for schema: {}", m_config.schema);
        }
        
        try {
            m_logger.info("Creating application '{}' with schema: {}", m_config.app, m_config.schema);
            m_client.createApplication(schema, contentType);
        } catch (Exception e) {
            logErrorThrow("Error creating schema: {}", e);
        }
        
        try {
            m_session = m_client.openApplication(m_config.app);
        } catch (RuntimeException e) {
            logErrorThrow("Application '{}' not found after creation. Name doesn't match schema?", m_config.app); 
        }
        String ss = m_session.getAppDef().getStorageService();
        if (!Utils.isEmpty(ss) && ss.equals("OLAPService")) {
            m_bOLAPApp = true;
        }
        loadTables();
    }   // createApplication
    
    // Delete existing application if it exists.
    private void deleteApplication() {
        ApplicationDefinition appDef = m_client.getAppDef(m_config.app);
        if (appDef != null) {
            m_logger.info("Deleting existing application: {}", appDef.getAppName());
            m_client.deleteApplication(appDef.getAppName(), appDef.getKey());
        }
    }   // deleteApplication
    
    // See if we can figure out the table name from this file name. We match the first
    // table name that matches the starting characters of the CSV file name.
    private TableDefinition determineTableFromFileName(String fileName) {
        ApplicationDefinition appDef = m_session.getAppDef();
        for (String tableName : m_tableNameList) {
            if (fileName.regionMatches(true, 0, tableName, 0, tableName.length())) {
                return appDef.getTableDef(tableName);
            }
        }
        return null;
    }   // determineTableFromFileName

    // Write the given message to stdout only. Uses {}-style parameters
    private static void display(String format, Object... args) {
        System.out.println(MessageFormatter.arrayFormat(format, args).getMessage());
    }   // display
    
    // Extract the field names from the given CSV header line and return as a list.
    private List getFieldListFromHeader(BufferedReader reader) {
        // Read the first line and watch out for BOM at the front of the header.
        String header = null;
        try {
            header = reader.readLine();
        } catch (IOException e) {
            // Leave header null
        }
        if (Utils.isEmpty(header)) {
            logErrorThrow("No header found in file");
        }
        if (header.charAt(0) == '\uFEFF') {
            header = header.substring(1);
        }
        
        // Split around commas, though this will include spaces.
        String[] tokens = header.split(",");
        List result = new ArrayList();
        for (String token : tokens) {
            // If this field matches the _ID field, add it with that name.
            String fieldName = token.trim();
            if (fieldName.equals(m_config.id)) {
                result.add(CommonDefs.ID_FIELD);
            } else {
                result.add(token.trim());
            }
        }
        return result;
    }   // getFieldListFromHeader

    // Load schema contents from m_config.schema file.
    private String getSchema() {
        if (Utils.isEmpty(m_config.schema)) {
            m_config.schema = m_config.app + ".xml";
        }
        File schemaFile = new File(m_config.schema);
        if (!schemaFile.exists()) {
            logErrorThrow("Schema file not found: {}", m_config.schema);
        }
        StringBuilder schemaBuffer = new StringBuilder();
        char[] charBuffer = new char[65536];
        try (FileReader reader = new FileReader(schemaFile)) {
            for (int bytesRead = reader.read(charBuffer); bytesRead > 0; bytesRead = reader.read(charBuffer)) {
                schemaBuffer.append(charBuffer, 0, bytesRead);
            }
        } catch (Exception e) {
            logErrorThrow("Cannot read schema file '{}': {}", m_config.schema, e);
        }
        return schemaBuffer.toString();
    }   // getSchema
    
    // Get all table names and sort from longest-to-shortest name. We need this funny
    // ordering to match CSV file names correctly.
    private void loadTables() {
        ApplicationDefinition appDef = m_session.getAppDef();
        m_tableNameList = new ArrayList(appDef.getTableDefinitions().keySet());
        Collections.sort(m_tableNameList, new Comparator() {
            @Override
            public int compare(String s1, String s2) {
                // We want longer table names to be first. So, if s1 == "a" and s2 == "aa",
                // "a".length() - "aa".length() == -1, which means "aa" appears first. We
                // don't care about the order of same-length names.
                return s2.length() - s1.length();
            }   // compare
        });
    }   // loadTables

    // Load the records in the given file into the given table.
    private void loadCSVFile(TableDefinition tableDef, File file) throws IOException {
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), Utils.UTF8_CHARSET))
            ) {
            loadCSVFromReader(tableDef, file.getAbsolutePath(), file.length(), reader);
        }
    }   // loadCSVFile

    // Load all files in the given folder whose name matches a known table.
    private void loadFolder(File folder) {
        m_logger.info("Scanning for files in folder: {}", folder.getAbsolutePath());
        File[] files = folder.listFiles();
        if (files == null || files.length == 0) {
            m_logger.error("No files found in folder: {}", folder.getAbsolutePath());
            return;
        }
        
        for (File file : files) {
            String fileName = file.getName().toLowerCase();
            if (!fileName.endsWith(".csv") && !fileName.endsWith(".zip")) {
                continue;
            }
            TableDefinition tableDef = determineTableFromFileName(file.getName());
            if (tableDef == null) {
                m_logger.error("Ignoring file; unknown corresponding table: {}", file.getName());
                continue;
            }
            try {
                if (fileName.endsWith(".csv")) {
                    loadCSVFile(tableDef, file);
                } else  {
                    loadZIPFile(tableDef, file);
                }
            } catch (IOException ex) {
                m_logger.error("I/O error scanning file '{}': {}", file.getName(), ex);
            }
        }
    }   // loadFolder

    private void loadZIPFile(TableDefinition tableDef, File file) throws IOException {
        try (ZipFile zipFile = new ZipFile(file)) {
            Enumeration zipEntries = zipFile.entries();
            while (zipEntries.hasMoreElements()) {
                ZipEntry zipEntry = zipEntries.nextElement();
                if (!zipEntry.getName().toLowerCase().endsWith(".csv")) {
                    m_logger.warn("Skipping zip file entry: " + zipEntry.getName());
                    continue;
                }
                try (InputStream zipEntryStream = zipFile.getInputStream(zipEntry)) {
                    BufferedReader reader = new BufferedReader(new InputStreamReader(zipEntryStream, Utils.UTF8_CHARSET));
                    loadCSVFromReader(tableDef, zipEntry.getName(), zipEntry.getSize(), reader);
                }
            }
        }
    }   // loadZIPFile
    
    // Open Doradus database, setting m_client.
    private void openDatabase() {
        if (m_config.tls) {
            m_client = new Client(m_config.host, m_config.port, m_config.getTLSParams());
        } else {
            m_client = new Client(m_config.host, m_config.port);
        }
    }   // openDatabase

    // Parse args into CSVConfig object.
    private void parseArgs(String[] args) {
        int index = 0;
        while (index < args.length) {
            String name = args[index];
            if (name.equals("-?") || name.equalsIgnoreCase("-help")) {
                usage();
            }
            if (name.charAt(0) != '-') {
                m_logger.error("Unrecognized parameter: {}", name);
                usage();
            }
            if (++index >= args.length) {
                m_logger.error("Another parameter expected after '{}'", name);
                usage();
            }
            String value = args[index];
            try {
                m_config.set(name.substring(1), value);
            } catch (Exception e) {
                m_logger.error(e.toString());
                usage();
            }
            index++;
        }
    }   // parseArgs
    
    // Load CSV records from the given reader, whose format is identified with the given
    // CSVFile. The caller must pass an opened stream and close it.
    private void loadCSVFromReader(TableDefinition  tableDef,
                                   String           csvName,
                                   long             byteLength,
                                   BufferedReader   reader) {
        m_logger.info("Loading CSV file: {}", csvName);
        
        // Determine the order in which fields in this file appear. Wrap this in a CSVFile.
        List fieldList = getFieldListFromHeader(reader);
        CSVFile csvFile = new CSVFile(csvName, tableDef, fieldList);
        
        long startTime = System.currentTimeMillis();
        long recsLoaded = 0;
        int lineNo = 0;
            while (true) {
                Map fieldMap = new HashMap();
                if (!tokenizeCSVLine(csvFile, reader, fieldMap)) {
                    break;
                }
                lineNo++;
                m_totalLines++;
                
                try {
                    m_workerQueue.put(new Record(csvFile, fieldMap));
                } catch (InterruptedException e) {
                    logErrorThrow("Error posting to queue", e.toString());
                }
                if ((++recsLoaded % 10000) == 0) {
                    m_logger.info("...loaded {} records.", recsLoaded);
                }
            }
            
        // Always close the file and display stats even if an error occurred.
        m_totalFiles++;
        m_totalBytes += byteLength;
        long stopTime = System.currentTimeMillis();
        long totalMillis = stopTime - startTime;
        if (totalMillis == 0) {
            totalMillis = 1;    // You never know
        }
        m_logger.info("File '{}': time={} millis; lines={}",
                      new Object[]{csvFile.m_fileName, totalMillis, lineNo});
    }   // loadCSVFromReader

    // Launch the requested number of workers. Quit if any can't be started.
    private void startWorkers() {
        m_logger.info("Starting {} workers", m_config.workers);
        for (int workerNo = 1; workerNo <= m_config.workers; workerNo++) {
            LoadWorker loadWorker = new LoadWorker(workerNo);
            loadWorker.start();
            m_workerList.add(loadWorker);
        }
    }   // startWorkers

    // Add a sentinel to the queue for each worker and wait all to finish.
    private void stopWorkers() {
        Record sentinel = Record.SENTINEL;
        for (int inx = 0; inx < m_workerList.size(); inx++) {
            try {
                m_workerQueue.put(sentinel);
            } catch (InterruptedException e) {
                // ignore
            }
        }
        
        for (LoadWorker loadWorker : m_workerList) {
            try {
                loadWorker.join();
            } catch (InterruptedException e) {
                // ignore
            }
        }
    }   // stopWorkers

    // Tokenize the next comma-separated record from the given reader. Map fields by name
    // into the given field map.
    private boolean tokenizeCSVLine(CSVFile             csvFile, 
                                    BufferedReader      reader,
                                    Map fieldMap) {
        fieldMap.clear();
        m_token.setLength(0);
        
        // First build a list of tokens found in the order they are found.
        List tokenList = new ArrayList();
        boolean bInQuote = false;
        int aChar = 0;
        try {
            while (true) {
                aChar = reader.read();
                if (aChar < 0) {
                    break;
                }
                
                if (!bInQuote && aChar == ',') {
                    tokenList.add(m_token.toString());
                    m_token.setLength(0);
                } else if (!bInQuote && aChar == '\r') {
                    tokenList.add(m_token.toString());
                    m_token.setLength(0);
                    reader.mark(1);
                    aChar = reader.read();
                    if (aChar == -1) {
                        break; 
                    }
                    if (aChar != '\n') {
                        reader.reset(); // put back non-LF
                    }
                    break;
                } else if (!bInQuote && aChar == '\n') {
                    tokenList.add(m_token.toString());
                    m_token.setLength(0);
                    break;
                } else if (aChar == '"') {
                    bInQuote = !bInQuote;
                } else {
                    m_token.append((char)aChar);
                }
            }
        } catch (IOException e) {
            logErrorThrow("I/O error reading file", e.toString());
        }

        // If we hit EOF without a final EOL, we could have a token in the buffer.
        if (m_token.length() > 0) {
            tokenList.add(m_token.toString());
            m_token.setLength(0);
        }
        
        if (tokenList.size() > 0) {
            for (int index = 0; index < tokenList.size(); index++) {
                String token = tokenList.get(index).trim();
                if (token.length() > 0) {
                    String fieldName = csvFile.m_fieldList.get(index);
                    fieldMap.put(fieldName, token);
                }
            }
            return true;
        }
        
        // No tokens found; return false if EOF.
        return aChar != -1;
    }   // tokenizeCSVLine

    // Format the given message using {}-style parameters, log as an error, and throw as
    // a RuntimeException.
    private void logErrorThrow(String format, Object... args) {
        String msg = MessageFormatter.arrayFormat(format, args).getMessage();
        m_logger.error(msg);
        throw new RuntimeException(msg);
    }   // logErrorThrow
    
    // Root directory must exist and be a folder
    private void validateRootFolder() {
        File rootDir = new File(m_config.root);
        if (!rootDir.exists()) {
            logErrorThrow("Root directory does not exist: {}", m_config.root);
        } else if (!rootDir.isDirectory()) {
            logErrorThrow("Root directory must be a folder: {}", m_config.root);
        } else if (!m_config.root.endsWith(File.separator)) {
            m_config.root = m_config.root + File.separator;
        }
    }   // validateRootFolder

    /**
     * LoadWorker loads records on behalf of CSVLoader. It runs in its own thread, receiving
     * records via a queue, loading them into the appropriate table in batches. The worker
     * shuts down when it receives a special sentinel record.
     */
    final class LoadWorker extends Thread {
        private static final long MILLIS_PER_DAY = 1000 * 3600 * 24;
        
        // Date formats we try to recognize (add to these as needed):
        private final SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[] {
            new SimpleDateFormat("MM/dd/yyyy hh:mm:ss a"),   // e.g., 7/17/2010 9:32:01 PM
            new SimpleDateFormat("MM/dd/yyyy HH:mm:ss"),     // e.g., 7/17/2010 21:32:01
        };
        
        // Members:
        private final int               m_workerNo;
        private final DBObjectBatch     m_batch;
        private String                  m_batchTableName = "";
        private int                     m_batchNo;
        private ApplicationSession      m_session;
        
        private LoadWorker(int workerNo) {
            m_workerNo = workerNo;
            
            m_logger.debug("Worker {}: Opening connection to {}:{}",
                           new Object[]{m_workerNo, m_config.host, m_config.port});
            Client client = new Client(m_config.host, m_config.port, m_config.getTLSParams());
            client.setCompression(m_config.compress);
            try {
                m_session = client.openApplication(m_config.app);
            } catch (Exception e) {
                logErrorThrow("Cannot access application {}: {}", m_config.app, e);
            }
            m_batch = new DBObjectBatch();
            client.close();
        }   // constructor

        // Run is entered when our supervisor starts us.
        @Override
        public void run() {
            while (true) {
                try {
                    Record record = m_workerQueue.take();
                    if (record == Record.SENTINEL) {
                        flushBatch();
                        break;
                    }
                    loadRecord(record);
                } catch (InterruptedException e) {
                    // ignore
                } catch (Exception ex) {
                    m_logger.error("Worker '" + m_workerNo + "'", ex);
                    m_appThread.interrupt();
                    break;
                }
            }

            m_session.close();
            m_logger.debug("Worker {}: Thread shutting down", m_workerNo);
        }   // run

        // Load the record into the database.
        private void loadRecord(Record record) throws IOException {
            if (!m_bOLAPApp && m_batch.getObjectCount() > 0 &&
                !record.m_csvFile.m_tableDef.getTableName().equals(m_batchTableName)) {
                flushBatch();
                m_batchNo = 0;
            }
            
            m_batch.addObject(createObject(record));
            m_batchTableName = record.m_csvFile.m_tableDef.getTableName();
            if (m_batch.getObjectCount() >= m_config.batchsize) {
                flushBatch();
            }
        }   // loadRecord

        private DBObject createObject(Record record) {
            TableDefinition tableDef = record.m_csvFile.m_tableDef;
            DBObject dbObj = new DBObject();
            dbObj.setTableName(tableDef.getTableName());
            
            for (Map.Entry mapEntry : record.m_fieldMap.entrySet()) {
                String fieldName = mapEntry.getKey();
                String fieldValue = mapEntry.getValue();
                FieldDefinition fieldDef = tableDef.getFieldDef(fieldName);
                if (fieldDef == null && !fieldName.equals("_ID") && m_config.skip_undef) {
                    continue;
                }
                boolean isCollection = fieldDef == null ? false : fieldDef.isCollection();
                boolean isLink = fieldDef == null ? false : fieldDef.isLinkField();
                FieldType fieldType = fieldDef == null ? FieldType.TEXT : fieldDef.getType();
                
                // Link and MV scalar fields are separate by '~'.
                if (isLink) {
                    dbObj.addFieldValues(fieldName, Arrays.asList(fieldValue.split("~")));
                } else if (isCollection) {
                    String[] values = fieldValue.split("~");
                    if (fieldType == FieldType.TIMESTAMP) {
                        for (int i = 0; i < values.length; i++) {
                            values[i] = convertTimestamp(values[i]);
                        }
                    }
                    dbObj.addFieldValues(fieldName, Arrays.asList(values));
                } else if (fieldType == FieldType.TIMESTAMP) {
                    dbObj.addFieldValue(fieldName, convertTimestamp(fieldValue));
                } else {
                    dbObj.addFieldValue(fieldName, fieldValue);
                }
            }
            
            return dbObj;
        }   // createObject

        // Attempt to recognize the given timestamp value format and convert to the Doradus
        // required form: YYYY-MM-DD hh:mm:ss.fff. If we don't recognize it, we return as-is.
        private String convertTimestamp(String value) {
            for (SimpleDateFormat sdf : DATE_FORMATS) {
                try {
                    Date date = sdf.parse(value);
                    if (m_config.increment_ts) {
                        Date adjustedDate = new Date(date.getTime() + (m_batchNo * MILLIS_PER_DAY));
                        return Utils.formatDateUTC(adjustedDate);
                    } else {
                        return Utils.formatDateUTC(date);
                    }
                } catch (ParseException e) {
                    // Skip this format.
                }
            }
            
            // Here, no formats were recognized, so let Doradus accept or reject.
            return value;
        }   // convertTimestamp

        // Load the current batch of records that we've queued-up.
        private void flushBatch() {
            if (m_batch.getObjectCount() == 0) {
                return;
            }

            try {
                BatchResult result = null;
                if (m_bOLAPApp) {
                    result = ((OLAPSession)m_session).addBatch(m_config.shard, m_batch);
                    if (m_config.merge_all) {
                        ((OLAPSession)m_session).mergeShard(m_config.shard, null);
                    }
                } else {
                    result = ((SpiderSession)m_session).addBatch(m_batchTableName, m_batch);
                }
                if (result.isFailed()) {
                    m_logger.error("Worker {}: Batch update failed: {}",
                                  new Object[]{m_workerNo, result.getErrorMessage()});
                    for (String objectID : result.getFailedObjectIDs()) {
                        ObjectResult objResult = result.getObjectResult(objectID);
                        m_logger.warn("Worker {}: error for object ID '{}': {}",
                                      new Object[]{m_workerNo, objectID, objResult.getErrorMessage()});
                        Map errorDetails = objResult.getErrorDetails();
                        for (String fieldName : errorDetails.keySet()) {
                            m_logger.warn("Worker {}: Detail: {}={}",
                                          new Object[]{m_workerNo, fieldName, errorDetails.get(fieldName)});
                        }
                    }
                }
            } catch (Exception e) {
                logErrorThrow("Error adding batch: {}", e);
            }
            
            m_batch.clear();
            m_batchNo++;
        }   // flushBatch
        
    }   // class LoadWorker
    
}   // class CSVLoader