All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.treasure_data.td_import.prepare.PrepareConfiguration Maven / Gradle / Ivy

There is a newer version: 0.5.10
Show newest version
//
// Treasure Data Bulk-Import Tool in Java
//
// Copyright (C) 2012 - 2013 Muga Nishizawa
//
//    Licensed under the Apache License, Version 2.0 (the "License");
//    you may not use this file except in compliance with the License.
//    You may obtain a copy of the License at
//
//        http://www.apache.org/licenses/LICENSE-2.0
//
//    Unless required by applicable law or agreed to in writing, software
//    distributed under the License is distributed on an "AS IS" BASIS,
//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//    See the License for the specific language governing permissions and
//    limitations under the License.
//
package com.treasure_data.td_import.prepare;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

import joptsimple.OptionSet;

import com.treasure_data.td_import.Options;
import com.treasure_data.td_import.Configuration;
import com.treasure_data.td_import.model.ColumnType;
import com.treasure_data.td_import.model.TimeValueTimeColumnValue;
import com.treasure_data.td_import.reader.ApacheRecordReader;
import com.treasure_data.td_import.reader.CSVRecordReader;
import com.treasure_data.td_import.reader.RecordReader;
import com.treasure_data.td_import.reader.JSONRecordReader;
import com.treasure_data.td_import.reader.MessagePackRecordReader;
import com.treasure_data.td_import.reader.MySQLTableReader;
import com.treasure_data.td_import.reader.RegexRecordReader;
import com.treasure_data.td_import.reader.SyslogRecordReader;
import com.treasure_data.td_import.source.Source;
import com.treasure_data.td_import.writer.RecordWriter;
import com.treasure_data.td_import.writer.MsgpackGZIPRecordWriter;
import com.treasure_data.td_import.writer.MySQLTimestampAdaptedMsgpackGZIPRecordWriter;

public class PrepareConfiguration extends Configuration {

    public static class Factory {
        protected Options options;

        public Factory(Properties props, boolean isUploaded) {
            options = new Options();
            if (isUploaded) {
                options.initUploadOptionParser(props);
            } else {
                options.initPrepareOptionParser(props);
            }
        }

        public Options getBulkImportOptions() {
            return options;
        }

        public PrepareConfiguration newPrepareConfiguration(String[] args) {
            options.setOptions(args);
            OptionSet optionSet = options.getOptions();

            // TODO FIXME when uploadParts is called, default format is "msgpack.gz"
            // on the other hand, when prepareParts, default format is "csv".
            String formatStr;
            if (optionSet.has(BI_PREPARE_PARTS_FORMAT)) {
                formatStr = (String) optionSet.valueOf(BI_PREPARE_PARTS_FORMAT);
            } else {
                formatStr = BI_PREPARE_PARTS_FORMAT_DEFAULTVALUE;
            }

            // lookup format enum
            Format format = Format.fromString(formatStr);
            if (format == null) {
                throw new IllegalArgumentException(String.format(
                        "unsupported format '%s'", formatStr));
            }
            PrepareConfiguration c = format.createPrepareConfiguration();
            c.options = options;
            return c;
        }
    }

    public static enum Format {
        CSV("csv") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new CSVRecordReader((CSVPrepareConfiguration) conf, writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new CSVPrepareConfiguration();
            }
        },
        TSV("tsv") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new CSVRecordReader((CSVPrepareConfiguration) conf, writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new CSVPrepareConfiguration();
            }
        },
        MYSQL("mysql") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new MySQLTableReader((MySQLPrepareConfiguration) conf,
                        writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new MySQLPrepareConfiguration();
            }
        },
        JSON("json") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new JSONRecordReader((JSONPrepareConfiguration) conf,
                        writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new JSONPrepareConfiguration();
            }
        },
        REGEX("regex") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new RegexRecordReader(
                        (RegexPrepareConfiguration) conf, writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new RegexPrepareConfiguration();
            }
        },
        APACHE("apache") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new ApacheRecordReader((ApachePrepareConfiguration) conf,
                        writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new ApachePrepareConfiguration();
            }
        },
        SYSLOG("syslog") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new SyslogRecordReader((SyslogPrepareConfiguration) conf,
                        writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new SyslogPrepareConfiguration();
            }
        },
        MSGPACK("msgpack") {
            @Override
            public RecordReader createFileReader(
                    PrepareConfiguration conf, RecordWriter writer)
                    throws PreparePartsException {
                return new MessagePackRecordReader(
                        (MessagePackPrepareConfiguration) conf, writer);
            }

            @Override
            public PrepareConfiguration createPrepareConfiguration() {
                return new MessagePackPrepareConfiguration();
            }
        };

        private String format;

        Format(String format) {
            this.format = format;
        }

        public String format() {
            return format;
        }

        public abstract PrepareConfiguration createPrepareConfiguration();

        public RecordReader createFileReader(
                PrepareConfiguration conf, RecordWriter writer)
                throws PreparePartsException {
            throw new PreparePartsException(
                    new UnsupportedOperationException("format: " + this));
        }

        public static Format fromString(String format) {
            return StringToFormat.get(format);
        }

        private static class StringToFormat {
            private static final Map REVERSE_DICTIONARY;

            static {
                Map map = new HashMap();
                for (Format elem : Format.values()) {
                    map.put(elem.format(), elem);
                }
                REVERSE_DICTIONARY = Collections.unmodifiableMap(map);
            }

            static Format get(String key) {
                return REVERSE_DICTIONARY.get(key);
            }
        }
    }

    public static enum OutputFormat {
        MSGPACKGZ("msgpackgz") {
            @Override
            public RecordWriter createFileWriter(PrepareConfiguration conf) throws PreparePartsException {
                if (!conf.getFormat().equals(Format.MYSQL)) {
                    return new MsgpackGZIPRecordWriter(conf);
                } else {
                    return new MySQLTimestampAdaptedMsgpackGZIPRecordWriter(conf);
                }
            }
        },
        SYSLOGMSGPACKGZ("syslogmsgpackgz") {
            @Override
            public RecordWriter createFileWriter(PrepareConfiguration conf) throws PreparePartsException {
                return new SyslogRecordReader.ExtFileWriter(conf);
            }
        };

        private String outputFormat;

        OutputFormat(String outputFormat) {
            this.outputFormat = outputFormat;
        }

        public String outputFormat() {
            return outputFormat;
        }

        public RecordWriter createFileWriter(PrepareConfiguration conf) throws PreparePartsException {
            throw new PreparePartsException(
                    new UnsupportedOperationException("output format: " + this));
        }

        public static OutputFormat fromString(String outputFormat) {
            return StringToOutputFormat.get(outputFormat);
        }

        private static class StringToOutputFormat {
            private static final Map REVERSE_DICTIONARY;

            static {
                Map map = new HashMap();
                for (OutputFormat elem : OutputFormat.values()) {
                    map.put(elem.outputFormat(), elem);
                }
                REVERSE_DICTIONARY = Collections.unmodifiableMap(map);
            }

            static OutputFormat get(String key) {
                return REVERSE_DICTIONARY.get(key);
            }
        }
    }

    public static enum CompressionType {
        GZIP("gzip") {
            @Override
            public InputStream createInputStream(InputStream in) throws IOException {
                return new BufferedInputStream(new GZIPInputStream(in));
            }
        }, AUTO("auto") {
            @Override
            public InputStream createInputStream(InputStream in) throws IOException {
                throw new IOException("unsupported compress type");
            }
        }, NONE("none") {
            @Override
            public InputStream createInputStream(InputStream in) throws IOException {
                return new BufferedInputStream(in);
            }
        };

        private String type;

        CompressionType(String type) {
            this.type = type;
        }

        public String type() {
            return type;
        }

        public abstract InputStream createInputStream(InputStream in) throws IOException;

        public static CompressionType fromString(String type) {
            return StringToCompressionType.get(type);
        }

        private static class StringToCompressionType {
            private static final Map REVERSE_DICTIONARY;

            static {
                Map map = new HashMap();
                for (CompressionType elem : CompressionType.values()) {
                    map.put(elem.type(), elem);
                }
                REVERSE_DICTIONARY = Collections.unmodifiableMap(map);
            }

            static CompressionType get(String key) {
                return REVERSE_DICTIONARY.get(key);
            }
        }
    }

    public static enum ErrorRecordsHandling {
        SKIP(BI_PREPARE_PARTS_ERROR_RECORDS_HANDLING_DEFAULTVALUE) {
            @Override
            public void handleError(PreparePartsException e)
                    throws PreparePartsException {
                // ignore
            }
        },
        ABORT("abort") {
            @Override
            public void handleError(PreparePartsException e)
                    throws PreparePartsException {
                throw e;
            }
        };

        private String mode;

        ErrorRecordsHandling(String mode) {
            this.mode = mode;
        }

        public String mode() {
            return mode;
        }

        public abstract void handleError(PreparePartsException e)
                throws PreparePartsException;

        public static ErrorRecordsHandling fromString(String mode) {
            return StringToErrorHandling.get(mode);
        }

        private static class StringToErrorHandling {
            private static final Map REVERSE_DICTIONARY;

            static {
                Map map = new HashMap();
                for (ErrorRecordsHandling elem : ErrorRecordsHandling.values()) {
                    map.put(elem.mode(), elem);
                }
                REVERSE_DICTIONARY = Collections.unmodifiableMap(map);
            }

            static ErrorRecordsHandling get(String key) {
                return REVERSE_DICTIONARY.get(key);
            }
        }
    }

    public static enum InvalidColumnsHandling {
        AUTOFIX("autofix") {
            @Override
            public String handleInvalidColumn(String column, int index) {
                String fixed = fixColumnFormat(column, index);
                if (!alreadyHandled(column)) {
                    String msg = "fixed invalid column name: column must contain only lowercase letters, digits, and '_': "
                            + "'" + column + "' is replaced into '" + fixed + "'.";
                    LOG.warning(msg);
                    System.out.println(msg);
                    handleNow(column);
                }
                return fixed;
            }
        },
        WARN(BI_PREPARE_INVALID_COLUMNS_HANDLING_DEFAULTVALUE) {
            @Override
            public String handleInvalidColumn(String column, int index) {
                if (!alreadyHandled(column)) {
                    String msg = "detected invalid column name: column must contain only lowercase letters, digits, and '_': "
                            + "'" + column + "' cannot be used within query strings.";
                    LOG.warning(msg);
                    System.out.println(msg);
                    handleNow(column);
                }
                return column;
            }
        };

        private String mode;
        protected Set cache = new HashSet();

        boolean alreadyHandled(String column) {
            return cache.contains(column);
        }

        void handleNow(String column) {
            cache.add(column);
        }

        InvalidColumnsHandling(String mode) {
            this.mode = mode;
        }

        public String mode() {
            return mode;
        }

        public abstract String handleInvalidColumn(String column, int index);

        public static boolean validColumnFormat(String column) {
            if (column == null || column.isEmpty()) {
                return false;
            }

            for (int i = 0; i < column.length(); i++) {
                int c = column.charAt(i);

                if (i == 0) {
                    if (!isLetter(c) && c != '_') {
                        return false;
                    }
                }

                if (!isDigit(c) && !isLetter(c) && c != '_') {
                    return false;
                }
            }

            return true;
        }

        public static String fixColumnFormat(String column, int index) {
            if (column == null || column.isEmpty()) {
                return "_c" + index;
            }

            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < column.length(); i++) {
                int c = column.charAt(i);

                if (isDigit(c) || isLetter(c) || c == '_') {
                    sb.append((char)c);
                } else if (isUpperLetter(c)) {
                    // if upper letter, it is translated into the lower case.
                    sb.append((char)(c + 32));
                } else {
                    // otherwise, '_' is appended
                    sb.append('_');
                }
            }

            return sb.toString();
        }

        private static boolean isDigit(int c) {
            return '0' <= c && c <= '9';
        }

        private static boolean isLetter(int c) {
            return 'a' <= c && c <= 'z';
        }

        private static boolean isUpperLetter(int c) {
            return 'A' <= c && c <= 'Z';
        }

        public static InvalidColumnsHandling fromString(String mode) {
            return StringToInvalidColumnsHandling.get(mode);
        }

        private static class StringToInvalidColumnsHandling {
            private static final Map REVERSE_DICTIONARY;

            static {
                Map map = new HashMap();
                for (InvalidColumnsHandling elem : InvalidColumnsHandling.values()) {
                    map.put(elem.mode(), elem);
                }
                REVERSE_DICTIONARY = Collections.unmodifiableMap(map);
            }

            static InvalidColumnsHandling get(String key) {
                return REVERSE_DICTIONARY.get(key);
            }
        }
    }

    private static final Logger LOG = Logger
            .getLogger(PrepareConfiguration.class.getName());

    // FIXME this field is also declared in td-client.Config.
    protected Properties props;
    protected Options options;
    protected OptionSet optionSet;

    protected Format format;
    protected OutputFormat outputFormat = OutputFormat.MSGPACKGZ;
    protected CompressionType compressionType;
    protected CharsetDecoder charsetDecoder;
    protected int numOfPrepareThreads;

    protected String aliasTimeColumn;
    protected TimeValueTimeColumnValue timeValue = new TimeValueTimeColumnValue(-1);
    protected String timeFormat;

    protected boolean hasPrimaryKey = false;
    protected String primaryKey = null;
    protected ColumnType primaryKeyType = null;

    protected String errorRecordOutputDirName;
    protected ErrorRecordsHandling errorRecordsHandling;
    protected InvalidColumnsHandling invalidColumnsHandling;
    protected boolean dryRun = false;
    protected String outputDirName;
    protected String errorRecordsOutputDirName;
    protected int splitSize;
    protected int sampleRowSize;

    protected String[] actualColumnNames;
    protected String[] columnNames;
    protected ColumnType[] columnTypes;
    protected Map columnTypeMap = new HashMap();
    protected boolean hasAllString = false;
    protected String[] excludeColumns;
    protected String[] onlyColumns;

    public PrepareConfiguration() {
    }

    public void configure(Properties props, Options options) {
        this.props = props;
        this.options = options;
        this.optionSet = options.getOptions();

        // format
        setFormat();

        // output format
        setOutputFormat();

        // compression type
        setCompressionType();

        // parallel
        setPrepareThreadNum();

        // error handling
        setErrorRecordsHandling();

        // invalid columns handling
        setInvalidColumnsHandling();

        // encoding
        setEncoding(); // depends on error-records-handling

        // primary key
        setPrimaryKey();

        // alias time column
        setAliasTimeColumn();

        // time value
        setTimeValue();

        // time format
        setTimeFormat();

        // output DIR
        setOutputDirName();

        // output DIR
        setErrorRecordsOutputDirName(); // depends on output

        // all-string
        setAllString();

        // exclude-columns
        setExcludeColumns();

        // only-columns
        setOnlyColumns();

        // dry-run mode
        setDryRun();

        // split size
        setSplitSize();

        // row size with sample reader
        setSampleReaderRowSize();
    }

    public List getNonOptionArguments() {
        return (List) options.getOptions().nonOptionArguments();
    }

    public boolean hasHelpOption() {
        return options.getOptions().has(BI_PREPARE_PARTS_HELP);
    }

    @Override
    public String showHelp(Properties props) {
        StringBuilder sbuf = new StringBuilder();

        // usage
        sbuf.append("usage:\n");
        sbuf.append(Configuration.CMD_PREPARE_USAGE);
        sbuf.append("\n");

        // example
        sbuf.append("example:\n");
        sbuf.append(Configuration.CMD_PREPARE_EXAMPLE);
        sbuf.append("\n");

        // description
        sbuf.append("description:\n");
        sbuf.append(Configuration.CMD_PREPARE_DESC);
        sbuf.append("\n");

        // options
        sbuf.append("options:\n");
        sbuf.append(Configuration.CMD_PREPARE_OPTIONS);
        sbuf.append("\n");

        return sbuf.toString();
    }

    public void setFormat() {
        String formatStr;
        if (!optionSet.has(BI_PREPARE_PARTS_FORMAT)) {
            formatStr = Configuration.BI_PREPARE_PARTS_FORMAT_DEFAULTVALUE;
        } else {
            formatStr = (String) optionSet.valueOf(BI_PREPARE_PARTS_FORMAT);
        }
        format = Format.fromString(formatStr);
        if (format == null) {
            throw new IllegalArgumentException(String.format(
                    "unsupported format '%s'", formatStr));
        }
    }

    public Format getFormat() {
        return format;
    }
    
    public String getSourceTargetDescr() {
    	if(format == Format.MYSQL)
    		return "the source MySQL table";
    	
    	String out = "at least one source ";
        if (format == Format.TSV)
    		return out + "TSV file";
    	else if (format == Format.JSON)
    		return out + "JSON file";
    	else // Format.CSV
    		return out + "CSV file";
    }

    public void setOutputFormat() {
        if (format == null) {
            throw new IllegalStateException(
                    "this method MUST be called after invoking the setFormat()");
        }

        if (format.equals(Format.SYSLOG)) {
            // if format type is 'syslog', output format 
            outputFormat = OutputFormat.SYSLOGMSGPACKGZ;
        } else {
            outputFormat = OutputFormat.MSGPACKGZ;
        }
    }

    public OutputFormat getOutputFormat() {
        return outputFormat;
    }

    public void setCompressionType() {
        String type;
        if (!optionSet.has(BI_PREPARE_PARTS_COMPRESSION)) {
            type = BI_PREPARE_PARTS_COMPRESSION_DEFAULTVALUE;
        } else {
            type = (String) optionSet.valueOf(BI_PREPARE_PARTS_COMPRESSION);
        }

        compressionType = CompressionType.fromString(type);
        if (compressionType == null) {
            throw new IllegalArgumentException(String.format(
                    "unsupported compression type: %s", type));
        }
    }

    public CompressionType getCompressionType() {
        return compressionType;
    }

    public CompressionType checkCompressionType(Source source) throws PreparePartsException {
        if (getCompressionType() != CompressionType.AUTO) {
            return getCompressionType();
        }

        CompressionType[] candidateCompressTypes = new CompressionType[] {
                CompressionType.GZIP, CompressionType.NONE,
        };

        CompressionType compressionType = null;
        for (int i = 0; i < candidateCompressTypes.length; i++) {
            InputStream in = null;
            try {
                if (candidateCompressTypes[i].equals(CompressionType.GZIP)) {
                    in = CompressionType.GZIP.createInputStream(source.getInputStream());
                } else if (candidateCompressTypes[i].equals(CompressionType.NONE)) {
                    in = CompressionType.NONE.createInputStream(source.getInputStream());
                } else {
                    throw new PreparePartsException("fatal error");
                }
                byte[] b = new byte[2];
                in.read(b);

                compressionType = candidateCompressTypes[i];
                break;
            } catch (IOException e) {
                LOG.fine(String.format("source %s is %s", source, e.getMessage()));
            } finally {
                if (in != null) {
                    try {
                        in.close();
                    } catch (IOException e) {
                        // ignore
                    }
                }
            }
        }

        this.compressionType = compressionType;
        return compressionType;
    }

    public void setPrepareThreadNum() {
        String num;
        if (!optionSet.has(BI_PREPARE_PARTS_PARALLEL)) {
            num = BI_PREPARE_PARTS_PARALLEL_DEFAULTVALUE;
        } else {
            num = (String) optionSet.valueOf(BI_PREPARE_PARTS_PARALLEL);
        }

        try {
            int n = Integer.parseInt(num);
            if (n < 0) {
                numOfPrepareThreads = 2;
            } else if (n > 96) {
                numOfPrepareThreads = 96;
            } else {
                numOfPrepareThreads = n;
            }
        } catch (NumberFormatException e) {
            String msg = String.format(
                    "'int' value is required as '%s' option", BI_PREPARE_PARTS_PARALLEL);
            throw new IllegalArgumentException(msg, e);
        }
    }

    public int getNumOfPrepareThreads() {
        return numOfPrepareThreads;
    }

    public void setEncoding() {
        String encoding;
        if (!optionSet.has(BI_PREPARE_PARTS_ENCODING)) {
            encoding = BI_PREPARE_PARTS_ENCODING_DEFAULTVALUE;
        } else {
            encoding = (String) optionSet.valueOf(BI_PREPARE_PARTS_ENCODING);
        }

        try {
            createCharsetDecoder(encoding);
        } catch (Exception e) {
            throw new IllegalArgumentException(e.getMessage());
        }
    }

    public void createCharsetDecoder(String encoding) throws Exception {
        charsetDecoder = Charset.forName(encoding).newDecoder();
        if (errorRecordsHandling.equals(ErrorRecordsHandling.ABORT)) {
            charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
            charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        } else { // skip
            charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
            charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
        }
    }

    public CharsetDecoder getCharsetDecoder() throws PreparePartsException {
        return charsetDecoder;
    }

    public void setPrimaryKey() {
        if (!optionSet.has(BI_PREPARE_PARTS_PRIMARY_KEY)) {
            return;
        }

        // if 'primary-key' option appears, ....
        String pair = (String) optionSet.valueOf(BI_PREPARE_PARTS_PRIMARY_KEY);
        if (pair.indexOf(":") <= 0) {
            throw new IllegalArgumentException(
                    String.format("Invalid 'primary-key' option: %s", pair));
        }

        String[] nameAndType = pair.split(":");
        if (nameAndType.length != 2) {
            throw new IllegalArgumentException(
                    String.format("Invalid 'primary-key' option: %s", pair));
        }

        String name = nameAndType[0];
        ColumnType type = ColumnType.Conv.fromString(nameAndType[1]);
        if (type == null || !(type.equals(ColumnType.INT) || type.equals(ColumnType.STRING))) {
            throw new IllegalArgumentException(String.format(
                    "primary-key's type must be 'int' or 'string' only: %s", pair));
        }

        primaryKey = name;
        primaryKeyType = type;
        hasPrimaryKey = true;
    }

    public boolean hasPrimaryKey() {
        return hasPrimaryKey;
    }

    public String getPrimaryKey() {
        return primaryKey;
    }

    public ColumnType getPrimaryKeyType() {
        return primaryKeyType;
    }

    public void setAliasTimeColumn() {
        if (optionSet.has(BI_PREPARE_PARTS_TIMECOLUMN)) {
            if (hasPrimaryKey()) {
                throw new IllegalArgumentException(
                        "cannot specify both of 'time-column' and 'primary-key' options");
            }
            aliasTimeColumn = (String) optionSet.valueOf(BI_PREPARE_PARTS_TIMECOLUMN);
        }
    }

    public String getAliasTimeColumn() {
        return aliasTimeColumn;
    }

    public void setTimeValue() {
        if (!optionSet.has(BI_PREPARE_PARTS_TIMEVALUE)) {
            return;
        }

        if (hasPrimaryKey()) {
            throw new IllegalArgumentException(
                    "cannot specify both of 'time-value' and 'primary-key' options");
        }

        String v = (String) optionSet.valueOf(BI_PREPARE_PARTS_TIMEVALUE);
        if (v != null) {
            boolean periodicallySorted = v.indexOf(',') >= 0;
            try {
                if (!periodicallySorted) {
                    timeValue = new TimeValueTimeColumnValue(Long.parseLong(v));
                } else {
                    String[] vv = v.split(",");
                    if (vv.length != 2) {
                        throw new IllegalArgumentException(
                                "'time-value' option requires a pair argument of long typed unix time and hours sorted periodically , like 1394409600,10");
                    }
                    timeValue = new TimeValueTimeColumnValue(
                            Long.parseLong(vv[0]), Long.parseLong(vv[1]));
                }
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException(
                        "'time-value option requires the long type argument'", e);
            }
        }
    }

    public TimeValueTimeColumnValue getTimeValue() {
        return timeValue;
    }

    public void setTimeFormat() {
        if (optionSet.has(BI_PREPARE_PARTS_TIMEFORMAT)) {
            if (hasPrimaryKey()) {
                throw new IllegalArgumentException(
                        "cannot specify both of 'time-format' and 'primary-key' options");
            }
            timeFormat = (String) optionSet.valueOf(BI_PREPARE_PARTS_TIMEFORMAT);
        }
    }

    public Strftime getTimeFormat() {
        return timeFormat == null ? null : new Strftime(timeFormat);
    }

    public Strftime getTimeFormat(String strfString) {
        return strfString == null ? null : new Strftime(strfString);
    }

    public void setOutputDirName() {
        if (optionSet.has("output")) {
            outputDirName = (String) optionSet.valueOf(BI_PREPARE_PARTS_OUTPUTDIR);
        }

        File outputDir = null;
        if (outputDirName == null || outputDirName.isEmpty()) {
            outputDir = new File(new File("."), BI_PREPARE_PARTS_OUTPUTDIR_DEFAULTVALUE);
            outputDirName = outputDir.getName();
        } else {
            outputDir = new File(outputDirName);
        }

        // validate output dir
        if (!outputDir.isDirectory()) {
            if (!outputDir.mkdir()) {
                throw new IllegalArgumentException(String.format(
                        "Cannot create '%s' directory '%s'",
                        BI_PREPARE_PARTS_OUTPUTDIR, outputDirName));
            }
        }
    }

    public void setErrorRecordsOutputDirName() {
        if (optionSet.has(BI_PREPARE_PARTS_ERROR_RECORDS_OUTPUT)) {
            errorRecordsOutputDirName = (String) optionSet.valueOf(BI_PREPARE_PARTS_ERROR_RECORDS_OUTPUT);
        }

        File errorRecordsOutputDir = null;
        if (errorRecordsOutputDirName == null || errorRecordsOutputDirName.isEmpty()) {
            errorRecordsOutputDir = new File(new File("."), BI_PREPARE_PARTS_ERROR_RECORDS_OUTPUTDIR_DEFAULTVALUE);
            errorRecordsOutputDirName = errorRecordsOutputDir.getName();
        } else {
            errorRecordsOutputDir = new File(errorRecordsOutputDirName);
        }

        // validate dir
        if (!errorRecordsOutputDir.isDirectory()) {
            if (!errorRecordsOutputDir.mkdir()) {
                throw new IllegalArgumentException(String.format(
                        "Cannot create '%s' directory '%s'",
                        BI_PREPARE_PARTS_ERROR_RECORDS_OUTPUT, errorRecordsOutputDirName));
            }
        }
    }

    public void setErrorRecordsHandling() {
        String mode;
        if (!optionSet.has(BI_PREPARE_PARTS_ERROR_RECORDS_HANDLING)) {
            mode = BI_PREPARE_PARTS_ERROR_RECORDS_HANDLING_DEFAULTVALUE;
        } else {
            mode = (String) optionSet.valueOf(BI_PREPARE_PARTS_ERROR_RECORDS_HANDLING);
        }

        errorRecordsHandling = ErrorRecordsHandling.fromString(mode);
        if (errorRecordsHandling == null) {
            throw new IllegalArgumentException(String.format(
                    "unsupported '%s' mode '%s'",
                    BI_PREPARE_PARTS_ERROR_RECORDS_HANDLING, mode));
        }
    }

    public ErrorRecordsHandling getErrorRecordsHandling() {
        return errorRecordsHandling;
    }

    public void setInvalidColumnsHandling() {
        String mode;
        if (!optionSet.has(BI_PREPARE_INVALID_COLUMNS_HANDLING)) {
            mode = BI_PREPARE_INVALID_COLUMNS_HANDLING_DEFAULTVALUE;
        } else {
            mode = (String) optionSet.valueOf(BI_PREPARE_INVALID_COLUMNS_HANDLING);
        }

        invalidColumnsHandling = InvalidColumnsHandling.fromString(mode);
        if (invalidColumnsHandling == null) {
            throw new IllegalArgumentException(String.format(
                    "unsupported '%s' mode '%s'",
                    BI_PREPARE_INVALID_COLUMNS_HANDLING, mode));
        }
    }

    public InvalidColumnsHandling getInvalidColumnsHandling() {
        return invalidColumnsHandling;
    }

    public void setDryRun() {
        if (optionSet.has("dry-run")) {
            String drun = (String) optionSet.valueOf("dry-run");
            dryRun = drun != null && drun.equals("true");    
        }
    }

    public boolean dryRun() {
        return dryRun;
    }

    public String getOutputDirName() {
        return outputDirName;
    }

    public String getErrorRecordsOutputDirName() {
        return errorRecordsOutputDirName;
    }

    public void setSplitSize() {
        String size;
        if (!optionSet.has(BI_PREPARE_PARTS_SPLIT_SIZE)) {
            size = BI_PREPARE_PARTS_SPLIT_SIZE_DEFAULTVALUE;
        } else {
            size = (String) optionSet.valueOf(BI_PREPARE_PARTS_SPLIT_SIZE);
        }

        try {
            splitSize = Integer.parseInt(size);
        } catch (NumberFormatException e) {
            String msg = String.format("'%s' is required as int type",
                    BI_PREPARE_PARTS_SPLIT_SIZE);
            throw new IllegalArgumentException(msg, e);
        }
    }

    public int getSplitSize() {
        return splitSize;
    }

    public void setSampleReaderRowSize() {
        String sRowSize = props.getProperty(
                Configuration.BI_PREPARE_PARTS_SAMPLE_ROWSIZE,
                Configuration.BI_PREPARE_PARTS_SAMPLE_ROWSIZE_DEFAULTVALUE);
        try {
            sampleRowSize = Integer.parseInt(sRowSize);
        } catch (NumberFormatException e) {
            String msg = String.format(
                    "sample row size is required as int type e.g. -D%s=%s",
                    Configuration.BI_PREPARE_PARTS_SAMPLE_ROWSIZE,
                    Configuration.BI_PREPARE_PARTS_SAMPLE_ROWSIZE_DEFAULTVALUE);
            throw new IllegalArgumentException(msg, e);
        }
    }

    public int getSampleRowSize() {
        return sampleRowSize;
    }

    public void setColumnNames() {
        if (!optionSet.has(BI_PREPARE_PARTS_COLUMNS)) {
            actualColumnNames = new String[0];
            columnNames = new String[0];
        } else {
            String[] cnames = optionSet.valuesOf(BI_PREPARE_PARTS_COLUMNS).toArray(new String[0]);
            setColumnNames(cnames);
        }
    }

    public void setColumnNames(String[] cnames) {
        this.actualColumnNames = cnames;
        String[] cnames0 = new String[cnames.length];
        for (int i = 0; i < cnames.length; i++) {
            if (InvalidColumnsHandling.validColumnFormat(cnames[i])) {
                cnames0[i] = cnames[i];
            } else {
                cnames0[i] = invalidColumnsHandling.handleInvalidColumn(cnames[i], i);
            }
        }
        this.columnNames = cnames0;
    }

    public String[] getActualColumnNames() {
        return actualColumnNames;
    }

    public String[] getColumnNames() {
        return columnNames;
    }

    public void setColumnTypes() {
        if (!optionSet.has(BI_PREPARE_PARTS_COLUMNTYPES)) {
            columnTypes = new ColumnType[0];
        } else {
            String[] types = optionSet.valuesOf(BI_PREPARE_PARTS_COLUMNTYPES).toArray(new String[0]);
            columnTypes = new ColumnType[types.length];
            for (int i = 0; i < types.length; i++) {
                columnTypes[i] = ColumnType.Conv.fromString(types[i].toLowerCase());
                if (columnTypes[i] == null) {
                    throw new IllegalArgumentException(String.format(
                            "'%s' cannot be specified as column type", types[i]));
                }
            }
        }
    }

    public void setColumnTypes(ColumnType[] columnTypes) {
        this.columnTypes = columnTypes;
    }

    public ColumnType[] getColumnTypes() {
        return columnTypes;
    }

    public void setColumnTypeMap() {
        if (!optionSet.has(BI_PREPARE_COLUMNTYPE)) {
            return;
        }

        List args = optionSet.valuesOf(BI_PREPARE_COLUMNTYPE);
        if (args == null || args.isEmpty()) {
            return;
        }

        columnTypeMap.clear();
        Iterator argsIter = args.iterator();
        while (argsIter.hasNext()) {
            String arg = (String) argsIter.next();
            int i = arg.indexOf(":");
            if (i <= 0) {
                throw new IllegalArgumentException(
                        String.format("Invalid 'column-type' option: %s", arg));
            }
            String[] nameAndType = arg.split(":");
            if (nameAndType.length != 2) {
                throw new IllegalArgumentException(
                        String.format("Invalid 'column-type' option: %s", arg));
            }

            String name = nameAndType[0];
            ColumnType type = ColumnType.Conv.fromString(nameAndType[1]);
            if (type != null) {
                columnTypeMap.put(name, type);
            }
        }
    }

    public Map getColumnTypeMap() {
        return columnTypeMap;
    }

    public void setAllString() {
        hasAllString = optionSet.has(BI_PREPARE_ALL_STRING);
    }

    public boolean hasAllString() {
        return hasAllString;
    }

    public void setExcludeColumns() {
        if (!optionSet.has(BI_PREPARE_PARTS_EXCLUDE_COLUMNS)) {
            excludeColumns = new String[0];
        } else {
            excludeColumns = optionSet.valuesOf(BI_PREPARE_PARTS_EXCLUDE_COLUMNS).toArray(new String[0]);
            for (String c : excludeColumns) {
                if (!hasPrimaryKey()) {
                    if (c.equals(Configuration.BI_PREPARE_PARTS_TIMECOLUMN_DEFAULTVALUE)) {
                        throw new IllegalArgumentException(String.format(
                                "'time' column cannot be included in '%s'",
                                BI_PREPARE_PARTS_EXCLUDE_COLUMNS));
                    }
                } else {
                    if (c.equals(getPrimaryKey())) {
                        throw new IllegalArgumentException(String.format(
                                "'primary-key' column cannot be included in '%s'",
                                BI_PREPARE_PARTS_EXCLUDE_COLUMNS));
                    }
                }
            }
        }
    }

    public String[] getExcludeColumns() {
        return excludeColumns;
    }

    public void setOnlyColumns() {
        if (!optionSet.has(BI_PREPARE_PARTS_ONLY_COLUMNS)) {
            onlyColumns = new String[0];
        } else {
            onlyColumns = optionSet.valuesOf(BI_PREPARE_PARTS_ONLY_COLUMNS).toArray(new String[0]);
            for (String oc : onlyColumns) {
                for (String ec : excludeColumns) {
                    if (oc.equals(ec)) {
                        throw new IllegalArgumentException(String.format(
                                "don't include '%s' in '%s'",
                                BI_PREPARE_PARTS_EXCLUDE_COLUMNS,
                                BI_PREPARE_PARTS_ONLY_COLUMNS));
                    }
                }
            }
        }
    }

    public String[] getOnlyColumns() {
        return onlyColumns;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy