com.sleepycat.je.util.DbLoad Maven / Gradle / Ivy
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.logging.Level;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DatabaseExistsException;
import com.sleepycat.je.DatabaseNotFoundException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.JEVersion;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.utilint.CmdUtil;
import com.sleepycat.je.utilint.LoggerUtils;
/**
* Loads a database from a dump file generated by {@link DbDump}.
* This utility may be used programmatically or from the command line.
*
* When using this utility as a command line program, and the
* application uses custom key comparators, be sure to add the jars or
* classes to the classpath that contain the application's comparator
* classes.
*
*
* java { com.sleepycat.je.util.DbLoad |
* -jar je-<version>.jar DbLoad }
* -h <dir> # environment home directory
* [-f <fileName>] # input file
* [-n] # no overwrite mode
* [-T] # input file is in text mode
* [-I] # ignore unknown parameters
* [-c name=value] # config values
* [-s <databaseName> ] # database to load
* [-v] # show progress
* [-V] # print JE version number
*
* See {@link DbLoad#main} for a full description of the
* command line arguments.
*
* To load a database to a stream from code:
*
* DbLoad loader = new DbLoad();
* loader.setEnv(env);
* loader.setDbName(dbName);
* loader.setInputStream(stream);
* loader.setNoOverwrite(noOvrwr);
* loader.setTextFileMode(tfm);
* loader.load();
*
*
* Because a DATA=END
marker is used to terminate the dump of
* each database, multiple databases can be dumped and loaded using a single
* stream. The {@link DbDump#dump} method leaves the stream positioned after
* the last line written and the {@link DbLoad#load} method leaves the stream
* positioned after the last line read.
*/
public class DbLoad {
private static final boolean DEBUG = false;
protected Environment env;
private boolean formatUsingPrintable;
private String dbName;
private BufferedReader reader;
private boolean noOverwrite;
private boolean textFileMode;
private boolean dupSort;
private boolean ignoreUnknownConfig;
private boolean commandLine;
private long progressInterval;
private long totalLoadBytes;
private static final String usageString =
"usage: " + CmdUtil.getJavaCommand(DbLoad.class) + "\n" +
" -h # environment home directory\n" +
" [-f ] # input file\n" +
" [-n] # no overwrite mode\n" +
" [-T] # input file is in text mode\n" +
" [-I] # ignore unknown parameters\n" +
" [-c name=value] # config values\n" +
" [-s ] # database to load\n" +
" [-v] # show progress\n" +
" [-V] # print JE version number";
/**
* The main used by the DbLoad utility.
*
* @param argv The arguments accepted by the DbLoad utility.
*
*
* usage: java { com.sleepycat.je.util.DbLoad | -jar
* je-<version>.jar DbLoad }
* [-f input-file] [-n] [-V] [-v] [-T] [-I]
* [-c name=value]
* [-s database] -h dbEnvHome
*
*
* -f - the file to load from (in DbDump format)
* -n - no overwrite mode. Do not overwrite existing data.
* -V - display the version of the JE library.
* -T - input file is in Text mode.
* -I - ignore unknown parameters in the config file.
*
* If -f is not specified, the dump is read from System.in.
*
* The -T option allows JE applications to easily load text files into
* databases.
*
* The -I option allows loading databases that were dumped with the
* Berkeley DB C product, when the dump file contains parameters not known
* to JE.
*
* The input must be paired lines of text, where the first line of the
* pair is the key item, and the second line of the pair is its
* corresponding data item.
*
* A simple escape mechanism, where newline and backslash (\) characters
* are special, is applied to the text input. Newline characters are
* interpreted as record separators. Backslash characters in the text will
* be interpreted in one of two ways: If the backslash character precedes
* another backslash character, the pair will be interpreted as a literal
* backslash. If the backslash character precedes any other character, the
* two characters following the backslash will be interpreted as a
* hexadecimal specification of a single character; for example, \0a is a
* newline character in the ASCII character set.
*
* For this reason, any backslash or newline characters that naturally
* occur in the text input must be escaped to avoid misinterpretation by
* db_load.
*
* -c name=value - Specify configuration options ignoring any value they
* may have based on the input. The command-line format is name=value. See
* the Supported Keywords section below for a list of keywords supported by
* the -c option.
*
* -s database - the database to load.
* -h dbEnvHome - the directory containing the database environment.
* -v - report progress
*
* Supported Keywords
* version=N - specify the version of the input file. Currently only
* version 3 is supported.
* format - specify the format of the file. Allowable values are "print"
* and "bytevalue".
* dupsort - specify whether the database allows duplicates or not.
* Allowable values are "true" and "false".
* type - specifies the type of database. Only "btree" is allowed.
* database - specifies the name of the database to be loaded.
*
* @throws EnvironmentFailureException if an unexpected, internal or
* environment-wide failure occurs.
*/
public static void main(String argv[])
throws Exception {
DbLoad loader = parseArgs(argv);
try {
loader.load();
} catch (Throwable e) {
e.printStackTrace();
}
loader.env.close();
}
private static void printUsage(String msg) {
System.err.println(msg);
System.err.println(usageString);
System.exit(-1);
}
private static DbLoad parseArgs(String argv[])
throws Exception {
boolean noOverwrite = false;
boolean textFileMode = false;
boolean ignoreUnknownConfig = false;
boolean showProgressInterval = false;
int argc = 0;
int nArgs = argv.length;
String inputFileName = null;
File envHome = null;
String dbName = null;
long progressInterval = 0;
DbLoad ret = new DbLoad();
ret.setCommandLine(true);
while (argc < nArgs) {
String thisArg = argv[argc++].trim();
if (thisArg.equals("-n")) {
noOverwrite = true;
} else if (thisArg.equals("-T")) {
textFileMode = true;
} else if (thisArg.equals("-I")) {
ignoreUnknownConfig = true;
} else if (thisArg.equals("-V")) {
System.out.println(JEVersion.CURRENT_VERSION);
System.exit(0);
} else if (thisArg.equals("-f")) {
if (argc < nArgs) {
inputFileName = argv[argc++];
} else {
printUsage("-f requires an argument");
}
} else if (thisArg.equals("-h")) {
if (argc < nArgs) {
envHome = new File(argv[argc++]);
} else {
printUsage("-h requires an argument");
}
} else if (thisArg.equals("-s")) {
if (argc < nArgs) {
dbName = argv[argc++];
} else {
printUsage("-s requires an argument");
}
} else if (thisArg.equals("-c")) {
if (argc < nArgs) {
try {
ret.loadConfigLine(argv[argc++]);
} catch (IllegalArgumentException e) {
printUsage("-c: " + e.getMessage());
}
} else {
printUsage("-c requires an argument");
}
} else if (thisArg.equals("-v")) {
showProgressInterval = true;
}
}
if (envHome == null) {
printUsage("-h is a required argument");
}
long totalLoadBytes = 0;
InputStream is;
if (inputFileName == null) {
is = System.in;
if (showProgressInterval) {
/*
* Can't show progress if we don't know how big the stream
* is.
*/
printUsage("-v requires -f");
}
} else {
is = new FileInputStream(inputFileName);
if (showProgressInterval) {
totalLoadBytes = ((FileInputStream) is).getChannel().size();
/* Use 5% intervals. */
progressInterval = totalLoadBytes / 20;
}
}
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
EnvironmentConfig envConfig = new EnvironmentConfig();
envConfig.setAllowCreate(true);
Environment env = new Environment(envHome, envConfig);
ret.setEnv(env);
ret.setDbName(dbName);
ret.setInputReader(reader);
ret.setNoOverwrite(noOverwrite);
ret.setTextFileMode(textFileMode);
ret.setIgnoreUnknownConfig(ignoreUnknownConfig);
ret.setProgressInterval(progressInterval);
ret.setTotalLoadBytes(totalLoadBytes);
return ret;
}
/*
* Begin DbLoad API. From here on there should be no calls to printUsage,
* System.xxx.print, or System.exit.
*/
/**
* Creates a DbLoad object.
*/
public DbLoad() {
}
/**
* If true, enables output of warning messages. Command line behavior is
* not available via the public API.
*/
private void setCommandLine(boolean commandLine) {
this.commandLine = commandLine;
}
/**
* Sets the Environment to load from.
*
* @param env The environment.
*/
public void setEnv(Environment env) {
this.env = env;
}
/**
* Sets the database name to load.
*
* @param dbName database name
*/
public void setDbName(String dbName) {
this.dbName = dbName;
}
/**
* Sets the BufferedReader to load from.
*
* @param reader The BufferedReader.
*/
public void setInputReader(BufferedReader reader) {
this.reader = reader;
}
/**
* Sets whether the load should overwrite existing data or not.
*
* @param noOverwrite True if existing data should not be overwritten.
*/
public void setNoOverwrite(boolean noOverwrite) {
this.noOverwrite = noOverwrite;
}
/**
* Sets whether the load data is in text file format.
*
* @param textFileMode True if the load data is in text file format.
*/
public void setTextFileMode(boolean textFileMode) {
this.textFileMode = textFileMode;
}
/**
* Sets whether to ignore unknown parameters in the config file. This
* allows loading databases that were dumped with the Berkeley DB C
* product, when the dump file contains parameters not known to JE.
*
* @param ignoreUnknownConfigMode True to ignore unknown parameters in
* the config file.
*/
public void setIgnoreUnknownConfig(boolean ignoreUnknownConfigMode) {
this.ignoreUnknownConfig = ignoreUnknownConfigMode;
}
/**
* If progressInterval is set, progress status messages are generated to
* stdout at set percentages of the load.
*
* @param progressInterval Specifies the percentage intervals for status
* messages. If 0, no messages are generated.
*/
public void setProgressInterval(long progressInterval) {
this.progressInterval = progressInterval;
}
/**
* Used for progress status messages. Must be set to greater than
* 0 if the progressInterval is greater than 0.
*
* @param totalLoadBytes number of input bytes to be loaded.
*/
public void setTotalLoadBytes(long totalLoadBytes) {
this.totalLoadBytes = totalLoadBytes;
}
public boolean load()
throws IOException, DatabaseException {
LoggerUtils.envLogMsg(Level.INFO, DbInternal.getNonNullEnvImpl(env),
"DbLoad.load of " + dbName + " starting");
if (progressInterval > 0) {
System.out.println("Load start: " + new Date());
}
if (textFileMode) {
formatUsingPrintable = true;
} else {
loadHeader();
}
if (dbName == null) {
throw new IllegalArgumentException
("Must supply a database name if -l not supplied.");
}
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setSortedDuplicates(dupSort);
dbConfig.setAllowCreate(true);
Database db;
try {
db = env.openDatabase(null, dbName, dbConfig);
} catch (DatabaseNotFoundException e) {
/* Should never happen, AllowCreate is true. */
throw EnvironmentFailureException.unexpectedException(e);
} catch (DatabaseExistsException e) {
/* Should never happen, ExclusiveCreate is false. */
throw EnvironmentFailureException.unexpectedException(e);
}
loadData(db);
db.close();
LoggerUtils.envLogMsg(Level.INFO, DbInternal.getNonNullEnvImpl(env),
"DbLoad.load of " + dbName + " ending.");
if (progressInterval > 0) {
System.out.println("Load end: " + new Date());
}
return true;
}
private void loadConfigLine(String line) {
int equalsIdx = line.indexOf('=');
if (equalsIdx < 0) {
throw new IllegalArgumentException
("Invalid header parameter: " + line);
}
String keyword = line.substring(0, equalsIdx).trim().toLowerCase();
String value = line.substring(equalsIdx + 1).trim();
if (keyword.equals("version")) {
if (DEBUG) {
System.out.println("Found version: " + line);
}
if (!value.equals("3")) {
throw new IllegalArgumentException
("Version " + value + " is not supported.");
}
} else if (keyword.equals("format")) {
value = value.toLowerCase();
if (value.equals("print")) {
formatUsingPrintable = true;
} else if (value.equals("bytevalue")) {
formatUsingPrintable = false;
} else {
throw new IllegalArgumentException
(value + " is an unknown value for the format keyword");
}
if (DEBUG) {
System.out.println("Found format: " + formatUsingPrintable);
}
} else if (keyword.equals("dupsort")) {
value = value.toLowerCase();
if (value.equals("true") ||
value.equals("1")) {
dupSort = true;
} else if (value.equals("false") ||
value.equals("0")) {
dupSort = false;
} else {
throw new IllegalArgumentException
(value + " is an unknown value for the dupsort keyword");
}
if (DEBUG) {
System.out.println("Found dupsort: " + dupSort);
}
} else if (keyword.equals("type")) {
value = value.toLowerCase();
if (!value.equals("btree")) {
throw new IllegalArgumentException
(value + " is not a supported database type.");
}
if (DEBUG) {
System.out.println("Found type: " + line);
}
} else if (keyword.equals("database")) {
if (dbName == null) {
dbName = value;
}
if (DEBUG) {
System.out.println("DatabaseImpl: " + dbName);
}
} else if (!ignoreUnknownConfig) {
throw new IllegalArgumentException
("'" + line + "' is not understood.");
}
}
private void loadHeader()
throws IOException {
if (DEBUG) {
System.out.println("loading header");
}
String line = reader.readLine();
while (line != null &&
!line.equals("HEADER=END")) {
loadConfigLine(line);
line = reader.readLine();
}
}
private void loadData(Database db)
throws DatabaseException, IOException {
String keyLine = reader.readLine();
String dataLine = null;
int count = 0;
long totalBytesRead = 0;
long lastTime = System.currentTimeMillis();
long bytesReadThisInterval = 0;
while (keyLine != null &&
!keyLine.equals("DATA=END")) {
dataLine = reader.readLine();
if (dataLine == null) {
throw new IllegalArgumentException("No data to match key " +
keyLine);
}
/* Add one for \n or \r. */
bytesReadThisInterval += dataLine.length() + 1;
byte[] keyBytes = loadLine(keyLine.trim());
byte[] dataBytes = loadLine(dataLine.trim());
DatabaseEntry key = new DatabaseEntry(keyBytes);
DatabaseEntry data = new DatabaseEntry(dataBytes);
if (noOverwrite) {
if (db.putNoOverwrite(null, key, data) ==
OperationStatus.KEYEXIST) {
/* Calling println is OK only from command line. */
if (commandLine) {
System.err.println("Key exists: " + key);
}
}
} else {
db.put(null, key, data);
}
count++;
if ((progressInterval > 0) &&
(bytesReadThisInterval > progressInterval)) {
totalBytesRead += bytesReadThisInterval;
bytesReadThisInterval -= progressInterval;
long now = System.currentTimeMillis();
System.out.println("loaded " + count + " records " +
(now - lastTime) + " ms - % completed: " +
((100 * totalBytesRead) / totalLoadBytes));
lastTime = now;
}
keyLine = reader.readLine();
if (keyLine == null) {
throw new IllegalArgumentException("No \"DATA=END\"");
}
bytesReadThisInterval += keyLine.length() + 1;
}
}
private byte[] loadLine(String line)
throws DatabaseException {
if (formatUsingPrintable) {
return readPrintableLine(line);
}
int nBytes = line.length() / 2;
byte[] ret = new byte[nBytes];
int charIdx = 0;
for (int i = 0; i < nBytes; i++, charIdx += 2) {
int b2 = Character.digit(line.charAt(charIdx), 16);
b2 <<= 4;
b2 += Character.digit(line.charAt(charIdx + 1), 16);
ret[i] = (byte) b2;
}
return ret;
}
private static byte backSlashValue =
((byte) ('\\')) & 0xff;
private byte[] readPrintableLine(String line)
throws DatabaseException {
/* nBytes is the max number of bytes that this line could turn into. */
int maxNBytes = line.length();
byte[] ba = new byte[maxNBytes];
int actualNBytes = 0;
for (int charIdx = 0; charIdx < maxNBytes; charIdx++) {
char c = line.charAt(charIdx);
if (c == '\\') {
if (++charIdx < maxNBytes) {
char c1 = line.charAt(charIdx);
if (c1 == '\\') {
ba[actualNBytes++] = backSlashValue;
} else {
if (++charIdx < maxNBytes) {
char c2 = line.charAt(charIdx);
int b = Character.digit(c1, 16);
b <<= 4;
b += Character.digit(c2, 16);
ba[actualNBytes++] = (byte) b;
} else {
throw
new IllegalArgumentException("Corrupted file");
}
}
} else {
throw new IllegalArgumentException("Corrupted file");
}
} else {
ba[actualNBytes++] = (byte) (c & 0xff);
}
}
if (maxNBytes == actualNBytes) {
return ba;
} else {
byte[] ret = new byte[actualNBytes];
System.arraycopy(ba, 0, ret, 0, actualNBytes);
return ret;
}
}
}