com.cinchapi.concourse.importer.cli.ImportCli Maven / Gradle / Ivy
/*
* Copyright (c) 2013-2018 Cinchapi Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cinchapi.concourse.importer.cli;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.file.DirectoryStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;
import jline.TerminalFactory;
import jline.console.ConsoleReader;
import org.reflections.Reflections;
import com.beust.jcommander.Parameter;
import com.cinchapi.common.base.CheckedExceptions;
import com.cinchapi.common.groovy.GroovyFiles;
import com.cinchapi.common.io.Files;
import com.cinchapi.common.reflect.Reflection;
import com.cinchapi.concourse.Concourse;
import com.cinchapi.concourse.cli.CommandLineInterface;
import com.cinchapi.concourse.cli.Options;
import com.cinchapi.concourse.importer.CsvImporter;
import com.cinchapi.concourse.importer.Headered;
import com.cinchapi.concourse.importer.Importer;
import com.cinchapi.concourse.importer.JsonImporter;
import com.cinchapi.concourse.importer.LegacyCsvImporter;
import com.cinchapi.concourse.importer.debug.ImportDryRunConcourse;
import com.cinchapi.concourse.util.FileOps;
import com.cinchapi.concourse.util.Strings;
import com.google.common.base.CaseFormat;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
import com.google.common.reflect.ClassPath;
import com.google.common.reflect.ClassPath.ClassInfo;
/**
* A CLI that uses the import framework to import data into Concourse.
*
* @author Jeff Nelson
*/
@SuppressWarnings("deprecation")
public class ImportCli extends CommandLineInterface {
/**
* Aliases for built-in importer types. These aliases should be specified
* using the {@code -t} or {@code --type} flag when invoking the CLI.
*/
private static Map> importers = Maps
.newHashMapWithExpectedSize(3);
static {
// NOTE: Be sure to increase the expectedSize parameter for the map when
// adding additional aliases
importers.put("csv", CsvImporter.class);
importers.put(".csv", LegacyCsvImporter.class); // deprecated
importers.put("json", JsonImporter.class);
}
/**
* A reference to the value returned from {@link #getLaunchDirectory()} to
* use in static methods.
*/
private static String LAUNCH_DIRECTORY = null;
/**
* A flag that indicates whether the CLI is being used to perform a dry run
* import.
*/
private boolean dryRun = false;
/*
* TODO
* 2) add flags to whitelist or blacklist files in a directory
*/
/**
* Construct a new instance.
*
* @param args
*/
public ImportCli(String[] args) {
super(args);
LAUNCH_DIRECTORY = getLaunchDirectory();
}
@Override
protected void doTask() {
final ImportOptions opts = (ImportOptions) options;
final Set records;
final Constructor extends Importer> constructor = getConstructor(
opts.type);
this.dryRun = opts.dryRun;
opts.dynamic.put(Importer.ANNOTATE_DATA_SOURCE_OPTION_NAME,
Boolean.toString(opts.annotateDataSource));
if(opts.data == null) { // Import data from stdin
Importer importer = Reflection.newInstance(constructor,
getConcourseConnection(0));
if(!opts.dynamic.isEmpty()) {
importer.setParams(options.dynamic);
}
if(importer instanceof Headered && !opts.header.isEmpty()) {
((Headered) importer).parseHeader(opts.header);
}
try {
ConsoleReader reader = new ConsoleReader();
String line;
records = Sets.newLinkedHashSet();
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
// Interactive import is ended when user presses CTRL + C,
// so we need this shutdown hook to ensure that they get
// feedback about the import before the JVM dies.
@Override
public void run() {
if(options.verbose) {
System.out.println(records);
}
System.out.println(Strings.format(
"Imported data into {} record{}",
records.size(), records.size() > 1 ? "s" : ""));
if(dryRun) {
System.out.println(
((ImportDryRunConcourse) getConcourseConnection(
0)).dump());
}
}
}));
try {
final AtomicBoolean lock = new AtomicBoolean(false);
new Thread(new Runnable() { // If there is no input in
// 100ms, assume that the
// session is interactive (i.e.
// not piped) and display a
// prompt
@Override
public void run() {
try {
Thread.sleep(100);
if(lock.compareAndSet(false, true)) {
System.out.println(
"Importing from stdin. Press "
+ "CTRL + C when finished");
}
}
catch (InterruptedException e) {}
}
}).start();
while ((line = reader.readLine()) != null) {
try {
lock.set(true);
records.addAll(importer.importString(line));
}
catch (Exception e) {
System.err.println(e);
}
}
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
catch (IOException e) {
throw Throwables.propagate(e);
}
finally {
try {
TerminalFactory.get().restore();
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
}
else {
String path = FileOps.expandPath(opts.data, getLaunchDirectory());
Collection files = FileOps.isDirectory(path)
? scan(Paths.get(path)) : ImmutableList.of(path);
Stopwatch watch = Stopwatch.createUnstarted();
if(files.size() > 1) {
records = Sets.newConcurrentHashSet();
final Queue filesQueue = (Queue) files;
List runnables = Lists
.newArrayListWithCapacity(opts.numThreads);
// Create just enough Runnables with instantiated Importers in
// advance. Each of those Runnables will work until #filesQueue
// is exhausted.
opts.numThreads = Math.min(opts.numThreads, files.size());
for (int i = 0; i < opts.numThreads; ++i) {
final Importer importer0 = Reflection.newInstance(
constructor, getConcourseConnection(i));
if(!opts.dynamic.isEmpty()) {
importer0.setParams(opts.dynamic);
}
if(importer0 instanceof Headered
&& !opts.header.isEmpty()) {
((Headered) importer0).parseHeader(opts.header);
}
runnables.add(new Runnable() {
private final Importer importer = importer0;
@Override
public void run() {
String file;
while ((file = filesQueue.poll()) != null) {
records.addAll(importer.importFile(file));
}
}
});
}
ExecutorService executor = Executors
.newFixedThreadPool(runnables.size());
System.out.println("Starting import...");
watch.start();
for (Runnable runnable : runnables) {
executor.execute(runnable);
}
executor.shutdown();
try {
if(!executor.awaitTermination(1, TimeUnit.MINUTES)) {
while (!executor.isTerminated()) {
System.out.print('.'); // block until all tasks are
// completed and provide some
// feedback to the user
}
}
}
catch (InterruptedException e) {
throw Throwables.propagate(e);
}
}
else {
Importer importer = Reflection.newInstance(constructor,
getConcourseConnection(0));
if(!opts.dynamic.isEmpty()) {
importer.setParams(opts.dynamic);
}
if(importer instanceof Headered && !opts.header.isEmpty()) {
((Headered) importer).parseHeader(opts.header);
}
System.out.println("Starting import...");
watch.start();
records = importer.importFile(files.iterator().next());
}
watch.stop();
long elapsed = watch.elapsed(TimeUnit.MILLISECONDS);
double seconds = elapsed / 1000.0;
if(options.verbose) {
System.out.println(records);
}
System.out.println(Strings.format(
"Imported data into {} record{} in {} seconds",
records.size(), records.size() > 1 ? "s" : "", seconds));
if(dryRun) {
System.out.println(
((ImportDryRunConcourse) getConcourseConnection(0))
.dump());
}
}
}
@Override
protected Options getOptions() {
return new ImportOptions();
}
/**
* Return the host Concourse Server's "home" directory.
*
* @return the host application's home
*/
@Nullable
private static Path getConcourseServerHome() {
// NOTE: This is a HACK! This method is borrowed from Concourse Server's
// ManagementCli interface because we know that the provided ImportCli
// will likely live in a Concourse Deployment's standard directory from
// CLIs.
String path = System.getProperty("user.app.home"); // this is set by the
// .env script that
// is sourced by
// every server-side
// CLI
return path != null ? Paths.get(path) : null;
}
/**
* Return the appropriate constructor for creating instances of the
* {@code type} {@link Importer}.
*
*
* To make the {@link Importer}, call
* {@link Constructor#newInstance(Object...)} with a non-shared Concourse
* connection.
*
*
* @param type the {@link #importers alias} or fully qualified name for the
* desired {@link Importer} class
* @return the constructor
*/
private static Constructor extends Importer> getConstructor(String type) {
Class extends Importer> clz = importers.get(type);
if(clz == null) {
try {
clz = getCustomImporterClass(type);
}
catch (ClassNotFoundException e) {
throw new RuntimeException(Strings
.format("{} is not a valid importer type.", type));
}
}
try {
return clz.getDeclaredConstructor(Concourse.class);
}
catch (NoSuchMethodException e) {
// This should never happen because Importer base class mandates the
// existence of the constructor in the subclass by not defining a
// no-arg alternative
throw Throwables.propagate(e);
}
}
/**
* Given an alias (or fully qualified class name) attempt to load a "custom"
* importer that is not already defined in the {@link #importers built-in}
* collection.
*
* @param alias a conventional alias (i.e. FileTypeImporter --> file-type)
* OR a fully qualified class name OR the path to a customer
* importer file (can be .groovy or .jar) OR the name of a
* customer importer file contained in the "importers" directory
* of the server's home
* @return the {@link Class} that corresponds to the custom importer
* @throws ClassNotFoundException
*/
@SuppressWarnings("unchecked")
private static Class extends Importer> getCustomImporterClass(
String alias) throws ClassNotFoundException {
try {
return (Class extends Importer>) Class.forName(alias);
}
catch (ClassNotFoundException e) {
Path path;
boolean exists = true;
if(!(path = Paths.get(Files.expandPath(alias, LAUNCH_DIRECTORY)))
.toFile().exists()) {
exists = false;
Path concourseServerHome = getConcourseServerHome();
if(concourseServerHome != null) {
Path importers = getConcourseServerHome()
.resolve("importers");
String[] candidates = { alias, alias + ".groovy",
alias + ".jar" };
for (String candidate : candidates) {
if((path = importers.resolve(candidate)).toFile()
.exists()) {
exists = true;
break;
}
}
}
}
if(exists) {
if(path.toString().endsWith(".groovy")) {
return GroovyFiles.loadClass(path);
}
else if(path.toString().endsWith(".jar")) {
URLClassLoader typeClassLoader = null;
URLClassLoader serverClassLoader = null;
try {
URL[] urls = new URL[] {
path.toFile().toURI().toURL() };
typeClassLoader = new URLClassLoader(urls, null);
serverClassLoader = new URLClassLoader(urls,
Thread.currentThread().getContextClassLoader());
ClassPath classpath = ClassPath.from(typeClassLoader);
for (ClassInfo info : classpath.getAllClasses()) {
String name = info.getName();
Class> clazz = serverClassLoader.loadClass(name);
if(Importer.class.isAssignableFrom((clazz))) {
return (Class extends Importer>) clazz;
}
}
throw e;
}
catch (IOException ioe) {
throw CheckedExceptions.throwAsRuntimeException(ioe);
}
finally {
for (URLClassLoader loader : ImmutableList
.of(typeClassLoader, serverClassLoader)) {
if(loader != null) {
try {
loader.close();
}
catch (IOException ignore) {
throw CheckedExceptions
.throwAsRuntimeException(ignore);
}
}
}
}
}
else {
throw new UnsupportedOperationException(Strings.format(
"{} is an unsupported file type for custom importers",
path));
}
}
else {
// Attempt to determine the correct class name from the alias by
// loading the server's classpath. For the record, this is hella
// slow.
Reflections.log = null; // turn off reflection logging
Reflections reflections = new Reflections();
char firstChar = alias.charAt(0);
for (Class extends Importer> clazz : reflections
.getSubTypesOf(Importer.class)) {
String name = clazz.getSimpleName();
if(name.length() == 0) { // Skip anonymous subclasses
continue;
}
char nameFirstChar = name.charAt(0);
if(!Modifier.isAbstract(clazz.getModifiers())
&& (nameFirstChar == Character
.toUpperCase(firstChar)
|| nameFirstChar == Character
.toLowerCase(firstChar))) {
String expected = CaseFormat.UPPER_CAMEL
.to(CaseFormat.LOWER_HYPHEN,
clazz.getSimpleName())
.replaceAll("-importer", "");
if(alias.equals(expected)) {
return clazz;
}
}
}
}
throw e;
}
}
/**
* Recursively scan and collect all the files in the directory defined by
* {@code path}.
*
* @param path a {@link Path} that is already verified to
* {@link java.nio.file.Files#isDirectory(Path, java.nio.file.LinkOption...)
* to be a directory}.
* @return the collection of files in the directory
*/
private static Queue scan(Path path) {
try (DirectoryStream stream = java.nio.file.Files
.newDirectoryStream(path)) {
Iterator it = stream.iterator();
Queue files = Queues.newConcurrentLinkedQueue();
while (it.hasNext()) {
Path thePath = it.next();
if(java.nio.file.Files.isDirectory(thePath)) {
files.addAll(scan(thePath));
}
else {
files.add(thePath.toString());
}
}
return files;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
/**
* Get the appropriate {@link Concourse} client based on the {@code ticket}
* and whether the import is actually a {@link #dryRun} or nah.
*
* @param ticket
* @return a connection to {@link Concourse} that can be used for importing
* data
*/
private Concourse getConcourseConnection(int ticket) {
if(dryRun) {
return new ImportDryRunConcourse();
}
else if(ticket == 0) {
return concourse;
}
else {
return Concourse.copyExistingConnection(concourse);
}
}
/**
* Import specific {@link Options}.
*
* @author jnelson
*/
protected static class ImportOptions extends Options {
@Parameter(names = { "-d",
"--data" }, description = "The path to the file or directory to import; if no source is provided read from stdin")
public String data;
@Parameter(names = "--numThreads", description = "The number of worker threads to use for a multithreaded import")
public int numThreads = Runtime.getRuntime().availableProcessors();
@Parameter(names = { "-r",
"--resolveKey" }, description = "The key to use when resolving data into existing records")
public String resolveKey = null;
@Parameter(names = { "-t",
"--type" }, description = "The Importer to use; specified as either the name of a built-in importer or the path to a custom importer located on the filesystem")
public String type = "csv";
@Parameter(names = "--header", description = "A custom header to assign for supporting importers")
public String header = "";
@Parameter(names = "--annotate-data-source", description = "Add the filename from which the data is imported as a value for the '__datasource' key on every imported object")
public boolean annotateDataSource = false;
@Parameter(names = "--dry-run", description = "Do a test import of the data in memory and print a JSON dump of what would be inserted into Concourse")
public boolean dryRun = false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy