org.kitesdk.maven.plugins.AbstractDatasetMojo Maven / Gradle / Ivy
Go to download
The Kite Maven Plugin provides Maven goals for packaging, deploying, and running
distributed applications.
/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.maven.plugins;
import com.google.common.annotations.VisibleForTesting;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.spi.DatasetRepository;
import com.google.common.io.Resources;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.maven.artifact.DependencyResolutionRequiredException;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugins.annotations.Parameter;
import org.kitesdk.data.spi.DatasetRepositories;
import org.kitesdk.data.spi.DefaultConfiguration;
abstract class AbstractDatasetMojo extends AbstractHadoopMojo {
/**
* A Kite dataset URI.
*/
@Parameter(property = "kite.uri")
protected String uri;
/**
* The root directory of the dataset repository. Optional if using HCatalog for metadata storage.
*/
@Parameter(property = "kite.rootDirectory")
protected String rootDirectory;
/**
* If true, store dataset metadata in HCatalog, otherwise store it on the filesystem.
*/
@Parameter(property = "kite.hcatalog")
protected boolean hcatalog = true;
/**
* The URI specifying the dataset repository, e.g. repo:hdfs://host:8020/data.
* Optional, but if specified then kite.rootDirectory
and
* kite.hcatalog
are ignored.
*/
@Parameter(property = "kite.repositoryUri")
protected String repositoryUri;
/**
* Hadoop configuration properties.
*/
@VisibleForTesting
@Parameter(property = "kite.hadoopConfiguration")
protected Properties hadoopConfiguration;
private static void addToConfiguration(Properties hadoopConfiguration) {
// base the new Configuration on the current defaults
Configuration conf = new Configuration(DefaultConfiguration.get());
// add all of the properties as config settings
for (String key : hadoopConfiguration.stringPropertyNames()) {
String value = hadoopConfiguration.getProperty(key);
conf.set(key, value);
}
// replace the original Configuration
DefaultConfiguration.set(conf);
addedConf = true;
}
@VisibleForTesting
static boolean addedConf = false;
protected Configuration getConf() {
if (!addedConf) {
addToConfiguration(hadoopConfiguration);
}
// use the default
return DefaultConfiguration.get();
}
DatasetRepository getDatasetRepository() {
getConf(); // ensure properties are added to DefaultConfig
if (repositoryUri != null) {
return DatasetRepositories.repositoryFor(repositoryUri);
}
if (rootDirectory != null) {
URI uri = URI.create(rootDirectory);
if (hcatalog) {
return DatasetRepositories.repositoryFor("repo:hive:" + uri.getPath());
} else if (uri.getScheme() != null) {
return DatasetRepositories.repositoryFor("repo:" + uri.toString());
} else if (getConf().get("fs.defaultFS") != null) {
URI defaultFS = URI.create(getConf().get("fs.defaultFS"));
return DatasetRepositories.repositoryFor(
"repo:" + defaultFS.getScheme() + ":" + uri.getPath());
} else if (getConf().get("fs.default.name") != null) {
URI defaultFS = URI.create(getConf().get("fs.default.name"));
return DatasetRepositories.repositoryFor(
"repo:" + defaultFS.getScheme() + ":" + uri.getPath());
} else {
return DatasetRepositories.repositoryFor("repo:file:" + uri.getPath());
}
} else if (hcatalog) {
return DatasetRepositories.repositoryFor("repo:hive");
}
throw new IllegalArgumentException(
"Root directory must be specified if not using Hive.");
}
void configureSchema(DatasetDescriptor.Builder descriptorBuilder, String
avroSchemaFile, String avroSchemaReflectClass) throws MojoExecutionException {
if (avroSchemaFile != null) {
File avroSchema = new File(avroSchemaFile);
try {
if (avroSchema.exists()) {
descriptorBuilder.schema(avroSchema);
} else {
descriptorBuilder.schema(Resources.getResource(avroSchemaFile).openStream());
}
} catch (IOException e) {
throw new MojoExecutionException("Problem while reading file " + avroSchemaFile, e);
}
} else if (avroSchemaReflectClass != null) {
try {
List classpath = new ArrayList();
for (Object element : mavenProject.getCompileClasspathElements()) {
String path = (String) element;
classpath.add(new File(path).toURI().toURL());
}
ClassLoader parentClassLoader = getClass().getClassLoader(); // use Maven's classloader, not the system one
ClassLoader classLoader = new URLClassLoader(
classpath.toArray(new URL[classpath.size()]), parentClassLoader);
descriptorBuilder.schema(Class.forName(avroSchemaReflectClass, true, classLoader));
} catch (ClassNotFoundException e) {
throw new MojoExecutionException("Problem finding class " +
avroSchemaReflectClass, e);
} catch (MalformedURLException e) {
throw new MojoExecutionException("Problem finding class " +
avroSchemaReflectClass, e);
} catch (DependencyResolutionRequiredException e) {
throw new MojoExecutionException("Problem finding class " +
avroSchemaReflectClass, e);
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy