All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.kylin.tool.CubeMetaExtractor Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.tool;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.ResourceTool;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeDescManager;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.job.dao.ExecutableDao;
import org.apache.kylin.job.dao.ExecutablePO;
import org.apache.kylin.job.exception.PersistentException;
import org.apache.kylin.metadata.TableMetadataManager;
import org.apache.kylin.metadata.badquery.BadQueryHistoryManager;
import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.DataModelManager;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.model.TableRef;
import org.apache.kylin.metadata.project.ProjectInstance;
import org.apache.kylin.metadata.project.ProjectManager;
import org.apache.kylin.metadata.project.RealizationEntry;
import org.apache.kylin.metadata.realization.IRealization;
import org.apache.kylin.metadata.realization.RealizationRegistry;
import org.apache.kylin.metadata.realization.RealizationStatusEnum;
import org.apache.kylin.metadata.realization.RealizationType;
import org.apache.kylin.metadata.streaming.StreamingConfig;
import org.apache.kylin.metadata.streaming.StreamingManager;
import org.apache.kylin.source.kafka.config.KafkaConfig;
import org.apache.kylin.storage.hybrid.HybridInstance;
import org.apache.kylin.storage.hybrid.HybridManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
/**
* extract cube related info for debugging/distributing purpose
*/
public class CubeMetaExtractor extends AbstractInfoExtractor {
private static final Logger logger = LoggerFactory.getLogger(CubeMetaExtractor.class);
@SuppressWarnings("static-access")
private static final Option OPTION_CUBE = OptionBuilder.withArgName("cube").hasArg().isRequired(false)
.withDescription("Specify which cube to extract").create("cube");
@SuppressWarnings("static-access")
private static final Option OPTION_HYBRID = OptionBuilder.withArgName("hybrid").hasArg().isRequired(false)
.withDescription("Specify which hybrid to extract").create("hybrid");
@SuppressWarnings("static-access")
private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false)
.withDescription("Specify realizations in which project to extract").create("project");
@SuppressWarnings("static-access")
private static final Option OPTION_All_PROJECT = OptionBuilder.withArgName("allProjects").hasArg(false)
.isRequired(false).withDescription("Specify realizations in all projects to extract").create("allProjects");
@SuppressWarnings("static-access")
private static final Option OPTION_STORAGE_TYPE = OptionBuilder.withArgName("storageType").hasArg()
.isRequired(false).withDescription("Specify the storage type to overwrite. Default is empty, keep origin.")
.create("storageType");
@SuppressWarnings("static-access")
private static final Option OPTION_ENGINE_TYPE = OptionBuilder.withArgName("engineType").hasArg().isRequired(false)
.withDescription("Specify the engine type to overwrite. Default is empty, keep origin.")
.create("engineType");
@SuppressWarnings("static-access")
private static final Option OPTION_INCLUDE_SEGMENTS = OptionBuilder.withArgName("includeSegments").hasArg()
.isRequired(false).withDescription("set this to true if want extract the segments info. Default true")
.create("includeSegments");
@SuppressWarnings("static-access")
private static final Option OPTION_INCLUDE_JOB = OptionBuilder.withArgName("includeJobs").hasArg().isRequired(false)
.withDescription("set this to true if want to extract job info/outputs too. Default false")
.create("includeJobs");
@SuppressWarnings("static-access")
private static final Option OPTION_INCLUDE_ONLY_JOB_OUTPUT = OptionBuilder.withArgName("onlyOutput").hasArg()
.isRequired(false).withDescription("when include jobs, onlt extract output of job. Default true")
.create("onlyOutput");
@SuppressWarnings("static-access")
private static final Option OPTION_INCLUDE_SEGMENT_DETAILS = OptionBuilder.withArgName("includeSegmentDetails")
.hasArg().isRequired(false)
.withDescription(
"set this to true if want to extract segment details too, such as dict, tablesnapshot. Default false")
.create("includeSegmentDetails");
private KylinConfig kylinConfig;
private DataModelManager metadataManager;
private ProjectManager projectManager;
private HybridManager hybridManager;
private CubeManager cubeManager;
private StreamingManager streamingManager;
private CubeDescManager cubeDescManager;
private ExecutableDao executableDao;
private RealizationRegistry realizationRegistry;
private BadQueryHistoryManager badQueryHistoryManager;
private String exportPath;
private boolean includeSegments;
private boolean includeJobs;
private boolean includeSegmentDetails;
private boolean onlyJobOutput;
private String storageType = null;
private String engineType = null;
private Set requiredResources = Sets.newLinkedHashSet();
private Set optionalResources = Sets.newLinkedHashSet();
private Set cubesToTrimAndSave = Sets.newLinkedHashSet();//these cubes needs to be saved skipping segments
public CubeMetaExtractor() {
super();
OptionGroup realizationOrProject = new OptionGroup();
realizationOrProject.addOption(OPTION_CUBE);
realizationOrProject.addOption(OPTION_PROJECT);
realizationOrProject.addOption(OPTION_HYBRID);
realizationOrProject.addOption(OPTION_All_PROJECT);
realizationOrProject.setRequired(true);
options.addOptionGroup(realizationOrProject);
options.addOption(OPTION_INCLUDE_SEGMENTS);
options.addOption(OPTION_INCLUDE_JOB);
options.addOption(OPTION_INCLUDE_SEGMENT_DETAILS);
options.addOption(OPTION_INCLUDE_ONLY_JOB_OUTPUT);
options.addOption(OPTION_STORAGE_TYPE);
options.addOption(OPTION_ENGINE_TYPE);
}
@Override
protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception {
includeSegments = optionsHelper.hasOption(OPTION_INCLUDE_SEGMENTS)
? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_SEGMENTS))
: true;
includeJobs = optionsHelper.hasOption(OPTION_INCLUDE_JOB)
? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_JOB))
: false;
includeSegmentDetails = optionsHelper.hasOption(OPTION_INCLUDE_SEGMENT_DETAILS)
? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_SEGMENT_DETAILS))
: false;
onlyJobOutput = optionsHelper.hasOption(OPTION_INCLUDE_ONLY_JOB_OUTPUT)
? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_ONLY_JOB_OUTPUT))
: true;
storageType = optionsHelper.hasOption(OPTION_STORAGE_TYPE) ? optionsHelper.getOptionValue(OPTION_STORAGE_TYPE)
: null;
engineType = optionsHelper.hasOption(OPTION_ENGINE_TYPE) ? optionsHelper.getOptionValue(OPTION_ENGINE_TYPE)
: null;
exportPath = exportDir.getAbsolutePath();
kylinConfig = KylinConfig.getInstanceFromEnv();
metadataManager = DataModelManager.getInstance(kylinConfig);
projectManager = ProjectManager.getInstance(kylinConfig);
hybridManager = HybridManager.getInstance(kylinConfig);
cubeManager = CubeManager.getInstance(kylinConfig);
cubeDescManager = CubeDescManager.getInstance(kylinConfig);
executableDao = ExecutableDao.getInstance(kylinConfig);
realizationRegistry = RealizationRegistry.getInstance(kylinConfig);
badQueryHistoryManager = BadQueryHistoryManager.getInstance(kylinConfig);
addRequired(ResourceStore.METASTORE_UUID_TAG);
if (optionsHelper.hasOption(OPTION_All_PROJECT)) {
for (ProjectInstance projectInstance : projectManager.listAllProjects()) {
requireProject(projectInstance);
}
} else if (optionsHelper.hasOption(OPTION_PROJECT)) {
String projectNames = optionsHelper.getOptionValue(OPTION_PROJECT);
for (String projectName : projectNames.split(",")) {
ProjectInstance projectInstance = projectManager.getProject(projectName);
Preconditions.checkNotNull(projectInstance, "Project " + projectName + " does not exist.");
requireProject(projectInstance);
}
} else if (optionsHelper.hasOption(OPTION_CUBE)) {
String cubeNames = optionsHelper.getOptionValue(OPTION_CUBE);
for (String cubeName : cubeNames.split(",")) {
IRealization realization = cubeManager.getRealization(cubeName);
if (realization == null) {
throw new IllegalArgumentException("No cube found with name of " + cubeName);
} else {
retrieveResourcePath(realization);
}
}
} else if (optionsHelper.hasOption(OPTION_HYBRID)) {
String hybridNames = optionsHelper.getOptionValue(OPTION_HYBRID);
for (String hybridName : hybridNames.split(",")) {
IRealization realization = hybridManager.getRealization(hybridName);
if (realization != null) {
retrieveResourcePath(realization);
} else {
throw new IllegalArgumentException("No hybrid found with name of" + hybridName);
}
}
}
executeExtraction(exportDir.getAbsolutePath());
engineOverwrite(new File(exportDir.getAbsolutePath()));
}
public String getExportPath() {
return exportPath;
}
private void requireProject(ProjectInstance projectInstance) throws IOException {
addRequired(projectInstance.getResourcePath());
List realizationEntries = projectInstance.getRealizationEntries();
for (RealizationEntry realizationEntry : realizationEntries) {
retrieveResourcePath(getRealization(realizationEntry));
}
List modelDescs = metadataManager.getModels(projectInstance.getName());
for (DataModelDesc modelDesc : modelDescs) {
addRequired(DataModelDesc.concatResourcePath(modelDesc.getName()));
}
addOptional(badQueryHistoryManager.getBadQueriesForProject(projectInstance.getName()).getResourcePath());
}
private void executeExtraction(String dest) {
logger.info("The resource paths going to be extracted:");
for (String s : requiredResources) {
logger.info(s + "(required)");
}
for (String s : optionalResources) {
logger.info(s + "(optional)");
}
for (CubeInstance cube : cubesToTrimAndSave) {
logger.info("Cube {} will be trimmed and extracted", cube);
}
try {
KylinConfig srcConfig = KylinConfig.getInstanceFromEnv();
KylinConfig dstConfig = KylinConfig.createInstanceFromUri(dest);
ResourceTool.copy(srcConfig, dstConfig, Lists.newArrayList(requiredResources), true);
for (String r : optionalResources) {
try {
ResourceTool.copy(srcConfig, dstConfig, Lists.newArrayList(r), true);
} catch (Exception e) {
logger.warn(
"Exception when copying optional resource {}. May be caused by resource missing. skip it.",
r);
}
}
ResourceStore dstStore = ResourceStore.getStore(dstConfig);
for (CubeInstance cube : cubesToTrimAndSave) {
CubeInstance trimmedCube = CubeInstance.getCopyOf(cube);
trimmedCube.getSegments().clear();
trimmedCube.setUuid(cube.getUuid());
dstStore.putResource(trimmedCube.getResourcePath(), trimmedCube, CubeManager.CUBE_SERIALIZER);
}
} catch (Exception e) {
throw new RuntimeException("Exception", e);
}
}
private void engineOverwrite(File dest) throws IOException {
if (engineType != null || storageType != null) {
for (File f : dest.listFiles()) {
if (f.isDirectory()) {
engineOverwrite(f);
} else {
engineOverwriteInternal(f);
}
}
}
}
private void engineOverwriteInternal(File f) throws IOException {
try {
ObjectMapper objectMapper = new ObjectMapper();
JsonNode rootNode = objectMapper.readTree(f);
boolean replaced = false;
if (engineType != null && rootNode.get("engine_type") != null) {
((ObjectNode) rootNode).put("engine_type", Integer.parseInt(engineType));
replaced = true;
}
if (storageType != null && rootNode.get("storage_type") != null) {
((ObjectNode) rootNode).put("storage_type", Integer.parseInt(storageType));
replaced = true;
}
if (replaced) {
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
objectMapper.writeValue(f, rootNode);
}
} catch (JsonProcessingException ex) {
logger.warn("cannot parse file {}", f);
}
}
private IRealization getRealization(RealizationEntry realizationEntry) {
return realizationRegistry.getRealization(realizationEntry.getType(), realizationEntry.getRealization());
}
private void addStreamingConfig(CubeInstance cube) {
streamingManager = StreamingManager.getInstance(kylinConfig);
for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) {
if (streamingConfig.getName() != null
&& streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) {
addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName()));
addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName()));
}
}
}
private void retrieveResourcePath(IRealization realization) throws IOException {
if (realization == null) {
return;
}
logger.info("Deal with realization {} of type {}", realization.getName(), realization.getType());
if (realization instanceof CubeInstance) {
CubeInstance cube = (CubeInstance) realization;
CubeDesc cubeDesc = cubeDescManager.getCubeDesc(cube.getDescName());
DataModelDesc modelDesc = metadataManager.getDataModelDesc(cubeDesc.getModelName());
// add tables
addTables(modelDesc);
// add streaming stuff
addStreamingConfig(cube);
// add cube
addRequired(CubeDesc.concatResourcePath(cubeDesc.getName()));
//add Segments and Jobs
addSegAndJob(cube);
} else if (realization instanceof HybridInstance) {
HybridInstance hybridInstance = (HybridInstance) realization;
addRequired(HybridInstance.concatResourcePath(hybridInstance.getName()));
for (IRealization iRealization : hybridInstance.getRealizations()) {
if (iRealization.getType() != RealizationType.CUBE) {
throw new RuntimeException("Hybrid " + iRealization.getName() + " contains non cube child "
+ iRealization.getName() + " with type " + iRealization.getType());
}
retrieveResourcePath(iRealization);
}
} else {
logger.warn("Unknown realization type: " + realization.getType());
}
}
private void addTables(DataModelDesc modelDesc) throws IOException {
if (modelDesc != null) {
//fixme should get all tbls in prj not only in cubes when back up by prj.
for (TableRef tableRef : modelDesc.getAllTables()) {
addRequired(tableRef.getTableDesc().getResourcePath());
addOptional(TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv()) //
.getTableExt(tableRef.getTableDesc()) //
.getResourcePath()); //
}
addRequired(DataModelDesc.concatResourcePath(modelDesc.getName()));
}
}
private void addSegAndJob(CubeInstance cube) {
if (includeSegments) {
addRequired(CubeInstance.concatResourcePath(cube.getName()));
for (CubeSegment segment : cube.getSegments(SegmentStatusEnum.READY)) {
addRequired(CubeSegment.getStatisticsResourcePath(cube.getName(), segment.getUuid()));
if (includeSegmentDetails) {
for (String dictPat : segment.getDictionaryPaths()) {
addRequired(dictPat);
}
for (String snapshotPath : segment.getSnapshotPaths()) {
addRequired(snapshotPath);
}
}
if (includeJobs) {
String lastJobId = segment.getLastBuildJobID();
if (StringUtils.isEmpty(lastJobId)) {
throw new RuntimeException("No job exist for segment :" + segment);
} else {
try {
if (onlyJobOutput) {
ExecutablePO executablePO = executableDao.getJob(lastJobId);
addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId);
} else {
ExecutablePO executablePO = executableDao.getJob(lastJobId);
addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + lastJobId);
addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId);
for (ExecutablePO task : executablePO.getTasks()) {
addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + task.getUuid());
addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + task.getUuid());
}
}
} catch (PersistentException e) {
throw new RuntimeException("PersistentException", e);
}
}
}
}
} else {
if (includeJobs) {
logger.warn("It's useless to set includeJobs to true when includeSegments is set to false");
}
cube.setStatus(RealizationStatusEnum.DISABLED);
cubesToTrimAndSave.add(cube);
}
}
private void addRequired(String record) {
logger.info("adding required resource {}", record);
requiredResources.add(record);
}
private void addOptional(String record) {
logger.info("adding optional resource {}", record);
optionalResources.add(record);
}
public static void main(String[] args) {
CubeMetaExtractor extractor = new CubeMetaExtractor();
extractor.execute(args);
}
}