All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.marklogic.contentpump.DatabaseTransformWriter Maven / Gradle / Ivy
/*
* Copyright (c) 2023 MarkLogic Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.marklogic.contentpump;
import java.io.IOException;
import java.util.Map;
import com.marklogic.xcc.exceptions.MLCloudRequestException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import com.marklogic.mapreduce.DocumentURI;
import com.marklogic.mapreduce.utilities.AssignmentManager;
import com.marklogic.mapreduce.utilities.InternalUtilities;
import com.marklogic.xcc.ContentCreateOptions;
import com.marklogic.xcc.ContentSource;
/**
* DatabaseContentWriter that does server-side transform and insert
* @author ali
*
* @param
*/
public class DatabaseTransformWriter extends
TransformWriter implements ConfigConstants {
public static final Log LOG =
LogFactory.getLog(DatabaseTransformWriter.class);
protected boolean isCopyProps;
protected boolean isCopyPerms;
public DatabaseTransformWriter(Configuration conf,
Map hostSourceMap, boolean fastLoad,
AssignmentManager am) {
super(conf, hostSourceMap, fastLoad, am);
isCopyProps = conf.getBoolean(CONF_COPY_PROPERTIES, true);
isCopyPerms = conf.getBoolean(CONF_COPY_PERMISSIONS, true);
}
@Override
public void write(DocumentURI key, VALUE value) throws IOException,
InterruptedException {
int fId = 0;
String uri = InternalUtilities.getUriWithOutputDir(key, outputDir);
if (fastLoad) {
if(!countBased) {
// placement for legacy or bucket
fId = am.getPlacementForestIndex(key);
sfId = fId;
} else {
if (sfId == -1) {
sfId = am.getPlacementForestIndex(key);
}
fId = sfId;
}
}
int sid = fId;
DatabaseDocumentWithMeta doc = (DatabaseDocumentWithMeta) value;
DocumentMetadata meta = doc.getMeta();
ContentCreateOptions opt =
DatabaseContentWriter.newContentCreateOptions(meta, options,
isCopyColls, isCopyQuality, isCopyMeta, isCopyPerms,
effectiveVersion);
boolean naked = meta.isNakedProps();
if (sessions[sid] == null) {
sessions[sid] = getSession(sid, false);
queries[sid] = getAdhocQuery(sid);
}
boolean committed = false;
if (!naked) {
opt.setFormat(doc.getContentType().getDocumentFormat());
addValue(uri, value, sid, opt, effectiveVersion 0) {
LOG.info(getFormattedBatchId() +
"Retrying committing batch is successful");
}
} catch (Exception e) {
boolean isRetryable = true;
LOG.warn("Failed committing transaction.");
if (e instanceof MLCloudRequestException){
isRetryable = ((MLCloudRequestException)e).isRetryable();
LOG.warn(getFormattedBatchId() +
"MLCloudRequestException:" + e.getMessage());
} else {
LOG.warn(getFormattedBatchId() +
"Exception:" + e.getMessage());
}
if (isRetryable && needCommitRetry() &&
(++commitRetry < commitRetryLimit)) {
LOG.warn(getFormattedBatchId() + "Failed during committing");
handleCommitExceptions(sid);
commitSleepTime = sleep(commitSleepTime);
stmtCounts[sid] = 0;
sessions[sid] = getSession(sid, true);
continue;
} else if (needCommitRetry()) {
LOG.error(getFormattedBatchId() +
"Exceeded max commit retry, batch failed permanently");
}
failed += commitUris[sid].size();
for (DocumentURI failedUri : commitUris[sid]) {
LOG.error(getFormattedBatchId() +
"Document failed permanently: " + failedUri);
}
handleCommitExceptions(sid);
} finally {
stmtCounts[sid] = 0;
committed = true;
}
}
break;
}
batchId++;
pendingURIs[sid].clear();
}
}
if (isCopyProps && meta.getProperties() != null &&
(effectiveVersion < PROPS_MIN_VERSION || naked)) {
boolean suc = DatabaseContentWriter.setDocumentProperties(uri,
meta.getProperties(),
isCopyPerms&&naked?meta.getPermString():null,
isCopyColls&&naked?meta.getCollectionString():null,
isCopyQuality&&naked?meta.getQualityString():null,
isCopyMeta&&naked?meta.getMeta():null, sessions[sid]);
stmtCounts[sid]++;
if (suc && naked) {
if (needCommit) {
commitUris[sid].add(key);
} else {
succeeded++;
}
} else if (!suc && naked) {
failed++;
}
}
if (needCommit && stmtCounts[sid] >= txnSize) {
try {
commit(sid);
} catch (Exception e) {
LOG.warn(getFormattedBatchId() +
"Failed committing transaction: " + e.getMessage());
handleCommitExceptions(sid);
}
stmtCounts[sid] = 0;
committed = true;
}
if ((!fastLoad) && ((!needCommit) || committed)) {
// rotate to next host and reset session
hostId = (hostId + 1)%forestIds.length;
sessions[0] = null;
queries[0] = null;
}
}
}