com.marklogic.spark.reader.customcode.CustomCodePartitionReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-spark-connector Show documentation
Show all versions of marklogic-spark-connector Show documentation
Spark 3 connector for MarkLogic
/*
* Copyright © 2024 MarkLogic Corporation. All Rights Reserved.
*/
package com.marklogic.spark.reader.customcode;
import com.marklogic.client.DatabaseClient;
import com.marklogic.client.eval.EvalResultIterator;
import com.marklogic.client.eval.ServerEvaluationCall;
import com.marklogic.spark.Options;
import com.marklogic.spark.ReadProgressLogger;
import com.marklogic.spark.reader.JsonRowDeserializer;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.connector.read.PartitionReader;
import org.apache.spark.unsafe.types.UTF8String;
class CustomCodePartitionReader implements PartitionReader {
private final ServerEvaluationCall serverEvaluationCall;
private final boolean isCustomSchema;
private EvalResultIterator evalResultIterator;
private final JsonRowDeserializer jsonRowDeserializer;
private final DatabaseClient databaseClient;
// Only needed for logging progress.
private final long batchSize;
private long progressCounter;
public CustomCodePartitionReader(CustomCodeContext customCodeContext, String partition) {
this.databaseClient = customCodeContext.connectToMarkLogic();
this.serverEvaluationCall = customCodeContext.buildCall(
this.databaseClient,
new CustomCodeContext.CallOptions(Options.READ_INVOKE, Options.READ_JAVASCRIPT, Options.READ_XQUERY,
Options.READ_JAVASCRIPT_FILE, Options.READ_XQUERY_FILE)
);
if (partition != null) {
this.serverEvaluationCall.addVariable("PARTITION", partition);
}
this.batchSize = customCodeContext.getNumericOption(Options.READ_BATCH_SIZE, 1, 1);
this.isCustomSchema = customCodeContext.isCustomSchema();
this.jsonRowDeserializer = new JsonRowDeserializer(customCodeContext.getSchema());
}
@Override
public boolean next() {
if (this.evalResultIterator == null) {
this.evalResultIterator = this.serverEvaluationCall.eval();
}
return this.evalResultIterator.hasNext();
}
@Override
public InternalRow get() {
String val = this.evalResultIterator.next().getString();
if (this.isCustomSchema) {
return this.jsonRowDeserializer.deserializeJson(val);
}
progressCounter++;
if (progressCounter >= batchSize) {
ReadProgressLogger.logProgressIfNecessary(progressCounter);
progressCounter = 0;
}
return new GenericInternalRow(new Object[]{UTF8String.fromString(val)});
}
@Override
public void close() {
if (this.evalResultIterator != null) {
this.evalResultIterator.close();
}
if (this.databaseClient != null) {
this.databaseClient.release();
}
}
}