All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.spark.reader.customcode.CustomCodePartitionReader Maven / Gradle / Ivy

There is a newer version: 2.4.2
Show newest version
/*
 * Copyright © 2024 MarkLogic Corporation. All Rights Reserved.
 */
package com.marklogic.spark.reader.customcode;

import com.marklogic.client.DatabaseClient;
import com.marklogic.client.eval.EvalResultIterator;
import com.marklogic.client.eval.ServerEvaluationCall;
import com.marklogic.spark.Options;
import com.marklogic.spark.ReadProgressLogger;
import com.marklogic.spark.reader.JsonRowDeserializer;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.connector.read.PartitionReader;
import org.apache.spark.unsafe.types.UTF8String;

class CustomCodePartitionReader implements PartitionReader {

    private final ServerEvaluationCall serverEvaluationCall;
    private final boolean isCustomSchema;

    private EvalResultIterator evalResultIterator;
    private final JsonRowDeserializer jsonRowDeserializer;
    private final DatabaseClient databaseClient;

    // Only needed for logging progress.
    private final long batchSize;
    private long progressCounter;

    public CustomCodePartitionReader(CustomCodeContext customCodeContext, String partition) {
        this.databaseClient = customCodeContext.connectToMarkLogic();
        this.serverEvaluationCall = customCodeContext.buildCall(
            this.databaseClient,
            new CustomCodeContext.CallOptions(Options.READ_INVOKE, Options.READ_JAVASCRIPT, Options.READ_XQUERY,
                Options.READ_JAVASCRIPT_FILE, Options.READ_XQUERY_FILE)
        );

        if (partition != null) {
            this.serverEvaluationCall.addVariable("PARTITION", partition);
        }

        this.batchSize = customCodeContext.getNumericOption(Options.READ_BATCH_SIZE, 1, 1);

        this.isCustomSchema = customCodeContext.isCustomSchema();
        this.jsonRowDeserializer = new JsonRowDeserializer(customCodeContext.getSchema());
    }

    @Override
    public boolean next() {
        if (this.evalResultIterator == null) {
            this.evalResultIterator = this.serverEvaluationCall.eval();
        }
        return this.evalResultIterator.hasNext();
    }

    @Override
    public InternalRow get() {
        String val = this.evalResultIterator.next().getString();
        if (this.isCustomSchema) {
            return this.jsonRowDeserializer.deserializeJson(val);
        }
        progressCounter++;
        if (progressCounter >= batchSize) {
            ReadProgressLogger.logProgressIfNecessary(progressCounter);
            progressCounter = 0;
        }
        return new GenericInternalRow(new Object[]{UTF8String.fromString(val)});
    }

    @Override
    public void close() {
        if (this.evalResultIterator != null) {
            this.evalResultIterator.close();
        }
        if (this.databaseClient != null) {
            this.databaseClient.release();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy