All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.spark.reader.document.DocumentRowSchema Maven / Gradle / Ivy

/*
 * Copyright © 2024 MarkLogic Corporation. All Rights Reserved.
 */
package com.marklogic.spark.reader.document;

import com.marklogic.client.io.DocumentMetadataHandle;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;

public abstract class DocumentRowSchema {

    public static final StructType SCHEMA = new StructType()
        .add("URI", DataTypes.StringType)
        .add("content", DataTypes.BinaryType)
        .add("format", DataTypes.StringType)
        .add("collections", DataTypes.createArrayType(DataTypes.StringType))
        .add("permissions", DataTypes.createMapType(
            DataTypes.StringType,
            DataTypes.createArrayType(DataTypes.StringType))
        )
        .add("quality", DataTypes.IntegerType)
        .add("properties", DataTypes.StringType)
        .add("metadataValues", DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType));

    private DocumentRowSchema() {
    }

    /**
     * Given a row that conforms to this class's schema, return a {@code DocumentMetadataHandle} that contains the
     * metadata from the given row.
     *
     * @param row
     * @return
     */
    public static DocumentMetadataHandle makeDocumentMetadata(InternalRow row) {
        DocumentMetadataHandle metadata = new DocumentMetadataHandle();
        addCollectionsToMetadata(row, metadata);
        addPermissionsToMetadata(row, metadata);
        if (!row.isNullAt(5)) {
            metadata.setQuality(row.getInt(5));
        }
        addPropertiesToMetadata(row, metadata);
        addMetadataValuesToMetadata(row, metadata);
        return metadata;
    }

    private static void addCollectionsToMetadata(InternalRow row, DocumentMetadataHandle metadata) {
        if (!row.isNullAt(3)) {
            ArrayData collections = row.getArray(3);
            for (int i = 0; i < collections.numElements(); i++) {
                String value = collections.get(i, DataTypes.StringType).toString();
                metadata.getCollections().add(value);
            }
        }
    }

    private static void addPermissionsToMetadata(InternalRow row, DocumentMetadataHandle metadata) {
        if (!row.isNullAt(4)) {
            MapData permissions = row.getMap(4);
            ArrayData roles = permissions.keyArray();
            ArrayData capabilities = permissions.valueArray();
            for (int i = 0; i < roles.numElements(); i++) {
                String role = roles.get(i, DataTypes.StringType).toString();
                ArrayData caps = capabilities.getArray(i);
                DocumentMetadataHandle.Capability[] capArray = new DocumentMetadataHandle.Capability[caps.numElements()];
                for (int j = 0; j < caps.numElements(); j++) {
                    String value = caps.get(j, DataTypes.StringType).toString();
                    capArray[j] = DocumentMetadataHandle.Capability.valueOf(value.toUpperCase());
                }
                metadata.getPermissions().add(role, capArray);
            }
        }
    }

    private static void addPropertiesToMetadata(InternalRow row, DocumentMetadataHandle metadata) {
        if (!row.isNullAt(6)) {
            String propertiesXml = row.getString(6);
            String metadataXml = String.format("%s", propertiesXml);
            DocumentMetadataHandle tempMetadata = new DocumentMetadataHandle();
            tempMetadata.fromBuffer(metadataXml.getBytes());
            metadata.setProperties(tempMetadata.getProperties());
        }
    }

    private static void addMetadataValuesToMetadata(InternalRow row, DocumentMetadataHandle metadata) {
        if (!row.isNullAt(7)) {
            MapData properties = row.getMap(7);
            ArrayData keys = properties.keyArray();
            ArrayData values = properties.valueArray();
            for (int i = 0; i < keys.numElements(); i++) {
                String key = keys.get(i, DataTypes.StringType).toString();
                String value = values.get(i, DataTypes.StringType).toString();
                metadata.getMetadataValues().put(key, value);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy