All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.client.model.HoodieInternalRow Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.client.model;

import org.apache.hudi.common.model.HoodieRecord;

import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
 * Hudi internal implementation of the {@link InternalRow} allowing to extend arbitrary
 * {@link InternalRow} overlaying Hudi-internal meta-fields on top of it.
 *
 * Capable of overlaying meta-fields in both cases: whether original {@link #sourceRow} contains
 * meta columns or not. This allows to handle following use-cases allowing to avoid any
 * manipulation (reshuffling) of the source row, by simply creating new instance
 * of {@link HoodieInternalRow} with all the meta-values provided
 *
 * 
    *
  • When meta-fields need to be prepended to the source {@link InternalRow}
  • *
  • When meta-fields need to be updated w/in the source {@link InternalRow} * ({@link org.apache.spark.sql.catalyst.expressions.UnsafeRow} currently does not * allow in-place updates due to its memory layout)
  • *
*/ public class HoodieInternalRow extends InternalRow { /** * Collection of meta-fields as defined by {@link HoodieRecord#HOODIE_META_COLUMNS} * * NOTE: {@code HoodieInternalRow} *always* overlays its own meta-fields even in case * when source row also contains them, to make sure these fields are mutable and * can be updated (for ex, {@link UnsafeRow} doesn't support mutations due to * its memory layout, as it persists field offsets) */ private final UTF8String[] metaFields; private final InternalRow sourceRow; /** * Specifies whether source {@link #sourceRow} contains meta-fields */ private final boolean sourceContainsMetaFields; public HoodieInternalRow(UTF8String commitTime, UTF8String commitSeqNumber, UTF8String recordKey, UTF8String partitionPath, UTF8String fileName, InternalRow sourceRow, boolean sourceContainsMetaFields) { this.metaFields = new UTF8String[] { commitTime, commitSeqNumber, recordKey, partitionPath, fileName }; this.sourceRow = sourceRow; this.sourceContainsMetaFields = sourceContainsMetaFields; } public HoodieInternalRow(UTF8String[] metaFields, InternalRow sourceRow, boolean sourceContainsMetaFields) { this.metaFields = metaFields; this.sourceRow = sourceRow; this.sourceContainsMetaFields = sourceContainsMetaFields; } @Override public int numFields() { if (sourceContainsMetaFields) { return sourceRow.numFields(); } else { return sourceRow.numFields() + metaFields.length; } } @Override public void setNullAt(int ordinal) { if (ordinal < metaFields.length) { metaFields[ordinal] = null; } else { sourceRow.setNullAt(rebaseOrdinal(ordinal)); } } @Override public void update(int ordinal, Object value) { if (ordinal < metaFields.length) { if (value instanceof UTF8String) { metaFields[ordinal] = (UTF8String) value; } else if (value instanceof String) { metaFields[ordinal] = UTF8String.fromString((String) value); } else { throw new IllegalArgumentException( String.format("Could not update the row at (%d) with value of type (%s), either UTF8String or String are expected", ordinal, value.getClass().getSimpleName())); } } else { sourceRow.update(rebaseOrdinal(ordinal), value); } } @Override public boolean isNullAt(int ordinal) { if (ordinal < metaFields.length) { return metaFields[ordinal] == null; } return sourceRow.isNullAt(rebaseOrdinal(ordinal)); } @Override public UTF8String getUTF8String(int ordinal) { if (ordinal < metaFields.length) { return metaFields[ordinal]; } return sourceRow.getUTF8String(rebaseOrdinal(ordinal)); } @Override public Object get(int ordinal, DataType dataType) { if (ordinal < metaFields.length) { validateMetaFieldDataType(dataType); return metaFields[ordinal]; } return sourceRow.get(rebaseOrdinal(ordinal), dataType); } @Override public boolean getBoolean(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Boolean.class); return sourceRow.getBoolean(rebaseOrdinal(ordinal)); } @Override public byte getByte(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Byte.class); return sourceRow.getByte(rebaseOrdinal(ordinal)); } @Override public short getShort(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Short.class); return sourceRow.getShort(rebaseOrdinal(ordinal)); } @Override public int getInt(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Integer.class); return sourceRow.getInt(rebaseOrdinal(ordinal)); } @Override public long getLong(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Long.class); return sourceRow.getLong(rebaseOrdinal(ordinal)); } @Override public float getFloat(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Float.class); return sourceRow.getFloat(rebaseOrdinal(ordinal)); } @Override public double getDouble(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Double.class); return sourceRow.getDouble(rebaseOrdinal(ordinal)); } @Override public Decimal getDecimal(int ordinal, int precision, int scale) { ruleOutMetaFieldsAccess(ordinal, Decimal.class); return sourceRow.getDecimal(rebaseOrdinal(ordinal), precision, scale); } @Override public byte[] getBinary(int ordinal) { ruleOutMetaFieldsAccess(ordinal, Byte[].class); return sourceRow.getBinary(rebaseOrdinal(ordinal)); } @Override public CalendarInterval getInterval(int ordinal) { ruleOutMetaFieldsAccess(ordinal, CalendarInterval.class); return sourceRow.getInterval(rebaseOrdinal(ordinal)); } @Override public InternalRow getStruct(int ordinal, int numFields) { ruleOutMetaFieldsAccess(ordinal, InternalRow.class); return sourceRow.getStruct(rebaseOrdinal(ordinal), numFields); } @Override public ArrayData getArray(int ordinal) { ruleOutMetaFieldsAccess(ordinal, ArrayData.class); return sourceRow.getArray(rebaseOrdinal(ordinal)); } @Override public MapData getMap(int ordinal) { ruleOutMetaFieldsAccess(ordinal, MapData.class); return sourceRow.getMap(rebaseOrdinal(ordinal)); } @Override public InternalRow copy() { UTF8String[] copyMetaFields = new UTF8String[metaFields.length]; for (int i = 0; i < metaFields.length; i++) { copyMetaFields[i] = metaFields[i] != null ? metaFields[i].copy() : null; } return new HoodieInternalRow(copyMetaFields, sourceRow.copy(), sourceContainsMetaFields); } private int rebaseOrdinal(int ordinal) { // NOTE: In cases when source row does not contain meta fields, we will have to // rebase ordinal onto its indexes return sourceContainsMetaFields ? ordinal : ordinal - metaFields.length; } private void validateMetaFieldDataType(DataType dataType) { if (!dataType.sameType(StringType$.MODULE$)) { throw new ClassCastException(String.format("Can not cast meta-field of type UTF8String to %s", dataType.simpleString())); } } private void ruleOutMetaFieldsAccess(int ordinal, Class expectedDataType) { if (ordinal < metaFields.length) { throw new ClassCastException(String.format("Can not cast meta-field of type UTF8String at (%d) as %s", ordinal, expectedDataType.getName())); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy