org.apache.parquet.io.MessageColumnIO Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.io;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.parquet.column.ColumnWriteStore;
import org.apache.parquet.column.ColumnWriter;
import org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.filter.UnboundRecordFilter;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.compat.FilterCompat.Filter;
import org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat;
import org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter;
import org.apache.parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat;
import org.apache.parquet.filter2.compat.FilterCompat.Visitor;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.filter2.recordlevel.FilteringRecordMaterializer;
import org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate;
import org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicateBuilder;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.MessageType;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Message level of the IO structure
*/
public class MessageColumnIO extends GroupColumnIO {
private static final Logger LOG = LoggerFactory.getLogger(MessageColumnIO.class);
private static final boolean DEBUG = LOG.isDebugEnabled();
private List leaves;
private final boolean validating;
private final String createdBy;
MessageColumnIO(MessageType messageType, boolean validating, String createdBy) {
super(messageType, null, 0);
this.validating = validating;
this.createdBy = createdBy;
}
@Override
public List getColumnNames() {
return super.getColumnNames();
}
public RecordReader getRecordReader(PageReadStore columns,
RecordMaterializer recordMaterializer) {
return getRecordReader(columns, recordMaterializer, FilterCompat.NOOP);
}
/**
* @param columns a page read store with the column data
* @param recordMaterializer a record materializer
* @param filter a record filter
* @param the type of records returned by the reader
* @return a record reader
* @deprecated use getRecordReader(PageReadStore, RecordMaterializer, Filter)
*/
@Deprecated
public RecordReader getRecordReader(PageReadStore columns,
RecordMaterializer recordMaterializer,
UnboundRecordFilter filter) {
return getRecordReader(columns, recordMaterializer, FilterCompat.get(filter));
}
public RecordReader getRecordReader(final PageReadStore columns,
final RecordMaterializer recordMaterializer,
final Filter filter) {
Objects.requireNonNull(columns, "columns cannot be null");
Objects.requireNonNull(recordMaterializer, "recordMaterializer cannot be null");
Objects.requireNonNull(filter, "filter cannot be null");
if (leaves.isEmpty()) {
return new EmptyRecordReader<>(recordMaterializer);
}
return filter.accept(new Visitor>() {
@Override
public RecordReader visit(FilterPredicateCompat filterPredicateCompat) {
FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder(leaves);
IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
RecordMaterializer filteringRecordMaterializer = new FilteringRecordMaterializer(
recordMaterializer,
leaves,
builder.getValueInspectorsByColumn(),
streamingPredicate);
return new RecordReaderImplementation<>(
MessageColumnIO.this,
filteringRecordMaterializer,
validating,
new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType(), createdBy));
}
@Override
public RecordReader visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
return new FilteredRecordReader<>(
MessageColumnIO.this,
recordMaterializer,
validating,
new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy),
unboundRecordFilterCompat.getUnboundRecordFilter(),
columns.getRowCount()
);
}
@Override
public RecordReader visit(NoOpFilter noOpFilter) {
return new RecordReaderImplementation<>(
MessageColumnIO.this,
recordMaterializer,
validating,
new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy));
}
});
}
/**
* To improve null writing performance, we cache null values on group nodes. We flush nulls when a
* non-null value hits the group node.
*
* Intuitively, when a group node hits a null value, all the leaves underneath it should be null.
* A direct way of doing it is to write nulls for all the leaves underneath it when a group node
* is null. This approach is not optimal, consider following case:
*
* - When the schema is really wide where for each group node, there are thousands of leaf
* nodes underneath it.
* - When the data being written is really sparse, group nodes could hit nulls frequently.
*
* With the direct approach, if a group node hit null values a thousand times, and there are a
* thousand nodes underneath it.
* For each null value, it iterates over a thousand leaf writers to write null values and it
* will do it for a thousand null values.
*
* In the above case, each leaf writer maintains it's own buffer of values, calling thousands of
* them in turn is very bad for memory locality. Instead each group node can remember the null values
* encountered and flush only when a non-null value hits the group node. In this way, when we flush
* null values, we only iterate through all the leaves 1 time and multiple cached null values are
* flushed to each leaf in a tight loop. This implementation has following characteristics.
*
* 1. When a group node hits a null value, it adds the repetition level of the null value to
* the groupNullCache. The definition level of the cached nulls should always be the same as
* the definition level of the group node so there is no need to store it.
*
* 2. When a group node hits a non null value and it has null value cached, it should flush null
* values and start from his children group nodes first. This make sure the order of null values
* being flushed is correct.
*
*/
private class MessageColumnIORecordConsumer extends RecordConsumer {
private ColumnIO currentColumnIO;
private int currentLevel = 0;
private class FieldsMarker {
private BitSet visitedIndexes = new BitSet();
@Override
public String toString() {
return "VisitedIndex{" +
"visitedIndexes=" + visitedIndexes +
'}';
}
public void reset(int fieldsCount) {
this.visitedIndexes.clear(0, fieldsCount);
}
public void markWritten(int i) {
visitedIndexes.set(i);
}
public boolean isWritten(int i) {
return visitedIndexes.get(i);
}
}
//track at each level of depth, which fields are written, so nulls can be inserted for the unwritten fields
private final FieldsMarker[] fieldsWritten;
private final int[] r;
private final ColumnWriter[] columnWriters;
/**
* Maintain a map of groups and all the leaf nodes underneath it. It's used to optimize writing null for a group node.
* Instead of using recursion calls, all the leaves can be called directly without traversing the sub tree of the group node
*/
private Map> groupToLeafWriter = new HashMap<>();
/*
* Cache nulls for each group node. It only stores the repetition level, since the definition level
* should always be the definition level of the group node.
*/
private Map groupNullCache = new HashMap<>();
private final ColumnWriteStore columns;
private boolean emptyField = true;
private void buildGroupToLeafWriterMap(PrimitiveColumnIO primitive, ColumnWriter writer) {
GroupColumnIO parent = primitive.getParent();
do {
getLeafWriters(parent).add(writer);
parent = parent.getParent();
} while (parent != null);
}
private List getLeafWriters(GroupColumnIO group) {
List writers = groupToLeafWriter.get(group);
if (writers == null) {
writers = new ArrayList<>();
groupToLeafWriter.put(group, writers);
}
return writers;
}
public MessageColumnIORecordConsumer(ColumnWriteStore columns) {
this.columns = columns;
int maxDepth = 0;
this.columnWriters = new ColumnWriter[MessageColumnIO.this.getLeaves().size()];
for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) {
ColumnWriter w = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor());
maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length);
columnWriters[primitiveColumnIO.getId()] = w;
buildGroupToLeafWriterMap(primitiveColumnIO, w);
}
fieldsWritten = new FieldsMarker[maxDepth];
for (int i = 0; i < maxDepth; i++) {
fieldsWritten[i] = new FieldsMarker();
}
r = new int[maxDepth];
}
private void printState() {
if (DEBUG) {
log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]);
if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) {
// sanity check
throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)");
}
}
}
private void log(Object message, Object...parameters) {
if (DEBUG) {
StringBuilder indent = new StringBuilder(currentLevel * 2);
for (int i = 0; i < currentLevel; ++i) {
indent.append(" ");
}
LOG.debug(indent.toString() + message, parameters);
}
}
@Override
public void startMessage() {
if (DEBUG) log("< MESSAGE START >");
currentColumnIO = MessageColumnIO.this;
r[0] = 0;
int numberOfFieldsToVisit = ((GroupColumnIO) currentColumnIO).getChildrenCount();
fieldsWritten[0].reset(numberOfFieldsToVisit);
if (DEBUG) printState();
}
@Override
public void endMessage() {
writeNullForMissingFieldsAtCurrentLevel();
// We need to flush the cached null values before ending the record to ensure that everything is sent to the
// writer before the current page would be closed
if (columns.isColumnFlushNeeded()) {
flush();
}
columns.endRecord();
if (DEBUG) log("< MESSAGE END >");
if (DEBUG) printState();
}
@Override
public void startField(String field, int index) {
try {
if (DEBUG) log("startField({}, {})", field, index);
currentColumnIO = ((GroupColumnIO) currentColumnIO).getChild(index);
emptyField = true;
if (DEBUG) printState();
} catch (RuntimeException e) {
throw new ParquetEncodingException("error starting field " + field + " at " + index, e);
}
}
@Override
public void endField(String field, int index) {
if (DEBUG) log("endField({}, {})",field ,index);
currentColumnIO = currentColumnIO.getParent();
if (emptyField) {
throw new ParquetEncodingException("empty fields are illegal, the field should be ommited completely instead");
}
fieldsWritten[currentLevel].markWritten(index);
r[currentLevel] = currentLevel == 0 ? 0 : r[currentLevel - 1];
if (DEBUG) printState();
}
private void writeNullForMissingFieldsAtCurrentLevel() {
int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount();
for (int i = 0; i < currentFieldsCount; i++) {
if (!fieldsWritten[currentLevel].isWritten(i)) {
try {
ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i);
int d = currentColumnIO.getDefinitionLevel();
if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")");
writeNull(undefinedField, r[currentLevel], d);
} catch (RuntimeException e) {
throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e);
}
}
}
}
private void writeNull(ColumnIO undefinedField, int r, int d) {
if (undefinedField.getType().isPrimitive()) {
columnWriters[((PrimitiveColumnIO) undefinedField).getId()].writeNull(r, d);
} else {
GroupColumnIO groupColumnIO = (GroupColumnIO) undefinedField;
// only cache the repetition level, the definition level should always be the definition level of the parent node
cacheNullForGroup(groupColumnIO, r);
}
}
private void cacheNullForGroup(GroupColumnIO group, int r) {
IntArrayList nulls = groupNullCache.get(group);
if (nulls == null) {
nulls = new IntArrayList();
groupNullCache.put(group, nulls);
}
nulls.add(r);
}
private void writeNullToLeaves(GroupColumnIO group) {
IntArrayList nullCache = groupNullCache.get(group);
if (nullCache == null || nullCache.isEmpty())
return;
int parentDefinitionLevel = group.getParent().getDefinitionLevel();
for (ColumnWriter leafWriter : groupToLeafWriter.get(group)) {
for (IntIterator iter = nullCache.iterator(); iter.hasNext();) {
int repetitionLevel = iter.nextInt();
leafWriter.writeNull(repetitionLevel, parentDefinitionLevel);
}
}
nullCache.clear();
}
private void setRepetitionLevel() {
r[currentLevel] = currentColumnIO.getRepetitionLevel();
if (DEBUG) log("r: {}", r[currentLevel]);
}
@Override
public void startGroup() {
if (DEBUG) log("startGroup()");
GroupColumnIO group = (GroupColumnIO) currentColumnIO;
// current group is not null, need to flush all the nulls that were cached before
if (hasNullCache(group)) {
flushCachedNulls(group);
}
++currentLevel;
r[currentLevel] = r[currentLevel - 1];
int fieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount();
fieldsWritten[currentLevel].reset(fieldsCount);
if (DEBUG) printState();
}
private boolean hasNullCache(GroupColumnIO group) {
IntArrayList nulls = groupNullCache.get(group);
return nulls != null && !nulls.isEmpty();
}
private void flushCachedNulls(GroupColumnIO group) {
//flush children first
for (int i = 0; i < group.getChildrenCount(); i++) {
ColumnIO child = group.getChild(i);
if (child instanceof GroupColumnIO) {
flushCachedNulls((GroupColumnIO) child);
}
}
//then flush itself
writeNullToLeaves(group);
}
@Override
public void endGroup() {
if (DEBUG) log("endGroup()");
emptyField = false;
writeNullForMissingFieldsAtCurrentLevel();
--currentLevel;
setRepetitionLevel();
if (DEBUG) printState();
}
private ColumnWriter getColumnWriter() {
return columnWriters[((PrimitiveColumnIO) currentColumnIO).getId()];
}
@Override
public void addInteger(int value) {
if (DEBUG) log("addInt({})", value);
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
@Override
public void addLong(long value) {
if (DEBUG) log("addLong({})", value);
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
@Override
public void addBoolean(boolean value) {
if (DEBUG) log("addBoolean({})", value);
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
@Override
public void addBinary(Binary value) {
if (DEBUG) log("addBinary({} bytes)", value.length());
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
@Override
public void addFloat(float value) {
if (DEBUG) log("addFloat({})", value);
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
@Override
public void addDouble(double value) {
if (DEBUG) log("addDouble({})", value);
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
/**
* Flush null for all groups
*/
@Override
public void flush() {
flushCachedNulls(MessageColumnIO.this);
}
}
public RecordConsumer getRecordWriter(ColumnWriteStore columns) {
RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns);
if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter);
return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter;
}
void setLevels() {
setLevels(0, 0, new String[0], new int[0], Arrays.asList(this), Arrays.asList(this));
}
void setLeaves(List leaves) {
this.leaves = leaves;
}
public List getLeaves() {
return this.leaves;
}
@Override
public MessageType getType() {
return (MessageType) super.getType();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy