Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2017-2023 O2 Czech Republic, a.s.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.o2.proxima.direct.bulk.fs.parquet;
import static java.util.Optional.of;
import com.google.common.base.Preconditions;
import cz.o2.proxima.annotations.Internal;
import cz.o2.proxima.direct.bulk.fs.parquet.ParquetFileFormat.Operation;
import cz.o2.proxima.direct.bulk.fs.parquet.StreamElementMaterializer.ParquetColumnGroup.ParquetColumn;
import cz.o2.proxima.direct.bulk.fs.parquet.StreamElementWriteSupport.ArrayWriter;
import cz.o2.proxima.repository.AttributeDescriptor;
import cz.o2.proxima.repository.EntityDescriptor;
import cz.o2.proxima.scheme.AttributeValueAccessor;
import cz.o2.proxima.scheme.AttributeValueAccessors.StructureValue;
import cz.o2.proxima.scheme.SchemaDescriptors.SchemaTypeDescriptor;
import cz.o2.proxima.scheme.ValueSerializer;
import cz.o2.proxima.storage.StreamElement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nullable;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
/** Class responsible for converting parquet record into {@link StreamElement}. */
@Slf4j
@Internal
public class StreamElementMaterializer extends RecordMaterializer {
private final ParquetColumnRecordConverter root;
private final MessageType schema;
private final EntityDescriptor entity;
private final String attributeNamesPrefix;
public StreamElementMaterializer(
MessageType schema, EntityDescriptor entity, String attributeNamesPrefix) {
this.schema = schema;
this.entity = entity;
this.attributeNamesPrefix = attributeNamesPrefix;
this.root = new ParquetColumnRecordConverter(schema, null, null);
}
@Override
public StreamElement getCurrentRecord() {
Map record = recordToMap(schema, root.getCurrentRecord(), new HashMap<>());
final String key = getColumnFromRow(ParquetFileFormat.PARQUET_COLUMN_NAME_KEY, record);
final String operation =
getColumnFromRow(ParquetFileFormat.PARQUET_COLUMN_NAME_OPERATION, record);
final String attributeName =
getColumnFromRow(ParquetFileFormat.PARQUET_COLUMN_NAME_ATTRIBUTE_PREFIX, record);
Optional> attribute = entity.findAttribute(attributeName);
if (!attribute.isPresent()) {
// current attribute is not in entity -> skip
log.info(
"Skipping attribute [{}] which is not in current entity [{}].",
attributeName,
entity.getName());
return null;
}
final String uuid = getColumnFromRow(ParquetFileFormat.PARQUET_COLUMN_NAME_UUID, record);
final long timestamp =
getColumnFromRow(ParquetFileFormat.PARQUET_COLUMN_NAME_TIMESTAMP, record);
switch (Operation.of(operation)) {
case DELETE:
return StreamElement.delete(entity, attribute.get(), uuid, key, attributeName, timestamp);
case DELETE_WILDCARD:
return StreamElement.deleteWildcard(entity, attribute.get(), uuid, key, timestamp);
case UPSERT:
return StreamElement.upsert(
entity,
attribute.get(),
uuid,
key,
attributeName,
timestamp,
getValueFromCurrentRowData(attribute.get(), record));
default:
throw new RecordMaterializationException("Unknown operation " + operation);
}
}
private byte[] getValueFromCurrentRowData(
AttributeDescriptor> attribute, Map record) {
final String storedAttributeName = attributeNamesPrefix + attribute.toAttributePrefix(false);
@SuppressWarnings("unchecked")
final ValueSerializer