![JAR search and dependency download from the Maven repository](/logo.png)
org.dinky.shaded.paimon.lookup.hash.HashLookupStoreReader Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2015 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
/* This file is based on source code of StorageReader from the PalDB Project (https://github.com/linkedin/PalDB), licensed by the Apache
* Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. */
package org.dinky.shaded.paimon.lookup.hash;
import org.dinky.shaded.paimon.io.cache.CacheManager;
import org.dinky.shaded.paimon.io.cache.CachedRandomInputView;
import org.dinky.shaded.paimon.lookup.LookupStoreReader;
import org.dinky.shaded.paimon.utils.MurmurHashUtils;
import org.dinky.shaded.paimon.utils.VarLengthIntUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Iterator;
import java.util.Map;
/** Internal read implementation for hash kv store. */
public class HashLookupStoreReader
implements LookupStoreReader, Iterable> {
private static final Logger LOG =
LoggerFactory.getLogger(HashLookupStoreReader.class.getName());
// Key count for each key length
private final int[] keyCounts;
// Slot size for each key length
private final int[] slotSizes;
// Number of slots for each key length
private final int[] slots;
// Offset of the index for different key length
private final int[] indexOffsets;
// Offset of the data for different key length
private final long[] dataOffsets;
// File input view
private CachedRandomInputView inputView;
// Buffers
private final byte[] slotBuffer;
HashLookupStoreReader(CacheManager cacheManager, File file) throws IOException {
// File path
if (!file.exists()) {
throw new FileNotFoundException("File " + file.getAbsolutePath() + " not found");
}
LOG.info("Opening file {}", file.getName());
// Open file and read metadata
long createdAt;
FileInputStream inputStream = new FileInputStream(file);
DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream));
// Offset of the index in the channel
int keyCount;
int indexOffset;
long dataOffset;
try {
// Time
createdAt = dataInputStream.readLong();
// Metadata counters
keyCount = dataInputStream.readInt();
// Number of different key length
int keyLengthCount = dataInputStream.readInt();
// Max key length
int maxKeyLength = dataInputStream.readInt();
// Read offset counts and keys
indexOffsets = new int[maxKeyLength + 1];
dataOffsets = new long[maxKeyLength + 1];
keyCounts = new int[maxKeyLength + 1];
slots = new int[maxKeyLength + 1];
slotSizes = new int[maxKeyLength + 1];
int maxSlotSize = 0;
for (int i = 0; i < keyLengthCount; i++) {
int keyLength = dataInputStream.readInt();
keyCounts[keyLength] = dataInputStream.readInt();
slots[keyLength] = dataInputStream.readInt();
slotSizes[keyLength] = dataInputStream.readInt();
indexOffsets[keyLength] = dataInputStream.readInt();
dataOffsets[keyLength] = dataInputStream.readLong();
maxSlotSize = Math.max(maxSlotSize, slotSizes[keyLength]);
}
slotBuffer = new byte[maxSlotSize];
// Read index offset to resign indexOffsets
indexOffset = dataInputStream.readInt();
for (int i = 0; i < indexOffsets.length; i++) {
indexOffsets[i] = indexOffset + indexOffsets[i];
}
// Read data offset to resign dataOffsets
dataOffset = dataInputStream.readLong();
for (int i = 0; i < dataOffsets.length; i++) {
dataOffsets[i] = dataOffset + dataOffsets[i];
}
} finally {
// Close metadata
dataInputStream.close();
inputStream.close();
}
// Create Mapped file in read-only mode
inputView = new CachedRandomInputView(file, cacheManager);
// logging
DecimalFormat integerFormat = new DecimalFormat("#,##0.00");
StringBuilder statMsg = new StringBuilder("Storage metadata\n");
statMsg.append(" Created at: ").append(formatCreatedAt(createdAt)).append("\n");
statMsg.append(" Key count: ").append(keyCount).append("\n");
for (int i = 0; i < keyCounts.length; i++) {
if (keyCounts[i] > 0) {
statMsg.append(" Key count for key length ")
.append(i)
.append(": ")
.append(keyCounts[i])
.append("\n");
}
}
statMsg.append(" Index size: ")
.append(integerFormat.format((dataOffset - indexOffset) / (1024.0 * 1024.0)))
.append(" Mb\n");
statMsg.append(" Data size: ")
.append(integerFormat.format((file.length() - dataOffset) / (1024.0 * 1024.0)))
.append(" Mb\n");
LOG.info(statMsg.toString());
}
@Override
public byte[] lookup(byte[] key) throws IOException {
int keyLength = key.length;
if (keyLength >= slots.length || keyCounts[keyLength] == 0) {
return null;
}
long hash = MurmurHashUtils.hashBytesPositive(key);
int numSlots = slots[keyLength];
int slotSize = slotSizes[keyLength];
int indexOffset = indexOffsets[keyLength];
long dataOffset = dataOffsets[keyLength];
for (int probe = 0; probe < numSlots; probe++) {
long slot = (hash + probe) % numSlots;
inputView.setReadPosition(indexOffset + slot * slotSize);
inputView.readFully(slotBuffer, 0, slotSize);
long offset = VarLengthIntUtils.decodeLong(slotBuffer, keyLength);
if (offset == 0) {
return null;
}
if (isKey(slotBuffer, key)) {
return getValue(dataOffset + offset);
}
}
return null;
}
private boolean isKey(byte[] slotBuffer, byte[] key) {
for (int i = 0; i < key.length; i++) {
if (slotBuffer[i] != key[i]) {
return false;
}
}
return true;
}
private byte[] getValue(long offset) throws IOException {
inputView.setReadPosition(offset);
// Get size of data
int size = VarLengthIntUtils.decodeInt(inputView);
// Create output bytes
byte[] res = new byte[size];
inputView.readFully(res);
return res;
}
private String formatCreatedAt(long createdAt) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
Calendar cl = Calendar.getInstance();
cl.setTimeInMillis(createdAt);
return sdf.format(cl.getTime());
}
@Override
public void close() throws IOException {
inputView.close();
inputView = null;
}
@Override
public Iterator> iterator() {
return new StorageIterator(true);
}
public Iterator> keys() {
return new StorageIterator(false);
}
private class StorageIterator implements Iterator> {
private final FastEntry entry = new FastEntry();
private final boolean withValue;
private int currentKeyLength = 0;
private byte[] currentSlotBuffer;
private long keyIndex;
private long keyLimit;
private long currentDataOffset;
private int currentIndexOffset;
public StorageIterator(boolean value) {
withValue = value;
nextKeyLength();
}
private void nextKeyLength() {
for (int i = currentKeyLength + 1; i < keyCounts.length; i++) {
long c = keyCounts[i];
if (c > 0) {
currentKeyLength = i;
keyLimit += c;
currentSlotBuffer = new byte[slotSizes[i]];
currentIndexOffset = indexOffsets[i];
currentDataOffset = dataOffsets[i];
break;
}
}
}
@Override
public boolean hasNext() {
return keyIndex < keyLimit;
}
@Override
public FastEntry next() {
try {
inputView.setReadPosition(currentIndexOffset);
long offset = 0;
while (offset == 0) {
inputView.readFully(currentSlotBuffer);
offset = VarLengthIntUtils.decodeLong(currentSlotBuffer, currentKeyLength);
currentIndexOffset += currentSlotBuffer.length;
}
byte[] key = Arrays.copyOf(currentSlotBuffer, currentKeyLength);
byte[] value = null;
if (withValue) {
long valueOffset = currentDataOffset + offset;
value = getValue(valueOffset);
}
entry.set(key, value);
if (++keyIndex == keyLimit) {
nextKeyLength();
}
return entry;
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException("Not supported yet.");
}
private class FastEntry implements Map.Entry {
private byte[] key;
private byte[] val;
protected void set(byte[] k, byte[] v) {
this.key = k;
this.val = v;
}
@Override
public byte[] getKey() {
return key;
}
@Override
public byte[] getValue() {
return val;
}
@Override
public byte[] setValue(byte[] value) {
throw new UnsupportedOperationException("Not supported.");
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy