com.palantir.util.IndexEncodingUtils Maven / Gradle / Ivy
/*
* (c) Copyright 2023 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.palantir.util;
import com.fasterxml.jackson.annotation.JsonIgnoreType;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.SafeArg;
import com.palantir.logsafe.UnsafeArg;
import com.palantir.logsafe.exceptions.SafeIllegalArgumentException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.zip.CRC32;
import org.immutables.value.Value;
public final class IndexEncodingUtils {
private IndexEncodingUtils() {}
public static IndexEncodingResult encode(
Set keys, Map keyToValue, ChecksumType checksumType) {
return encode(keys, keyToValue, Function.identity(), checksumType);
}
/**
* Compute a derived map (replacing keys with their associated index into the ordered list) to the value returned
* by running the {@code valueMapper} over the original value.
* If the {@code keys} are transmitted regardless of whether they have a value associated in {@code keyToValue},
* this encoding can be used to save significant space on the wire.
*
* @param keys a set of keys which implement {@link DeterministicHashable}
* @param keyToValue a map of keys to values. The key set of this map must be a subset of {@code keys}
* @param valueMapper a mapping function applied to values before placing them into the result map
* @param checksumType the type of checksum algorithm to use on the ordered list of keys
* @return the ordered list of keys, the index map, and a compound checksum of the ordered keys
* @throws SafeIllegalArgumentException if {@code keyToValue} contains keys that are not in the provided
* set of keys
*/
public static IndexEncodingResult encode(
Set keys, Map keyToValue, Function valueMapper, ChecksumType checksumType) {
List keyList = new ArrayList<>(keys);
// A linked hash map will give a minor improvement when iterating during serialization
Map indexToValue = Maps.newLinkedHashMapWithExpectedSize(keyToValue.size());
// We are explicitly using a primitive for-loop (no streaming) to squeeze out a bit of performance
// (same in decode)
for (int i = 0; i < keyList.size(); i++) {
V value = keyToValue.get(keyList.get(i));
if (value != null) {
indexToValue.put(i, valueMapper.apply(value));
}
}
if (indexToValue.size() != keyToValue.size()) {
Set unknownKeys = Sets.difference(keyToValue.keySet(), keys);
throw new SafeIllegalArgumentException(
"Value map uses keys that are not in the provided set of keys",
UnsafeArg.of("unknownKeys", unknownKeys));
}
return IndexEncodingResult.builder()
.keyList(keyList)
.indexToValue(indexToValue)
.keyListChecksum(computeChecksum(checksumType, keyList))
.build();
}
public static Map decode(IndexEncodingResult indexEncoding) {
return decode(indexEncoding, Function.identity());
}
/**
* Compute a derived map (replacing indices into the list of keys with their item) to the value returned
* by running the {@code valueMapper} over the original value.
*
* @param indexEncoding the output of {@link IndexEncodingUtils#encode}, i.e. the ordered list of keys,
* a map of indices to values, and a checksum of the ordered key list. Every index must be
* a valid index into the list of keys
* @param valueMapper a mapping function applied to values before placing them into the result map
* @throws SafeIllegalArgumentException if the provided checksum does not match the actual checksum of the ordered
* keys or if {@code indexToValue} contains indices that are not valid for the list of keys
*/
public static Map decode(
IndexEncodingResult indexEncoding, Function valueMapper) {
List keyList = indexEncoding.keyList();
KeyListChecksum expectedChecksum = indexEncoding.keyListChecksum();
KeyListChecksum actualChecksum = computeChecksum(expectedChecksum.type(), keyList);
Preconditions.checkArgument(
actualChecksum.equals(expectedChecksum),
"Key list integrity check failed",
UnsafeArg.of("keyList", keyList),
SafeArg.of("actualChecksum", actualChecksum),
SafeArg.of("expectedChecksum", expectedChecksum));
Map keyToValue =
Maps.newHashMapWithExpectedSize(indexEncoding.indexToValue().size());
for (Map.Entry entry : indexEncoding.indexToValue().entrySet()) {
int index = entry.getKey();
Preconditions.checkArgument(
index >= 0 && index < keyList.size(),
"Index map contains invalid index",
SafeArg.of("index", index),
SafeArg.of("keyListSize", keyList.size()));
keyToValue.put(keyList.get(index), valueMapper.apply(entry.getValue()));
}
return keyToValue;
}
public static KeyListChecksum computeChecksum(
ChecksumType checksumType, List keyList) {
switch (checksumType) {
case CRC32_OF_DETERMINISTIC_HASHCODE: {
CRC32 checksum = new CRC32();
for (K key : keyList) {
checksum.update(key.deterministicHashCode());
}
return KeyListChecksum.of(
checksumType,
ByteBuffer.allocate(8).putLong(checksum.getValue()).array());
}
default: {
throw new SafeIllegalArgumentException(
"Unknown checksum type", SafeArg.of("checksumType", checksumType));
}
}
}
public interface DeterministicHashable {
/**
* A hash code implementation that is stable across different processes/JVMs and only depends on the contents
* of the object.
*/
int deterministicHashCode();
}
/**
* This class is merely used to wrap the output of {@link IndexEncodingUtils#encode} and should not be embedded in
* any other object directly or serialized as-is.
*/
@Value.Immutable
@JsonIgnoreType
public interface IndexEncodingResult {
List keyList();
KeyListChecksum keyListChecksum();
Map indexToValue();
static ImmutableIndexEncodingResult.Builder builder() {
return ImmutableIndexEncodingResult.builder();
}
}
public enum ChecksumType {
CRC32_OF_DETERMINISTIC_HASHCODE(1);
private static final Map ID_TO_ENTRY =
Arrays.stream(ChecksumType.values()).collect(Collectors.toMap(entry -> entry.id, entry -> entry));
private final int id;
ChecksumType(int id) {
this.id = id;
}
public int getId() {
return id;
}
public static ChecksumType valueOf(int id) {
Preconditions.checkArgument(
ID_TO_ENTRY.containsKey(id), "Unknown checksum type ID", SafeArg.of("checksumTypeId", id));
return ID_TO_ENTRY.get(id);
}
}
@Value.Immutable
public interface KeyListChecksum {
@Value.Parameter
ChecksumType type();
@Value.Parameter
byte[] value();
static KeyListChecksum of(ChecksumType type, byte[] value) {
return ImmutableKeyListChecksum.of(type, value);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy