org.gradle.internal.serialize.kryo.StringDeduplicatingKryoBackedEncoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gradle-api Show documentation
Show all versions of gradle-api Show documentation
Gradle 6.9.1 API redistribution.
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradle.internal.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.gradle.internal.serialize.AbstractEncoder;
import org.gradle.internal.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
public class StringDeduplicatingKryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private IndexedStringSet strings;
private final Output output;
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
public void writeByte(byte value) {
output.writeByte(value);
}
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
public void writeLong(long value) {
output.writeLong(value);
}
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
public void writeInt(int value) {
output.writeInt(value);
}
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
writeNullableString(value);
}
public void writeNullableString(@Nullable CharSequence value) {
if (value == null) {
output.writeByte((byte) 0);
return;
} else {
if (strings == null) {
strings = new IndexedStringSet();
}
output.writeByte((byte) 1);
}
strings.register(value.toString());
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
public void flush() {
output.flush();
}
public void close() {
output.close();
}
public void done() {
strings = null;
}
/**
* A dedicated set of strings implementation which associates a unique
* integer to each new string. It works similarly to a hash map, by
* selecting a bucket based on the 8 lower bits of the hash code of
* the string. Then there are two bucket implementations: one in case
* there's a single string in the bucket, the other when multiple strings
* are in.
*
* Integers are not chosen arbitrarily: they must be consecutive integers
* starting from 0.
*
* This is done so that we can optimize the size of the stream written, by
* replacing strings with an id. Therefore this set takes care of doing it
* as we build the set.
*/
private class IndexedStringSet {
private final StringSetBucket[] buckets = new StringSetBucket[256];
private int count;
public void register(String value) {
int bucketId = value.hashCode() & 0xFF;
StringSetBucket bucket = buckets[bucketId];
if (bucket == null) {
buckets[bucketId] = new SingleEntryStringSet(value);
} else {
buckets[bucketId] = bucket.register(value);
}
}
/**
* A bucket that contains only a single entry. Optimized for memory
* usage.
*/
private class SingleEntryStringSet implements StringSetBucket {
private final IndexedString indexed;
private SingleEntryStringSet(String value) {
this.indexed = new IndexedString(value, count);
output.writeInt(count, true);
output.writeString(value);
count++;
}
public StringSetBucket register(String value) {
if (indexed.matches(value)) {
output.writeInt(indexed.index, true);
return this;
}
return new MultiListStringSet(indexed).register(value);
}
@Override
public String toString() {
return indexed.toString();
}
}
/**
* A bucket implementation used when more than one string is found in a bucket, with
* a reasonable number of strings.
*/
private class MultiListStringSet implements StringSetBucket {
private final List store = Lists.newArrayList();
public MultiListStringSet(IndexedString initial) {
store.add(initial);
}
@Override
public StringSetBucket register(String value) {
for (IndexedString indexedString : store) {
if (indexedString.matches(value)) {
output.writeInt(indexedString.index, true);
return this;
}
}
output.writeInt(count, true);
output.writeString(value);
store.add(new IndexedString(value, count));
count++;
if (store.size() > 4) {
return new MultiMapStringSet(store);
}
return this;
}
@Override
public String toString() {
return store.toString();
}
}
/**
* A bucket implementation which uses a map under the hood, for
* faster lookups whenever the number of items in a bucket grows
* too much.
*/
private class MultiMapStringSet implements StringSetBucket {
private final Map map;
private MultiMapStringSet(List strings) {
map = Maps.newHashMapWithExpectedSize(strings.size() << 1);
for (IndexedString indexedString : strings) {
map.put(indexedString.value, indexedString.index);
}
}
@Override
public StringSetBucket register(String value) {
Integer index = map.get(value);
if (index != null) {
output.writeInt(index, true);
return this;
}
output.writeInt(count, true);
output.writeString(value);
count++;
return this;
}
@Override
public String toString() {
return map.toString();
}
}
}
/**
* Interface for all bucket types.
*/
private interface StringSetBucket {
/**
* Registers a string in the set. The returned value may either be
* the same string set, or a different implementation optimized for
* a different bucket size. This allows us to go from single string set
* to list set and eventually a map backed set.
*/
StringSetBucket register(String value);
}
/**
* Associates a unique integer to a string.
*/
private static class IndexedString {
private final String value;
private final int index;
private IndexedString(String value, int index) {
this.value = value;
this.index = index;
}
boolean matches(String value) {
return value.hashCode() == this.value.hashCode() && value.equals(this.value);
}
@Override
public String toString() {
return "Value '" + value + "' index " + index;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy