All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.matteobertozzi.yajbe.YajbeFieldNameWriter Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.github.matteobertozzi.yajbe;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

final class YajbeFieldNameWriter {
  private static final int MAX_INDEXED_NAMES = 65819;

  private final IndexedHashSet indexedMap = new IndexedHashSet(128);
  private final YajbeWriter stream;

  private String lastKey;
  private byte[] lastKeyUtf8;

  public YajbeFieldNameWriter(final YajbeWriter stream) {
    this.stream = stream;
  }

  void setInitialFieldNames(final String[] names) {
    if (indexedMap.size != 0) {
      throw new UnsupportedOperationException("field names already added");
    }

    for (int i = 0; i < names.length && i < 65819; ++i) {
      indexedMap.add(names[i]);
    }
  }

  public void write(final String key) throws IOException {
    final int index = this.indexedMap.get(key);
    if (index >= 0) {
      this.writeIndexedFieldName(index);
      this.lastKey = key;
      this.lastKeyUtf8 = null;
      return;
    }

    final byte[] utf8 = key.getBytes(StandardCharsets.UTF_8);

    if (this.lastKey != null && utf8.length > 4) {
      checkPrefixAndWrite(utf8);
    } else {
      writeFullFieldName(utf8);
    }

    if (indexedMap.size() < MAX_INDEXED_NAMES) {
      indexedMap.add(key);
    }
    this.lastKey = key;
    this.lastKeyUtf8 = utf8;
  }

  private void checkPrefixAndWrite(final byte[] utf8) throws IOException {
    if (lastKeyUtf8 == null) {
      this.lastKeyUtf8 = lastKey.getBytes(StandardCharsets.UTF_8);
    }

    final int prefix = Math.min(0xff, this.prefix(utf8));
    final int suffix = this.suffix(utf8, prefix);

    if (suffix > 2) {
      writePrefixSuffix(utf8, prefix, Math.min(0xff, suffix));
    } else if (prefix > 2) {
      writePrefix(utf8, prefix);
    } else {
      writeFullFieldName(utf8);
    }
  }

  public void writeFullFieldName(final byte[] fieldName) throws IOException {
    // 100----- Full Field Name (0-29 length - 1, 30 1b-len, 31 2b-len)
    writeLength(0b100_00000, fieldName.length);
    this.stream.write(fieldName, 0, fieldName.length);
  }

  public void writeIndexedFieldName(final int fieldIndex) throws IOException {
    // 101----- Field Offset (0-29 field, 30 1b-len, 31 2b-len)
    this.writeLength(0b101_00000, fieldIndex);
  }

  public void writePrefix(final byte[] fieldName, final int prefix) throws IOException {
    // 110----- Prefix (1byte prefix, 0-29 length - 1, 30 1b-len, 31 2b-len)
    final int length = fieldName.length - prefix;
    this.writeLength(0b110_00000, length);
    stream.write(prefix);
    stream.write(fieldName, prefix, length);
  }

  public void writePrefixSuffix(final byte[] fieldName, final int prefix, final int suffix) throws IOException {
    // 111----- Prefix/Suffix (1byte prefix, 1byte suffix, 0-29 length - 1, 30 1b-len, 31 2b-len)
    final int length = fieldName.length - prefix - suffix;
    this.writeLength(0b111_00000, length);
    stream.write(prefix);
    stream.write(suffix);
    stream.write(fieldName, prefix, length);
  }

  private void writeLength(final int head, final int length) throws IOException {
    if (length < 30) {
      stream.write(head | length);
      return;
    }

    if (length <= 284) {
      // 30 + 1byte = 284
      final byte[] buf = stream.rawBuffer();
      final int bufOff = stream.rawBufferOffset(2);
      buf[bufOff] = (byte) (head | 0b11110);
      buf[bufOff + 1] = (byte) ((length - 29) & 0xff);
      return;
    }

    if (length <= 65819) {
      // 31 + 2byte = 65819
      final byte[] buf = stream.rawBuffer();
      final int bufOff = stream.rawBufferOffset(3);
      buf[bufOff] = (byte) (head | 0b11111);
      buf[bufOff + 1] = (byte) ((length - 284) / 256);
      buf[bufOff + 2] = (byte) ((length - 284) & 255);
      return;
    }

    throw new Error("unexpected too many field names: " + length);
  }

  private int prefix(final byte[] key) {
    final int prefix = Arrays.mismatch(lastKeyUtf8, key);
    if (prefix >= 0) return prefix;
    return Math.min(lastKeyUtf8.length, key.length);
  }

  private int suffix(final byte[] key, final int kPrefix) {
    final byte[] a = this.lastKeyUtf8;
    final int bLen = key.length - kPrefix;
    final int len = Math.min(a.length, bLen);
    for (int i = 1; i <= len; ++i) {
      if ((a[a.length - i] & 0xff) != (key[kPrefix + (bLen - i)] & 0xff)) {
        return i - 1;
      }
    }
    return len;
  }

  private static final class IndexedHashSet {
    private String[] values;
    private int[] table; // hash/next
    private int[] buckets;
    private int size;

    public IndexedHashSet(final int estimateSize) {
      this.values = new String[estimateSize];
      this.table = new int[estimateSize * 2];
      this.buckets = new int[tableSizeForItems(estimateSize)];
      this.size = 0;
      Arrays.fill(buckets, -1);
    }

    public int size() {
      return size;
    }

    public void add(final String key) {
      if (size == values.length) {
        resize();
      }

      final int keyIndex = size++;
      final int keyHash = hash(key);
      final int targetBucket = keyHash & (buckets.length - 1);
      final int itemIndex = keyIndex << 1;
      values[keyIndex] = key;
      table[itemIndex] = keyHash;
      table[itemIndex + 1] = buckets[targetBucket];
      buckets[targetBucket] = keyIndex;
    }

    public int get(final String key) {
      final int hash = hash(key);
      int index = buckets[hash & (buckets.length - 1)];
      while (index >= 0) {
        final int itemIndex = (index << 1);
        if (hash == table[itemIndex] && key.equals(values[index])) {
          return index;
        }
        index = table[itemIndex + 1];
      }
      return -1;
    }

    private void resize() {
      this.values = Arrays.copyOf(this.values, this.values.length << 1);
      this.table = Arrays.copyOf(this.table, this.table.length << 1);

      final int newBucketsCount = tableSizeForItems(values.length);
      //System.out.println("table " + newBucketsCount + "/" + values.length);
      if (newBucketsCount == buckets.length) return;

      final int[] newBuckets = new int[newBucketsCount];
      Arrays.fill(newBuckets, -1);
      final int mask = newBucketsCount - 1;
      for (int i = 0, itemIndex = 0; i < size; ++i, itemIndex += 2) {
        final int targetBucket = table[itemIndex] & mask;
        table[itemIndex + 1] = newBuckets[targetBucket];
        newBuckets[targetBucket] = i;
      }
      this.buckets = newBuckets;
    }

    private static int tableSizeForItems(final int expectedItems) {
      return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros((expectedItems * 2) - 1));
    }

    private static int hash(final String key) {
      final int h = key.hashCode();
      return (h ^ (h >>> 16)) & 0x7fffffff;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy