All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.segment.nested.DictionaryIdLookup Maven / Gradle / Ivy

There is a newer version: 30.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment.nested;

import com.google.common.primitives.Ints;
import org.apache.druid.annotations.SuppressFBWarnings;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.ByteBufferUtils;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.TypeStrategies;
import org.apache.druid.segment.data.DictionaryWriter;
import org.apache.druid.segment.data.FixedIndexed;
import org.apache.druid.segment.data.FrontCodedIntArrayIndexed;
import org.apache.druid.segment.data.Indexed;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.GatheringByteChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.EnumSet;

/**
 * Value to dictionary id lookup, backed with memory mapped dictionaries populated lazily by the supplied
 * @link DictionaryWriter}.
 */
public final class DictionaryIdLookup implements Closeable
{
  private final String name;
  private final Path tempBasePath;

  @Nullable
  private final DictionaryWriter stringDictionaryWriter;
  private Path stringDictionaryFile = null;
  private SmooshedFileMapper stringBufferMapper = null;
  private Indexed stringDictionary = null;

  @Nullable
  private final DictionaryWriter longDictionaryWriter;
  private Path longDictionaryFile = null;
  private MappedByteBuffer longBuffer = null;
  private FixedIndexed longDictionary = null;

  @Nullable
  private final DictionaryWriter doubleDictionaryWriter;
  private Path doubleDictionaryFile = null;
  MappedByteBuffer doubleBuffer = null;
  FixedIndexed doubleDictionary = null;

  @Nullable
  private final DictionaryWriter arrayDictionaryWriter;
  private Path arrayDictionaryFile = null;
  private MappedByteBuffer arrayBuffer = null;
  private FrontCodedIntArrayIndexed arrayDictionary = null;

  public DictionaryIdLookup(
      String name,
      Path tempBasePath,
      @Nullable DictionaryWriter stringDictionaryWriter,
      @Nullable DictionaryWriter longDictionaryWriter,
      @Nullable DictionaryWriter doubleDictionaryWriter,
      @Nullable DictionaryWriter arrayDictionaryWriter
  )
  {
    this.name = name;
    this.tempBasePath = tempBasePath;
    this.stringDictionaryWriter = stringDictionaryWriter;
    this.longDictionaryWriter = longDictionaryWriter;
    this.doubleDictionaryWriter = doubleDictionaryWriter;
    this.arrayDictionaryWriter = arrayDictionaryWriter;
  }

  public int lookupString(@Nullable String value)
  {
    if (stringDictionary == null) {
      // GenericIndexed v2 can write to multiple files if the dictionary is larger than 2gb, so we use a smooshfile
      // for strings because of this. if other type dictionary writers could potentially use multiple internal files
      // in the future, we should transition them to using this approach as well (or build a combination smoosher and
      // mapper so that we can have a mutable smoosh)
      File stringSmoosh = FileUtils.createTempDirInLocation(tempBasePath, StringUtils.urlEncode(name) + "__stringTempSmoosh");
      stringDictionaryFile = stringSmoosh.toPath();
      final String fileName = NestedCommonFormatColumnSerializer.getInternalFileName(
          name,
          NestedCommonFormatColumnSerializer.STRING_DICTIONARY_FILE_NAME
      );

      try (
          final FileSmoosher smoosher = new FileSmoosher(stringSmoosh);
          final SmooshedWriter writer = smoosher.addWithSmooshedWriter(
              fileName,
              stringDictionaryWriter.getSerializedSize()
          )
      ) {
        stringDictionaryWriter.writeTo(writer, smoosher);
        writer.close();
        smoosher.close();
        stringBufferMapper = SmooshedFileMapper.load(stringSmoosh);
        final ByteBuffer stringBuffer = stringBufferMapper.mapFile(fileName);
        stringDictionary = StringEncodingStrategies.getStringDictionarySupplier(
            stringBufferMapper,
            stringBuffer,
            ByteOrder.nativeOrder()
        ).get();
      }
      catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
    final byte[] bytes = StringUtils.toUtf8Nullable(value);
    final int index = stringDictionary.indexOf(bytes == null ? null : ByteBuffer.wrap(bytes));
    if (index < 0) {
      throw DruidException.defensive("Value not found in column[%s] string dictionary", name);
    }
    return index;
  }

  public int lookupLong(@Nullable Long value)
  {
    if (longDictionary == null) {
      longDictionaryFile = makeTempFile(name + NestedCommonFormatColumnSerializer.LONG_DICTIONARY_FILE_NAME);
      longBuffer = mapWriter(longDictionaryFile, longDictionaryWriter);
      longDictionary = FixedIndexed.read(longBuffer, TypeStrategies.LONG, ByteOrder.nativeOrder(), Long.BYTES).get();
      // reset position
      longBuffer.position(0);
    }
    final int index = longDictionary.indexOf(value);
    if (index < 0) {
      throw DruidException.defensive("Value not found in column[%s] long dictionary", name);
    }
    return index + longOffset();
  }

  public int lookupDouble(@Nullable Double value)
  {
    if (doubleDictionary == null) {
      doubleDictionaryFile = makeTempFile(name + NestedCommonFormatColumnSerializer.DOUBLE_DICTIONARY_FILE_NAME);
      doubleBuffer = mapWriter(doubleDictionaryFile, doubleDictionaryWriter);
      doubleDictionary = FixedIndexed.read(
          doubleBuffer,
          TypeStrategies.DOUBLE,
          ByteOrder.nativeOrder(),
          Double.BYTES
      ).get();
      // reset position
      doubleBuffer.position(0);
    }
    final int index = doubleDictionary.indexOf(value);
    if (index < 0) {
      throw DruidException.defensive("Value not found in column[%s] double dictionary", name);
    }
    return index + doubleOffset();
  }

  public int lookupArray(@Nullable int[] value)
  {
    if (arrayDictionary == null) {
      arrayDictionaryFile = makeTempFile(name + NestedCommonFormatColumnSerializer.ARRAY_DICTIONARY_FILE_NAME);
      arrayBuffer = mapWriter(arrayDictionaryFile, arrayDictionaryWriter);
      arrayDictionary = FrontCodedIntArrayIndexed.read(arrayBuffer, ByteOrder.nativeOrder()).get();
      // reset position
      arrayBuffer.position(0);
    }
    final int index = arrayDictionary.indexOf(value);
    if (index < 0) {
      throw DruidException.defensive("Value not found in column[%s] array dictionary", name);
    }
    return index + arrayOffset();
  }

  @Nullable
  public SmooshedFileMapper getStringBufferMapper()
  {
    return stringBufferMapper;
  }

  @Nullable
  public ByteBuffer getLongBuffer()
  {
    return longBuffer;
  }

  @Nullable
  public ByteBuffer getDoubleBuffer()
  {
    return doubleBuffer;
  }

  @Nullable
  public ByteBuffer getArrayBuffer()
  {
    return arrayBuffer;
  }

  @Override
  public void close()
  {
    if (stringBufferMapper != null) {
      stringBufferMapper.close();
      deleteTempFile(stringDictionaryFile);
    }
    if (longBuffer != null) {
      ByteBufferUtils.unmap(longBuffer);
      deleteTempFile(longDictionaryFile);
    }
    if (doubleBuffer != null) {
      ByteBufferUtils.unmap(doubleBuffer);
      deleteTempFile(doubleDictionaryFile);
    }
    if (arrayBuffer != null) {
      ByteBufferUtils.unmap(arrayBuffer);
      deleteTempFile(arrayDictionaryFile);
    }
  }

  private int longOffset()
  {
    return stringDictionaryWriter != null ? stringDictionaryWriter.getCardinality() : 0;
  }

  private int doubleOffset()
  {
    return longOffset() + (longDictionaryWriter != null ? longDictionaryWriter.getCardinality() : 0);
  }

  private int arrayOffset()
  {
    return doubleOffset() + (doubleDictionaryWriter != null ? doubleDictionaryWriter.getCardinality() : 0);
  }

  private Path makeTempFile(String name)
  {
    try {
      return Files.createTempFile(tempBasePath, StringUtils.urlEncode(name), null);
    }
    catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  private void deleteTempFile(Path path)
  {
    try {
      final File file = path.toFile();
      if (file.isDirectory()) {
        FileUtils.deleteDirectory(file);
      } else {
        Files.delete(path);
      }
    }
    catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @SuppressFBWarnings("NP_NONNULL_PARAM_VIOLATION")
  private MappedByteBuffer mapWriter(Path path, DictionaryWriter writer)
  {
    final EnumSet options = EnumSet.of(
        StandardOpenOption.READ,
        StandardOpenOption.WRITE,
        StandardOpenOption.CREATE,
        StandardOpenOption.TRUNCATE_EXISTING
    );

    try (FileChannel fileChannel = FileChannel.open(path, options);
         GatheringByteChannel smooshChannel = makeWriter(fileChannel, writer.getSerializedSize())) {
      //noinspection DataFlowIssue
      writer.writeTo(smooshChannel, null);
      return fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, writer.getSerializedSize());
    }
    catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  private GatheringByteChannel makeWriter(FileChannel channel, long size)
  {
    // basically same code as smooshed writer, can't use channel directly because copying between channels
    // doesn't handle size of source channel correctly
    return new GatheringByteChannel()
    {
      private boolean isClosed = false;
      private long currOffset = 0;

      @Override
      public boolean isOpen()
      {
        return !isClosed;
      }

      @Override
      public void close() throws IOException
      {
        channel.close();
        isClosed = true;
      }

      public int bytesLeft()
      {
        return (int) (size - currOffset);
      }

      @Override
      public int write(ByteBuffer buffer) throws IOException
      {
        return addToOffset(channel.write(buffer));
      }

      @Override
      public long write(ByteBuffer[] srcs, int offset, int length) throws IOException
      {
        return addToOffset(channel.write(srcs, offset, length));
      }

      @Override
      public long write(ByteBuffer[] srcs) throws IOException
      {
        return addToOffset(channel.write(srcs));
      }

      public int addToOffset(long numBytesWritten)
      {
        if (numBytesWritten > bytesLeft()) {
          throw DruidException.defensive(
              "Wrote more bytes[%,d] than available[%,d]. Don't do that.",
              numBytesWritten,
              bytesLeft()
          );
        }
        currOffset += numBytesWritten;

        return Ints.checkedCast(numBytesWritten);
      }
    };
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy