All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iotdb.db.tools.TsFileSplitTool Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iotdb.db.tools;

import org.apache.iotdb.commons.conf.IoTDBConstant;
import org.apache.iotdb.db.conf.IoTDBDescriptor;
import org.apache.iotdb.db.storageengine.dataregion.modification.ModificationFile;
import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource;
import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResourceStatus;
import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
import org.apache.iotdb.tsfile.common.constant.TsFileConstant;
import org.apache.iotdb.tsfile.encoding.decoder.Decoder;
import org.apache.iotdb.tsfile.exception.write.UnSupportedDataTypeException;
import org.apache.iotdb.tsfile.file.MetaMarker;
import org.apache.iotdb.tsfile.file.header.ChunkHeader;
import org.apache.iotdb.tsfile.file.header.PageHeader;
import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata;
import org.apache.iotdb.tsfile.file.metadata.IDeviceID;
import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
import org.apache.iotdb.tsfile.fileSystem.fsFactory.FSFactory;
import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
import org.apache.iotdb.tsfile.read.common.BatchData;
import org.apache.iotdb.tsfile.read.common.Path;
import org.apache.iotdb.tsfile.read.reader.page.PageReader;
import org.apache.iotdb.tsfile.utils.Binary;
import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl;
import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class TsFileSplitTool {

  private static final Logger LOGGER = LoggerFactory.getLogger(TsFileSplitTool.class);

  private final String filename;

  private static final String SIZE_PARAM = "-size";
  private static final String LEVEL_PARAM = "-level";

  /**
   * If the chunk point num is lower than this threshold, it will be deserialized into points,
   * default is 100
   */
  private final long chunkPointNumLowerBoundInCompaction =
      IoTDBDescriptor.getInstance().getConfig().getChunkPointNumLowerBoundInCompaction();

  private static long targetSplitFileSize =
      IoTDBDescriptor.getInstance().getConfig().getTargetCompactionFileSize();

  private static String levelNum = "10";

  private static final FSFactory fsFactory = FSFactoryProducer.getFSFactory();

  // TODO maxPlanIndex and minPlanIndex should be modified after cluster is refactored
  // Maximum index of plans executed within this TsFile
  protected long maxPlanIndex = Long.MIN_VALUE;

  // Minimum index of plans executed within this TsFile.
  protected long minPlanIndex = Long.MAX_VALUE;

  public static void main(String[] args) throws IOException {
    checkArgs(args);
    String fileName = args[0];
    LOGGER.info("Splitting TsFile {} ...", fileName);
    new TsFileSplitTool(fileName).run();
  }

  /* construct TsFileSketchTool */
  public TsFileSplitTool(String filename) {
    this.filename = filename;
  }

  /* entry of tool */
  @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning
  public void run() throws IOException {
    if (fsFactory.getFile(filename + ModificationFile.FILE_SUFFIX).exists()) {
      throw new IOException("Unsupported to split TsFile with modification currently.");
    }

    TsFileIOWriter writer = null;

    try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) {
      Iterator> pathIterator = reader.getPathsIterator();
      Set devices = new HashSet<>();
      String[] filePathSplit = filename.split(IoTDBConstant.FILE_NAME_SEPARATOR);
      int originVersionIndex = Integer.parseInt(filePathSplit[filePathSplit.length - 3]);
      int versionIndex = originVersionIndex + 1;
      filePathSplit[filePathSplit.length - 2] = levelNum;

      while (pathIterator.hasNext()) {
        for (Path path : pathIterator.next()) {
          IDeviceID deviceId = path.getIDeviceID();
          if (devices.add(deviceId)) {
            if (writer != null && writer.getPos() < targetSplitFileSize) {
              writer.endChunkGroup();
              writer.startChunkGroup(deviceId);
            } else {
              if (writer != null) {
                // seal last TsFile
                TsFileResource resource = endFileAndGenerateResource(writer);
                resource.close();
              }

              filePathSplit[filePathSplit.length - 3] = String.valueOf(versionIndex);
              StringBuilder sb = new StringBuilder();
              for (int i = 0; i < filePathSplit.length; i++) {
                sb.append(filePathSplit[i]);
                if (i != filePathSplit.length - 1) {
                  sb.append(IoTDBConstant.FILE_NAME_SEPARATOR);
                }
              }
              // open a new TsFile
              writer = new TsFileIOWriter(fsFactory.getFile(sb.toString()));
              versionIndex++;
              writer.startChunkGroup(deviceId);
            }
          }

          List chunkMetadataList = reader.getChunkMetadataList(path);
          assert writer != null;

          ChunkMetadata firstChunkMetadata = chunkMetadataList.get(0);
          reader.position(firstChunkMetadata.getOffsetOfChunkHeader());
          ChunkHeader chunkHeader = reader.readChunkHeader(reader.readMarker());
          if (chunkHeader.getChunkType()
                  == (byte) (MetaMarker.CHUNK_HEADER | TsFileConstant.TIME_COLUMN_MASK)
              || chunkHeader.getChunkType()
                  == (byte) (MetaMarker.CHUNK_HEADER | TsFileConstant.VALUE_COLUMN_MASK)
              || chunkHeader.getChunkType()
                  == (byte)
                      (MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER | TsFileConstant.TIME_COLUMN_MASK)
              || chunkHeader.getChunkType()
                  == (byte)
                      (MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER | TsFileConstant.VALUE_COLUMN_MASK)) {
            throw new IOException("Unsupported to split TsFile with aligned timeseries currently.");
          }

          MeasurementSchema measurementSchema =
              new MeasurementSchema(
                  chunkHeader.getMeasurementID(), chunkHeader.getDataType(),
                  chunkHeader.getEncodingType(), chunkHeader.getCompressionType());

          int numInChunk = 0;
          ChunkWriterImpl chunkWriter = new ChunkWriterImpl(measurementSchema);

          for (int i = 0; i < chunkMetadataList.size(); i++) {
            if (i != 0) {
              reader.position(chunkMetadataList.get(i).getOffsetOfChunkHeader());
              chunkHeader = reader.readChunkHeader(reader.readMarker());
            }

            TSDataType dataType = chunkHeader.getDataType();
            int dataSize = chunkHeader.getDataSize();
            Decoder valueDecoder =
                Decoder.getDecoderByType(chunkHeader.getEncodingType(), dataType);
            Decoder defaultTimeDecoder =
                Decoder.getDecoderByType(
                    TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()),
                    TSDataType.INT64);

            while (dataSize > 0) {
              valueDecoder.reset();

              // a new Page
              PageHeader pageHeader =
                  reader.readPageHeader(
                      dataType,
                      ((byte) (chunkHeader.getChunkType() & 0x3F)) == MetaMarker.CHUNK_HEADER);
              ByteBuffer pageData = reader.readPage(pageHeader, chunkHeader.getCompressionType());
              PageReader pageReader =
                  new PageReader(pageData, dataType, valueDecoder, defaultTimeDecoder);
              BatchData batchData = pageReader.getAllSatisfiedPageData();

              while (batchData.hasCurrent()) {
                writeToChunkWriter(
                    chunkWriter,
                    batchData.currentTime(),
                    batchData.currentValue(),
                    chunkHeader.getDataType());
                numInChunk++;
                if (numInChunk == chunkPointNumLowerBoundInCompaction) {
                  chunkWriter.writeToFileWriter(writer);
                  numInChunk = 0;
                }
                batchData.next();
              }

              dataSize -= pageHeader.getSerializedPageSize();
            }
          }
          if (numInChunk != 0) {
            chunkWriter.writeToFileWriter(writer);
          }
        }
      }

      if (writer != null) {
        // seal last TsFile
        TsFileResource resource = endFileAndGenerateResource(writer);
        resource.close();
      }
    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }

  private void writeToChunkWriter(
      ChunkWriterImpl chunkWriter, long time, Object value, TSDataType dataType) {
    switch (dataType) {
      case INT32:
        chunkWriter.write(time, (int) value);
        break;
      case INT64:
        chunkWriter.write(time, (long) value);
        break;
      case FLOAT:
        chunkWriter.write(time, (float) value);
        break;
      case DOUBLE:
        chunkWriter.write(time, (double) value);
        break;
      case BOOLEAN:
        chunkWriter.write(time, (boolean) value);
        break;
      case TEXT:
        chunkWriter.write(time, (Binary) value);
        break;
      default:
        throw new UnSupportedDataTypeException(
            String.format("Data type %s is not supported.", dataType));
    }
  }

  private TsFileResource endFileAndGenerateResource(TsFileIOWriter writer) throws IOException {
    writer.endChunkGroup();
    writer.endFile();

    TsFileResource tsFileResource = new TsFileResource(writer.getFile());
    Map> deviceTimeseriesMetadataMap =
        writer.getDeviceTimeseriesMetadataMap();
    for (Map.Entry> entry :
        deviceTimeseriesMetadataMap.entrySet()) {
      IDeviceID device = entry.getKey();
      for (TimeseriesMetadata timeseriesMetaData : entry.getValue()) {
        tsFileResource.updateStartTime(device, timeseriesMetaData.getStatistics().getStartTime());
        tsFileResource.updateEndTime(device, timeseriesMetaData.getStatistics().getEndTime());
      }
    }
    tsFileResource.setMinPlanIndex(minPlanIndex);
    tsFileResource.setMaxPlanIndex(maxPlanIndex);
    tsFileResource.setStatus(TsFileResourceStatus.NORMAL);
    tsFileResource.serialize();

    return tsFileResource;
  }

  private static void checkArgs(String[] args) {
    if (args.length == 3) {
      if (args[1].equals(SIZE_PARAM)) {
        targetSplitFileSize = Long.parseLong(args[2]);
        return;
      } else if (args[1].equals(LEVEL_PARAM)) {
        levelNum = args[2];
        return;
      }
    } else if (args.length == 5) {
      if (args[1].equals(SIZE_PARAM) && args[3].equals(LEVEL_PARAM)) {
        targetSplitFileSize = Long.parseLong(args[2]);
        levelNum = args[4];
        return;
      } else if (args[1].equals(LEVEL_PARAM) && args[3].equals(SIZE_PARAM)) {
        levelNum = args[2];
        targetSplitFileSize = Long.parseLong(args[4]);
        return;
      }
    }
    throw new UnsupportedOperationException("Invalid param");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy