All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.datarouter.filesystem.snapshot.writer.SnapshotWriter Maven / Gradle / Ivy

The newest version!
/*
 * Copyright © 2009 HotPads ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.datarouter.filesystem.snapshot.writer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.datarouter.bytes.EmptyArray;
import io.datarouter.bytes.codec.bytestringcodec.CsvIntByteStringCodec;
import io.datarouter.filesystem.snapshot.block.root.RootBlock;
import io.datarouter.filesystem.snapshot.encode.BranchBlockEncoder;
import io.datarouter.filesystem.snapshot.encode.LeafBlockEncoder;
import io.datarouter.filesystem.snapshot.encode.ValueBlockEncoder;
import io.datarouter.filesystem.snapshot.entry.SnapshotEntry;
import io.datarouter.filesystem.snapshot.key.SnapshotKey;
import io.datarouter.filesystem.snapshot.storage.block.SnapshotBlockStorage;
import io.datarouter.filesystem.snapshot.storage.file.SnapshotFileStorage;
import io.datarouter.scanner.Scanner;
import io.datarouter.util.concurrent.BlockingDequeTool;
import io.datarouter.util.concurrent.CountDownLatchTool;

public class SnapshotWriter implements AutoCloseable{
	private static final Logger logger = LoggerFactory.getLogger(SnapshotWriter.class);

	private final SnapshotKey snapshotKey;
	private final SnapshotWriterTracker tracker;
	private final SnapshotWriterConfig config;
	private final SnapshotBlockWriter blockWriter;
	private long lastStatusLogMs;

	private final Thread writerThread;
	private final long startTimeMs;
	private final LinkedBlockingDeque messages;
	private final CountDownLatch writerThreadCompletionLatch;

	//branch encoders
	private final List branchBlockEncoders;
	private final List numBranchBlocksByLevel;

	//leaf encoder
	private LeafBlockEncoder leafBlockEncoder;
	private long numKeys;
	private int numLeafBlocks;
	private SnapshotEntry lastEntry;

	//value encoder
	private final int numColumns;
	private final List valueBlockEncoders;
	private final List numValueBlocksByColumn;
	private final List numValuesInBlockByColumn;

	public SnapshotWriter(
			SnapshotKey snapshotKey,
			SnapshotFileStorage snapshotFileStorage,
			SnapshotBlockStorage snapshotBlockStorage,
			SnapshotWriterConfig config,
			ExecutorService exec){
		this.snapshotKey = snapshotKey;
		tracker = new SnapshotWriterTracker(snapshotKey);
		this.config = config;
		this.blockWriter = new SnapshotBlockWriter(
				snapshotKey,
				tracker,
				snapshotBlockStorage,
				snapshotFileStorage,
				config, exec);
		lastStatusLogMs = System.currentTimeMillis();

		messages = new LinkedBlockingDeque<>(config.batchQueueLength());
		writerThreadCompletionLatch = new CountDownLatch(1);

		//branch encoders
		this.branchBlockEncoders = new ArrayList<>();
		this.numBranchBlocksByLevel = new ArrayList<>();

		//leaf encoder
		this.leafBlockEncoder = config.leafBlockEncoderSupplier().get();
		this.numLeafBlocks = 0;

		//value encoder
		this.numColumns = config.numColumns();
		this.valueBlockEncoders = new ArrayList<>();
		this.numValueBlocksByColumn = new ArrayList<>();
		this.numValuesInBlockByColumn = new ArrayList<>();
		IntStream.range(0, numColumns).forEach($ -> {
			valueBlockEncoders.add(config.valueBlockEncoderSupplier().get());
			numValueBlocksByColumn.add(0);
			numValuesInBlockByColumn.add(0);
		});

		writerThread = startWriterThread();
		startTimeMs = System.currentTimeMillis();
	}

	private Thread startWriterThread(){
		Runnable writerRunnable = () -> {
			Message batch;
			do{
				long beforeNs = System.nanoTime();
				batch = BlockingDequeTool.pollForever(messages);
				long ns = System.nanoTime() - beforeNs;
				tracker.readStallNs.incrementBy(ns);
				batch.entries.forEach(this::add);
				tracker.entriesQueued.decrementBySize(batch.entries);
				tracker.entriesProcessed.incrementBySize(batch.entries);
			}while(!batch.isLast);
			writerThreadCompletionLatch.countDown();
		};
		String writerThreadName = String.join("-", getClass().getSimpleName(), snapshotKey.toString());
		Thread thread = new Thread(writerRunnable, writerThreadName);
		thread.start();
		return thread;
	}

	@Override
	public void close(){
		writerThread.interrupt();
	}

	public void addBatch(List entries){
		BlockingDequeTool.put(messages, Message.addBatch(entries));
		tracker.entriesQueued.incrementBySize(entries);
		logStatusOccasional();
	}

	private void add(SnapshotEntry entry){
		if(numColumns != entry.columnValues.length){
			String message = String.format("Expected %s values but found %s", numColumns, entry.columnValues.length);
			throw new IllegalArgumentException(message);
		}
		if(config.sorted()
				&& lastEntry != null
				&& leafBlockEncoder.numRecords() == 0){// check sorting within block during encoding
			int diff = Arrays.compareUnsigned(
					entry.keySlab(),
					entry.keyFrom(),
					entry.keyTo(),
					lastEntry.keySlab(),
					lastEntry.keyFrom(),
					lastEntry.keyTo());
			if(diff <= 0){
				String message = String.format("key=[%s] must sort after lastKey=[%s]",
						CsvIntByteStringCodec.INSTANCE.encode(entry.key()),
						CsvIntByteStringCodec.INSTANCE.encode(lastEntry.key()));
				throw new IllegalArgumentException(message);
			}
		}

		//keys
		long keyId = numKeys;
		int[] valueBlockIds;
		int[] valueIndexes;
		if(numColumns == 0){
			valueBlockIds = EmptyArray.INT;
			valueIndexes = EmptyArray.INT;
		}else{
			valueBlockIds = new int[numColumns];
			valueIndexes = new int[numColumns];
			for(int column = 0; column < numColumns; ++column){
				valueBlockIds[column] = numValueBlocksByColumn.get(column);
				valueIndexes[column] = numValuesInBlockByColumn.get(column);
			}
		}
		leafBlockEncoder.add(numLeafBlocks, keyId, entry, valueBlockIds, valueIndexes);
		if(leafBlockEncoder.numBytes() >= config.leafBlockSize()){
			addBranchEntry(0, keyId, entry, numLeafBlocks);
			blockWriter.submitLeaf(leafBlockEncoder);
			leafBlockEncoder = config.leafBlockEncoderSupplier().get();
			++numLeafBlocks;
		}

		//values
		for(int column = 0; column < numColumns; ++column){
			ValueBlockEncoder valueBlockEncoder = valueBlockEncoders.get(column);
			valueBlockEncoder.add(entry, column);
			numValuesInBlockByColumn.set(column, numValuesInBlockByColumn.get(column) + 1);
			if(valueBlockEncoder.numBytes() >= config.valueBlockSize()){
				blockWriter.submitValueBlock(column, numValueBlocksByColumn.get(column), valueBlockEncoder);
				valueBlockEncoders.set(column, config.valueBlockEncoderSupplier().get());
				numValueBlocksByColumn.set(column, numValueBlocksByColumn.get(column) + 1);
				numValuesInBlockByColumn.set(column, 0);
			}
		}

		++numKeys;
		lastEntry = entry;
	}

	private void addBranchEntry(int level, long keyId, SnapshotEntry entry, int childBlockId){
		if(level > branchBlockEncoders.size() - 1){
			branchBlockEncoders.add(config.branchBlockEncoderFactory().apply(level));
			numBranchBlocksByLevel.add(0);
		}
		BranchBlockEncoder encoder = branchBlockEncoders.get(level);
		int blockId = numBranchBlocksByLevel.get(level);
		encoder.add(blockId, keyId, entry, childBlockId);
		if(encoder.numBytes() >= config.branchBlockSize()){
			addBranchEntry(level + 1, keyId, entry, numBranchBlocksByLevel.get(level));
			blockWriter.submitBranch(encoder);
			branchBlockEncoders.set(level, config.branchBlockEncoderFactory().apply(level));
			numBranchBlocksByLevel.set(level, numBranchBlocksByLevel.get(level) + 1);
		}
	}

	public Optional complete(){
		BlockingDequeTool.put(messages, Message.last());
		CountDownLatchTool.await(writerThreadCompletionLatch);

		// finish value blocks
		IntStream.range(0, valueBlockEncoders.size()).forEach(column -> {
			ValueBlockEncoder valueBlockEncoder = valueBlockEncoders.get(column);
			if(valueBlockEncoder.numRecords() > 0){
				blockWriter.submitValueBlock(column, numValueBlocksByColumn.get(column), valueBlockEncoder);
				numValueBlocksByColumn.set(column, numValueBlocksByColumn.get(column) + 1);
			}
		});

		//finish leaf blocks
		if(leafBlockEncoder.numRecords() > 0){
			addBranchEntry(0, numKeys, lastEntry, numLeafBlocks);
			blockWriter.submitLeaf(leafBlockEncoder);
			++numLeafBlocks;
		}

		//finish branch blocks
		IntStream.range(0, branchBlockEncoders.size()).forEach(level -> {
			BranchBlockEncoder branchEncoder = branchBlockEncoders.get(level);
			if(branchEncoder.numRecords() > 0){
				if(level != branchBlockEncoders.size() - 1){// avoid creating a root block with only one entry
					addBranchEntry(level + 1, numKeys, lastEntry, numBranchBlocksByLevel.get(level));
				}
				blockWriter.submitBranch(branchEncoder);
				branchBlockEncoders.set(level, config.branchBlockEncoderFactory().apply(level));
				numBranchBlocksByLevel.set(level, numBranchBlocksByLevel.get(level) + 1);
			}
		});

		//complete file uploads (could parallelize this?)
		blockWriter.complete();

		//write root block
		if(numKeys == 0){
			return Optional.empty();
		}
		//TODO write to cache if config.updateCache
		RootBlock root = blockWriter.flushRootBlock(
				startTimeMs,
				numBranchBlocksByLevel,
				numValueBlocksByColumn,
				branchBlockEncoders.size(),
				numKeys,
				numLeafBlocks);

		// log completion
		logStatus();
		String logTokens = Scanner.of(root.toKeyValueStrings().entrySet())
				.map(kv -> kv.getKey() + "=" + kv.getValue())
				.collect(Collectors.joining(", "));
		logger.warn("Completed group={}, id={}, {}", snapshotKey.groupId(), snapshotKey.snapshotId(), logTokens);
		return Optional.of(root);
	}

	private void logStatusOccasional(){
		long now = System.currentTimeMillis();
		long elapsedMs = now - lastStatusLogMs;
		if(elapsedMs > config.logPeriodMs()){
			logStatus();
		}
	}

	private void logStatus(){
		long elapsedMs = System.currentTimeMillis() - startTimeMs;
		logger.warn("{}", tracker.toLog(elapsedMs));
		lastStatusLogMs = System.currentTimeMillis();
	}

	private static class Message{

		final List entries;
		final boolean isLast;

		Message(List entries, boolean isLast){
			this.entries = entries;
			this.isLast = isLast;
		}

		static Message addBatch(List entries){
			return new Message(entries, false);
		}

		static Message last(){
			return new Message(List.of(), true);
		}

	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy