All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.sql.impl.connector.map.MapIndexScanP Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2021 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.sql.impl.connector.map;

import com.hazelcast.cluster.Address;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.internal.iteration.IndexIterationPointer;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.serialization.InternalSerializationService;
import com.hazelcast.internal.util.collection.PartitionIdSet;
import com.hazelcast.jet.core.AbstractProcessor;
import com.hazelcast.jet.core.Processor;
import com.hazelcast.jet.core.ProcessorSupplier;
import com.hazelcast.jet.impl.connector.AbstractIndexReader;
import com.hazelcast.jet.sql.impl.ExpressionUtil;
import com.hazelcast.map.impl.operation.MapFetchIndexOperation;
import com.hazelcast.map.impl.operation.MapFetchIndexOperation.MapFetchIndexOperationResult;
import com.hazelcast.map.impl.operation.MapFetchIndexOperation.MissingPartitionException;
import com.hazelcast.map.impl.operation.MapOperationProvider;
import com.hazelcast.map.impl.proxy.MapProxyImpl;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.nio.serialization.DataSerializable;
import com.hazelcast.query.impl.QueryableEntry;
import com.hazelcast.query.impl.getters.Extractors;
import com.hazelcast.security.permission.MapPermission;
import com.hazelcast.spi.exception.TargetDisconnectedException;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.exception.WrongTargetException;
import com.hazelcast.spi.impl.InternalCompletableFuture;
import com.hazelcast.spi.impl.operationservice.Operation;
import com.hazelcast.sql.impl.exec.scan.MapIndexScanMetadata;
import com.hazelcast.sql.impl.exec.scan.MapScanRow;
import com.hazelcast.sql.impl.exec.scan.index.IndexFilter;
import com.hazelcast.sql.impl.expression.ExpressionEvalContext;
import com.hazelcast.sql.impl.row.JetSqlRow;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.security.Permission;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PrimitiveIterator;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.stream.IntStream;

import static com.hazelcast.jet.impl.util.Util.getNodeEngine;
import static com.hazelcast.query.impl.getters.GetterCache.SIMPLE_GETTER_CACHE_SUPPLIER;
import static com.hazelcast.security.permission.ActionConstants.ACTION_CREATE;
import static com.hazelcast.security.permission.ActionConstants.ACTION_READ;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.stream.Collectors.toList;

/**
 * Implementation of migration-tolerant IMap index scan processor.
 * 

* The key component of migration-tolerant index scan is so-called 'split'. * It's the basic unit of execution that is attached to a concrete member * and there exists a single instance during normal execution. *

* Scan algorithm initially assumes that all assigned partitions are local. * Processor sends {@link MapFetchIndexOperation} to the local member * during normal execution, and to all members which own partitions * assigned to the processor after migration. After response is received, * processor saves the new `pointers` to `lastPointers`, emits to * processor's outbox, and sends another operation with the new pointers. *

* During partition migration processor will split the partitions in the * original `split` into disjoint parts, according to the owner, and retry * with the new owners. If all partitions in a `split` were read, the * `split` is removed from execution. */ final class MapIndexScanP extends AbstractProcessor { private static final long DELAY_AFTER_MISSING_PARTITION = MILLISECONDS.toNanos(100); private final MapIndexScanMetadata metadata; private HazelcastInstance hazelcastInstance; private ExpressionEvalContext evalContext; private AbstractIndexReader> reader; private final ArrayList splits = new ArrayList<>(); private MapScanRow row; private JetSqlRow pendingItem; private boolean isIndexSorted; private MapIndexScanP(@Nonnull MapIndexScanMetadata indexScanMetadata) { this.metadata = indexScanMetadata; } @Override protected void init(@Nonnull Context context) { hazelcastInstance = context.hazelcastInstance(); evalContext = ExpressionEvalContext.from(context); reader = new LocalMapIndexReader(hazelcastInstance, evalContext.getSerializationService(), metadata); int[] memberPartitions = context.processorPartitions(); splits.add(new Split( new PartitionIdSet(hazelcastInstance.getPartitionService().getPartitions().size(), memberPartitions), hazelcastInstance.getCluster().getLocalMember().getAddress(), filtersToPointers(metadata.getFilter(), metadata.isDescending(), evalContext) )); row = MapScanRow.create( metadata.getKeyDescriptor(), metadata.getValueDescriptor(), metadata.getFieldPaths(), metadata.getFieldTypes(), Extractors.newBuilder(evalContext.getSerializationService()) .setGetterCacheSupplier(SIMPLE_GETTER_CACHE_SUPPLIER) .build(), evalContext.getSerializationService() ); isIndexSorted = metadata.getComparator() != null; } private static IndexIterationPointer[] filtersToPointers( @Nonnull IndexFilter filter, boolean descending, ExpressionEvalContext evalContext ) { return IndexIterationPointer.createFromIndexFilter(filter, descending, evalContext); } @Override public boolean complete() { return isIndexSorted ? runSortedIndex() : runHashIndex(); } @Override public boolean closeIsCooperative() { return true; } private boolean runSortedIndex() { for (; ; ) { if (pendingItem != null && !tryEmit(pendingItem)) { return false; } else { pendingItem = null; } JetSqlRow extreme = null; int extremeIndex = -1; for (int i = 0; i < splits.size(); ++i) { Split split = splits.get(i); try { split.peek(); } catch (MissingPartitionException e) { splits.addAll(splitOnMigration(split)); splits.remove(i--); continue; } if (split.currentRow == null) { if (split.done()) { // No more items to read, remove finished split. splits.remove(i--); continue; } // waiting for more rows from this split return false; } if (extremeIndex < 0 || metadata.getComparator().compare(split.currentRow, splits.get(extremeIndex).currentRow) < 0) { extremeIndex = i; extreme = split.currentRow; } } if (extremeIndex < 0) { assert splits.isEmpty(); return true; } pendingItem = extreme; splits.get(extremeIndex).remove(); } } private boolean runHashIndex() { boolean allIdle; do { allIdle = true; for (int i = 0; i < splits.size(); ++i) { Split split = splits.get(i); try { split.peek(); } catch (MissingPartitionException e) { splits.addAll(splitOnMigration(split)); splits.remove(i--); continue; } if (split.currentRow == null) { if (split.done()) { // No more items to read, remove finished split. splits.remove(i--); if (splits.isEmpty()) { return true; } } } else { allIdle = false; if (tryEmit(split.currentRow)) { split.remove(); } else { return false; } } } } while (!allIdle); return false; } /** * Perform splitting of a {@link Split} after receiving {@link MissingPartitionException} * or various cluster state exceptions like {@link MemberLeftException}. *

* Method gets current partition table and proceeds with following procedure: *

* It splits the partitions assigned to the split according to the new * partition owner into disjoint sets, one for each owner. * * @param split the split to split * @return collection of new split units */ private List splitOnMigration(Split split) { IndexIterationPointer[] lastPointers = split.pointers; InternalPartitionService partitionService = getNodeEngine(hazelcastInstance).getPartitionService(); Map newSplits = new HashMap<>(); PrimitiveIterator.OfInt partitionIterator = split.partitions.intIterator(); while (partitionIterator.hasNext()) { int partitionId = partitionIterator.nextInt(); // If at least one partition owner is not assigned -- assign current member. // Later, a WrongTargetException will be thrown // and it causes this method to be called again. // Occasionally prediction with current member would be correct. Address potentialOwner = partitionService.getPartition(partitionId).getOwnerOrNull(); Address owner = potentialOwner == null ? split.owner : partitionService.getPartition(partitionId).getOwnerOrNull(); newSplits.computeIfAbsent(owner, x -> new Split( new PartitionIdSet(partitionService.getPartitionCount()), owner, lastPointers) ).partitions.add(partitionId); } ArrayList res = new ArrayList<>(newSplits.values()); // if the resulting split is the same as the input split, postpone retrying it if (res.size() == 1) { Split newSplit = res.get(0); if (newSplit.owner.equals(split.owner) && newSplit.partitions.equals(split.partitions)) { newSplit.postponeUntil(System.nanoTime() + DELAY_AFTER_MISSING_PARTITION); } } return res; } /** * Basic unit of index scan execution bounded to concrete member and partitions set. * Can be split into smaller splits after migration is detected. */ private final class Split { private final PartitionIdSet partitions; private final Address owner; private IndexIterationPointer[] pointers; private List> currentBatch = emptyList(); private JetSqlRow currentRow; private int currentBatchPosition; private CompletableFuture future; private long postponeUntil = Long.MIN_VALUE; private Split(PartitionIdSet partitions, Address owner, IndexIterationPointer[] pointers) { this.partitions = partitions; this.owner = owner; this.pointers = pointers; this.currentBatchPosition = 0; this.future = null; } private void postponeUntil(long postponeUntil) { this.postponeUntil = postponeUntil; } /** * After this call, {@link #currentRow} will return the next entry to emit. * They are set to null, if there's no row available * because we're either done or waiting for more data. */ private void peek() { if (postponeUntil > Long.MIN_VALUE) { if (System.nanoTime() < postponeUntil) { return; } else { postponeUntil = Long.MIN_VALUE; } } // start a new async call, if we're not done and one isn't in flight if (future == null && pointers.length > 0) { future = reader.readBatch(owner, partitions, pointers); } // get the next batch from the future, if the current batch is done and one is in flight if (currentBatch.size() == currentBatchPosition && future != null && future.isDone()) { MapFetchIndexOperationResult result; try { result = reader.toBatchResult(future); } catch (ExecutionException e) { // unwrap the MissingPartitionException, and wrap other exceptions as MPE Throwable t = findTopologyExceptionInCauses(e); if (t != null) { throw new MissingPartitionException(t.toString(), e); } throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } currentBatch = reader.toRecordSet(result); currentBatchPosition = 0; pointers = result.getPointers(); future = null; } // project the next item while (currentRow == null && currentBatchPosition < currentBatch.size()) { // Sometimes scan query may not include indexed field. // So, additional projection is required to ability to merge-sort an output. currentRow = projectAndFilter(currentBatch.get(currentBatchPosition)); if (currentRow == null) { currentBatchPosition++; } } } /** * Returns a topology exception from the given Throwable or its causes. * Topology exception are those related to a member leaving the cluster. * We handle them the same as {@link MissingPartitionException} trigger. */ @SuppressWarnings("BooleanExpressionComplexity") private Throwable findTopologyExceptionInCauses(Throwable t) { while (t != null) { if (t instanceof MissingPartitionException || t instanceof HazelcastInstanceNotActiveException || t instanceof MemberLeftException || t instanceof TargetDisconnectedException || t instanceof TargetNotMemberException || t instanceof WrongTargetException ) { return t; } t = t.getCause(); } return null; } private JetSqlRow projectAndFilter(@Nonnull QueryableEntry entry) { row.setKeyValue( entry.getKeyIfPresent(), entry.getKeyDataIfPresent(), entry.getValueIfPresent(), entry.getValueDataIfPresent() ); return ExpressionUtil.evaluate(metadata.getRemainingFilter(), metadata.getProjection(), row, evalContext); } private void remove() { currentBatchPosition++; currentRow = null; } private boolean done() { return currentBatchPosition == currentBatch.size() && pointers.length == 0; } @Override public String toString() { return "Split{" + "partitions=" + partitions + ", owner=" + owner + ", hash=" + System.identityHashCode(this) + '}'; } } private static final class LocalMapIndexReader extends AbstractIndexReader> { static final int FETCH_SIZE_HINT = 128; private final HazelcastInstance hazelcastInstance; private final String indexName; private LocalMapIndexReader( @Nonnull HazelcastInstance hzInstance, @Nonnull InternalSerializationService serializationService, @Nonnull MapIndexScanMetadata indexScanMetadata ) { super(indexScanMetadata.getMapName(), MapFetchIndexOperationResult::getEntries); this.hazelcastInstance = hzInstance; this.indexName = indexScanMetadata.getIndexName(); this.serializationService = serializationService; } @Override @Nonnull public InternalCompletableFuture readBatch( Address address, PartitionIdSet partitions, IndexIterationPointer[] pointers ) { MapProxyImpl mapProxyImpl = (MapProxyImpl) hazelcastInstance.getMap(objectName); MapOperationProvider operationProvider = mapProxyImpl.getOperationProvider(); Operation op = operationProvider.createFetchIndexOperation( mapProxyImpl.getName(), indexName, pointers, partitions, FETCH_SIZE_HINT ); return mapProxyImpl.getOperationService().invokeOnTarget(mapProxyImpl.getServiceName(), op, address); } } static ProcessorSupplier readMapIndexSupplier(MapIndexScanMetadata indexScanMetadata) { return new MapIndexScanProcessorSupplier(indexScanMetadata); } private static final class MapIndexScanProcessorSupplier implements ProcessorSupplier, DataSerializable { private MapIndexScanMetadata metadata; @SuppressWarnings("unused") private MapIndexScanProcessorSupplier() { } private MapIndexScanProcessorSupplier(@Nonnull MapIndexScanMetadata metadata) { this.metadata = metadata; } @Override @Nonnull public List get(int count) { return IntStream.range(0, count) .mapToObj(i -> new MapIndexScanP(metadata)) .collect(toList()); } @Override public List permissions() { return singletonList(new MapPermission(metadata.getMapName(), ACTION_CREATE, ACTION_READ)); } @Override public void writeData(ObjectDataOutput out) throws IOException { out.writeObject(metadata); } @Override public void readData(ObjectDataInput in) throws IOException { metadata = in.readObject(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy