com.hazelcast.org.apache.calcite.runtime.Matcher Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.org.apache.calcite.runtime;
import com.hazelcast.org.apache.calcite.linq4j.MemoryFactory;
import com.hazelcast.org.apache.calcite.util.ImmutableBitSet;
import com.hazelcast.com.google.common.collect.ImmutableList;
import com.hazelcast.com.google.common.collect.ImmutableMap;
import com.hazelcast.com.google.common.collect.ImmutableSet;
import com.hazelcast.com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* Workspace that partialMatches patterns against an automaton.
* @param Type of rows matched by this automaton
*/
public class Matcher {
private final DeterministicAutomaton dfa;
private final ImmutableMap>> predicates;
// The following members are work space. They can be shared among partitions,
// but only one thread can use them at a time. Putting them here saves the
// expense of creating a fresh object each call to "match".
private final ImmutableList> emptyStateSet = ImmutableList.of();
private final ImmutableBitSet startSet;
private final List rowSymbols = new ArrayList<>();
/**
* Creates a Matcher; use {@link #builder}.
*/
private Matcher(Automaton automaton,
ImmutableMap>> predicates) {
this.predicates = Objects.requireNonNull(predicates);
final ImmutableBitSet.Builder startSetBuilder =
ImmutableBitSet.builder();
startSetBuilder.set(automaton.startState.id);
automaton.epsilonSuccessors(automaton.startState.id, startSetBuilder);
startSet = startSetBuilder.build();
// Build the DFA
dfa = new DeterministicAutomaton(automaton);
}
public static Builder builder(Automaton automaton) {
return new Builder<>(automaton);
}
public List> match(E... rows) {
return match(Arrays.asList(rows));
}
public List> match(Iterable rows) {
final ImmutableList.Builder> resultMatchBuilder =
ImmutableList.builder();
final Consumer> resultMatchConsumer = resultMatchBuilder::add;
final PartitionState partitionState = createPartitionState(0, 0);
for (E row : rows) {
partitionState.getMemoryFactory().add(row);
matchOne(partitionState.getRows(), partitionState, resultMatchConsumer);
}
return resultMatchBuilder.build();
}
public PartitionState createPartitionState(int history, int future) {
return new PartitionState<>(history, future);
}
/**
* Feeds a single input row into the given partition state,
* and writes the resulting output rows (if any).
* This method ignores the symbols that caused a transition.
*/
protected void matchOne(MemoryFactory.Memory rows,
PartitionState partitionState, Consumer> resultMatches) {
List> matches = matchOneWithSymbols(rows, partitionState);
for (PartialMatch pm : matches) {
resultMatches.accept(pm);
}
}
protected List> matchOneWithSymbols(MemoryFactory.Memory rows,
PartitionState partitionState) {
final HashSet> newMatches = new HashSet<>();
for (Map.Entry>> predicate
: predicates.entrySet()) {
for (PartialMatch pm : partitionState.getPartialMatches()) {
// Remove this match
if (predicate.getValue().test(rows)) {
// Check if we have transitions from here
final List transitions =
dfa.getTransitions().stream()
.filter(t -> predicate.getKey().equals(t.symbol))
.filter(t -> pm.currentState.equals(t.fromState))
.collect(Collectors.toList());
for (DeterministicAutomaton.Transition transition : transitions) {
// System.out.println("Append new transition to ");
final PartialMatch newMatch = pm.append(transition.symbol,
rows.get(), transition.toState);
newMatches.add(newMatch);
}
}
}
// Check if a new Match starts here
if (predicate.getValue().test(rows)) {
final List transitions =
dfa.getTransitions().stream()
.filter(t -> predicate.getKey().equals(t.symbol))
.filter(t -> dfa.startState.equals(t.fromState))
.collect(Collectors.toList());
for (DeterministicAutomaton.Transition transition : transitions) {
final PartialMatch newMatch = new PartialMatch<>(-1L,
ImmutableList.of(transition.symbol), ImmutableList.of(rows.get()),
transition.toState);
newMatches.add(newMatch);
}
}
}
// Remove all current partitions
partitionState.clearPartitions();
// Add all partial matches
partitionState.addPartialMatches(newMatches);
// Check if one of the new Matches is in a final state, otherwise add them
// and go on
final ImmutableList.Builder> builder =
ImmutableList.builder();
for (PartialMatch match : newMatches) {
if (dfa.getEndStates().contains(match.currentState)) {
// This is the match, handle all "open" partial matches with a suitable
// strategy
// TODO add strategy
// Return it!
builder.add(match);
}
}
return builder.build();
}
/**
* State for each partition.
*
* @param Row type
*/
static class PartitionState {
private final Set> partialMatches = new HashSet<>();
private final MemoryFactory memoryFactory;
PartitionState(int history, int future) {
this.memoryFactory = new MemoryFactory<>(history, future);
}
public void addPartialMatches(Collection> matches) {
partialMatches.addAll(matches);
}
public Set> getPartialMatches() {
return ImmutableSet.copyOf(partialMatches);
}
public void removePartialMatch(PartialMatch pm) {
partialMatches.remove(pm);
}
public void clearPartitions() {
partialMatches.clear();
}
public MemoryFactory.Memory getRows() {
return memoryFactory.create();
}
public MemoryFactory getMemoryFactory() {
return this.memoryFactory;
}
}
/**
* Partial match of the NFA.
*
* This class is immutable; the {@link #copy()} and
* {@link #append(String, Object, DeterministicAutomaton.MultiState)}
* methods generate new instances.
*
* @param Row type
*/
static class PartialMatch {
final long startRow;
final ImmutableList symbols;
final ImmutableList rows;
final DeterministicAutomaton.MultiState currentState;
PartialMatch(long startRow, ImmutableList symbols,
ImmutableList rows, DeterministicAutomaton.MultiState currentState) {
this.startRow = startRow;
this.symbols = symbols;
this.rows = rows;
this.currentState = currentState;
}
public PartialMatch copy() {
return new PartialMatch<>(startRow, symbols, rows, currentState);
}
public PartialMatch append(String symbol, E row,
DeterministicAutomaton.MultiState toState) {
ImmutableList symbols = ImmutableList.builder()
.addAll(this.symbols)
.add(symbol)
.build();
ImmutableList rows = ImmutableList.builder()
.addAll(this.rows)
.add(row)
.build();
return new PartialMatch<>(startRow, symbols, rows, toState);
}
@Override public boolean equals(Object o) {
return o == this
|| o instanceof PartialMatch
&& startRow == ((PartialMatch) o).startRow
&& Objects.equals(symbols, ((PartialMatch) o).symbols)
&& Objects.equals(rows, ((PartialMatch) o).rows)
&& Objects.equals(currentState, ((PartialMatch) o).currentState);
}
@Override public int hashCode() {
return Objects.hash(startRow, symbols, rows, currentState);
}
@Override public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i = 0; i < rows.size(); i++) {
if (i > 0) {
sb.append(", ");
}
sb.append("(");
sb.append(symbols.get(i));
sb.append(", ");
sb.append(rows.get(i));
sb.append(")");
}
sb.append("]");
return sb.toString();
}
}
/**
* Builds a Matcher.
*
* @param Type of rows matched by this automaton
*/
public static class Builder {
final Automaton automaton;
final Map>> symbolPredicates =
new HashMap<>();
Builder(Automaton automaton) {
this.automaton = automaton;
}
/**
* Associates a predicate with a symbol.
*/
public Builder add(String symbolName,
Predicate> predicate) {
symbolPredicates.put(symbolName, predicate);
return this;
}
public Matcher build() {
final Set predicateSymbolsNotInGraph =
Sets.newTreeSet(symbolPredicates.keySet());
predicateSymbolsNotInGraph.removeAll(automaton.symbolNames);
if (!predicateSymbolsNotInGraph.isEmpty()) {
throw new IllegalArgumentException("not all predicate symbols ["
+ predicateSymbolsNotInGraph + "] are in graph ["
+ automaton.symbolNames + "]");
}
final ImmutableMap.Builder>> builder =
ImmutableMap.builder();
for (String symbolName : automaton.symbolNames) {
// If a symbol does not have a predicate, it defaults to true.
// By convention, "STRT" is used for the start symbol, but it could be
// anything.
builder.put(symbolName,
symbolPredicates.getOrDefault(symbolName, e -> true));
}
return new Matcher<>(automaton, builder.build());
}
}
/**
* Represents a Tuple of a symbol and a row
*
* @param Type of Row
*/
static class Tuple {
final String symbol;
final E row;
Tuple(String symbol, E row) {
this.symbol = symbol;
this.row = row;
}
@Override public boolean equals(Object o) {
return o == this
|| o instanceof Tuple
&& ((Tuple) o).symbol.equals(symbol)
&& Objects.equals(row, ((Tuple) o).row);
}
@Override public int hashCode() {
return Objects.hash(symbol, row);
}
@Override public String toString() {
return "(" + symbol + ", " + row + ")";
}
}
}