![JAR search and dependency download from the Maven repository](/logo.png)
cdc.applic.tools.KeyedTablePartitioner Maven / Gradle / Ivy
package cdc.applic.tools;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.io.IoBuilder;
import cdc.applic.dictionaries.checks.SemanticChecker;
import cdc.applic.dictionaries.handles.DictionaryHandle;
import cdc.applic.expressions.ApplicException;
import cdc.applic.expressions.Expression;
import cdc.applic.expressions.ExpressionPool;
import cdc.applic.factorization.FactorizationFeatures;
import cdc.applic.factorization.Partitioner;
import cdc.applic.factorization.core.PartitionerImpl;
import cdc.applic.proofs.Prover;
import cdc.applic.proofs.core.clauses.ProverImpl;
import cdc.applic.simplification.SimplifierFeatures;
import cdc.office.tables.Header;
import cdc.office.tables.Row;
import cdc.office.tables.diff.Side;
import cdc.tuples.CTupleN;
import cdc.util.lang.DataException;
import cdc.util.lang.InvalidDataException;
/**
* Utility class used to help compare 2 sets of invariants, each invariant
* containing one or more variants.
*
* Each Invariant is identified by a set of keys.
* Each Variant has an applicability.
*
* WARNING: a strong hypothesis is that for a given invariant, variants
* have disjoint applicabilities.
* If this is not the case, an exception is thrown.
*
* For each invariant, left and right variants are selected.
* Left and right applicabilities are partitioned, and left
* and right rows are created using this new applicabilities.
*
* Example, where:
*
* - I is the invariant identifier.
*
- A, B, ... define each variant applicability.
*
- Lx and Rx are left and right data.
*
*
* Input left invariant with 2 variants:
*
* - I, ABC, L1
*
- I, EFG, L2
*
* Input right invariant with 3 variants:
*
* - I, A, R1
*
- I, BCD, R2
*
- I, EF, R3
*
* The partition of applicabilities is A, BC, D, EF, G.
* Output left invariant contains 4 variants:
*
* - I, A, L1
*
- I, BC, L1
*
- I, EF, L2
*
- I, G, L2
*
* And output right invariant contains 4 variants:
*
* - I, A, R1
*
- I, BC, R2
*
- I, D, R2
*
- I, EF, R3
*
*
* The result can now be compared line by line, using I + applic as the line
* composite key.
*
* @author Damien Carbonne
*/
public class KeyedTablePartitioner {
private static final Logger LOGGER = LogManager.getLogger(KeyedTablePartitioner.class);
private static final PrintStream ERR = IoBuilder.forLogger(LOGGER).setLevel(Level.ERROR).buildPrintStream();
/** List of keys. */
private final List keyNames;
/** Index of left column containing applic. */
private final int leftApplicColumn;
/** Indices of left key columns. */
private final int[] leftKeyColumns;
/** Index of right column containing applic. */
private final int rightApplicColumn;
/** Indices of right key columns. */
private final int[] rightKeyColumns;
/** Left (Key, Data Rows) map. */
private final Map, List> leftMap = new HashMap<>();
/** Right (Key, Data Rows) map. */
private final Map, List> rightMap = new HashMap<>();
/** Left result rows. */
private final List leftRows = new ArrayList<>();
/** Right result rows. */
private final List rightRows = new ArrayList<>();
private final Prover prover;
private final ExpressionPool pool = new ExpressionPool();
private final SemanticChecker checker;
/**
* Creates a Partitioner.
*
* @param handle The dictionary handle, used to partition applicabilities.
* @param simplifierFeatures The features to use for simplification.
* @param leftHeader The left table header.
* @param leftData The left table data.
* @param rightHeader The right table header.
* @param rightData The right table data.
* @param applicName The name of the column that contains applicability.
* @param keyNames The names of the columns that identify invariants.
* There should be at least one.
*/
public KeyedTablePartitioner(DictionaryHandle handle,
SimplifierFeatures simplifierFeatures,
Header leftHeader,
List leftData,
Header rightHeader,
List rightData,
String applicName,
String... keyNames) {
final Partitioner partitioner = new PartitionerImpl(handle, simplifierFeatures);
this.prover = new ProverImpl(handle, simplifierFeatures.getProverFeatures());
this.checker = new SemanticChecker(handle.getDictionary());
this.keyNames = Arrays.asList(keyNames);
this.leftApplicColumn = buildKeyColumn(leftHeader, applicName);
this.leftKeyColumns = buildKeyColumns(leftHeader, keyNames);
this.rightApplicColumn = buildKeyColumn(rightHeader, applicName);
this.rightKeyColumns = buildKeyColumns(rightHeader, keyNames);
// Compute left map from keys to rows
int leftErrors = 0;
for (int number = 0; number < leftData.size(); number++) {
final Row row = leftData.get(number);
final CTupleN key = getKey(Side.LEFT, row, number);
final List list = leftMap.computeIfAbsent(key, k -> new ArrayList<>());
list.add(row);
leftErrors += checkApplic(row, Side.LEFT, number);
}
// Compute right map from keys to rows
int rightErrors = 0;
for (int number = 0; number < rightData.size(); number++) {
final Row row = rightData.get(number);
final CTupleN key = getKey(Side.RIGHT, row, number);
final List list = rightMap.computeIfAbsent(key, k -> new ArrayList<>());
list.add(row);
leftErrors += checkApplic(row, Side.RIGHT, number);
}
if (leftErrors + rightErrors > 0) {
LOGGER.fatal("There are {} error(s) in data applicabilities.", leftErrors + rightErrors);
throw new DataException("Invalid applicabilities in input data");
}
// Set of all row keys
final Set> keys = new HashSet<>();
keys.addAll(leftMap.keySet());
keys.addAll(rightMap.keySet());
// Check partitions
// Here, leftErrors and rightErrors equal 0
for (final CTupleN key : keys) {
leftErrors += checkPartitions(key, leftMap.getOrDefault(key, Collections.emptyList()), Side.LEFT);
rightErrors += checkPartitions(key, rightMap.getOrDefault(key, Collections.emptyList()), Side.RIGHT);
}
if (leftErrors + rightErrors > 0) {
LOGGER.fatal("There are {} error(s) in partitionning of applicabilities.", leftErrors + rightErrors);
throw new DataException((leftErrors + rightErrors) + " error(s) in partitioning of applicabilities of input data");
}
// Iterate on all keys
for (final CTupleN key : keys) {
// The list of left rows that have key
final List left = leftMap.getOrDefault(key, Collections.emptyList());
final int leftSize = left.size();
// The list of right rows that have key
final List right = rightMap.getOrDefault(key, Collections.emptyList());
if (left.isEmpty()) {
// Right list should not be empty
// No need to partition
rightRows.addAll(right);
} else if (right.isEmpty()) {
// Left list should not be empty
// No need to partition
leftRows.addAll(left);
} else {
// Both lists are not empty
// Do partition
// List of all applicabilities : left then right
final List inputs = new ArrayList<>();
// Left partition (that should be verified)
final Set leftPartition = new HashSet<>();
for (final Row row : left) {
leftPartition.add(inputs.size());
inputs.add(getApplic(row, Side.LEFT));
}
// Right partition (that should be verified)
final Set rightPartition = new HashSet<>();
for (final Row row : right) {
rightPartition.add(inputs.size());
inputs.add(getApplic(row, Side.RIGHT));
}
// List containing left and right partitions
final List> inputPartitions = new ArrayList<>();
inputPartitions.add(leftPartition);
inputPartitions.add(rightPartition);
// Partition applicabilities
final Partitioner.Result result = partitioner.partition(inputs,
inputPartitions,
FactorizationFeatures.SIMPLIFY_NO_CHECK_SHORT_NARROW);
for (final Expression x : result.getOutputExpressions()) {
final Set indices = result.getInputIndices(x);
// There should be 1 or 2 indices
// If 2, they should be on different sides
if (indices.size() == 1) {
final int index = indices.iterator().next();
if (index < leftSize) {
leftRows.add(patch(left.get(index), Side.LEFT, x));
} else {
rightRows.add(patch(right.get(index - leftSize), Side.RIGHT, x));
}
} else if (indices.size() == 2) {
final Iterator iter = indices.iterator();
final int index1 = iter.next();
final int index2 = iter.next();
if (index1 < leftSize && index2 >= leftSize) {
leftRows.add(patch(left.get(index1), Side.LEFT, x));
rightRows.add(patch(right.get(index2 - leftSize), Side.RIGHT, x));
} else if (index1 >= leftSize && index2 < leftSize) {
leftRows.add(patch(left.get(index2), Side.LEFT, x));
rightRows.add(patch(right.get(index1 - leftSize), Side.RIGHT, x));
} else {
// Both indices are on the same side, which is not normal
// The partition promise is invalid
// This should not happen as partitioning is checked
invalidPartition(result, "Both indices on same side: " + index1 + " " + index2);
}
} else {
// Probably, the partition promise is invalid
// This should not happen as partitioning is checked
invalidPartition(result, "Invalid number of indices: " + indices.size());
}
}
}
}
}
private int checkPartitions(CTupleN key,
List rows,
Side side) {
int count = 0;
if (rows.size() > 1) {
// List of applicabilities of rows
final List x = new ArrayList<>();
for (final Row row : rows) {
x.add(getApplic(row, side));
}
final boolean valid = prover.alwaysAtMostOne(x.toArray(new Expression[x.size()]));
if (!valid) {
count++;
LOGGER.error("Expressions are not disjoint {} for {} on {}", x, key, side);
}
}
return count;
}
private static void invalidPartition(Partitioner.Result result,
String message) {
LOGGER.error(message);
result.print(ERR);
throw new DataException(message);
}
/**
* Duplicates a row and sets it applicability.
*
* @param row The row.
* @param side The side.
* @param applic The new applicability.
* @return A clone of {@code data} where applicability is set
* to {@code applic}.
*/
private Row patch(Row row,
Side side,
Expression applic) {
final int applicColumn = getApplicColumn(side);
final Row.Builder r = Row.builder();
for (int index = 0; index < row.size(); index++) {
if (index == applicColumn) {
r.addValue(applic.toString());
} else {
r.addValue(row.getValue(index));
}
}
return r.build();
}
/**
* @param row The row.
* @param side The side.
* @return The applicability of {@code row}.
*/
private Expression getApplic(Row row,
Side side) {
final int applicColumn = getApplicColumn(side);
final String value = row.getValue(applicColumn);
return pool.get(value);
}
/**
* Checks the validity of the applicability of a row.
*
* If applicability is invalid, a message is generated.
*
* @param row The row.
* @param side The side.
* @param number The row number.
* @return 1 if applicability is invalid, 0 otherwise.
*/
private int checkApplic(Row row,
Side side,
int number) {
final int applicColumn = getApplicColumn(side);
final String value = row.getValue(applicColumn);
try {
final Expression x = pool.get(value);
checker.checkCompliance(x);
} catch (final ApplicException e) {
LOGGER.error("Invalid applicability '{}', {} side, line: {}. {}",
value,
side,
number + 2,
e);
return 1;
}
return 0;
}
private static int[] buildKeyColumns(Header header,
String... keyNames) {
final int[] result = new int[keyNames.length];
for (int index = 0; index < keyNames.length; index++) {
final int column = header.getMatchingIndex(keyNames[index]);
if (column < 0) {
throw new IllegalArgumentException("Key '" + keyNames[index] + "' missing in " + header);
}
result[index] = column;
}
return result;
}
private static int buildKeyColumn(Header header,
String name) {
final int column = header.getMatchingIndex(name);
if (column < 0) {
throw new IllegalArgumentException("'" + name + "' missing in " + header);
}
return column;
}
/**
* @param side The side.
* @return The keys indices on {@code side}.
*/
private int[] getKeyColumns(Side side) {
return side == Side.LEFT ? leftKeyColumns : rightKeyColumns;
}
/**
* @param side The side.
* @return The applic column index on {@code side}.
*/
private int getApplicColumn(Side side) {
return side == Side.LEFT ? leftApplicColumn : rightApplicColumn;
}
/**
* Returns an informative string about a row.
*
* @param side The side.
* @param row The row.
* @param number The row number (0-based).
* @return An informative string about a {@code row}.
*/
private static String locate(Side side,
Row row,
int number) {
return " in " + side + " row " + row + ", line " + (number + 2);
}
/**
* Returns the key of a row.
*
* @param side The side.
* @param row The row.
* @param number The row number (0-based).
* @return The key of {@code row}.
*/
private CTupleN getKey(Side side,
Row row,
int number) {
final int[] keyColumns = getKeyColumns(side);
final String[] values = new String[keyColumns.length];
for (int index = 0; index < keyColumns.length; index++) {
final int column = keyColumns[index];
final String value = row.getValue(column);
if (value == null) {
throw new InvalidDataException("Missing " + keyNames.get(column) + " cell" + locate(side, row, number));
}
values[index] = value;
}
return CTupleN.of(values);
}
/**
* @return The left result rows.
*/
public List getLeftRows() {
return leftRows;
}
/**
* @return The right result rows.
*/
public List getRightRows() {
return rightRows;
}
}