cdc.applic.tools.KeyedTablePartitioner Maven / Gradle / Ivy

Go to download
package cdc.applic.tools;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.io.IoBuilder;

import cdc.applic.dictionaries.checks.SemanticChecker;
import cdc.applic.dictionaries.handles.DictionaryHandle;
import cdc.applic.expressions.ApplicException;
import cdc.applic.expressions.Expression;
import cdc.applic.expressions.ExpressionPool;
import cdc.applic.factorization.FactorizationFeatures;
import cdc.applic.factorization.Partitioner;
import cdc.applic.factorization.core.PartitionerImpl;
import cdc.applic.proofs.Prover;
import cdc.applic.proofs.core.clauses.ProverImpl;
import cdc.applic.simplification.SimplifierFeatures;
import cdc.office.tables.Header;
import cdc.office.tables.Row;
import cdc.office.tables.diff.Side;
import cdc.tuples.CTupleN;
import cdc.util.lang.DataException;
import cdc.util.lang.InvalidDataException;

/**
 * Utility class used to help compare 2 sets of invariants, each invariant
 * containing one or more variants.
 * 
 * Each Invariant is identified by a set of keys.

 * Each Variant has an applicability.
 * 

 * WARNING: a strong hypothesis is that for a given invariant, variants
 * have disjoint applicabilities.

 * If this is not the case, an exception is thrown.
 * 

 * For each invariant, left and right variants are selected.
 * Left and right applicabilities are partitioned, and left
 * and right rows are created using this new applicabilities.
 * 

 * Example, where:
 * 

 * I is the invariant identifier.
 * 
A, B, ... define each variant applicability.
 * 
Lx and Rx are left and right data.
 * 
 *
 * Input left invariant with 2 variants:
 * 
 * I, ABC, L1
 * 
I, EFG, L2
 * 
 * Input right invariant with 3 variants:
 * 
 * I, A, R1
 * 
I, BCD, R2
 * 
I, EF, R3
 * 
 * The partition of applicabilities is A, BC, D, EF, G.

 * Output left invariant contains 4 variants:
 * 
 * I, A, L1
 * 
I, BC, L1
 * 
I, EF, L2
 * 
I, G, L2
 * 
 * And output right invariant contains 4 variants:
 * 
 * I, A, R1
 * 
I, BC, R2
 * 
I, D, R2
 * 
I, EF, R3
 * 
 *
 * The result can now be compared line by line, using I + applic as the line
 * composite key.
 *
 * @author Damien Carbonne
 */
public class KeyedTablePartitioner {
    private static final Logger LOGGER = LogManager.getLogger(KeyedTablePartitioner.class);
    private static final PrintStream ERR = IoBuilder.forLogger(LOGGER).setLevel(Level.ERROR).buildPrintStream();

    /** List of keys. */
    private final List keyNames;
    /** Index of left column containing applic. */
    private final int leftApplicColumn;
    /** Indices of left key columns. */
    private final int[] leftKeyColumns;
    /** Index of right column containing applic. */
    private final int rightApplicColumn;
    /** Indices of right key columns. */
    private final int[] rightKeyColumns;

    /** Left (Key, Data Rows) map. */
    private final Map, List> leftMap = new HashMap<>();
    /** Right (Key, Data Rows) map. */
    private final Map, List> rightMap = new HashMap<>();
    /** Left result rows. */
    private final List leftRows = new ArrayList<>();
    /** Right result rows. */
    private final List rightRows = new ArrayList<>();

    private final Prover prover;
    private final ExpressionPool pool = new ExpressionPool();
    private final SemanticChecker checker;

    /**
     * Creates a Partitioner.
     *
     * @param handle The dictionary handle, used to partition applicabilities.
     * @param simplifierFeatures The features to use for simplification.
     * @param leftHeader The left table header.
     * @param leftData The left table data.
     * @param rightHeader The right table header.
     * @param rightData The right table data.
     * @param applicName The name of the column that contains applicability.
     * @param keyNames The names of the columns that identify invariants.
     *            There should be at least one.
     */
    public KeyedTablePartitioner(DictionaryHandle handle,
                                 SimplifierFeatures simplifierFeatures,
                                 Header leftHeader,
                                 List leftData,
                                 Header rightHeader,
                                 List rightData,
                                 String applicName,
                                 String... keyNames) {
        final Partitioner partitioner = new PartitionerImpl(handle, simplifierFeatures);
        this.prover = new ProverImpl(handle, simplifierFeatures.getProverFeatures());
        this.checker = new SemanticChecker(handle.getDictionary());

        this.keyNames = Arrays.asList(keyNames);

        this.leftApplicColumn = buildKeyColumn(leftHeader, applicName);
        this.leftKeyColumns = buildKeyColumns(leftHeader, keyNames);
        this.rightApplicColumn = buildKeyColumn(rightHeader, applicName);
        this.rightKeyColumns = buildKeyColumns(rightHeader, keyNames);

        // Compute left map from keys to rows
        int leftErrors = 0;
        for (int number = 0; number < leftData.size(); number++) {
            final Row row = leftData.get(number);
            final CTupleN key = getKey(Side.LEFT, row, number);
            final List list = leftMap.computeIfAbsent(key, k -> new ArrayList<>());
            list.add(row);
            leftErrors += checkApplic(row, Side.LEFT, number);
        }

        // Compute right map from keys to rows
        int rightErrors = 0;
        for (int number = 0; number < rightData.size(); number++) {
            final Row row = rightData.get(number);
            final CTupleN key = getKey(Side.RIGHT, row, number);
            final List list = rightMap.computeIfAbsent(key, k -> new ArrayList<>());
            list.add(row);
            leftErrors += checkApplic(row, Side.RIGHT, number);
        }

        if (leftErrors + rightErrors > 0) {
            LOGGER.fatal("There are {} error(s) in data applicabilities.", leftErrors + rightErrors);
            throw new DataException("Invalid applicabilities in input data");
        }

        // Set of all row keys
        final Set> keys = new HashSet<>();
        keys.addAll(leftMap.keySet());
        keys.addAll(rightMap.keySet());

        // Check partitions
        // Here, leftErrors and rightErrors equal 0
        for (final CTupleN key : keys) {
            leftErrors += checkPartitions(key, leftMap.getOrDefault(key, Collections.emptyList()), Side.LEFT);
            rightErrors += checkPartitions(key, rightMap.getOrDefault(key, Collections.emptyList()), Side.RIGHT);
        }
        if (leftErrors + rightErrors > 0) {
            LOGGER.fatal("There are {} error(s) in partitionning of applicabilities.", leftErrors + rightErrors);
            throw new DataException((leftErrors + rightErrors) + " error(s) in partitioning of applicabilities of input data");
        }

        // Iterate on all keys
        for (final CTupleN key : keys) {
            // The list of left rows that have key
            final List left = leftMap.getOrDefault(key, Collections.emptyList());
            final int leftSize = left.size();
            // The list of right rows that have key
            final List right = rightMap.getOrDefault(key, Collections.emptyList());
            if (left.isEmpty()) {
                // Right list should not be empty
                // No need to partition
                rightRows.addAll(right);
            } else if (right.isEmpty()) {
                // Left list should not be empty
                // No need to partition
                leftRows.addAll(left);
            } else {
                // Both lists are not empty
                // Do partition

                // List of all applicabilities : left then right
                final List inputs = new ArrayList<>();
                // Left partition (that should be verified)
                final Set leftPartition = new HashSet<>();
                for (final Row row : left) {
                    leftPartition.add(inputs.size());
                    inputs.add(getApplic(row, Side.LEFT));
                }
                // Right partition (that should be verified)
                final Set rightPartition = new HashSet<>();
                for (final Row row : right) {
                    rightPartition.add(inputs.size());
                    inputs.add(getApplic(row, Side.RIGHT));
                }
                // List containing left and right partitions
                final List> inputPartitions = new ArrayList<>();
                inputPartitions.add(leftPartition);
                inputPartitions.add(rightPartition);

                // Partition applicabilities
                final Partitioner.Result result = partitioner.partition(inputs,
                                                                        inputPartitions,
                                                                        FactorizationFeatures.SIMPLIFY_NO_CHECK_SHORT_NARROW);
                for (final Expression x : result.getOutputExpressions()) {
                    final Set indices = result.getInputIndices(x);
                    // There should be 1 or 2 indices
                    // If 2, they should be on different sides
                    if (indices.size() == 1) {
                        final int index = indices.iterator().next();
                        if (index < leftSize) {
                            leftRows.add(patch(left.get(index), Side.LEFT, x));
                        } else {
                            rightRows.add(patch(right.get(index - leftSize), Side.RIGHT, x));
                        }
                    } else if (indices.size() == 2) {
                        final Iterator iter = indices.iterator();
                        final int index1 = iter.next();
                        final int index2 = iter.next();
                        if (index1 < leftSize && index2 >= leftSize) {
                            leftRows.add(patch(left.get(index1), Side.LEFT, x));
                            rightRows.add(patch(right.get(index2 - leftSize), Side.RIGHT, x));
                        } else if (index1 >= leftSize && index2 < leftSize) {
                            leftRows.add(patch(left.get(index2), Side.LEFT, x));
                            rightRows.add(patch(right.get(index1 - leftSize), Side.RIGHT, x));
                        } else {
                            // Both indices are on the same side, which is not normal
                            // The partition promise is invalid
                            // This should not happen as partitioning is checked
                            invalidPartition(result, "Both indices on same side: " + index1 + " " + index2);
                        }
                    } else {
                        // Probably, the partition promise is invalid
                        // This should not happen as partitioning is checked
                        invalidPartition(result, "Invalid number of indices: " + indices.size());
                    }
                }
            }
        }
    }

    private int checkPartitions(CTupleN key,
                                List rows,
                                Side side) {
        int count = 0;
        if (rows.size() > 1) {
            // List of applicabilities of rows
            final List x = new ArrayList<>();
            for (final Row row : rows) {
                x.add(getApplic(row, side));
            }
            final boolean valid = prover.alwaysAtMostOne(x.toArray(new Expression[x.size()]));
            if (!valid) {
                count++;
                LOGGER.error("Expressions are not disjoint {} for {} on {}", x, key, side);
            }
        }
        return count;
    }

    private static void invalidPartition(Partitioner.Result result,
                                         String message) {
        LOGGER.error(message);
        result.print(ERR);
        throw new DataException(message);
    }

    /**
     * Duplicates a row and sets it applicability.
     *
     * @param row The row.
     * @param side The side.
     * @param applic The new applicability.
     * @return A clone of {@code data} where applicability is set
     *         to {@code applic}.
     */
    private Row patch(Row row,
                      Side side,
                      Expression applic) {
        final int applicColumn = getApplicColumn(side);
        final Row.Builder r = Row.builder();
        for (int index = 0; index < row.size(); index++) {
            if (index == applicColumn) {
                r.addValue(applic.toString());
            } else {
                r.addValue(row.getValue(index));
            }
        }
        return r.build();
    }

    /**
     * @param row The row.
     * @param side The side.
     * @return The applicability of {@code row}.
     */
    private Expression getApplic(Row row,
                                 Side side) {
        final int applicColumn = getApplicColumn(side);
        final String value = row.getValue(applicColumn);
        return pool.get(value);
    }

    /**
     * Checks the validity of the applicability of a row.
     * 
     * If applicability is invalid, a message is generated.
     *
     * @param row The row.
     * @param side The side.
     * @param number The row number.
     * @return 1 if applicability is invalid, 0 otherwise.
     */
    private int checkApplic(Row row,
                            Side side,
                            int number) {
        final int applicColumn = getApplicColumn(side);
        final String value = row.getValue(applicColumn);
        try {
            final Expression x = pool.get(value);
            checker.checkCompliance(x);
        } catch (final ApplicException e) {
            LOGGER.error("Invalid applicability '{}', {} side, line: {}. {}",
                         value,
                         side,
                         number + 2,
                         e);
            return 1;
        }
        return 0;
    }

    private static int[] buildKeyColumns(Header header,
                                         String... keyNames) {
        final int[] result = new int[keyNames.length];
        for (int index = 0; index < keyNames.length; index++) {
            final int column = header.getMatchingIndex(keyNames[index]);
            if (column < 0) {
                throw new IllegalArgumentException("Key '" + keyNames[index] + "' missing in " + header);
            }
            result[index] = column;
        }
        return result;
    }

    private static int buildKeyColumn(Header header,
                                      String name) {
        final int column = header.getMatchingIndex(name);
        if (column < 0) {
            throw new IllegalArgumentException("'" + name + "' missing in " + header);
        }
        return column;
    }

    /**
     * @param side The side.
     * @return The keys indices on {@code side}.
     */
    private int[] getKeyColumns(Side side) {
        return side == Side.LEFT ? leftKeyColumns : rightKeyColumns;
    }

    /**
     * @param side The side.
     * @return The applic column index on {@code side}.
     */
    private int getApplicColumn(Side side) {
        return side == Side.LEFT ? leftApplicColumn : rightApplicColumn;
    }

    /**
     * Returns an informative string about a row.
     *
     * @param side The side.
     * @param row The row.
     * @param number The row number (0-based).
     * @return An informative string about a {@code row}.
     */
    private static String locate(Side side,
                                 Row row,
                                 int number) {
        return " in " + side + " row " + row + ", line " + (number + 2);
    }

    /**
     * Returns the key of a row.
     *
     * @param side The side.
     * @param row The row.
     * @param number The row number (0-based).
     * @return The key of {@code row}.
     */
    private CTupleN getKey(Side side,
                                   Row row,
                                   int number) {
        final int[] keyColumns = getKeyColumns(side);
        final String[] values = new String[keyColumns.length];
        for (int index = 0; index < keyColumns.length; index++) {
            final int column = keyColumns[index];
            final String value = row.getValue(column);
            if (value == null) {
                throw new InvalidDataException("Missing " + keyNames.get(column) + " cell" + locate(side, row, number));
            }
            values[index] = value;
        }
        return CTupleN.of(values);
    }

    /**
     * @return The left result rows.
     */
    public List getLeftRows() {
        return leftRows;
    }

    /**
     * @return The right result rows.
     */
    public List getRightRows() {
        return rightRows;
    }
}