All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cdc.office.tools.KeyedSheetDiff Maven / Gradle / Ivy

package cdc.office.tools;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.io.IoBuilder;

import cdc.office.ss.SheetLoader;
import cdc.office.ss.SheetParserFactory;
import cdc.office.ss.WorkbookKind;
import cdc.office.ss.WorkbookWriterFeatures;
import cdc.office.tables.Header;
import cdc.office.tables.HeaderCell;
import cdc.office.tables.HeaderMapper;
import cdc.office.tables.Row;
import cdc.office.tables.Rows;
import cdc.office.tables.diff.KeyedTableDiff;
import cdc.office.tables.diff.Side;
import cdc.office.tools.KeyedSheetDiff.MainArgs.Feature;
import cdc.util.cli.AbstractMainSupport;
import cdc.util.cli.FeatureMask;
import cdc.util.cli.MainResult;
import cdc.util.cli.OptionEnum;
import cdc.util.time.Chronometer;

/**
 * Utility used to compare 2 workbook sheets whose lines are identified by key columns.
 *
 * @author Damien Carbonne
 */
public final class KeyedSheetDiff {
    static final Logger LOGGER = LogManager.getLogger(KeyedSheetDiff.class);
    static final PrintStream OUT = IoBuilder.forLogger(LOGGER).setLevel(Level.INFO).buildPrintStream();

    private static final String HEADER1 = "   Header 1: ";
    private static final String HEADER2 = "   Header 2: ";
    private static final String PREFIX = "Column";

    final MainArgs margs;

    private KeyedSheetDiff(MainArgs margs) {
        this.margs = margs;
    }

    private void info(String message) {
        if (margs.features.isEnabled(MainArgs.Feature.VERBOSE)) {
            LOGGER.info(message);
        }
    }

    private void warn(String message) {
        if (margs.features.isEnabled(MainArgs.Feature.VERBOSE)) {
            LOGGER.warn(message);
        }
    }

    public static class MainArgs {
        public static final String DEFAULT_ADDED_MARK = KeyedTableDiffExporter.DEFAULT_ADDED_MARK;
        public static final String DEFAULT_REMOVED_MARK = KeyedTableDiffExporter.DEFAULT_REMOVED_MARK;
        public static final String DEFAULT_CHANGED_MARK = KeyedTableDiffExporter.DEFAULT_CHANGED_MARK;
        public static final String DEFAULT_UNCHANGED_MARK = KeyedTableDiffExporter.DEFAULT_UNCHANGED_MARK;
        public static final String DEFAULT_FILE1_MARK = KeyedTableDiffExporter.DEFAULT_FILE1_MARK;
        public static final String DEFAULT_FILE2_MARK = KeyedTableDiffExporter.DEFAULT_FILE2_MARK;
        public static final String DEFAULT_DIFF_MARK = KeyedTableDiffExporter.DEFAULT_DIFF_MARK;
        public static final String DEFAULT_DIFF_SHEET_NAME = KeyedTableDiffExporter.DEFAULT_DIFF_SHEET_NAME;

        public enum Feature implements OptionEnum {
            FIX_HEADERS("fix-headers",
                        "If headers contain errors (duplicate or empty names), fix them. A renaming is applied, that can lead to unexpected results."),
            IGNORE_DUPLICATES("ignore-duplicates",
                              "If there are duplicates, the first row with a key is compared, and remaining rows with the same key are ignored."),
            UNCHANGED_LINES("unchanged-lines",
                            "Output unchanged lines (default)."),
            NO_UNCHANGED_LINES("no-unchanged-lines",
                               "Do not output unchanged lines."),
            ADDED_OR_REMOVED_MARKS("added-or-removed-marks",
                                   "Output added or removed marks (default)."),
            NO_ADDED_OR_REMOVED_MARKS("no-added-or-removed-marks",
                                      "Do not output added or removed marks."),
            COLORS("colors",
                   "Use colors (default with output formats that support colors)."),
            NO_COLORS("no-colors",
                      "Do not use colors (default with output formats that don't support colors)."),
            MARKS("marks",
                  "Use marks (default with output formats that don't support colors)."),
            NO_MARKS("no-marks",
                     "Do not use marks (default with output formats that support colors)."),
            LINE_DIFF_COLUMN("line-diff-column",
                             "Add a column describing differences of lines (default)."),
            NO_LINE_DIFF_COLUMN("no-line-diff-column",
                                "Do not add a column describing differences of lines."),
            CELL_DIFF_COLUMNS("cell-diff-columns",
                              "Add a column for each key or data column describing differences of cells."),
            NO_CELL_DIFF_COLUMNS("no-cell-diff-columns",
                                 "Do not add a column for each key or data column describing differences of cells (default)."),
            VULNERABILITY_PROTECTIONS("vulnerability-protections",
                                      "Enable vulnerability protections such as detection of Zip bombs (default)."),
            NO_VULNERABILITY_PROTECTIONS("no-vulnerability-protections",
                                         "Disable vulnerability protections such as detection of Zip bombs.\n"
                                                 + "This should be used with trusted sources."),
            SORT_LINES("sort-lines",
                       "Sort lines using keys. Order of key columns declaration matters."),
            AUTO_SIZE_COLUMNS("auto-size-columns",
                              "Auto size columns. This may take longer time."),
            AUTO_SIZE_ROWS("auto-size-rows",
                           "Auto size rows. This may take longer time."),
            SHOW_CHANGE_DETAILS("show-change-details",
                                "If enabled, show value 1 (with removed mark or color) and value 2 (with added mark or color).\n"
                                        + " Otherwise, show value 2 (with changed mark or color)."),
            SHOW_ORIGINAL_NAMES("show-original-names",
                                "If enabled, adds names of original columns in output columns."
                                        + "\nThis is useful when name mapping is done."),
            SPLIT_COMPARISONS("split-comparisons",
                              "If enabled, 2 columns are created for each pair of compared input columns, one for each input file."),
            SYNTHESIS("synthesis",
                      "Print a synthesis of differences on terminal."),
            SAVE_SYNTHESIS("save-synthesis",
                           "Save synthesis in output file, in a dedicated sheet."),
            STRICT_CSV("strict-csv",
                       "If enabled, CSV files are strictly parsed and checked for legality of '\"' in cells."),
            NO_STRICT_CSV("no-strict-csv",
                          "If enabled, CSV files are NOT strictly parsed. Some invalid cells are accepted (default)."),
            VERBOSE("verbose", "Print progress messages.");

            private final String name;
            private final String description;

            private Feature(String name,
                            String description) {
                this.name = name;
                this.description = description;
            }

            @Override
            public final String getName() {
                return name;
            }

            @Override
            public final String getDescription() {
                return description;
            }
        }

        /** First file. */
        public File file1;
        /** First sheet. */
        public String sheet1;
        /** First pattern. */
        public String pattern1;
        /** First map. */
        public final Map map1 = new HashMap<>();
        /** Second file. */
        public File file2;
        /** Second sheet. */
        public String sheet2;
        /** Second pattern. */
        public String pattern2;
        /** Second map. */
        public final Map map2 = new HashMap<>();
        /** Output file. */
        public File output;
        /** Output sheet. */
        public String sheet = DEFAULT_DIFF_SHEET_NAME;
        /** Key names (after mapping of names). */
        public final List keys = new ArrayList<>();
        /** Names of attributes to keep (after mapping of names). */
        public final Set attributes = new HashSet<>();
        /** charset. */
        public Charset charset;
        /** separator. */
        public char separator = ';';
        public String addedMark = DEFAULT_ADDED_MARK;
        public String removedMark = DEFAULT_REMOVED_MARK;
        public String changedMark = DEFAULT_CHANGED_MARK;
        public String unchangedMark = DEFAULT_UNCHANGED_MARK;
        public String diffMark = DEFAULT_DIFF_MARK;
        public String file1Mark = DEFAULT_FILE1_MARK;
        public String file2Mark = DEFAULT_FILE2_MARK;
        public int maxRowsPerSheet = -1;

        public final FeatureMask features = new FeatureMask<>();

        public final void setEnabled(Feature feature,
                                     boolean enabled) {
            features.setEnabled(feature, enabled);
        }

        public final boolean isEnabled(Feature feature) {
            return features.isEnabled(feature);
        }
    }

    private List load(File file,
                           int number,
                           String sheet,
                           String pattern) throws IOException {
        final Chronometer chrono = new Chronometer();

        final SheetLoader loader = new SheetLoader();
        loader.getFactory().setCharset(margs.charset);
        loader.getFactory().setSeparator(margs.separator);
        loader.getFactory().setEnabled(SheetParserFactory.Feature.DISABLE_VULNERABILITY_PROTECTIONS,
                                       margs.isEnabled(MainArgs.Feature.NO_VULNERABILITY_PROTECTIONS));
        loader.getFactory().setEnabled(SheetParserFactory.Feature.STRICT_CSV,
                                       margs.isEnabled(MainArgs.Feature.STRICT_CSV));

        chrono.start();
        final List rows;
        if (sheet != null) {
            info("Load sheet " + number + " " + file + ":" + sheet);
            rows = loader.load(file, null, sheet);
        } else if (pattern != null) {
            info("Load sheets " + number + " " + file + ":" + pattern);
            try {
                final Pattern p = Pattern.compile(pattern);
                final Predicate predicate = s -> p.matcher(s).matches();
                rows = loader.load(file, null, predicate);
                if (rows.isEmpty()) {
                    final List sheets = loader.getMatchingSheetNames(file, null, predicate);
                    if (sheets.isEmpty()) {
                        throw new IllegalArgumentException("File " + file + " does not contain any sheet matching '" + pattern
                                + "'.");
                    }
                }
            } catch (final PatternSyntaxException e) {
                throw new IllegalArgumentException("Invalid sheet name pattern: '" + pattern + "'", e);
            }
        } else {
            info("Load sheet " + number + " " + file);
            rows = loader.load(file, null, 0);
        }

        chrono.suspend();
        info("Done (" + rows.size() + " rows) " + chrono);

        return rows;
    }

    void execute() throws IOException {
        final Chronometer chrono = new Chronometer();

        final List r1 = load(margs.file1, 1, margs.sheet1, margs.pattern1);
        final List r2 = load(margs.file2, 2, margs.sheet2, margs.pattern2);

        // Check that sheets are not empty: they must at least contain a header
        if (r1.isEmpty()) {
            throw new IllegalArgumentException("No data in file1 sheet(s).");
        } else if (r2.isEmpty()) {
            throw new IllegalArgumentException("No data in file2 sheet(s).");
        }

        // Check headers
        final List messages = new ArrayList<>();
        boolean header1NeedsFix = false;
        boolean header2NeedsFix = false;
        if (r1.get(0).hasNullValues()) {
            header1NeedsFix = true;
            messages.add("Header of file1 sheet(s) contains empty intermediate cell(s), columns "
                    + r1.get(0).getNullValuesIndices().stream().map(i -> i + 1).toList()
                    + ": " + r1.get(0));
        }
        if (r1.get(0).hasDuplicateValues()) {
            header1NeedsFix = true;
            messages.add("Header of file1 sheet(s) has duplicate values, "
                    + r1.get(0).getDuplicateValues()
                    + ": " + r1.get(0));
        }
        if (r2.get(0).hasNullValues()) {
            header2NeedsFix = true;
            messages.add("Header of file2 sheet(s) contains empty intermediate cell(s), columns "
                    + r2.get(0).getNullValuesIndices().stream().map(i -> i + 1).toList()
                    + ": " + r2.get(0));
        }
        if (r2.get(0).hasDuplicateValues()) {
            header2NeedsFix = true;
            messages.add("Header of file2 sheet(s) has duplicate values, "
                    + r2.get(0).getDuplicateValues()
                    + ": " + r2.get(0));
        }

        if (margs.features.isEnabled(MainArgs.Feature.FIX_HEADERS)) {
            for (final String message : messages) {
                warn(message);
            }
        } else if (!messages.isEmpty()) {
            throw new IllegalArgumentException(messages.get(0));
        }

        // Can be here if 1) headers are valid or 2) FAIL_SAFE is enabled
        // 1) fix does nothing
        // 2) headers are fixed
        if (header1NeedsFix) {
            info("Fix Header 1");
            Rows.fixHeader(r1, PREFIX);
        }
        if (header2NeedsFix) {
            info("Fix Header 2");
            Rows.fixHeader(r2, PREFIX);
        }

        info(HEADER1 + Header.builder().names(r1.get(0)).build());
        info(HEADER2 + Header.builder().names(r2.get(0)).build());

        // Rename headers
        if (!margs.map1.isEmpty()) {
            info("Rename Header 1 using: " + margs.map1);
            Rows.renameHeader(r1, margs.map1);
            info(HEADER1 + Header.builder().names(r1.get(0)).build());
        }
        if (!margs.map2.isEmpty()) {
            info("Rename Header 2 using: " + margs.map2);
            Rows.renameHeader(r2, margs.map2);
            info(HEADER2 + Header.builder().names(r2.get(0)).build());
        }

        // Remove unwanted columns
        final List rows1;
        final List rows2;
        if (margs.attributes.isEmpty()) {
            rows1 = r1;
            rows2 = r2;
        } else {
            // Keep only columns that are keys or listed in data
            info("Filter columns");
            chrono.start();
            final Set keep = new HashSet<>();
            keep.addAll(margs.keys);
            keep.addAll(margs.attributes);
            final Header h1 = Header.builder().names(r1.get(0)).build();
            final Header h2 = Header.builder().names(r2.get(0)).build();
            rows1 = Rows.filter(r1, h1, keep);
            rows2 = Rows.filter(r2, h2, keep);
            chrono.suspend();
            info("Done " + chrono);
            info(HEADER1 + Header.builder().names(rows1.get(0)).build());
            info(HEADER2 + Header.builder().names(rows2.get(0)).build());
        }

        // Here only desired columns are present

        // Retrieve headers of both files
        final Header header1 = Header.builder().names(rows1.get(0)).build();
        final Header header2 = Header.builder().names(rows2.get(0)).build();

        info("Header 1 " + header1);
        info("Header 2 " + header2);

        // Check that both headers contain the expected keys
        final Header expected = Header.builder()
                                      .names(margs.keys)
                                      .build();
        info("Expected header " + expected);

        final HeaderMapper mapper1 = HeaderMapper.builder()
                                                 .mandatory(expected)
                                                 .actual(header1)
                                                 .build();

        final HeaderMapper mapper2 = HeaderMapper.builder()
                                                 .mandatory(expected)
                                                 .actual(header2)
                                                 .build();

        if (!mapper1.hasAllMandatoryCells()) {
            throw new IllegalArgumentException("Missing keys: "
                    + mapper1.getMissingMandatoryCells().stream().map(HeaderCell::toString).sorted()
                             .collect(Collectors.joining(",", "[", "]"))
                    + " in file1 header: " + header1);
        }

        if (!mapper2.hasAllMandatoryCells()) {
            throw new IllegalArgumentException("Missing keys: "
                    + mapper2.getMissingMandatoryCells().stream().map(HeaderCell::toString).sorted()
                             .collect(Collectors.joining(",", "[", "]"))
                    + " in file2 header: " + header2);
        }

        // Remove header in both input rows
        rows1.remove(0);
        rows2.remove(0);

        // Compare the data rows
        chrono.start();
        info("Compare rows");
        final KeyedTableDiff diff = KeyedTableDiff.builder()
                                                  .leftSystemId(margs.file1.getName()
                                                          + (margs.sheet1 == null ? "" : ":" + margs.sheet1))
                                                  .leftHeader(header1)
                                                  .leftRows(rows1)
                                                  .rightSystemId(margs.file2.getName()
                                                          + (margs.sheet2 == null ? "" : ":" + margs.sheet2))
                                                  .rightHeader(header2)
                                                  .rightRows(rows2)
                                                  .keyNames(margs.keys)
                                                  .warn(this::warn)
                                                  .ignoreDuplicates(margs.isEnabled(MainArgs.Feature.IGNORE_DUPLICATES))
                                                  .build();
        chrono.suspend();
        info("Done" + (diff.getNumberOfIgnoredRows() == 0
                ? ""
                : " (" + diff.getNumberOfIgnoredRows(Side.LEFT) + "/" + rows1.size() + " "
                        + diff.getNumberOfIgnoredRows(Side.RIGHT) + "/" + rows2.size() + " ignored)")
                + " " + chrono);

        if (margs.isEnabled(MainArgs.Feature.SYNTHESIS)) {
            diff.getSynthesis().print(OUT);
        }

        final WorkbookKind outputKind = WorkbookKind.from(margs.output);
        final boolean supportsColors =
                outputKind == WorkbookKind.XLS
                        || outputKind == WorkbookKind.XLSM
                        || outputKind == WorkbookKind.XLSX;
        final boolean showColors;
        if (margs.features.isEnabled(MainArgs.Feature.COLORS)) {
            showColors = true;
        } else if (margs.features.isEnabled(MainArgs.Feature.NO_COLORS)) {
            showColors = false;
        } else {
            showColors = supportsColors;
        }

        final boolean showMarks;
        if (margs.features.isEnabled(MainArgs.Feature.MARKS)) {
            showMarks = true;
        } else if (margs.features.isEnabled(MainArgs.Feature.NO_MARKS)) {
            showMarks = false;
        } else {
            showMarks = !supportsColors;
        }

        // Export result
        final KeyedTableDiffExporter exporter =
                KeyedTableDiffExporter.builder()
                                      .addedMark(margs.isEnabled(Feature.NO_ADDED_OR_REMOVED_MARKS) ? "" : margs.addedMark)
                                      .changedMark(margs.changedMark)
                                      .unchangedMark(margs.unchangedMark)
                                      .removedMark(margs.isEnabled(Feature.NO_ADDED_OR_REMOVED_MARKS) ? "" : margs.removedMark)
                                      .diffMark(margs.diffMark)
                                      .file1Mark(margs.file1Mark)
                                      .file2Mark(margs.file2Mark)
                                      .header1(header1)
                                      .header2(header2)
                                      .map1(margs.map1)
                                      .map2(margs.map2)
                                      .diffSheetName(margs.sheet)
                                      .maxRowsPerSheet(margs.maxRowsPerSheet)
                                      .features(WorkbookWriterFeatures.builder()
                                                                      .separator(margs.separator)
                                                                      .charset(margs.charset)
                                                                      .maxLineLength(-1)
                                                                      .enable(WorkbookWriterFeatures.Feature.AUTO_FILTER_COLUMNS)
                                                                      .setEnabled(WorkbookWriterFeatures.Feature.AUTO_SIZE_COLUMNS,
                                                                                  margs.features.contains(Feature.AUTO_SIZE_COLUMNS))
                                                                      .setEnabled(WorkbookWriterFeatures.Feature.AUTO_SIZE_ROWS,
                                                                                  margs.features.contains(Feature.AUTO_SIZE_ROWS))
                                                                      .setEnabled(WorkbookWriterFeatures.Feature.RICH_TEXT,
                                                                                  !margs.features.contains(Feature.NO_COLORS))
                                                                      .build())
                                      .hint(KeyedTableDiffExporter.Hint.ADD_CELL_DIFF_COLUMNS,
                                            margs.isEnabled(Feature.CELL_DIFF_COLUMNS))
                                      .hint(KeyedTableDiffExporter.Hint.ADD_LINE_DIFF_COLUMN,
                                            !margs.isEnabled(Feature.NO_LINE_DIFF_COLUMN))
                                      .hint(KeyedTableDiffExporter.Hint.SAVE_SYNTHESIS,
                                            margs.isEnabled(Feature.SAVE_SYNTHESIS))
                                      .hint(KeyedTableDiffExporter.Hint.SHOW_CHANGE_DETAILS,
                                            margs.isEnabled(Feature.SHOW_CHANGE_DETAILS))
                                      .hint(KeyedTableDiffExporter.Hint.SHOW_COLORS,
                                            showColors)
                                      .hint(KeyedTableDiffExporter.Hint.SHOW_MARKS,
                                            showMarks)
                                      .hint(KeyedTableDiffExporter.Hint.SHOW_ORIGINAL_NAMES,
                                            margs.isEnabled(Feature.SHOW_ORIGINAL_NAMES))
                                      .hint(KeyedTableDiffExporter.Hint.SHOW_UNCHANGED_LINES,
                                            !margs.isEnabled(Feature.NO_UNCHANGED_LINES))
                                      .hint(KeyedTableDiffExporter.Hint.SORT_LINES,
                                            margs.isEnabled(Feature.SORT_LINES))
                                      .hint(KeyedTableDiffExporter.Hint.SPLIT_COMPARISONS,
                                            margs.isEnabled(Feature.SPLIT_COMPARISONS))
                                      .build();

        chrono.start();
        info("Generate " + margs.output);
        exporter.save(diff, margs.output);
        chrono.suspend();
        info("Done " + chrono);
    }

    public static void execute(MainArgs margs) throws IOException {
        final KeyedSheetDiff instance = new KeyedSheetDiff(margs);
        instance.execute();
    }

    public static MainResult exec(String... args) {
        final MainSupport support = new MainSupport();
        support.main(args);
        return support.getResult();
    }

    public static void main(String... args) {
        final int code = exec(args).getCode();
        System.exit(code);
    }

    private static class MainSupport extends AbstractMainSupport {
        private static final String KEY_VALUE_SEPARATOR = "::";
        private static final String FILE1 = "file1";
        private static final String FILE2 = "file2";
        private static final String MAP1 = "map1";
        private static final String MAP2 = "map2";
        private static final String SHEET1 = "sheet1";
        private static final String SHEET2 = "sheet2";
        private static final String SHEET = "sheet";
        private static final String PATTERN1 = "pattern1";
        private static final String PATTERN2 = "pattern2";
        private static final String KEY = "key";
        private static final String ATTRIBUTE = "attribute";
        private static final String CHARSET = "charset";
        private static final String SEPARATOR = "separator";
        private static final String ADDED_MARK = "added-mark";
        private static final String REMOVED_MARK = "removed-mark";
        private static final String CHANGED_MARK = "changed-mark";
        private static final String UNCHANGED_MARK = "unchanged-mark";
        private static final String DIFF_MARK = "diff-mark";
        private static final String FILE1_MARK = "file1-mark";
        private static final String FILE2_MARK = "file2-mark";
        private static final String MAX_ROWS_PER_SHEET = "max-rows";

        public MainSupport() {
            super(KeyedSheetDiff.class, LOGGER);
        }

        @Override
        protected String getVersion() {
            return Config.VERSION;
        }

        @Override
        protected String getHelpHeader() {
            return """
                   %s is used to compare two sets of sheets in workbooks (csv, xls, xlsx, xlsm  or ods).
                   If several sheets are loaded from a file, they must all have the same columns, in the same order.
                   Lines in sheets are matched by a set of key columns.
                   Key and attribute columns can be renamed.
                   The attribute columns to compare can be selected.
                   Input and output files can use different formats.
                   Differences are indicated with textual marks or colors (if output format supports it).

                   '--sheet1' (resp. '--sheet2') and '--pattern1' (resp. '--pattern2') options are exclusive and optional.

                   Patterns are Java patterns.""".formatted(KeyedSheetDiff.class.getSimpleName());
        }

        @Override
        protected String getHelpFooter() {
            return """
                   KNOWN LIMITATIONS
                   All CSV files (input and output) must use the same charset and separator.
                   When mixing input file formats with CSV, if a key column contains numbers, comparison will fail.
                   Ods handling is experimental. Ods output does not support coloring.""";
        }

        @Override
        protected void addSpecificOptions(Options options) {
            options.addOption(Option.builder()
                                    .longOpt(FILE1)
                                    .desc("Mandatory name of the first input file.")
                                    .hasArg()
                                    .required()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(FILE2)
                                    .desc("Mandatory name of the second input file.")
                                    .hasArg()
                                    .required()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(SHEET1)
                                    .desc("Optional name of the sheet in the first input file.")
                                    .hasArg()
                                    .build());
            options.addOption(Option.builder()
                                    .longOpt(PATTERN1)
                                    .desc("Optional pattern of the sheet(s) in the first input file.")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(SHEET2)
                                    .desc("Optional name of the sheet in the second input file.")
                                    .hasArg()
                                    .build());
            options.addOption(Option.builder()
                                    .longOpt(PATTERN2)
                                    .desc("Optional pattern of the sheet(s) in the second input file.")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(SHEET)
                                    .desc("Optional name of the delta sheet in the output file. (default: \""
                                            + MainArgs.DEFAULT_DIFF_SHEET_NAME + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(MAP1)
                                    .desc("Optional mapping of column names (keys and attributes) of first file."
                                            + "\nEach mapping has the form ::.")
                                    .hasArgs()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(MAP2)
                                    .desc("Optional mapping of column names (keys and attributes) of second file."
                                            + "\nEach mapping has the form ::.")
                                    .hasArgs()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(OUTPUT)
                                    .desc("Mandatory name of the output file. It must have a supported extension.")
                                    .hasArg()
                                    .required()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(KEY)
                                    .desc("Mandatory name(s) of key column(s)."
                                            + "\nIf mapping is used, use new name(s).")
                                    .hasArgs()
                                    .required()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(ATTRIBUTE)
                                    .desc("""
                                          Optional name(s) of attribute column(s) to compare.
                                          If omitted, all attribute columns are compared.
                                          If mapping is used, use new name(s).""")
                                    .hasArgs()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(CHARSET)
                                    .desc("Optional name of the charset for CSV files (default: platform default charset).")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(SEPARATOR)
                                    .desc("Optional char separator for CSV files (default: ';').")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(ADDED_MARK)
                                    .desc("Optional mark for added cells (default: \"" + MainArgs.DEFAULT_ADDED_MARK + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(REMOVED_MARK)
                                    .desc("Optional mark for removed cells (default: \"" + MainArgs.DEFAULT_REMOVED_MARK + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(CHANGED_MARK)
                                    .desc("Optional mark for changed cells (default: \"" + MainArgs.DEFAULT_CHANGED_MARK + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(UNCHANGED_MARK)
                                    .desc("Optional mark for unchanged cells (default: \"" + MainArgs.DEFAULT_UNCHANGED_MARK
                                            + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(DIFF_MARK)
                                    .desc("Optional mark for diff columns (default: \"" + MainArgs.DEFAULT_DIFF_MARK + "\").")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(FILE1_MARK)
                                    .desc("Optional mark for file1 columns (default: \"" + MainArgs.DEFAULT_FILE1_MARK + "\")."
                                            + "\nRelated to " + MainArgs.Feature.SHOW_ORIGINAL_NAMES.getName() + " option.")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(FILE2_MARK)
                                    .desc("Optional mark for file2 columns (default: \"" + MainArgs.DEFAULT_FILE2_MARK + "\")."
                                            + "\nRelated to " + MainArgs.Feature.SHOW_ORIGINAL_NAMES.getName() + " option.")
                                    .hasArg()
                                    .build());

            options.addOption(Option.builder()
                                    .longOpt(MAX_ROWS_PER_SHEET)
                                    .desc("""
                                          Optional maximum number of rows per sheet (default: -1).
                                          A negative value means no maximum, or the format maximum.
                                          The number of generated sheets depends on that number and on the total number of rows.
                                          A very small positive number (0, 1, 2, ...) is irrelevant and will produce errors.""")
                                    .hasArg()
                                    .build());

            AbstractMainSupport.addNoArgOptions(options, MainArgs.Feature.class);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.ADDED_OR_REMOVED_MARKS,
                                            MainArgs.Feature.NO_ADDED_OR_REMOVED_MARKS);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.CELL_DIFF_COLUMNS,
                                            MainArgs.Feature.NO_CELL_DIFF_COLUMNS);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.COLORS,
                                            MainArgs.Feature.NO_COLORS);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.LINE_DIFF_COLUMN,
                                            MainArgs.Feature.NO_LINE_DIFF_COLUMN);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.MARKS,
                                            MainArgs.Feature.NO_MARKS);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.UNCHANGED_LINES,
                                            MainArgs.Feature.NO_UNCHANGED_LINES);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.VULNERABILITY_PROTECTIONS,
                                            MainArgs.Feature.NO_VULNERABILITY_PROTECTIONS);
            AbstractMainSupport.createGroup(options,
                                            MainArgs.Feature.STRICT_CSV,
                                            MainArgs.Feature.NO_STRICT_CSV);
            AbstractMainSupport.createGroup(options,
                                            false,
                                            SHEET1,
                                            PATTERN1);
            AbstractMainSupport.createGroup(options,
                                            false,
                                            SHEET2,
                                            PATTERN2);
        }

        private static String getKey(String s) {
            return getPart(s, KEY_VALUE_SEPARATOR, 0);
        }

        private static String getValue(String s) {
            return getPart(s, KEY_VALUE_SEPARATOR, 1);
        }

        private static void analyzeMap(String s,
                                       Map map) {
            final String key = getKey(s);
            final String value = getValue(s);
            map.put(key, value);
        }

        @Override
        protected MainArgs analyze(CommandLine cl) throws ParseException {
            final MainArgs margs = new MainArgs();

            margs.file1 = getValueAsResolvedFile(cl, FILE1, IS_FILE);
            margs.sheet1 = getValueAsString(cl, SHEET1, null);
            margs.pattern1 = getValueAsString(cl, PATTERN1, null);
            margs.file2 = getValueAsResolvedFile(cl, FILE2, IS_FILE);
            margs.sheet2 = getValueAsString(cl, SHEET2, null);
            margs.pattern2 = getValueAsString(cl, PATTERN2, null);
            margs.output = getValueAsResolvedFile(cl, OUTPUT);
            margs.sheet = getValueAsString(cl, SHEET, MainArgs.DEFAULT_DIFF_SHEET_NAME);
            margs.charset = getValueAsCharset(cl, CHARSET);
            margs.separator = getValueAsChar(cl, SEPARATOR, ';');
            margs.addedMark = getValueAsString(cl, ADDED_MARK, MainArgs.DEFAULT_ADDED_MARK);
            margs.removedMark = getValueAsString(cl, REMOVED_MARK, MainArgs.DEFAULT_REMOVED_MARK);
            margs.changedMark = getValueAsString(cl, CHANGED_MARK, MainArgs.DEFAULT_CHANGED_MARK);
            margs.unchangedMark = getValueAsString(cl, UNCHANGED_MARK, MainArgs.DEFAULT_UNCHANGED_MARK);
            margs.diffMark = getValueAsString(cl, DIFF_MARK, MainArgs.DEFAULT_DIFF_MARK);
            margs.file1Mark = getValueAsString(cl, FILE1_MARK, MainArgs.DEFAULT_FILE1_MARK);
            margs.file2Mark = getValueAsString(cl, FILE2_MARK, MainArgs.DEFAULT_FILE2_MARK);
            margs.maxRowsPerSheet = getValueAsInt(cl, MAX_ROWS_PER_SHEET, -1);

            for (final String s : cl.getOptionValues(KEY)) {
                margs.keys.add(s);
            }
            if (cl.hasOption(ATTRIBUTE)) {
                for (final String s : cl.getOptionValues(ATTRIBUTE)) {
                    margs.attributes.add(s);
                }
            }
            if (cl.hasOption(MAP1)) {
                for (final String s : cl.getOptionValues(MAP1)) {
                    analyzeMap(s, margs.map1);
                }
            }
            if (cl.hasOption(MAP2)) {
                for (final String s : cl.getOptionValues(MAP2)) {
                    analyzeMap(s, margs.map2);
                }
            }
            setMask(cl, MainArgs.Feature.class, margs.features::setEnabled);
            return margs;
        }

        @Override
        protected Void execute(MainArgs margs) throws Exception {
            KeyedSheetDiff.execute(margs);
            return null;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy