cdc.office.tools.Anonymizer Maven / Gradle / Ivy
The newest version!
package cdc.office.tools;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import cdc.office.csv.CsvParser;
import cdc.office.csv.CsvWriter;
import cdc.office.tables.Row;
import cdc.office.tables.RowLocation;
import cdc.office.tables.TableHandler;
import cdc.office.tools.AbstractFilter.BaseMainArgs.BaseFeature;
import cdc.util.cli.AbstractMainSupport;
import cdc.util.cli.FeatureMask;
import cdc.util.cli.MainResult;
import cdc.util.cli.OptionEnum;
import cdc.util.function.Evaluation;
import cdc.util.lang.ExceptionWrapper;
import cdc.util.strings.StringAnonymizer;
import cdc.util.strings.StringConversion;
/**
* Class used to anonymize selected columns.
*
* One can:
*
* - limit column length.
*
- replace characters by a specified character or jam characters (generated random characters).
*
- preserve white spaces.
*
- preserve specified characters.
*
*
* @author Damien Carbonne
*/
public final class Anonymizer extends AbstractFilter {
private static final Logger LOGGER = LogManager.getLogger(Anonymizer.class);
private Anonymizer(MainArgs margs) {
super(margs);
}
public static class MainArgs extends AbstractFilter.BaseMainArgs {
public enum Feature implements OptionEnum {
PRESERVE_WHITESPACES("preserve-whitespaces", "If set, white spaces are perserved."),
JAM("jam", "If set, characters are jammed instead of being replaced by the same character.");
private final String name;
private final String description;
private Feature(String name,
String description) {
this.name = name;
this.description = description;
}
@Override
public final String getName() {
return name;
}
@Override
public final String getDescription() {
return description;
}
}
/** Columns (1-based) to anonymize. */
public final Set columns = new HashSet<>();
/** Replacement character. */
public char replacement = 'X';
/** Characters to preserve. */
public String preservedCharacters = null;
/** Max column size. */
public int maxLength = -1;
protected final FeatureMask features = new FeatureMask<>();
public final void setEnabled(Feature feature,
boolean enabled) {
features.setEnabled(feature, enabled);
}
public final boolean isEnabled(Feature feature) {
return features.isEnabled(feature);
}
}
private final class Handler implements TableHandler {
final CsvWriter writer;
final StringAnonymizer anonymizer = new StringAnonymizer();
public Handler() throws IOException {
this.writer = new CsvWriter(margs.output, margs.outputCharset);
this.writer.setSeparator(margs.outputSeparator);
this.anonymizer.setReplacement(margs.replacement);
this.anonymizer.setPreservedCharacters(margs.preservedCharacters);
this.anonymizer.setMaxLength(margs.maxLength);
this.anonymizer.setEnabled(StringAnonymizer.Feature.PRESERVE_WHITESPACES,
margs.isEnabled(MainArgs.Feature.PRESERVE_WHITESPACES));
this.anonymizer.setEnabled(StringAnonymizer.Feature.JAM, margs.isEnabled(MainArgs.Feature.JAM));
}
@Override
public void processBeginTable(String name,
int numberOfRows) {
// Ignore
}
@Override
public Evaluation processHeader(Row header,
RowLocation location) {
try {
writer.writeln(header.getValues());
} catch (final IOException e) {
throw ExceptionWrapper.wrap(e);
}
return Evaluation.CONTINUE;
}
@Override
public Evaluation processData(Row data,
RowLocation location) {
try {
for (int column = 0; column < data.size(); column++) {
final String value = data.getValue(column);
if (margs.columns.contains(column + 1)) {
writer.write(anonymizer.anonymize(value));
} else {
writer.write(value);
}
}
writer.writeln();
} catch (final IOException e) {
throw ExceptionWrapper.wrap(e);
}
return Evaluation.CONTINUE;
}
@Override
public void processEndTable(String name) {
try {
writer.close();
} catch (final IOException e) {
throw ExceptionWrapper.wrap(e);
}
if (margs.isEnabled(BaseFeature.VERBOSE)) {
LOGGER.info("Generated '{}' (charset: {})", margs.output, margs.getOutputCharset());
}
}
}
private void execute() throws IOException {
final Handler handler = new Handler();
final CsvParser parser =
CsvParser.builder()
.separator(margs.inputSeparator)
.build();
if (margs.isEnabled(BaseFeature.VERBOSE)) {
LOGGER.info("Load '{}' (charset: {})", margs.input, margs.getInputCharset());
}
parser.parse(margs.input,
margs.inputCharset == null
? Charset.defaultCharset()
: margs.inputCharset,
handler,
margs.isEnabled(BaseFeature.HAS_HEADER) ? 1 : 0);
}
public static void execute(MainArgs margs) throws IOException {
final Anonymizer instance = new Anonymizer(margs);
instance.execute();
}
public static MainResult exec(String... args) {
final MainSupport support = new MainSupport();
support.main(args);
return support.getResult();
}
public static void main(String... args) {
final int code = exec(args).getCode();
System.exit(code);
}
private static class MainSupport extends FilterMainSupport {
private static final String REPLACEMENT = "replacement";
private static final String PRESERVE_CHARS = "preserve-chars";
private static final String MAX_LENGTH = "max-length";
public MainSupport() {
super(Anonymizer.class, LOGGER);
}
@Override
protected String getVersion() {
return Config.VERSION;
}
@Override
protected void addSpecificOptions(Options options) {
addSpecificBaseOptions(options);
options.addOption(Option.builder()
.longOpt(COLUMNS)
.desc("Columns (1-based) to anonymize.")
.hasArgs()
.required()
.build());
options.addOption(Option.builder()
.longOpt(REPLACEMENT)
.desc("Optional replacement character (default: 'X').")
.hasArgs()
.build());
options.addOption(Option.builder()
.longOpt(PRESERVE_CHARS)
.desc("Optional characters to preserve (default: none).")
.hasArgs()
.build());
options.addOption(Option.builder()
.longOpt(MAX_LENGTH)
.desc("Optional max length of values (default: -1).")
.hasArg()
.build());
AbstractMainSupport.addNoArgOptions(options, MainArgs.Feature.class);
}
@Override
protected MainArgs analyze(CommandLine cl) throws ParseException {
final MainArgs margs = new MainArgs();
analyze(cl, margs);
for (final String s : cl.getOptionValues(COLUMNS)) {
try {
final int number = StringConversion.asInt(s);
margs.columns.add(number);
} catch (final Exception e) {
throw new ParseException(e.getMessage());
}
}
margs.replacement = getValueAsChar(cl, REPLACEMENT, 'X');
margs.preservedCharacters = cl.getOptionValue(PRESERVE_CHARS);
margs.maxLength = getValueAsInt(cl, MAX_LENGTH, -1);
AbstractMainSupport.setMask(cl, MainArgs.Feature.class, margs.features::setEnabled);
return margs;
}
@Override
protected Void execute(MainArgs margs) throws Exception {
Anonymizer.execute(margs);
return null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy