All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.gwdg.metadataqa.marc.cli.parameters.CommonParameters Maven / Gradle / Ivy

package de.gwdg.metadataqa.marc.cli.parameters;

import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnorator;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnoratorFactory;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilter;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilterFactory;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.cli.utils.IgnorableFields;
import de.gwdg.metadataqa.marc.definition.DataSource;
import de.gwdg.metadataqa.marc.definition.MarcFormat;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.utils.alephseq.AlephseqLine;
import org.apache.commons.cli.*;
import org.apache.commons.lang3.StringUtils;

import java.io.InputStream;
import java.io.Serializable;

public class CommonParameters implements Serializable {

  private static final long serialVersionUID = -4760615880678251867L;

  protected String[] args;
  public static final String DEFAULT_OUTPUT_DIR = ".";
  public static final MarcVersion DEFAULT_MARC_VERSION = MarcVersion.MARC21;

  protected MarcVersion marcVersion = DEFAULT_MARC_VERSION;
  protected MarcFormat marcFormat = MarcFormat.ISO;
  protected DataSource dataSource = DataSource.FILE;
  protected boolean doHelp;
  protected boolean doLog = true;
  protected int limit = -1;
  protected int offset = -1;
  protected String id = null;
  protected Leader.Type defaultRecordType = null;
  protected boolean fixAlephseq = false;
  protected boolean fixAlma = false;
  protected boolean fixKbr = false;
  protected boolean alephseq = false;
  protected boolean marcxml = false;
  protected boolean lineSeparated = false;
  protected boolean trimId = false;
  private String outputDir = DEFAULT_OUTPUT_DIR;
  protected RecordIgnorator recordIgnorator;
  protected RecordFilter recordFilter;
  protected IgnorableFields ignorableFields = new IgnorableFields();
  protected InputStream stream = null;
  protected String defaultEncoding = null;

  protected Options options = new Options();
  protected static final CommandLineParser parser = new DefaultParser();
  protected CommandLine cmd;
  private boolean isOptionSet = false;
  private AlephseqLine.TYPE alephseqLineType;
  private String picaIdField = "003@$0";
  private String picaSubfieldSeparator = "$";
  private String picaSchemaFile;
  private String picaRecordTypeField = "002@$0";
  private SchemaType schemaType = SchemaType.MARC21;

  protected void setOptions() {
    if (!isOptionSet) {
      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
      options.addOption("h", "help", false, "display help");
      options.addOption("n", "nolog", false, "do not display log messages");
      options.addOption("l", "limit", true, "limit the number of records to process");
      options.addOption("o", "offset", true, "the first record to process");
      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
      options.addOption("X", "fixAlma", false, "fix the known issues of Alma format");
      options.addOption("R", "fixKbr", false, "fix the known issues of Alma format");
      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
      options.addOption("t", "outputDir", true, "output directory");
      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
      options.addOption("s", "dataSource", true, "data source (file of stream)");
      options.addOption("g", "defaultEncoding", true, "default character encoding");
      options.addOption("A", "alephseqLineType", true, "Alephseq line type");
      options.addOption("B", "picaIdField", true, "PICA id field");
      options.addOption("D", "picaSubfieldSeparator", true, "PICA subfield separator");
      options.addOption("E", "picaSchemaFile", true, "Avram PICA schema file");
      options.addOption("F", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
      options.addOption("G", "picaRecordType", true, "picaRecordType");
      options.addOption("I", "allowableRecords", true, "allow records for the analysis");

      isOptionSet = true;
    }
  }

  public CommonParameters() {
  }

  public CommonParameters(String[] arguments)  throws ParseException {
    cmd = parser.parse(getOptions(), arguments);

    readSchemaType();
    readMarcVersion();
    readMarcFormat();
    readDataSource();
    doHelp = cmd.hasOption("help");
    doLog = !cmd.hasOption("nolog");
    readLimit();
    readOffset();
    if (offset > -1 && limit > -1)
      limit += offset;
    readId();
    readDefaultRecordType();
    setAlephseq(cmd.hasOption("alephseq"));
    fixAlephseq = cmd.hasOption("fixAlephseq");
    fixAlma = cmd.hasOption("fixAlma");
    fixKbr = cmd.hasOption("fixKbr");
    setMarcxml(cmd.hasOption("marcxml"));
    lineSeparated = cmd.hasOption("lineSeparated");
    readOutputDir();
    trimId = cmd.hasOption("trimId");
    readIgnorableFields();
    readIgnorableRecords();
    readAllowableRecords();
    readDefaultEncoding();
    readAlephseqLineType();
    readPicaIdField();
    readPicaSubfieldSeparator();
    readPicaSchemaFile();
    readPicaRecordType();

    args = cmd.getArgs();
  }

  private void readPicaSchemaFile() {
    if (cmd.hasOption("picaSchemaFile"))
      picaSchemaFile = cmd.getOptionValue("picaSchemaFile");
  }

  private void readPicaRecordType() {
    if (cmd.hasOption("picaRecordType"))
      picaRecordTypeField = cmd.getOptionValue("picaRecordType");
  }


  private void readPicaSubfieldSeparator() {
    if (cmd.hasOption("picaSubfieldSeparator"))
      picaSubfieldSeparator = cmd.getOptionValue("picaSubfieldSeparator");
  }

  private void readPicaIdField() {
    if (cmd.hasOption("picaIdField"))
      picaIdField = cmd.getOptionValue("picaIdField");
  }

  private void readAlephseqLineType() throws ParseException {
    if (cmd.hasOption("alephseqLineType"))
      setAlephseqLineType(cmd.getOptionValue("alephseqLineType"));
  }

  private void readDefaultEncoding() {
    if (cmd.hasOption("defaultEncoding"))
      setDefaultEncoding(cmd.getOptionValue("defaultEncoding"));
  }

  private void readIgnorableRecords() {
    String ignorableRecords = cmd.hasOption("ignorableRecords") ? cmd.getOptionValue("ignorableRecords") : "";
    setRecordIgnorator(ignorableRecords);
  }

  private void readAllowableRecords() {
    String allowableRecords = cmd.hasOption("allowableRecords") ? cmd.getOptionValue("allowableRecords") : "";
    setRecordFilter(allowableRecords);
  }

  private void readIgnorableFields() {
    if (cmd.hasOption("ignorableFields"))
      setIgnorableFields(cmd.getOptionValue("ignorableFields"));
  }

  private void readOutputDir() {
    if (cmd.hasOption("outputDir"))
      outputDir = cmd.getOptionValue("outputDir");
  }

  private void readDefaultRecordType() throws ParseException {
    if (cmd.hasOption("defaultRecordType"))
      setDefaultRecordType(cmd.getOptionValue("defaultRecordType"));
  }

  private void readId() {
    if (cmd.hasOption("id"))
      id = cmd.getOptionValue("id").trim();
  }

  private void readOffset() {
    if (cmd.hasOption("offset"))
      offset = Integer.parseInt(cmd.getOptionValue("offset"));
  }

  private void readLimit() {
    if (cmd.hasOption("limit"))
      limit = Integer.parseInt(cmd.getOptionValue("limit"));
  }

  private void readDataSource() throws ParseException {
    if (cmd.hasOption("dataSource"))
      setDataSource(cmd.getOptionValue("dataSource"));
  }

  private void readSchemaType() throws ParseException {
    if (cmd.hasOption("schemaType"))
      setSchemaType(cmd.getOptionValue("schemaType"));
  }

  private void setSchemaType(String input) throws ParseException {
    try {
      schemaType = SchemaType.valueOf(input);
    } catch (IllegalArgumentException e) {
      throw new ParseException(String.format("Unrecognized schemaType parameter value: '%s'", input));
    }
  }

  private void readMarcFormat() throws ParseException {
    if (cmd.hasOption("marcFormat"))
      setMarcFormat(cmd.getOptionValue("marcFormat"));
  }

  private void readMarcVersion() throws ParseException {
    if (cmd.hasOption("marcVersion"))
      setMarcVersion(cmd.getOptionValue("marcVersion"));
  }

  private void setAlephseqLineType(String alephseqLineTypeInput) throws ParseException {
    try {
      alephseqLineType = AlephseqLine.TYPE.valueOf(cmd.getOptionValue("alephseqLineType"));
    } catch (IllegalArgumentException e) {
      throw new ParseException(String.format("Unrecognized alephseqLineType parameter value: '%s'", alephseqLineTypeInput));
    }
  }

  public Options getOptions() {
    if (!isOptionSet)
      setOptions();
    return options;
  }

  public MarcVersion getMarcVersion() {
    return marcVersion;
  }

  public void setMarcVersion(MarcVersion marcVersion) {
    this.marcVersion = marcVersion;
  }

  public void setMarcVersion(String marcVersion) throws ParseException {
    this.marcVersion = MarcVersion.byCode(marcVersion.trim());
    if (this.marcVersion == null)
      throw new ParseException(String.format("Unrecognized marcVersion parameter value: '%s'", marcVersion));
  }

  public MarcFormat getMarcFormat() {
    return marcFormat;
  }

  public void setMarcFormat(MarcFormat marcFormat) {
    this.marcFormat = marcFormat;
  }

  public void setMarcFormat(String marcFormatString) throws ParseException {
    marcFormat = MarcFormat.byCode(marcFormatString.trim());
    if (marcFormat == null)
      throw new ParseException(String.format("Unrecognized marcFormat parameter value: '%s'", marcFormatString));
    if (marcFormat.equals(MarcFormat.ALEPHSEQ))
      setAlephseq(true);
    if (marcFormat.equals(MarcFormat.XML))
      setMarcxml(true);
    if (marcFormat.equals(MarcFormat.LINE_SEPARATED))
      setLineSeparated(true);
    if (marcFormat.equals(MarcFormat.PICA_NORMALIZED) || marcFormat.equals(MarcFormat.PICA_PLAIN))
      schemaType = SchemaType.PICA;
  }

  public DataSource getDataSource() {
    return dataSource;
  }

  public void setDataSource(DataSource dataSource) {
    this.dataSource = dataSource;
  }

  public void setDataSource(String dataSourceString) throws ParseException {
    dataSource = DataSource.byCode(dataSourceString.trim());
    if (dataSource == null)
      throw new ParseException(String.format("Unrecognized marcFormat parameter value: '%s'", dataSourceString));
  }

  public boolean doHelp() {
    return doHelp;
  }

  public void setDoHelp(boolean doHelp) {
    this.doHelp = doHelp;
  }

  public boolean doLog() {
    return doLog;
  }

  public void setDoLog(boolean doLog) {
    this.doLog = doLog;
  }

  public String[] getArgs() {
    return args;
  }

  public int getLimit() {
    return limit;
  }

  public void setLimit(int limit) {
    this.limit = limit;
  }

  public int getOffset() {
    return offset;
  }

  public void setOffset(int offset) {
    this.offset = offset;
  }

  public boolean hasId() {
    return StringUtils.isNotBlank(id);
  }

  public String getId() {
    return id;
  }

  public void setId(String id) {
    this.id = id;
  }

  public Leader.Type getDefaultRecordType() {
    return defaultRecordType;
  }

  public void setDefaultRecordType(Leader.Type defaultRecordType) {
    this.defaultRecordType = defaultRecordType;
  }

  public void setDefaultRecordType(String defaultRecordType) throws ParseException {
    this.defaultRecordType = Leader.Type.valueOf(defaultRecordType);
    if (this.defaultRecordType == null)
      throw new ParseException(String.format("Unrecognized defaultRecordType parameter value: '%s'", defaultRecordType));
  }

  public boolean fixAlephseq() {
    return fixAlephseq;
  }

  public void setFixAlephseq(boolean fixAlephseq) {
    this.fixAlephseq = fixAlephseq;
  }

  public boolean fixAlma() {
    return fixAlma;
  }

  public void setFixAlma(boolean fixAlma) {
    this.fixAlma = fixAlma;
  }

  public boolean fixKbr() {
    return fixKbr;
  }

  public void setFixKbr(boolean fixKbr) {
    this.fixKbr = fixKbr;
  }

  public String getReplecementInControlFields() {
    if (fixAlephseq())
      return "^";
    else if (fixAlma() || fixKbr())
      return "#";
    else
      return null;
  }

  public boolean isAlephseq() {
    return alephseq;
  }

  public void setAlephseq(boolean alephseq) {
    this.alephseq = alephseq;
    if (alephseq)
      marcFormat = MarcFormat.ALEPHSEQ;
  }

  public boolean isMarcxml() {
    return marcxml;
  }

  public void setMarcxml(boolean marcxml) {
    this.marcxml = marcxml;
    if (marcxml)
      marcFormat = MarcFormat.XML;
  }

  public boolean isLineSeparated() {
    return lineSeparated;
  }

  public void setLineSeparated(boolean lineSeparated) {
    this.lineSeparated = lineSeparated;
  }

  public String getOutputDir() {
    return outputDir;
  }

  public void setOutputDir(String outputDir) {
    this.outputDir = outputDir;
  }

  public boolean getTrimId() {
    return trimId;
  }

  public void setTrimId(boolean trimId) {
    this.trimId = trimId;
  }

  public IgnorableFields getIgnorableFields() {
    return ignorableFields;
  }

  public void setIgnorableFields(String ignorableFields) {
    this.ignorableFields.parseFields(ignorableFields.trim());
  }

  public RecordIgnorator getRecordIgnorator() {
    return recordIgnorator;
  }

  public void setRecordIgnorator(String ignorableRecords) {
    this.recordIgnorator = RecordIgnoratorFactory.create(schemaType, ignorableRecords.trim());
  }

  public RecordFilter getRecordFilter() {
    return recordFilter;
  }

  public void setRecordFilter(String allowableRecords) {
    this.recordFilter = RecordFilterFactory.create(schemaType, allowableRecords.trim());
  }

  public InputStream getStream() {
    return stream;
  }

  public void setStream(InputStream stream) {
    this.stream = stream;
  }

  public String getDefaultEncoding() {
    return defaultEncoding;
  }

  private void setDefaultEncoding(String defaultEncoding) {
    this.defaultEncoding = defaultEncoding;
  }

  public AlephseqLine.TYPE getAlephseqLineType() {
    return this.alephseqLineType;
  }

  public String getPicaIdField() {
    return picaIdField;
  }

  public void setPicaIdField(String picaIdField) {
    this.picaIdField = picaIdField;
  }

  public String getPicaSubfieldSeparator() {
    return picaSubfieldSeparator;
  }

  public void setPicaSubfieldSeparator(String picaSubfieldSeparator) {
    this.picaSubfieldSeparator = picaSubfieldSeparator;
  }

  public String getPicaSchemaFile() {
    return picaSchemaFile;
  }

  public SchemaType getSchemaType() {
    return schemaType;
  }

  public String getPicaRecordTypeField() {
    return picaRecordTypeField;
  }

  public boolean isMarc21() {
    return schemaType.equals(SchemaType.MARC21);
  }

  public boolean isPica() {
    return schemaType.equals(SchemaType.PICA);
  }

  public String formatParameters() {
    String text = "";
    text += String.format("schemaType: %s%n", schemaType);
    text += String.format("marcVersion: %s, %s%n", marcVersion.getCode(), marcVersion.getLabel());
    text += String.format("marcFormat: %s, %s%n", marcFormat.getCode(), marcFormat.getLabel());
    text += String.format("dataSource: %s, %s%n", dataSource.getCode(), dataSource.getLabel());
    text += String.format("limit: %d%n", limit);
    text += String.format("offset: %s%n", offset);
    text += String.format("MARC files: %s%n", StringUtils.join(args, ", "));
    text += String.format("id: %s%n", id);
    text += String.format("defaultRecordType: %s%n", defaultRecordType);
    text += String.format("fixAlephseq: %s%n", fixAlephseq);
    text += String.format("fixAlma: %s%n", fixAlma);
    text += String.format("alephseq: %s%n", alephseq);
    text += String.format("marcxml: %s%n", marcxml);
    text += String.format("lineSeparated: %s%n", lineSeparated);
    text += String.format("outputDir: %s%n", outputDir);
    text += String.format("trimId: %s%n", trimId);
    text += String.format("ignorableFields: %s%n", ignorableFields);
    text += String.format("allowableRecords: %s%n", recordFilter);
    text += String.format("ignorableRecords: %s%n", recordIgnorator);
    text += String.format("defaultEncoding: %s%n", defaultEncoding);
    text += String.format("alephseqLineType: %s%n", alephseqLineType);
    if (isPica()) {
      text += String.format("picaIdField: %s%n", picaIdField);
      text += String.format("picaSubfieldSeparator: %s%n", picaSubfieldSeparator);
      text += String.format("picaRecordType: %s%n", picaRecordTypeField);
    }

    return text;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy