All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.gwdg.metadataqa.marc.utils.pica.PicaLine Maven / Gradle / Ivy

package de.gwdg.metadataqa.marc.utils.pica;

import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PicaLine {
  private static final Logger logger = Logger.getLogger(PicaLine.class.getCanonicalName());

  private static final Pattern LINE = Pattern.compile("^(SET:|Eingabe:|Warnung:|[0-2][0-9][0-9][A-Z@])(\\/([0-9][0-9]+))? (.*)$", Pattern.MULTILINE);
  public static final String DEFAULT_SEPARATOR = "$";
  private static final String SET = "SET";
  private static final String EINGABE = "Eingabe";
  private static final String WARNUNG = "Warnung";
  private String subfieldSeparator = DEFAULT_SEPARATOR;
  private static String DOLLAR_REPLACEMENT = "%26%26";

  private String tag;
  private String occurrence;
  private String content;
  private List subfields;

  private boolean valid = false;
  private boolean skippable = false;

  public PicaLine() {
  }

  public PicaLine(String raw) {
    parse(raw);
  }

  public PicaLine(String raw, String subfieldSeparator) {
    this.subfieldSeparator = subfieldSeparator;
    parse(raw);
  }

  public boolean isSET() {
    return tag.equals(SET);
  }

  public boolean isEingabe() {
    return tag.equals(EINGABE);
  }

  public boolean isWarnung() {
    return tag.equals(WARNUNG);
  }

  public boolean isValidTag() {
    return valid && !(isSET() || isEingabe() || isWarnung());
  }

  public String getTag() {
    return tag;
  }

  public String getOccurrence() {
    return occurrence;
  }

  public List getSubfields() {
    return subfields;
  }

  public String getQualifiedTag() {
    if (occurrence != null)
      return tag + "/" + occurrence;
    return tag;
  }

  public String formatSubfields() {
    if (subfields == null) {
      logger.severe("null subfields: " + content);
      return "";
    }
    List formattedSubfields = new ArrayList<>();
    for (PicaSubfield subfield : subfields) {
      formattedSubfields.add(subfield.format());
    }
    return StringUtils.join(formattedSubfields, ", ");
  }

  public String getContent() {
    return content;
  }

  private void parse(String raw) {
    if (raw.equals("")) {
      skippable = true;
    } else {
      Matcher matcher = LINE.matcher(raw);
      if (matcher.matches()) {
        tag = matcher.group(1).replaceAll(":$", "");
        occurrence = matcher.group(3);
        content = matcher.group(4);
        parseSubfields();
        valid = true;
      } else {
        logger.warning(String.format("Unable to parse line: '%s'", raw));
      }
    }
  }

  private void parseSubfields() {
    subfields = new ArrayList<>();
    boolean dollarReplacement = false;
    String[] parts = null;
    if (subfieldSeparator.equals("$") && content.contains("$$")) {
      dollarReplacement = true;
      parts = content.replace("$$", DOLLAR_REPLACEMENT).split(Pattern.quote(subfieldSeparator));
    } else {
      parts = content.split(Pattern.quote(subfieldSeparator));
    }
    for (String part : parts) {
      if (StringUtils.isNotBlank(part)) {
        if (dollarReplacement) {
          part = part.replace(DOLLAR_REPLACEMENT, "$");
        }
        subfields.add(new PicaSubfield(part.substring(0, 1), part.substring(1)));
      }
    }
  }

  @Override
  public String toString() {
    return "PicaLine{" +
      "tag='" + tag + '\'' +
      ", occurrence='" + occurrence + '\'' +
      ", subfields=" + subfields +
      '}';
  }

  public boolean isSkippable() {
    return skippable || isSET() || isEingabe() || isWarnung();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy