All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.gwdg.metadataqa.marc.analysis.DataElementCounter Maven / Gradle / Ivy

package de.gwdg.metadataqa.marc.analysis;

import de.gwdg.metadataqa.marc.MarcSubfield;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

public class DataElementCounter {

  public enum Basis{EXISTENCE, OCCURENCE}

  private static final Logger logger = Logger.getLogger(DataElementCounter.class.getCanonicalName());

  List elements = new ArrayList<>();
  Map> tags = new LinkedHashMap<>();
  private final String header;
  private final Basis basis;

  public DataElementCounter(String dir, String fileName, Basis basis) {
    this.basis = basis;
    File file = new File(dir, fileName);
    String _header = "";
    try {
      List lines = FileUtils.readLines(file, "utf-8");
      _header = lines.get(0);
      String[] topFields = _header.split(",");
      for (String field : topFields) {
        String[] parts = field.split("\\$");
        DataElement element = new DataElement(parts[0], parts[1]);
        elements.add(element);
        tags.computeIfAbsent(element.field, s -> new ArrayList<>());
        tags.get(element.field).add(element);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
    this.header = _header;
  }

  public List count(BibliographicRecord marcRecord) {
    List counts = new ArrayList<>();
    for (Map.Entry> entry : tags.entrySet()) {
      List instances = marcRecord.getDatafield(entry.getKey());
      if (instances == null || instances.isEmpty()) {
        for (DataElement element : entry.getValue()) {
          counts.add(0);
        }
      } else {
        Map result = new LinkedHashMap<>();
        for (DataField instance : instances) {
          for (DataElement element : entry.getValue()) {
            result.computeIfAbsent(element.subfield, s -> 0);
            List subfields = instance.getSubfield(element.subfield);
            if (subfields != null && !subfields.isEmpty()) {
              result.put(element.subfield, result.get(element.subfield) + subfields.size());
            }
          }
        }
        for (DataElement element : entry.getValue()) {
          int score = result.get(element.subfield);
          if (basis.equals(Basis.EXISTENCE) && score >= 1)
            score = 1;
          counts.add(score);
        }
      }
    }
    return counts;
  }

  public String getHeader() {
    return header;
  }

  class DataElement {
    String field;
    String subfield;
    String key;

    public DataElement(String field, String subfield) {
      this.field = field;
      this.subfield = subfield;
      this.key = field + "$" + subfield;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy