All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.digitalcollections.solrocr.model.OcrHighlightResult Maven / Gradle / Ivy

Go to download

Solr plugin to add support for highlighting directly from various OCR formats (hOCR/ALTO/MiniOCR) without having to store the OCR documents in the index.

The newest version!
package de.digitalcollections.solrocr.model;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;

public class OcrHighlightResult {
  private final Map fieldSnippets;
  private final Map snippetCounts;

  public OcrHighlightResult() {
    snippetCounts = new HashMap<>();
    fieldSnippets = new HashMap<>();
  }

  public void addSnippetsForField(String field, OcrSnippet[] ocrSnippets) {
    this.fieldSnippets.put(field, ocrSnippets);
  }

  public void addSnippetCountForField(String field, int i) {
    this.snippetCounts.put(field, i);
  }

  public OcrSnippet[] getFieldSnippets(String field) {
    return fieldSnippets.get(field);
  }

  public int getSnippetCount(String field) {
    return snippetCounts.get(field);
  }

  public NamedList toNamedList() {
    SimpleOrderedMap out = new SimpleOrderedMap<>();
    for (String fieldName : fieldSnippets.keySet()) {
      SimpleOrderedMap fieldOut = new SimpleOrderedMap<>();
      int snipCount = getSnippetCount(fieldName);
      OcrSnippet[] snips = getFieldSnippets(fieldName);
      List> outSnips =
          Arrays.stream(snips)
              .map(snip -> snip == null ? null : snip.toNamedList())
              .collect(Collectors.toList());
      fieldOut.add("snippets", outSnips);
      fieldOut.add("numTotal", snipCount);
      out.add(fieldName, fieldOut);
    }
    return out;
  }
}