All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.digitalcollections.solrocr.lucene.OcrHComponents Maven / Gradle / Ivy

Go to download

Solr plugin to add support for highlighting directly from various OCR formats (hOCR/ALTO/MiniOCR) without having to store the OCR documents in the index.

There is a newer version: 0.7.0
Show newest version
package de.digitalcollections.solrocr.lucene;

import de.digitalcollections.solrocr.lucene.byteoffset.ByteOffsetPhraseHelper;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.uhighlight.PhraseHelper;
import org.apache.lucene.search.uhighlight.UHComponents;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;

/**
 * Components for the {@link OcrHighlighter}, with support for loading byte offsets from payloads.
 */
public class OcrHComponents extends UHComponents {
  private final ByteOffsetPhraseHelper byteOffsetPhraseHelper;

  public OcrHComponents(String field, Predicate fieldMatcher,
      Query query, BytesRef[] terms,
      PhraseHelper phraseHelper,
      CharacterRunAutomaton[] automata,
      Set highlightFlags) {
    super(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
    this.byteOffsetPhraseHelper = null;
  }

  public OcrHComponents(
      String field, Predicate fieldMatcher,
      Query query, BytesRef[] terms,
      PhraseHelper phraseHelper,
      ByteOffsetPhraseHelper byteOffsetPhraseHelper,
      CharacterRunAutomaton[] automata,
      Set highlightFlags) {
    super(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
    this.byteOffsetPhraseHelper = byteOffsetPhraseHelper;
  }

  public ByteOffsetPhraseHelper getByteOffsetPhraseHelper() {
    return byteOffsetPhraseHelper;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy