All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.digitalcollections.solrocr.lucene.byteoffset.NoOpByteOffsetStrategy Maven / Gradle / Ivy

Go to download

Solr plugin to add support for highlighting directly from various OCR formats (hOCR/ALTO/MiniOCR) without having to store the OCR documents in the index.

There is a newer version: 0.7.0
Show newest version
package de.digitalcollections.solrocr.lucene.byteoffset;

import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.uhighlight.PhraseHelper;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.OffsetSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import de.digitalcollections.solrocr.lucene.OcrHComponents;
import de.digitalcollections.solrocr.lucene.OcrHighlighter;

/**
 * A variant of {@link org.apache.lucene.search.uhighlight.NoOpOffsetStrategy} for byte offsets from payloads
 */
public class NoOpByteOffsetStrategy extends FieldByteOffsetStrategy {
  public static final NoOpByteOffsetStrategy INSTANCE = new NoOpByteOffsetStrategy();

  private NoOpByteOffsetStrategy() {
    super(new OcrHComponents(
        "_ignored_",
        (s) -> false,
        new MatchNoDocsQuery(),
        new BytesRef[0],
        PhraseHelper.NONE,
        new CharacterRunAutomaton[0],
        Collections.emptySet()));
  }

  @Override
  public OffsetSource getOffsetSource() {
    return OcrHighlighter.OffsetSource.NONE_NEEDED;
  }

  @Override
  public ByteOffsetsEnum getByteOffsetsEnum(LeafReader reader, int docId) throws IOException {
    return ByteOffsetsEnum.EMPTY;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy