All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikimedia.highlighter.cirrus.opensearch.CharScanningSegmenterFactory Maven / Gradle / Ivy

The newest version!
package org.wikimedia.highlighter.cirrus.opensearch;

import org.wikimedia.search.highlighter.cirrus.Segment;
import org.wikimedia.search.highlighter.cirrus.Segmenter;
import org.wikimedia.search.highlighter.cirrus.snippet.CharScanningSegmenter;
import org.wikimedia.search.highlighter.cirrus.source.StringSourceExtracter;

public class CharScanningSegmenterFactory implements SegmenterFactory {
    private final int fragmentCharSize;
    private final int boundaryMaxScan;

    public CharScanningSegmenterFactory(int fragmentCharSize, int boundaryMaxScan) {
        this.fragmentCharSize = fragmentCharSize;
        this.boundaryMaxScan = boundaryMaxScan;
    }

    @Override
    public Segmenter build(String value) {
        return new CharScanningSegmenter(value, fragmentCharSize, boundaryMaxScan);
    }

    @Override
    public String extractNoMatchFragment(String value, int size) {
        // We can just delegate down the the fragmenter and let it scan characters.
        Segment bounds = new CharScanningSegmenter(value, size, boundaryMaxScan).memo(0, size).pickBounds(0, value.length());
        return StringSourceExtracter.safeSubstring(bounds.startOffset(), bounds.endOffset(), value);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy