All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.snippet.FragmenterAbstract Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2008-2013 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.snippet;

import java.util.Iterator;
import java.util.TreeSet;

import org.w3c.dom.NamedNodeMap;

import com.jaeksoft.searchlib.Logging;

public abstract class FragmenterAbstract {

	private transient TreeSet splitPos;

	private transient int originalTextLength;

	protected FragmenterAbstract() {
		splitPos = null;
	}

	protected abstract void setAttributes(NamedNodeMap attr);

	protected void addSplit(int pos) {
		if (pos >= originalTextLength)
			return;
		if (pos == 0)
			return;
		splitPos.add(pos);
	}

	final protected void getFragments(String originalText,
			FragmentList fragments, int vectorOffset) {
		originalTextLength = originalText.length();
		if (splitPos == null)
			splitPos = new TreeSet();
		splitPos.clear();
		check(originalText);
		Iterator splitIterator = splitPos.iterator();
		int pos = 0;
		Fragment lastFragment = null;
		while (splitIterator.hasNext()) {
			int nextSplitPos = splitIterator.next();
			lastFragment = fragments.addOriginalText(
					originalText.substring(pos, nextSplitPos), vectorOffset,
					lastFragment == null);
			pos = nextSplitPos;
		}
		if (pos < originalText.length())
			lastFragment = fragments.addOriginalText(
					originalText.substring(pos), vectorOffset,
					lastFragment == null);
		if (lastFragment != null)
			lastFragment.setEdge(true);
	}

	protected abstract FragmenterAbstract newInstance();

	final protected static NoFragmenter NOFRAGMENTER = new NoFragmenter();

	final static protected FragmenterAbstract newInstance(String className)
			throws InstantiationException, IllegalAccessException {
		if (className == null || className.length() == 0)
			return NOFRAGMENTER;
		try {
			FragmenterAbstract fragmenter = (FragmenterAbstract) Class.forName(
					"com.jaeksoft.searchlib.snippet." + className)
					.newInstance();
			return fragmenter;
		} catch (ClassNotFoundException e) {
			Logging.warn(e);
			return NOFRAGMENTER;
		}
	}

	protected abstract void check(String originalText);

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy