org.daisy.pipeline.nlp.lexing.LexServiceRegistry Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nlp-lexing Show documentation
Common API for Lexing functionality
There is a newer version: 1.0.1
package org.daisy.pipeline.nlp.lexing;

import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;

import org.daisy.pipeline.nlp.lexing.LexService.LexerInitException;
import org.daisy.pipeline.nlp.lexing.LexService.LexerToken;

public class LexServiceRegistry {
	private Map> mLexerToTokens = new ConcurrentHashMap>();

	/**
	 * Component callback
	 */
	public void addLexService(LexService lexer) {
		mLexerToTokens.put(lexer, new CopyOnWriteArrayList());
	}

	/**
	 * Component callback
	 */
	public void removeLexService(LexService lexer) {
		List tokens = mLexerToTokens.get(lexer);
		synchronized (this) {
			if (tokens.size() > 0) {
				lexer.globalRelease();
			}
		}
		mLexerToTokens.remove(lexer);
	}

	/**
	 * Choose a LexService for a language and get its local token or create a
	 * new one if there is not yet an existing one.
	 * 
	 * @param lang
	 * @param existingTokens are the current tokens of the pipeline job
	 *            previously returned by getTokenForLang().
	 * @return a token for the LexService that best works with @param lang. A
	 *         token is an instance of lexer which is unique for each pipeline
	 *         job so that it can work within multi-thread contexts. It first
	 *         attempts to find a token of LexService that is already used by
	 *         the pipeline job for other languages. If none is found, it
	 *         creates a new token. The token is initialized with the resources
	 *         of the other pipeline jobs when this is possible.
	 * @throws LexerInitException
	 */
	public LexerToken getTokenForLang(Locale lang, Collection existingTokens)
	        throws LexerInitException {
		// Look in priority in the existing tokens
		int bestScore = -1;
		LexerToken bestToken = null;
		for (LexerToken token : existingTokens) {
			int score = token.getLexService().getLexQuality(lang);
			if (score > bestScore) {
				bestScore = score;
				bestToken = token;
			}
		}

		// Look in the others lexers
		Map.Entry> best = null;
		for (Map.Entry> entry : mLexerToTokens.entrySet()) {
			int score = entry.getKey().getLexQuality(lang);
			if (score > bestScore) {
				bestScore = score;
				best = entry;
				bestToken = null;
			}
		}

		if (bestToken != null) {
			//The best token for the lang has already been created.
			//We then bind the language to it.
			for (LexerToken otherToken : mLexerToTokens.get(bestToken.getLexService())) {
				bestToken.shareResourcesWith(otherToken, lang);
			}
			bestToken.addLang(lang);
			return bestToken;
		}

		//A new token is the best choice for the given language.
		LexerToken result = best.getKey().newToken();
		result.addLang(lang);
		best.getValue().add(result);
		synchronized (this) {
			if (best.getValue().size() == 1) {
				best.getKey().globalInit(); //first initialization of the LexService
			}
		}

		return result;
	}

	/**
	 * Choose a LexService and get its token to be used for cases when languages
	 * are unknown or not supported.
	 */
	public LexerToken getFallbackToken(Collection existingTokens)
	        throws LexerInitException {
		// Look in priority in the existing tokens
		int bestScore = 0;
		LexerToken bestToken = null;
		for (LexerToken token : existingTokens) {
			int score = token.getLexService().getOverallQuality();
			if (score > bestScore) {
				bestScore = score;
				bestToken = token;
			}
		}

		// Look at the other lexers
		Map.Entry> best = null;
		for (Map.Entry> entry : mLexerToTokens.entrySet()) {
			int score = entry.getKey().getOverallQuality();
			if (score > bestScore) {
				bestScore = score;
				best = entry;
				bestToken = null;
			}
		}

		if (bestToken != null) {
			return bestToken;
		}

		if (best == null) {
			throw new LexerInitException("No generic lexing service available");
		}

		//The best fallback choice is to use a new token
		LexerToken result = best.getKey().newToken();
		synchronized (this) {
			if (best.getValue().size() == 1) {
				best.getKey().globalInit(); //first initialization of the LexService
			}
		}

		return result;
	}

	/**
	 * Unregister a tokens returned by getTokenForLang(). Releasing multiple
	 * times the same token has no side effect.
	 * 
	 * @param jobtokens
	 */
	public void releaseToken(LexerToken jobtoken) {
		for (List tokens : mLexerToTokens.values()) {
			if (tokens.remove(jobtoken)) {
				synchronized (this) {
					if (tokens.size() == 0) {
						jobtoken.getLexService().globalRelease();
					}
				}
				return;
			}
		}

	}

}