org.carrot2.text.linguistic.LanguageModel Maven / Gradle / Ivy
/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.linguistic;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.util.factory.CachedInstanceFactoryDecorator;
import org.carrot2.util.factory.IFactory;
/**
* A holder for all elements of a language model for a single language used internally by
* content preprocessing components.
*/
public final class LanguageModel
{
private final LanguageCode languageCode;
private final IFactory stemmerFactory;
private final IFactory tokenizerFactory;
private final IFactory lexicalDataFactory;
LanguageModel(LanguageCode languageCode, IFactory stemmerFactory,
IFactory tokenizerFactory, IFactory lexicalDataFactory)
{
this.languageCode = languageCode;
this.stemmerFactory = new CachedInstanceFactoryDecorator(stemmerFactory);
this.tokenizerFactory = new CachedInstanceFactoryDecorator(
tokenizerFactory);
this.lexicalDataFactory = new CachedInstanceFactoryDecorator(
lexicalDataFactory);
}
public static LanguageModel create(
final LanguageCode languageCode,
final IStemmerFactory stemmerFactory,
final ITokenizerFactory tokenizerFactory,
final ILexicalDataFactory lexicalDataFactory)
{
// TODO: we could try to get rid of this extra layer of indirection here:
// eagerly create instances of language model elements and keep references
// to them rather than their factories. I'm not sure if the .NET API
// would work correctly in that case though.
return new LanguageModel(languageCode, new IFactory()
{
@Override
public IStemmer createInstance()
{
return stemmerFactory.getStemmer(languageCode);
}
}, new IFactory()
{
@Override
public ITokenizer createInstance()
{
return tokenizerFactory.getTokenizer(languageCode);
}
}, new IFactory()
{
@Override
public ILexicalData createInstance()
{
return lexicalDataFactory.getLexicalData(languageCode);
}
});
}
public LanguageCode getLanguageCode()
{
return languageCode;
}
public ILexicalData getLexicalData()
{
return lexicalDataFactory.createInstance();
}
public IStemmer getStemmer()
{
return stemmerFactory.createInstance();
}
public ITokenizer getTokenizer()
{
return tokenizerFactory.createInstance();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy