org.carrot2.text.linguistic.lucene.ArabicStemmerAdapter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset
(core algorithms and infrastructure, no document sources).
/*
* Carrot2 project.
*
* Copyright (C) 2002-2013, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.linguistic.lucene;
import org.carrot2.text.linguistic.IStemmer;
import org.carrot2.text.util.MutableCharArray;
/**
* Adapter to lucene-contrib Arabic analyzers.
*/
public class ArabicStemmerAdapter implements IStemmer
{
private final org.apache.lucene.analysis.ar.ArabicStemmer delegate;
private final org.apache.lucene.analysis.ar.ArabicNormalizer normalizer;
private char [] buffer = new char [0];
public ArabicStemmerAdapter()
{
delegate = new org.apache.lucene.analysis.ar.ArabicStemmer();
normalizer = new org.apache.lucene.analysis.ar.ArabicNormalizer();
}
public CharSequence stem(CharSequence word)
{
if (word.length() > buffer.length)
{
buffer = new char [word.length()];
}
for (int i = 0; i < word.length(); i++)
{
buffer[i] = word.charAt(i);
}
int newLen = normalizer.normalize(buffer, word.length());
newLen = delegate.stem(buffer, newLen);
if (newLen != word.length() || !equals(buffer, newLen, word))
{
return new MutableCharArray(buffer, 0, newLen);
}
// Same-same.
return null;
}
private boolean equals(char [] buffer, int len, CharSequence word)
{
assert len == word.length();
for (int i = 0; i < len; i++)
{
if (buffer[i] != word.charAt(i)) return false;
}
return true;
}
}