
org.codelibs.fess.suggest.converter.KatakanaConverter Maven / Gradle / Ivy
/*
* Copyright 2012-2024 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.suggest.converter;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenizerFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.opensearch.core.common.Strings;
import com.ibm.icu.text.Transliterator;
public class KatakanaConverter implements ReadingConverter {
protected final Transliterator transliterator = Transliterator.getInstance("Hiragana-Katakana");
protected volatile boolean initialized = false;
protected TokenizerFactory tokenizerFactory = null;
public KatakanaConverter() {
// nothing
}
public KatakanaConverter(final TokenizerFactory tokenizerFactory) {
if (isEnableTokenizer(tokenizerFactory)) {
this.tokenizerFactory = tokenizerFactory;
}
}
@Override
public void init() throws IOException {
/*
* TODO if (initialized) { return; }
*
* if (tokenizerFactory == null) { final String path = System.getProperty(SuggestConstants.USER_DICT_PATH);
* final String encoding = System.getProperty(SuggestConstants.USER_DICT_ENCODING); final Map
* args = new HashMap<>(); args.put("mode", "normal"); args.put("discardPunctuation", "false"); if
* (Strings.isNullOrEmpty(path)) { args.put("userDictionary", path); } if (Strings.isNullOrEmpty(encoding)) {
* args.put("userDictionaryEncoding", encoding); } final JapaneseTokenizerFactory japaneseTokenizerFactory = new
* JapaneseTokenizerFactory(args); // TODO japaneseTokenizerFactory.inform(new FilesystemResourceLoader());
* tokenizerFactory = japaneseTokenizerFactory; } initialized = true;
*/
}
@Override
public List convert(final String text, final String field, final String... langs) throws IOException {
final List readingList = new ArrayList<>();
readingList.add(toKatakana(text));
return readingList;
}
protected String toKatakana(final String inputStr) throws IOException {
final StringBuilder kanaBuf = new StringBuilder();
final Reader rd = new StringReader(inputStr);
try (TokenStream stream = createTokenStream(rd)) {
if (stream == null) {
throw new IOException("Invalid tokenizer.");
}
stream.reset();
int offset = 0;
while (stream.incrementToken()) {
final CharTermAttribute att = stream.getAttribute(CharTermAttribute.class);
final String term = att.toString();
final int pos = inputStr.indexOf(term, offset);
if (pos > 0) {
final String tmp = inputStr.substring(offset, pos);
kanaBuf.append(transliterator.transliterate(tmp));
offset = pos;
} else if (pos == -1) {
continue;
}
String reading = getReadingFromAttribute(stream);
if (Strings.isNullOrEmpty(reading)) {
reading = transliterator.transliterate(att.toString());
}
kanaBuf.append(reading);
offset += term.length();
}
}
return kanaBuf.toString();
}
protected boolean isEnableTokenizer(final TokenizerFactory factory) {
// TODO return factory instanceof JapaneseTokenizerFactory;
return false;
}
private TokenStream createTokenStream(final Reader rd) {
return null;
/*
* TODO if (tokenizerFactory instanceof JapaneseTokenizerFactory) { return tokenizerFactory.create(); } else {
* return null; }
*/
}
protected String getReadingFromAttribute(final TokenStream stream) {
return null;
/*
* if (tokenizerFactory instanceof JapaneseTokenizerFactory) { final ReadingAttribute rdAttr =
* stream.getAttribute(ReadingAttribute.class); return rdAttr.getReading(); } else { return null; }
*/
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy