net.paoding.analysis.dictionary.BinaryDictionary Maven / Gradle / Ivy
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.paoding.analysis.dictionary;
/**
* Dictionary的二叉查找实现。
*
*
* @author Zhiliang Wang [[email protected]]
*
* @since 1.0
*
*/
public class BinaryDictionary implements Dictionary {
// -------------------------------------------------
private Word[] ascWords;
private final int start;
private final int end;
private final int count;
// -------------------------------------------------
/**
* 以一组升序排列的词语构造二叉查找字典
*
*
* @param ascWords
* 升序排列词语
*/
public BinaryDictionary(Word[] ascWords) {
this(ascWords, 0, ascWords.length);
}
public BinaryDictionary(Word[] ascWords, int start, int end) {
this.ascWords = ascWords;
this.start = start;
this.end = end;
this.count = end - start;
}
// -------------------------------------------------
public Word get(int index) {
return ascWords[start + index];
}
public int size() {
return count;
}
public Hit search(CharSequence input, int begin, int count) {
int left = this.start;
int right = this.end - 1;
int pointer = 0;
Word word = null;
int relation;
//
while (left <= right) {
pointer = (left + right) >> 1;
word = ascWords[pointer];
relation = compare(input, begin, count, word);
if (relation == 0) {
// System.out.println(new String(input,begin, count)+"***" +
// word);
int nextWordIndex = pointer + 1;
if (nextWordIndex >= ascWords.length) {
return new Hit(pointer, word, null);
} else {
return new Hit(pointer, word, ascWords[nextWordIndex]);
}
}
if (relation < 0)
right = pointer - 1;
else
left = pointer + 1;
}
//
if (left >= ascWords.length) {
return Hit.UNDEFINED;
}
//
boolean asPrex = true;
Word nextWord = ascWords[left];
if (nextWord.length() < count) {
asPrex = false;
}
for (int i = begin, j = 0; asPrex && j < count; i++, j++) {
if (input.charAt(i) != nextWord.charAt(j)) {
asPrex = false;
}
}
return asPrex ? new Hit(Hit.UNCLOSED_INDEX, null, nextWord)
: Hit.UNDEFINED;
}
public static int compare(CharSequence one, int begin, int count,
CharSequence theOther) {
for (int i = begin, j = 0; i < one.length()
&& j < Math.min(theOther.length(), count); i++, j++) {
if (one.charAt(i) > theOther.charAt(j)) {
return 1;
} else if (one.charAt(i) < theOther.charAt(j)) {
return -1;
}
}
return count - theOther.length();
}
}