All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.until.keyword.dictionary.Darts Maven / Gradle / Ivy

There is a newer version: 2.0.3
Show newest version
package org.until.keyword.dictionary;

import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.Map.Entry;

/**
 * Double-Array Trie
 * 
 * @author mega
 */
public class Darts implements Externalizable {

	protected int[] base;
	protected int[] check;
	protected String[] state;
	protected Map coding;
	protected Map firstWordIndices;

	public void build(List phraseList) {
		if (phraseList == null)
			throw new IllegalArgumentException("phraseList can't be null");
		build(phraseList.toArray(new String[phraseList.size()]));
	}

	public void build(String... phraseArray) {
		if (phraseArray == null || phraseArray.length == 0)
			throw new IllegalArgumentException("phraseArray can't be empty");
		int maxPhraseLength = 0;
		Set phrases = new TreeSet();
		Set words = new TreeSet();
		for (String phrase : phraseArray) {
			if (phrase == null || phrase.length() < 1 || phrase.startsWith("#"))
				continue;
			phrases.add(phrase);
			maxPhraseLength = maxPhraseLength > phrase.length() ? maxPhraseLength : phrase.length();
			for (int i = 0; i < phrase.length(); i++) {
				words.add(phrase.substring(i, i + 1));
			}
		}
		initVariable();
		int codingIndex = 1;
		for (String word : words) {
			coding.put(word, codingIndex++);
		}
		create(phrases, maxPhraseLength);
	}

	protected void initVariable() {
		base = null;
		check = null;
		state = null;
		coding = new HashMap();
		firstWordIndices = new HashMap();
	}

	protected void create(Set phrases, int maxPhraseLength) {
		Map stateBaseIndices = new HashMap(phrases.size() * 3);
		extendSize(coding.size());
		int pos = 0;
		for (String phrase : phrases) {
			String c = phrase.substring(0, 1);
			if (!firstWordIndices.containsKey(c)) {
				base[pos] = 1;
				check[pos] = 0;
				state[pos] = phrase.substring(0, 1);
				stateBaseIndices.put(state[pos], pos);
				firstWordIndices.put(c, ++pos);
			}
		}
		for (int i = 1; i < maxPhraseLength; i++) {
			for (String phrase : phrases) {
				if (phrase.length() <= i || stateBaseIndices.containsKey(phrase.substring(0, i + 1)))
					continue;
				String c = phrase.substring(i, i + 1);
				int prevBaseIndex = stateBaseIndices.get(phrase.substring(0, i));
				int prevBaseValue = base[prevBaseIndex];
				int expectBaseIndex = pos;
				int expectPrevBaseValue = expectBaseIndex - coding.get(c) + 1;
				if (prevBaseValue < expectPrevBaseValue) {
					base[prevBaseIndex] = expectPrevBaseValue;
					if (base.length <= pos)
						extendSize(base.length);
					base[pos] = 1;
					check[pos] = prevBaseIndex + 1;
					state[pos] = phrase.substring(0, i + 1);
					stateBaseIndices.put(state[pos], pos++);
				} else {
					pos = prevBaseValue + coding.get(c) - 1;
					if (base.length <= pos)
						extendSize(base.length);
					base[pos] = 1;
					check[pos] = prevBaseIndex + 1;
					state[pos] = phrase.substring(0, i + 1);
					stateBaseIndices.put(state[pos], pos++);
				}
			}
		}
		resize(pos);
		for (int i = 0; i < base.length; i++) {
			if (state[i] == null) {
				state[i] = "";
				continue;
			}
			if (base[i] > 0 && phrases.contains(state[i])) {
				base[i] *= -1;
			}
		}
	}

	protected void extendSize(int i) {
		if (base == null) {
			base = new int[i];
		} else {
			int[] temp = new int[base.length + i];
			System.arraycopy(base, 0, temp, 0, base.length);
			base = temp;
		}
		if (check == null) {
			check = new int[i];
		} else {
			int[] temp = new int[check.length + i];
			System.arraycopy(check, 0, temp, 0, check.length);
			check = temp;
		}
		if (state == null) {
			state = new String[i];
		} else {
			String[] temp = new String[state.length + i];
			System.arraycopy(state, 0, temp, 0, state.length);
			state = temp;
		}
	}

	protected void resize(int size) {
		int[] tempBase = new int[size];
		System.arraycopy(base, 0, tempBase, 0, size);
		base = tempBase;
		int[] tempCheck = new int[size];
		System.arraycopy(check, 0, tempCheck, 0, size);
		check = tempCheck;
		String[] tempState = new String[size];
		System.arraycopy(state, 0, tempState, 0, size);
		state = tempState;
	}

	protected int getBaseIndex(String phrase) {
		int result = -1;
		if (phrase != null && phrase.length() > 0)
			for (int i = 0; i < state.length; i++) {
				if (state[i].equals(phrase)) {
					result = i;
					break;
				}
			}
		return result;
	}

	public int[] search(String sentence) {
		return search(sentence, 0);
	}

	public int[] search(String sentence, int pos) {
		int sub = 0;
		int m = -1;
		String f = sentence.substring(pos, pos + 1);
		if (firstWordIndices.containsKey(f)) {
			int o = firstWordIndices.get(f);
			int p = o;
			int b = base[p - 1];
			if (b < 0) {
				m = pos;
				sub = p;
			}
			for (int i = pos + 1; i < sentence.length(); i++) {
				String s = sentence.substring(i, i + 1);
				if (coding.containsKey(s)) {
					p = (b > 0 ? b : -b) + coding.get(s);
					if (p > check.length) {
						if (m >= 0)
							return new int[] { sub, m + 1 };
						else
							return new int[] { sub, pos };
					}
					if (check[p - 1] == o) {
						o = p;
						b = base[p - 1];
						if (b < 0) {
							m = i;
							sub = p;
						}
					} else if (m >= 0) {
						return new int[] { sub, m + 1 };
					} else {
						return new int[] { sub, pos };
					}
				} else if (m >= 0) {
					return new int[] { sub, m + 1 };
				} else {
					return new int[] { sub, pos };
				}
			}
			if (m >= 0)
				return new int[] { sub, m + 1 };
			else
				return new int[] { sub, pos };
		} else {
			return new int[] { sub, pos };
		}
	}

	@SuppressWarnings("unchecked")
	@Override
	public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
		base = (int[]) in.readObject();
		check = (int[]) in.readObject();
		state = (String[]) in.readObject();
		coding = (Map) in.readObject();
		firstWordIndices = (Map) in.readObject();
	}

	@Override
	public void writeExternal(ObjectOutput out) throws IOException {
		out.writeObject(base);
		out.writeObject(check);
		out.writeObject(state);
		out.writeObject(coding);
		out.writeObject(firstWordIndices);
	}

	@Override
	public boolean equals(Object obj) {
		if (base == null)
			return super.equals(obj);
		if (!(obj instanceof Darts))
			return false;
		Darts d = (Darts) obj;
		if (base.length != d.base.length || check.length != d.check.length || state.length != d.state.length
				|| coding.size() != d.coding.size() || firstWordIndices.size() != d.firstWordIndices.size())
			return false;
		for (int i = 0; i < base.length; i++) {
			if (base[i] != d.base[i])
				return false;
		}
		for (int i = 0; i < check.length; i++) {
			if (check[i] != d.check[i])
				return false;
		}
		for (int i = 0; i < state.length; i++) {
			if (!state[i].equals(d.state[i]))
				return false;
		}
		for (Iterator> ite1 = coding.entrySet().iterator(), ite2 = d.coding.entrySet()
				.iterator(); ite1.hasNext();) {
			Entry e1 = ite1.next();
			Entry e2 = ite2.next();
			if (!e1.getKey().equals(e2.getKey()) || !e1.getValue().equals(e2.getValue()))
				return false;
		}
		for (Iterator> ite1 = firstWordIndices.entrySet().iterator(), ite2 = d.firstWordIndices
				.entrySet().iterator(); ite1.hasNext();) {
			Entry e1 = ite1.next();
			Entry e2 = ite2.next();
			if (!e1.getKey().equals(e2.getKey()) || !e1.getValue().equals(e2.getValue()))
				return false;
		}
		return true;
	}

	@Override
	public String toString() {
		if (base == null)
			return super.toString();
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < base.length; i++) {
			sb.append("index=").append(i + 1).append("; base=").append(base[i]).append("; check=").append(check[i])
					.append("; state=").append(state[i]).append("\n");
		}
		return new String(sb);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy