All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.almondtools.rexlex.stringsearch.SetBackwardOracleMatching Maven / Gradle / Ivy

package com.almondtools.rexlex.stringsearch;

import static java.util.Arrays.copyOfRange;

import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import com.almondtools.rexlex.io.CharProvider;

public class SetBackwardOracleMatching implements StringSearchAlgorithm {

	private TrieRoot trie;
	private int minLength;
	private Map> terminals;

	public SetBackwardOracleMatching(List patterns) {
		List charpatterns = toCharArray(patterns);
		this.minLength = minLength(charpatterns);
		this.trie = computeTrie(charpatterns, minLength);
		this.terminals = computeTerminals(trie, charpatterns, minLength);
	}

	private int minLength(List patterns) {
		int len = Integer.MAX_VALUE;
		for (char[] pattern : patterns) {
			if (pattern.length < len) {
				len = pattern.length;
			}
		}
		return len;
	}

	private List toCharArray(List patterns) {
		List charpatterns = new ArrayList(patterns.size());
		for (String pattern : patterns) {
			charpatterns.add(pattern.toCharArray());
		}
		return charpatterns;
	}

	private static TrieRoot computeTrie(List charpatterns, int length) {
		TrieRoot trie = new TrieRoot();
		for (char[] pattern : charpatterns) {
			char[] prefix = copyOfRange(pattern, 0, length);
			trie.extendReverse(prefix, pattern.length == prefix.length);
		}
		computeOracle(trie, length);
		return trie;
	}

	private static void computeOracle(TrieRoot trie, int length) {
		final Map oracle = new IdentityHashMap();
		final Trie init = trie;
		oracle.put(init, null);
		trie.apply(new TrieVisitor() {

			@Override
			public void visitRoot(TrieRoot trie, Trie parent) {
				visit(trie, parent);
			}

			@Override
			public void visitNode(TrieNode trie, Trie parent) {
				visit(trie, parent);
			}

			private void visit(Trie trie, Trie parent) {
				List nexts = new ArrayList(trie.getNexts());
				if (parent != null && trie instanceof TrieNode) {
					TrieNode node = (TrieNode) trie;
					char c = node.getChar();
					Trie down = oracle.get(parent);
					while (down != null && down.nextNode(c) == null) {
						down.addNext(node);
						down = oracle.get(down);
					}
					if (down != null) {
						Trie next = down.nextNode(c);
						oracle.put(trie, next);
					} else {
						oracle.put(trie, init);
					}
				}
				for (Trie next : nexts) {
					next.apply(this, trie);
				}
			}

		}, null);
	}

	private Map> computeTerminals(TrieRoot trie, List patterns, int minLength) {
		final Map> terminals = new IdentityHashMap>();
		for (char[] pattern : patterns) {
			String stringPattern = new String(pattern);
			String prefix = stringPattern.substring(0, minLength);
			Trie terminal = trie.nextNode(TrieRoot.revert(prefix.toCharArray()));
			List terminalPatterns = terminals.get(terminal);
			if (terminalPatterns == null) {
				terminalPatterns = new ArrayList();
				terminalPatterns.add(prefix);
				terminals.put(terminal, terminalPatterns);
			}
			terminalPatterns.add(stringPattern.substring(minLength));
		}
		return terminals;
	}

	@Override
	public StringFinder createFinder(CharProvider chars) {
		return new Finder(chars);
	}

	@Override
	public int getPatternLength() {
		return minLength;
	}

	private class Finder implements StringFinder {

		private CharProvider chars;
		private List buffer;

		public Finder(CharProvider chars) {
			this.chars = chars;
			this.buffer = new LinkedList();
		}

		@Override
		public void skipTo(int pos) {
			chars.move(pos);
		}

		@Override
		public StringMatch findNext() {
			if (!buffer.isEmpty()) {
				return buffer.remove(0);
			}
			final int lookahead = minLength - 1;
			next: while (!chars.finished(lookahead)) {
				Trie current = trie;
				int j = lookahead;
				while (j >= 0 && current != null) {
					current = current.nextNode(chars.lookahead(j));
					j--;
				}
				int currentWindowStart = chars.current();
				int currentPos = currentWindowStart + j + 1;
				int currentWindowEnd = currentWindowStart + minLength;
				String matchedPrefix = chars.slice(currentPos, currentWindowEnd);
				if (current != null && j < 0) {
					List patterns = terminals.get(current);
					Iterator iPatterns = patterns.iterator();
					String prefix = iPatterns.next();
					if (prefix.equals(matchedPrefix)) {
						while (iPatterns.hasNext()) {
							String suffix = iPatterns.next();
							if (!chars.finished(suffix.length())) {
								int currentWordEnd = currentWindowEnd + suffix.length();
								if (chars.slice(currentWindowEnd, currentWordEnd).equals(suffix)) {
									buffer.add(new StringMatch(currentWindowStart, currentWordEnd, prefix + suffix));
								}
							}
						}
						chars.next();
						if (buffer.isEmpty()) {
							continue next;
						} else {
							return buffer.remove(0);
						}
					}

				}
				if (j <= 0) {
					chars.next();
				} else {
					chars.forward(j + 1);
				}
			}
			return null;
		}

		@Override
		public List findAll() {
			List matches = new ArrayList();
			while (true) {
				StringMatch match = findNext();
				if (match == null) {
					return matches;
				} else {
					matches.add(match);
				}
			}
		}

	}

	public static class Factory implements MultiWordSearchAlgorithmFactory {

		@Override
		public StringSearchAlgorithm of(List patterns) {
			return new SetBackwardOracleMatching(patterns);
		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy