All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.almondtools.rexlex.stringsearch.SetHorspool Maven / Gradle / Ivy

package com.almondtools.rexlex.stringsearch;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import com.almondtools.rexlex.io.CharProvider;

public class SetHorspool implements StringSearchAlgorithm {

	private TrieRoot trie;
	private char minChar;
	private char maxChar;
	private int minLength;
	private int[] characterShift;

	public SetHorspool(List patterns) {
		List charpatterns = toCharArray(patterns);
		this.trie = computeTrie(charpatterns);
		this.maxChar = computeMaxChar(charpatterns);
		this.minChar = computeMinChar(charpatterns);
		this.minLength = minLength(charpatterns);
		this.characterShift = computeCharacterShift(charpatterns, minLength, minChar, maxChar);
	}

	private int minLength(List patterns) {
		int len = Integer.MAX_VALUE;
		for (char[] pattern : patterns) {
			if (pattern.length < len) {
				len = pattern.length;
			}
		}
		return len;
	}

	private static char computeMinChar(List patterns) {
		char min = Character.MAX_VALUE;
		for (char[] pattern : patterns) {
			for (int i = 0; i < pattern.length; i++) {
				if (pattern[i] < min) {
					min = pattern[i];
				}
			}
		}
		return min;
	}

	private static char computeMaxChar(List patterns) {
		char max = Character.MIN_VALUE;
		for (char[] pattern : patterns) {
			for (int i = 0; i < pattern.length; i++) {
				if (pattern[i] > max) {
					max = pattern[i];
				}
			}
		}
		return max;
	}

	private List toCharArray(List patterns) {
		List charpatterns = new ArrayList(patterns.size());
		for (String pattern : patterns) {
			charpatterns.add(pattern.toCharArray());
		}
		return charpatterns;
	}

	private static TrieRoot computeTrie(List charpatterns) {
		TrieRoot trie = new TrieRoot();
		for (char[] pattern : charpatterns) {
			trie.extendReverse(pattern);
		}
		return trie;
	}

	private static int[] computeCharacterShift(List patterns, int minLength, char min, char max) {
		int[] characters = new int[max - min + 1];
		for (int i = 0; i < characters.length; i++) {
			characters[i] = minLength;
		}
		for (char[] pattern : patterns) {
			for (int i = 0; i < pattern.length - 1; i++) {
				characters[pattern[i] - min] = min(characters[pattern[i] - min], pattern.length - i - 1);
			}
		}
		return characters;
	}

	private static int min(int i, int j) {
		return i < j ? i : j;
	}

	private int getShift(char c) {
		if (c < minChar || c > maxChar) {
			return minLength;
		}
		return characterShift[c - minChar];
	}

	@Override
	public StringFinder createFinder(CharProvider chars) {
		return new Finder(chars);
	}

	@Override
	public int getPatternLength() {
		return minLength;
	}

	private class Finder implements StringFinder {

		private CharProvider chars;
		private List buffer;

		public Finder(CharProvider chars) {
			this.chars = chars;
			this.buffer = new LinkedList();
		}

		@Override
		public void skipTo(int pos) {
			chars.move(pos);
		}

		@Override
		public StringMatch findNext() {
			if (!buffer.isEmpty()) {
				return buffer.remove(0);
			}
			int lookahead = minLength - 1;
			while (!chars.finished(lookahead)) {
				int patternPointer = lookahead;
				int pos = chars.current();
				char current = chars.lookahead(patternPointer);
				TrieNode node = trie.nextNode(current);
				while (node != null) {
					if (node.isTerminal()) {
						buffer.add(createMatch(patternPointer));
					}
					patternPointer--;
					if (pos + patternPointer < 0) {
						break;
					}
					node = node.nextNode(chars.lookahead(patternPointer));
				}
				chars.forward(getShift(current));
				if (!buffer.isEmpty()) {
					return buffer.remove(0);
				}
			}
			return null;
		}

		@Override
		public List findAll() {
			List matches = new ArrayList();
			while (true) {
				StringMatch match = findNext();
				if (match == null) {
					return matches;
				} else {
					matches.add(match);
				}
			}
		}

		private StringMatch createMatch(int patternPointer) {
			int start = chars.current() + patternPointer;
			int end = chars.current() + minLength;
			String s = chars.slice(start, end);
			return new StringMatch(start, end, s);
		}
	}

	public static class Factory implements MultiWordSearchAlgorithmFactory {

		@Override
		public StringSearchAlgorithm of(List patterns) {
			return new SetHorspool(patterns);
		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy