ck.ahocorasick.0.2.1.source-code.Main Maven / Gradle / Ivy
import org.ahocorasick.interval.IntervalTree;
import org.ahocorasick.interval.Intervalable;
import org.ahocorasick.trie.Emit;
import org.ahocorasick.trie.Token;
import org.ahocorasick.trie.Trie;
import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class Main {
static public void main(String[] args) throws IOException, InterruptedException {
// ahoCorasick();
// intervalTree();
// churchill();
fortyTwo();
}
public static void ahoCorasick() throws InterruptedException, IOException {
System.out.println("BEFORE");
System.out.println("Heap: "+Runtime.getRuntime().totalMemory());
System.out.println("Free: "+Runtime.getRuntime().freeMemory());
Trie trie = new Trie();
BufferedReader reader = new BufferedReader
(new InputStreamReader
(new FileInputStream("/usr/share/dict/words")));
long startTime = System.currentTimeMillis();
String line;
while ((line = reader.readLine()) != null) {
trie.addKeyword(line);
}
long endTime = System.currentTimeMillis();
System.out.println("endTime - startTime = " +
(endTime - startTime) +
" milliseconds");
System.gc();
Thread.sleep(1000);
System.out.println("AFTER");
System.out.println("Heap: "+Runtime.getRuntime().totalMemory());
System.out.println("Free: "+Runtime.getRuntime().freeMemory());
}
public static void intervalTree() {
List emits = new ArrayList();
emits.add(new Emit(0, 2, null));
emits.add(new Emit(1, 3, null));
emits.add(new Emit(2, 4, null));
emits.add(new Emit(3, 5, null));
emits.add(new Emit(4, 6, null));
emits.add(new Emit(5, 7, null));
IntervalTree intervalTree = new IntervalTree(emits);
}
public static void churchill() throws IOException {
String speech =
"Turning once again, and this time more generally, to the question of invasion, I would observe that there has never been a period in all these long centuries of which we boast when an absolute guarantee against invasion, still less against serious raids, could have been given to our people. In the days of Napoleon, of which I was speaking just now, the same wind which would have carried his transports across the Channel might have driven away the blockading fleet. There was always the chance, and it is that chance which has excited and befooled the imaginations of many Continental tyrants. Many are the tales that are told. We are assured that novel methods will be adopted, and when we see the originality of malice, the ingenuity of aggression, which our enemy displays, we may certainly prepare ourselves for every kind of novel stratagem and every kind of brutal and treacherous manœuvre. I think that no idea is so outlandish that it should not be considered and viewed with a searching, but at the same time, I hope, with a steady eye. We must never forget the solid assurances of sea power and those which belong to air power if it can be locally exercised.\n" +
"I have, myself, full confidence that if all do their duty, if nothing is neglected, and if the best arrangements are made, as they are being made, we shall prove ourselves once more able to defend our island home, to ride out the storm of war, and to outlive the menace of tyranny, if necessary for years, if necessary alone. At any rate, that is what we are going to try to do. That is the resolve of His Majesty's Government – every man of them. That is the will of Parliament and the nation. The British Empire and the French Republic, linked together in their cause and in their need, will defend to the death their native soil, aiding each other like good comrades to the utmost of their strength.\n" +
"Even though large tracts of Europe and many old and famous States have fallen or may fall into the grip of the Gestapo and all the odious apparatus of Nazi rule, we shall not flag or fail. We shall go on to the end. We shall fight in France, we shall fight on the seas and oceans, we shall fight with growing confidence and growing strength in the air, we shall defend our island, whatever the cost may be. We shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the streets, we shall fight in the hills; we shall never surrender, and if, which I do not for a moment believe, this island or a large part of it were subjugated and starving, then our Empire beyond the seas, armed and guarded by the British Fleet, would carry on the struggle, until, in God's good time, the New World, with all its power and might, steps forth to the rescue and the liberation of the old.";
// String speech =
// "Turning once again, and this time more generally";
// Trie trie = new Trie().removeOverlaps().onlyWholeWords();
Trie trie = new Trie().removeOverlaps().onlyWholeWords().caseInsensitive();
// Trie trie = new Trie();
BufferedReader reader = new BufferedReader
(new InputStreamReader
(new FileInputStream("/usr/share/dict/words")));
String line;
while ((line = reader.readLine()) != null) {
trie.addKeyword(line);
}
// Collection emits = trie.parseText(speech);
// for (Emit emit : emits) {
// System.out.println(emit.getStart()+":"+emit.getEnd()+"="+emit.getKeyword());
// }
Collection tokens = trie.tokenize(speech);
for (Token token : tokens) {
if (token.isMatch()) {
System.out.print("*");
}
System.out.print(token.getFragment());
if (token.isMatch()) {
System.out.print("*");
}
}
}
public static void fortyTwo() {
String speech = "The Answer to the Great Question... Of Life, " +
"the Universe and Everything... Is... Forty-two,' said " +
"Deep Thought, with infinite majesty and calm.";
Trie trie = new Trie().removeOverlaps().onlyWholeWords().caseInsensitive();
trie.addKeyword("great question");
trie.addKeyword("forty-two");
trie.addKeyword("deep thought");
Collection tokens = trie.tokenize(speech);
StringBuffer html = new StringBuffer();
html.append("");
for (Token token : tokens) {
if (token.isMatch()) {
html.append("");
}
html.append(token.getFragment());
if (token.isMatch()) {
html.append("");
}
}
html.append("
");
System.out.println(html);
}
}