com.ontotext.kim.gate.KimLookupParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gazetteer-lkb Show documentation
Show all versions of gazetteer-lkb Show documentation
A Large Knowledge Based (LKB) Gazetteer
The newest version!
package com.ontotext.kim.gate;
import java.util.Collection;
import org.apache.log4j.Logger;
import com.ontotext.kim.model.ParsingFrame;
/**
* This class processes a textual input, finds the location of all known text
* fragments stored in a dictionary and then passes these findings to a special
* handler.
*
* The text processing is performed through the class ParsingFrame
* .
* An instance of class AliasCacheImpl
is used for dictionary.
* The findings are passed to a handler class that implements the interface
* EntityOccuranceHandler
.
*
* @author danko
*
*/
public class KimLookupParser {
private static Logger log = Logger.getLogger(KimLookupParser.class);
public interface EntityOccuranceHandler {
void processEntityOccurance(int start, int end,
String instURI, String classURI);
}
public interface AliasLookupDictionary {
/**
* Looks up for matches given a ParsingFrame. This is used for
* multiple lookups for different fragments of a parsed text.
* @param pfm - a ParsingFrame which has already parsed a part
* of the text
* @return - collection of matching wrapped Aliases
*/
public Collection lookup(ParsingFrame pfm);
/**
* Checks if the lexeme phrase that is currently focused by the
* passed ParsingFrame is a valid lexeme prefix of another alias.
* If it is a valid prefix - then phrase can grow.
* @param pfm - a ParsingFrame which has already parsed a part
* of the text
* @return - true is there is still a chance to find a larger
* phrase matching the current location of the parsed text
*/
public boolean canPhraseGrow(ParsingFrame pfm);
}
/** This class implements a container to return the results of the
* Alias Dictionary lookup */
public static class AliasWrapper {
public final String instURI; // The instance URI of the related Entity
public final String classURI; // The class URI of the related Entity
public final int start;
public final int end;
public AliasWrapper(String instURI, String classURI,
int start, int end) {
this.instURI = instURI; this.classURI = classURI;
this.start = start; this.end = end;
}
}
private AliasLookupDictionary aliasDictionary;
private boolean interrupted = false;
public KimLookupParser(AliasLookupDictionary aliasCache) {
this.aliasDictionary = aliasCache;
}
public void findLookups(String content, EntityOccuranceHandler entityHandler) {
this.interrupted = false;
ParsingFrame pfm = new ParsingFrame(content);
log.debug("Time tracing begins");
Collection currentMatch;
do {
if (pfm.parseOne()) {
currentMatch = aliasDictionary.lookup(pfm);
if (currentMatch != null) {
for (KimLookupParser.AliasWrapper ent : currentMatch) {
entityHandler.processEntityOccurance(ent.start, ent.end, ent.instURI, ent.classURI);
}
}
}
if (!aliasDictionary.canPhraseGrow(pfm) || !pfm.frameCanExpand())
pfm.moveOne();
} while (pfm.frameCanMove() && !this.interrupted);
log.debug("Time tracing ends");
}
public boolean isInterrupted() {
return this.interrupted;
}
public void setInterrupted(boolean interrupted) {
this.interrupted = interrupted;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy