
org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.assertion.medfacts.cleartk;
import org.apache.commons.io.FilenameUtils;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ConfigurationParameterFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
import org.cleartk.ml.Feature;
import org.cleartk.ml.Instance;
import org.cleartk.ml.TreeFeature;
import org.cleartk.ml.feature.extractor.CleartkExtractor;
import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
import org.cleartk.ml.feature.extractor.TypePathExtractor;
import org.cleartk.ml.feature.function.FeatureFunctionExtractor;
import java.io.File;
import java.net.URI;
import java.util.*;
//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
/**
* @author swu
*/
public abstract class AssertionCleartkAnalysisEngine extends
CleartkAnnotator {
Logger logger = Logger.getLogger( AssertionCleartkAnalysisEngine.class );
public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
public enum FEATURE_CONFIG {
NO_SEM, NO_SYN, STK, STK_FRAGS, PTK, PTK_FRAGS, DEP_REGEX, DEP_REGEX_FRAGS, ALL_SYN, VECTORS, NO_TOK
}
public static int relationId; // counter for error logging
// additional parameter for domain adaptation
public static final String FILE_TO_DOMAIN_MAP = "mapTrainFileToDomain";
@ConfigurationParameter(
name = PARAM_GOLD_VIEW_NAME,
mandatory = false,
description = "view containing the manual identified annotations (especially EntityMention and EventMention annotations); needed for training" )
protected String goldViewName;
public static final String PARAM_PRINT_ERRORS = "PrintErrors";
@ConfigurationParameter(
name = PARAM_PRINT_ERRORS,
mandatory = false,
description = "Print errors true/false",
defaultValue = "false" )
boolean printErrors;
public static final String PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE = "ProbabilityOfKeepingADefaultExample";
@ConfigurationParameter(
name = PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE,
mandatory = false,
description = "probability that a default example should be retained for training" )
protected double probabilityOfKeepingADefaultExample = 1.0;
public static final String PARAM_PORTION_OF_DATA_TO_USE = "PortionOfDataToUse";
@ConfigurationParameter(
name = PARAM_PORTION_OF_DATA_TO_USE,
mandatory = false,
description = "How much data to actually use during training (e.g. for building learning curves)"
)
protected double portionOfDataToUse = 1.0;
public static final String PARAM_FEATURE_SELECTION_THRESHOLD = "WhetherToDoFeatureSelection";
// Accurate name? Actually uses the threshold, right?
@ConfigurationParameter(
name = PARAM_FEATURE_SELECTION_THRESHOLD,
mandatory = false,
description = "the Chi-squared threshold at which features should be removed" )
protected Float featureSelectionThreshold = 0f;
public static final String PARAM_FEATURE_CONFIG = "FEATURE_CONFIG";
@ConfigurationParameter(
name = PARAM_FEATURE_CONFIG,
description = "Feature configuration to use (for experiments)",
mandatory = false
)
protected FEATURE_CONFIG featConfig = FEATURE_CONFIG.ALL_SYN;
public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";
@ConfigurationParameter(
mandatory = false,
name = PARAM_FEATURE_SELECTION_URI,
description = "provides a URI where the feature selection data will be written" )
protected URI featureSelectionURI;
protected static Random coin = new Random( 0 );
protected static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";
@ConfigurationParameter(
name = FILE_TO_DOMAIN_MAP,
mandatory = false,
description = "a map of filenames to their respective domains (i.e., directories that contain them)" )
protected String fileDomainMap;
protected Map fileToDomain = new HashMap<>();
protected String lastLabel;
/* DEPRECATED: STW 2013/03/28. Use DependencyUtility:getNominalHeadNode(jCas,annotation) instead */
// public ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
//
// for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
//
// ConllDependencyNode head = depNode.getHead();
// if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
// // The head is outside the bounds of the annotation, so this node must be the annotation's head
// return depNode;
// }
// }
// // Can this happen?
// return null;
// }
//private FeatureExtractor1 tokenFeatureExtractor;
// protected List> contextFeatureExtractors;
// protected List> tokenContextFeatureExtractors;
protected List> contextFeatureExtractors;
protected List> tokenContextFeatureExtractors;
protected List> tokenCleartkExtractors;
protected List> entityFeatureExtractors;
protected List> entityTreeExtractors;
protected CleartkExtractor cuePhraseInWindowExtractor;
protected List> featureFunctionExtractors = new ArrayList<>();
protected FedaFeatureFunction ffDomainAdaptor = null;
protected FeatureSelection featureSelection;
public abstract void setClassLabel( IdentifiedAnnotation entityMention, Instance instance )
throws AnalysisEngineProcessException;
protected abstract void initializeFeatureSelection() throws ResourceInitializationException;
// public abstract FeatureSelection createFeatureSelection(double threshold);
// public abstract URI createFeatureSelectionURI(File outputDirectoryName);
private JCas getAnnotationView( final JCas jCas ) throws AnalysisEngineProcessException {
if ( this.isTraining() ) {
try {
return jCas.getView( this.goldViewName );
} catch ( CASException e ) {
throw new AnalysisEngineProcessException( e );
}
}
return jCas;
}
@Override
@SuppressWarnings( "deprecation" )
public void initialize( UimaContext context ) throws ResourceInitializationException {
super.initialize( context );
// Re-process the "directory" string for domains that were used in the data
if ( null != fileDomainMap ) {
String[] dirs = fileDomainMap.split( "[;:]" );
for ( String dir : dirs ) {
// TODO: normalize dir to real domainId
String domainId = normalizeToDomain( dir );
File dataDir = new File( dir );
if ( dataDir.listFiles() != null ) {
for ( File f : dataDir.listFiles() ) {
fileToDomain.put( FilenameUtils.removeExtension( f.getName() ), domainId );
}
// System.out.println(trainFiles.toString());
}
}
}
if ( this.isTraining() && this.goldViewName == null ) {
throw new IllegalArgumentException( PARAM_GOLD_VIEW_NAME + " must be defined during training" );
}
// alias for NGram feature parameters
// int fromRight = CharacterNGramProliferator.RIGHT_TO_LEFT;
// a list of feature extractors that require only the token:
// the stem of the word, the text of the word itself, plus
// features created from the word text like character ngrams
this.entityFeatureExtractors = new ArrayList<>();
// a list of feature extractors that require the token and the sentence
// this.contextFeatureExtractors = new ArrayList();
this.tokenCleartkExtractors = new ArrayList<>();
CleartkExtractor tokenExtraction1 =
new CleartkExtractor<>(
BaseToken.class,
// new FeatureFunctionExtractor(new CoveredTextExtractor(), new LowerCaseFeatureFunction()),
// new FeatureFunctionExtractor(new CoveredTextExtractor(), new BrownClusterFeatureFunction()),
new CoveredTextExtractor(),
//new CleartkExtractor.Covered(),
new CleartkExtractor.LastCovered( 2 ),
new CleartkExtractor.Preceding( 5 ),
new CleartkExtractor.Following( 4 ),
new CleartkExtractor.Bag( new CleartkExtractor.Preceding( 3 ) ),
new CleartkExtractor.Bag( new CleartkExtractor.Following( 3 ) ),
new CleartkExtractor.Bag( new CleartkExtractor.Preceding( 5 ) ),
new CleartkExtractor.Bag( new CleartkExtractor.Following( 5 ) ),
new CleartkExtractor.Bag( new CleartkExtractor.Preceding( 10 ) ),
new CleartkExtractor.Bag( new CleartkExtractor.Following( 10 ) )
);
CleartkExtractor posExtraction1 =
new CleartkExtractor<>(
BaseToken.class,
new TypePathExtractor<>( BaseToken.class, "partOfSpeech" ),
new CleartkExtractor.LastCovered( 2 ),
new CleartkExtractor.Preceding( 3 ),
new CleartkExtractor.Following( 2 )
);
this.tokenCleartkExtractors.add( tokenExtraction1 );
// this.tokenCleartkExtractors.add(posExtraction1);
// this.contextFeatureExtractors.add(new CleartkExtractor(IdentifiedAnnotation.class,
// new CoveredTextExtractor(),
// //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
// new Preceding(2),
// new Following(2)));
// stab at dependency-based features
//List features = new ArrayList();
//ConllDependencyNode node1 = findAnnotationHead(jCas, arg1);
// CombinedExtractor1 baseExtractorCuePhraseCategory =
// new CombinedExtractor1
// (
// new CoveredTextExtractor(),
// new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
// new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
// new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
// );
// Commented out by TM because it is never actually used:
/*
cuePhraseInWindowExtractor = new CleartkExtractor<>(
BaseToken.class,
new CoveredTextExtractor(),
new CleartkExtractor.Bag(new CleartkExtractor.Covered())
// AssertionCuePhraseAnnotation.class,
// baseExtractorCuePhraseCategory,
// new CleartkExtractor.Bag(new CleartkExtractor.Preceding(3)),
// new CleartkExtractor.Bag(new CleartkExtractor.Following(3)),
// new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
// new CleartkExtractor.Bag(new CleartkExtractor.Following(5)),
// new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
// new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
);
*/
if ( !fileToDomain.isEmpty() ) {
// set up FeatureFunction for all the laggard, non-Extractor features
ffDomainAdaptor = new FedaFeatureFunction( new ArrayList<>( new HashSet<>( fileToDomain.values() ) ) );
}
entityTreeExtractors = new ArrayList<>();
}
@Override
public void process( JCas jCas ) throws AnalysisEngineProcessException {
logger.info( "Processing ..." );
String documentId = DocIdUtil.getDocumentID( jCas );
String domainId = "";
String domainFeature = null;
if ( this.featureFunctionExtractors.size() <= 0 ) {
this.ffDomainAdaptor = null;
}
if ( documentId != null ) {
logger.debug( "processing next doc: " + documentId );
// set the domain to be FeatureFunction'ed into all extractors
if ( !fileToDomain.isEmpty() && ffDomainAdaptor != null ) {
domainId = fileToDomain.get( documentId );
// if domain is not found, no warning -- just considers general domain
ffDomainAdaptor.setDomain( domainId );
} else if ( !fileToDomain.isEmpty() ) {
domainFeature = fileToDomain.get( documentId );
}
} else {
logger.debug( "processing next doc (doc id is null)" );
}
this.lastLabel = "";
// // get gold standard relation instances during testing for error analysis
// if (! this.isTraining() && printErrors) {
// JCas goldView;
// try {
// goldView = jCas.getView("GoldView");
// } catch(CASException e) {
// throw new AnalysisEngineProcessException(e);
// }
//
// //categoryLookup = createCategoryLookup(goldView);
// }
final JCas annotationView = getAnnotationView( jCas );
// Map> coveringSentenceMap = JCasUtil.indexCovering(annotationView, IdentifiedAnnotation.class, Sentence.class);
// Map> tokensCoveredInSentenceMap = JCasUtil.indexCovered(annotationView, Sentence.class, BaseToken.class);
// Map> coveringZoneMap =
// JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Zone.class);
// Map> coveringSents =
// JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Sentence.class);
// List> instances = new ArrayList>();
// generate a list of training instances for each sentence in the document
// Use an indexed map. This is faster than calling select and then selectCovering within a loop.
final Map> sentenceAnnotationMap
= JCasUtil.indexCovered( annotationView, Sentence.class, Annotation.class );
// Faster than calling JCasUtil methods for each which has to iterate through the full cas each time.
final Collection entities = new ArrayList<>();
final Collection cues = new ArrayList<>();
final Collection baseTokens = new ArrayList<>();
for(Sentence coveringSent : JCasUtil.select(annotationView, Sentence.class)){
Collection coveredAnnotations = sentenceAnnotationMap.get(coveringSent);
// Sort Annotations into *Mention, assertion cues and BaseTokens in one loop.
// Faster than calling JCasUtil methods for each which has to iterate through the full cas each time.
entities.clear();
cues.clear();
baseTokens.clear();
for ( Annotation annotation : coveredAnnotations ) {
if ( annotation instanceof EventMention || annotation instanceof EntityMention ) {
entities.add( (IdentifiedAnnotation)annotation );
} else if ( annotation instanceof AssertionCuePhraseAnnotation ) {
cues.add( (AssertionCuePhraseAnnotation)annotation );
} else if ( annotation instanceof BaseToken ) {
baseTokens.add( (BaseToken)annotation );
}
}
for ( IdentifiedAnnotation identifiedAnnotation : entities ) {
if ( identifiedAnnotation.getPolarity() == -1 ) {
logger.debug( String.format( " - identified annotation: [%d-%d] polarity %d (%s)",
identifiedAnnotation.getBegin(),
identifiedAnnotation.getEnd(),
identifiedAnnotation.getPolarity(),
identifiedAnnotation.getClass().getName() ) );
}
Instance instance = new Instance<>();
if ( domainFeature != null ) {
instance.add( new Feature( "Domain", domainFeature ) );
}
// // extract all features that require only the entity mention annotation
// instance.addAll(tokenFeatureExtractor.extract(jCas, entityMention));
// extract all features that require the token and sentence annotations
//Sentence sentence = sentenceList.iterator().next();
/*
if (sentence != null)
{
for (ContextExtractor extractor : this.contextFeatureExtractors) {
instance.addAll(extractor.extractWithin(annotationView, entityMention, sentence));
}
} else
{
// TODO extract context features for annotations that don't fall within a sentence
logger.log(Level.WARN, "FIXME/TODO: generate context features for entities that don't fall within a sentence");
}
*/
/*
for (ContextExtractor extractor : this.tokenContextFeatureExtractors) {
instance.addAll(extractor.extract(annotationView, entityMention));
}
*/
// only use extract this version if not doing domain adaptation
if ( ffDomainAdaptor == null ) {
for ( CleartkExtractor extractor : this.tokenCleartkExtractors ) {
// instance.addAll(extractor.extractWithin(annotationView, entityMention, sentence));
// if ( coveringSent != null ) {
instance.addAll( extractor
.extractWithin( annotationView, identifiedAnnotation, coveringSent ) );
// } else {
// instance.addAll( extractor.extract( annotationView, identifiedAnnotation ) );
// }
}
}
int closest = Integer.MAX_VALUE;
AssertionCuePhraseAnnotation closestCue = null;
for ( AssertionCuePhraseAnnotation cue : cues ) {
// It is much faster to count between BaseTokens already isolated within the same sentence.
final int betweenCount = countBetween( cue, identifiedAnnotation, baseTokens );
if ( betweenCount < closest ) {
closestCue = cue;
closest = betweenCount;
}
// instance.addAll(cuePhraseInWindowExtractor.extractBetween(jCas, cue, entityOrEventMention));
}
if ( closestCue != null && closest < 21 ) {
instance.add( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) );
// instance.add(new Feature("ClosestCue_Phrase", closestCue.getCuePhrase()));
instance.add( new Feature( "ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily() ) );
instance.add( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) );
// add hack-ey domain adaptation to these hacked-in features
if ( !fileToDomain.isEmpty() && ffDomainAdaptor != null ) {
instance.addAll( ffDomainAdaptor
.apply( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) ) );
instance.addAll( ffDomainAdaptor
.apply( new Feature( "ClosestCue_PhraseFamily", closestCue
.getCuePhraseAssertionFamily() ) ) );
instance.addAll( ffDomainAdaptor
.apply( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) ) );
}
}
// }
// if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
// {
// instance.addAll(cuePhraseFeatures);
// }
// 7/9/13 SRH trying to make it work just for anatomical site
int eemTypeId = identifiedAnnotation.getTypeID();
if ( eemTypeId == CONST.NE_TYPE_ID_ANATOMICAL_SITE ) {
// 7/9/13 srh modified per tmiller so it's binary but not numeric feature
//instance.add(new Feature("ENTITY_TYPE_" + entityOrEventMention.getTypeID()));
instance.add( new Feature( "ENTITY_TYPE_ANAT_SITE" ) );
// add hack-ey domain adaptation to these hacked-in features
if ( !fileToDomain.isEmpty() && ffDomainAdaptor != null ) {
instance.addAll( ffDomainAdaptor.apply( new Feature( "ENTITY_TYPE_ANAT_SITE" ) ) );
}
}
/* This hurts recall more than it helps precision
else if (eemTypeId == CONST.NE_TYPE_ID_DRUG) {
// 7/10 adding drug
instance.add(new Feature("ENTITY_TYPE_DRUG"));
}
*/
// only extract these features if not doing domain adaptation
if ( ffDomainAdaptor == null ) {
for ( FeatureExtractor1 extractor : this.entityFeatureExtractors ) {
instance.addAll( extractor.extract( jCas, identifiedAnnotation ) );
}
}
for ( FeatureExtractor1 extractor : this.entityTreeExtractors ) {
instance.addAll( extractor.extract( jCas, identifiedAnnotation ) );
}
// List zoneFeatures = extractZoneFeatures(coveringZoneMap, entityOrEventMention);
// if (zoneFeatures != null && !zoneFeatures.isEmpty())
// {
// instance.addAll(zoneFeatures);
// }
List feats = instance.getFeatures();
// List lcFeats = new ArrayList();
for ( Feature feat : feats ) {
if ( feat instanceof TreeFeature ||
(feat.getName() != null && (feat.getName().startsWith( "TreeFrag" ) ||
feat.getName().startsWith( "WORD" ) ||
feat.getName().startsWith( "NEG" ))) ) {
continue;
}
if ( feat.getName() != null &&
(feat.getName().contains( "_TreeFrag" ) || feat.getName().contains( "_WORD" ) ||
feat.getName().contains( "_NEG" )) ) {
continue;
}
if ( feat.getValue() instanceof String ) {
feat.setValue( ((String)feat.getValue()).toLowerCase() );
}
}
if ( !fileToDomain.isEmpty() && ffDomainAdaptor != null ) {
for ( FeatureFunctionExtractor extractor : this.featureFunctionExtractors ) {
// TODO: extend to the case where the extractors take a different argument besides entityOrEventMention
instance.addAll( extractor.extract( jCas, identifiedAnnotation ) );
}
}
// grab the output label
setClassLabel( identifiedAnnotation, instance );
if ( this.isTraining() ) {
// apply feature selection, if necessary
if ( this.featureSelection != null ) {
feats = this.featureSelection.transform( feats );
}
// ensures that the (possibly) transformed feats are used
if ( instance.getOutcome() != null ) {
if ( coin.nextDouble() < this.portionOfDataToUse ) {
this.dataWriter.write( new Instance<>( instance.getOutcome(), feats ) );
}
}
}
}
}
}
/*
public List extractZoneFeatures(Map> coveringZoneMap, IdentifiedAnnotation entityOrEventMention)
{
final Collection zoneList = coveringZoneMap.get(entityOrEventMention);
if (zoneList == null || zoneList.isEmpty())
{
//logger.info("AssertionCleartkAnalysisEngine.extractZoneFeatures() early END (no zones)");
return new ArrayList();
} else
{
logger.debug("AssertionCleartkAnalysisEngine.extractZoneFeatures() found zones and adding zone features");
}
ArrayList featureList = new ArrayList();
for (Zone zone : zoneList)
{
Feature currentFeature = new Feature("zone", zone.getLabel());
logger.debug(String.format("zone: %s", zone.getLabel()));
logger.debug(String.format("zone feature: %s", currentFeature.toString()));
featureList.add(currentFeature);
}
return featureList;
}
*/
public static AnalysisEngineDescription getDescription( Object... additionalConfiguration )
throws ResourceInitializationException {
AnalysisEngineDescription desc = AnalysisEngineFactory
.createEngineDescription( AssertionCleartkAnalysisEngine.class );
if ( additionalConfiguration.length > 0 ) {
ConfigurationParameterFactory.addConfigurationParameters( desc, additionalConfiguration );
}
return desc;
}
public Map getTrainFileToDomain() {
return fileToDomain;
}
public void setTrainFileToDomain( Map trainFileToDomain ) {
this.fileToDomain = trainFileToDomain;
}
/**
* Looks in the domain string (path) for meaningful corpus names
*
* @param dir
* @return
*/
public static String normalizeToDomain( String dir ) {
// TODO: real normalization
String[] p = dir.split( "/" );
List parts = new ArrayList<>();
Collections.addAll( parts, p );
Collections.reverse( parts );
for ( String part : parts ) {
if ( part.toLowerCase().startsWith( "test" ) || part.toLowerCase().startsWith( "train" ) ||
part.toLowerCase().startsWith( "dev" ) ) {
continue;
}
return part;
}
return dir;
}
/**
* @param annotation1 -
* @param annotation2 -
* @param baseTokens baseTokens within window
* @return number of basetokens that lie between annotation1 and annotation2
*/
static private int countBetween( final Annotation annotation1,
final Annotation annotation2,
final Collection baseTokens ) {
final int lowEnd = Math.min( annotation1.getEnd(), annotation2.getEnd() );
final int highBegin = Math.max( annotation1.getBegin(), annotation2.getBegin() );
int between = 0;
for ( BaseToken baseToken : baseTokens ) {
if ( lowEnd < baseToken.getBegin() && baseToken.getEnd() < highBegin ) {
between++;
}
}
return between;
}
/*
public static AnalysisEngineDescription getClassifierDescription(String modelFileName)
throws ResourceInitializationException {
return CleartkAnnotatorDescriptionFactory.createCleartkAnnotator(
AssertionCleartkAnalysisEngine.class,
AssertionComponents.TYPE_SYSTEM_DESCRIPTION,
modelFileName);
}
public static AnalysisEngineDescription getWriterDescription(String outputDirectory)
throws ResourceInitializationException {
AnalysisEngineDescription aed = CleartkAnnotatorDescriptionFactory.createViterbiAnnotator(
AssertionCleartkAnalysisEngine.class,
AssertionComponents.TYPE_SYSTEM_DESCRIPTION,
DefaultMaxentDataWriterFactory.class,
outputDirectory);
ConfigurationParameterFactory.addConfigurationParameter(
aed,
MaxentDataWriterFactory_ImplBase.PARAM_COMPRESS,
true);
return aed;
}
*/
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy