All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE Maven / Gradle / Ivy

Go to download

This wraps the ClearNLP dependency parser and semantic role labeler into a UIMA friendly annotator

There is a newer version: 6.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.ctakes.dependency.parser.ae; import com.googlecode.clearnlp.component.AbstractComponent; import com.googlecode.clearnlp.dependency.*; import com.googlecode.clearnlp.engine.EngineGetter; import com.googlecode.clearnlp.nlp.NLPLib; import com.googlecode.clearnlp.reader.AbstractReader; import org.apache.ctakes.core.pipeline.PipeBitInfo; import org.apache.ctakes.core.resource.FileLocator; import org.apache.ctakes.core.util.ListFactory; import org.apache.ctakes.typesystem.type.syntax.BaseToken; import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode; import org.apache.ctakes.typesystem.type.syntax.NewlineToken; import org.apache.ctakes.typesystem.type.textsem.Predicate; import org.apache.ctakes.typesystem.type.textsem.SemanticArgument; import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation; import org.apache.ctakes.typesystem.type.textspan.Sentence; import org.apache.log4j.Logger; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.TypeCapability; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.EmptyFSList; import org.apache.uima.jcas.cas.FSList; import org.apache.uima.jcas.cas.TOP; import org.apache.uima.resource.ResourceInitializationException; import java.io.InputStream; import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** *This class provides a UIMA wrapper for the ClearNLP Semantic Role Labeler, which is * available here. *

* http://code.google.com/p/clearnlp *

* Before using this AnalysisEngine, you should run a Tokenizer, POS-tagger, Lemmatizer, and the * CLEAR parser dependency parser. *

* Please see /ClearNLP-wrapper/resources/dependency/clear/README for * important information pertaining to the models provided for this parser. *

* */ @TypeCapability( inputs = { "org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech", "org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber", "org.apache.ctakes.typesystem.type.syntax.BaseToken:end", "org.apache.ctakes.typesystem.type.syntax.BaseToken:begin", "org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode" } ) @PipeBitInfo( name = "ClearNLP Semantic Role Labeler", description = "Adds Semantic Roles Relations.", role = PipeBitInfo.Role.ANNOTATOR, dependencies = { PipeBitInfo.TypeProduct.SENTENCE, PipeBitInfo.TypeProduct.BASE_TOKEN, PipeBitInfo.TypeProduct.DEPENDENCY_NODE }, products = { PipeBitInfo.TypeProduct.SEMANTIC_RELATION } ) public class ClearNLPSemanticRoleLabelerAE extends JCasAnnotator_ImplBase { final String language = AbstractReader.LANG_EN; public Logger logger = Logger.getLogger( getClass().getName() ); public static final String DEFAULT_SRL_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/srl/mayo-en-srl-1.3.0.jar"; public static final String DEFAULT_PRED_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/pred/mayo-en-pred-1.3.0.jar"; public static final String DEFAULT_ROLE_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/role/mayo-en-role-1.3.0.jar"; public static final String PARAM_PARSER_MODEL_FILE_NAME = "ParserModelFileName"; public static final String PARAM_PRED_MODEL_FILE_NAME = "ParserPredFileName"; public static final String PARAM_ROLE_MODEL_FILE_NAME = "ParserRoleFileName"; protected URI srlModelUri; protected URI srlPredUri; protected URI srlRoleUri; @ConfigurationParameter( name = PARAM_PARSER_MODEL_FILE_NAME, description = "This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.", defaultValue = DEFAULT_SRL_MODEL_FILE_NAME ) private String parserModelFileName; public static final String PARAM_LEMMATIZER_DATA_FILE = "LemmatizerDataFile"; @ConfigurationParameter( name = PARAM_PRED_MODEL_FILE_NAME, description = "This parameter provides the file name of the pred model required by the factory method provided by ClearNLPUtil.", defaultValue = DEFAULT_PRED_MODEL_FILE_NAME ) private String parserPredFileName; @ConfigurationParameter( name = PARAM_ROLE_MODEL_FILE_NAME, description = "This parameter provides the file name of the role model required by the factory method provided by ClearNLPUtil.", defaultValue = DEFAULT_ROLE_MODEL_FILE_NAME ) private String parserRoleFileName; protected AbstractComponent srlabeler; protected AbstractComponent identifier; protected AbstractComponent classifier; @Override public void initialize( UimaContext context ) throws ResourceInitializationException { super.initialize( context ); try { InputStream srlPred = (this.srlPredUri == null) ? FileLocator.getAsStream( DEFAULT_PRED_MODEL_FILE_NAME ) : FileLocator.getAsStream( parserPredFileName ); this.identifier = EngineGetter.getComponent( srlPred, this.language, NLPLib.MODE_PRED ); InputStream srlRole = (this.srlRoleUri == null) ? FileLocator.getAsStream( DEFAULT_ROLE_MODEL_FILE_NAME ) : FileLocator.getAsStream( parserRoleFileName ); this.classifier = EngineGetter.getComponent( srlRole, this.language, NLPLib.MODE_ROLE ); InputStream srlModel = (this.srlModelUri == null) ? FileLocator.getAsStream( DEFAULT_SRL_MODEL_FILE_NAME ) : FileLocator.getAsStream( parserModelFileName ); this.srlabeler = EngineGetter.getComponent( srlModel, this.language, NLPLib.MODE_SRL ); } catch ( Exception e ) { throw new ResourceInitializationException( e ); } } @Override public void process( JCas jCas ) throws AnalysisEngineProcessException { for ( Sentence sentence : JCasUtil.select( jCas, Sentence.class ) ) { List printableTokens = new ArrayList<>(); for ( BaseToken token : JCasUtil.selectCovered( jCas, BaseToken.class, sentence ) ) { if ( token instanceof NewlineToken ) { continue; } printableTokens.add( token ); } DEPTree tree = new DEPTree(); // Build map between CAS dependency node and id for later creation of // ClearNLP dependency node/tree Map depNodeToID = new HashMap<>(); int nodeId = 1; for ( ConllDependencyNode depNode : JCasUtil.selectCovered( jCas, ConllDependencyNode.class, sentence ) ) { //if (depNode instanceof TopDependencyNode) { if ( depNode.getHead() == null ) { // A node without the head is the head of the sentence depNodeToID.put( depNode, 0 ); } else { depNodeToID.put( depNode, nodeId ); nodeId++; } } int[] headIDs = new int[ printableTokens.size() ]; String[] deprels = new String[ printableTokens.size() ]; // Initialize Token / Sentence info for the ClearNLP Semantic Role Labeler // we are filtering out newline tokens // use idIter as the non-newline token index counter int idIter = 0; for ( int i = 0; i < printableTokens.size(); i++ ) { BaseToken token = printableTokens.get( i ); // ignore newline tokens within a sentence - newline = whitespace = non-token if ( !(token instanceof NewlineToken) ) { // Determine HeadId List casDepNodes = JCasUtil.selectCovered( jCas, ConllDependencyNode.class, token ); ConllDependencyNode casDepNode = casDepNodes.get( 0 ); if ( casDepNode.getId() == 0 ) { casDepNode = casDepNodes.get( 1 ); } deprels[ i ] = casDepNode.getDeprel(); ConllDependencyNode head = casDepNode.getHead(); // If there is no head, this is the head node, set node to 0 final Integer headIdIndex = (head == null) ? 0 : depNodeToID.get( head ); if ( headIdIndex != null ) { headIDs[ i ] = headIdIndex; } else { logger.error( "No dependency node for index " + head + ". Map size is " + depNodeToID.size() + "\nSetting head ID to 0" ); headIDs[ i ] = 0; } // Populate Dependency Node / Tree information int id = idIter + 1; String form = casDepNode.getForm(); String pos = casDepNode.getPostag(); String lemma = casDepNode.getLemma(); DEPNode node = new DEPNode( id, form, lemma, pos, new DEPFeat() ); tree.add( node ); idIter++; } } for ( int i = 1; i < tree.size(); i++ ) { DEPNode node = tree.get( i ); DEPNode head = tree.get( headIDs[ i - 1 ] ); String label = deprels[ i - 1 ]; node.setHead( head, label ); } tree.initSHeads(); // Run the SRL identifier.process( tree ); classifier.process( tree ); srlabeler.process( tree ); // Convert ClearNLP SRL output to CAS types extractSRLInfo( jCas, printableTokens, tree ); } } /** * Converts the output from the ClearNLP Semantic Role Labeler to the ClearTK Predicate and * SemanticArgument Types. * * @param jCas * @param tokens - In order list of tokens * @param tree - DepdendencyTree output by ClearNLP SRLPredict */ private void extractSRLInfo( JCas jCas, List tokens, DEPTree tree ) { Map headIdToPredicate = new HashMap(); Map> predicateArguments = new HashMap>(); for ( int i = 1; i < tree.size(); i++ ) { // Every ClearNLP parserNode will contain an srlInfo field. DEPNode parserNode = tree.get( i ); BaseToken token = tokens.get( i - 1 ); String rolesetId; if ( (rolesetId = parserNode.getFeat( DEPLib.FEAT_PB )) != null ) { if ( !headIdToPredicate.containsKey( parserNode ) ) { // We have not encountered this predicate yet, so create it Predicate pred = this.createPredicate( jCas, rolesetId, token ); headIdToPredicate.put( parserNode, pred ); pred.setRelations( new EmptyFSList( jCas ) ); } } } // Start at node 1, since node 0 is considered the head of the sentence for ( int i = 1; i < tree.size(); i++ ) { // Every ClearNLP parserNode will contain an srlInfo field. DEPNode parserNode = tree.get( i ); BaseToken token = tokens.get( i - 1 ); for ( DEPArc head : parserNode.getSHeads() ) { Predicate predicate = headIdToPredicate.get( head.getNode() ); // Append this argument to the predicate's list of arguments if ( !predicateArguments.containsKey( predicate ) ) { predicateArguments.put( predicate, new ArrayList() ); } List argumentList = predicateArguments.get( predicate ); // Create the semantic argument and store for later link creation SemanticArgument argument = createArgument( jCas, head, token ); argumentList.add( argument ); } } // Create relations between predicates and arguments for ( Map.Entry> entry : predicateArguments.entrySet() ) { Predicate predicate = entry.getKey(); List relations = new ArrayList(); for ( SemanticArgument argument : entry.getValue() ) { SemanticRoleRelation relation = new SemanticRoleRelation( jCas ); relation.setArgument( argument ); relation.setPredicate( predicate ); relation.setCategory( argument.getLabel() ); relation.addToIndexes(); relations.add( relation ); argument.setRelation( relation ); } FSList relationsList = ListFactory.buildList( jCas, relations.toArray( new TOP[ relations.size() ] ) ); predicate.setRelations( relationsList ); } } private Predicate createPredicate( JCas jCas, String rolesetId, BaseToken token ) { Predicate pred = new Predicate( jCas, token.getBegin(), token.getEnd() ); pred.setFrameSet( rolesetId ); pred.addToIndexes(); return pred; } private SemanticArgument createArgument( JCas jCas, DEPArc head, BaseToken token ) { SemanticArgument argument = new SemanticArgument( jCas, token.getBegin(), token.getEnd() ); argument.setLabel( head.getLabel() ); argument.addToIndexes(); return argument; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy