All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.dictionary.lookup.ae.AssertionCuePhraseConsumerImpl Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.dictionary.lookup.ae;

import org.apache.ctakes.core.resource.LuceneIndexReaderResource;
import org.apache.ctakes.dictionary.lookup.MetaDataHit;
import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;

import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Properties;

/**
 * Implementation that takes Rxnorm dictionary lookup hits and stores only the
 * ones that are also present in the Orange Book.
 */
@SuppressWarnings( "unchecked" )
public class AssertionCuePhraseConsumerImpl extends BaseLookupConsumerImpl
		implements LookupConsumer
{
  public static final String CUE_PHRASE_FIELD_NAME = "cuePhrase";
  public static final String CUE_PHRASE_CATEGORY_FIELD_NAME = "cuePhraseCategory";
  public static final String CUE_PHRASE_FAMILY_FIELD_NAME = "cuePhraseFamily";
  public static final String CUE_PHRASE_FIRST_WORD_FIELD_NAME = "cuePhraseFirstWord";

	// LOG4J logger based on class name
	private Logger LOGGER = LoggerFactory.getLogger(getClass().getName());

	private final String CODE_MF_PRP_KEY = "codeMetaField";

	private final String CODING_SCHEME_PRP_KEY = "codingScheme";

	private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey";

	private Properties iv_props;

	private IndexSearcher iv_searcher;
	//ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
	// Added 'MaxListSize'
	private int iv_maxHits;

	public AssertionCuePhraseConsumerImpl(UimaContext aCtx, Properties props, int maxListSize)
			throws Exception
	{
		// TODO property validation could be done here
		iv_props = props;
		iv_maxHits = maxListSize;
		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
		iv_searcher = new IndexSearcher(resrc.getIndexReader());
	}
	public AssertionCuePhraseConsumerImpl(UimaContext aCtx, Properties props)
	throws Exception
	{
		// TODO property validation could be done here
		iv_props = props;
		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
		iv_searcher = new IndexSearcher(resrc.getIndexReader());
		iv_maxHits = Integer.MAX_VALUE;
	}
	public void consumeHits(JCas jcas, Iterator lhItr)
			throws AnalysisEngineProcessException
	{
		Iterator hitsByOffsetItr = organizeByOffset(lhItr);
		while (hitsByOffsetItr.hasNext())
		{
			Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();

			// iterate over the LookupHit objects
			// code is only valid if the covered text is also present in the
			// filter
			Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
			int neBegin = -1;
			int neEnd = -1;
			Collection validCodeCol = new HashSet();
			while (lhAtOffsetItr.hasNext())
			{
				LookupHit lh = (LookupHit) lhAtOffsetItr.next();
				neBegin = lh.getStartOffset();
				neEnd = lh.getEndOffset();

				String text = jcas.getDocumentText().substring(
						lh.getStartOffset(),
						lh.getEndOffset());
				text = text.trim().toLowerCase();

				MetaDataHit mdh = lh.getDictMetaDataHit();
				String cuePhrase = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FIELD_NAME);
				String cuePhraseFirstWord = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FIRST_WORD_FIELD_NAME);
				String cuePhraseCategory = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_CATEGORY_FIELD_NAME);
				String cuePhraseFamily = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_FAMILY_FIELD_NAME);
				//String cuePhraseAssertionFamily = mdh.getMetaFieldValue(AssertionCuePhraseConsumerImpl.CUE_PHRASE_ASSERTION_FAMILY_FIELD_NAME);
				
//				String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY));
//
//				if (isValid("trade_name", text) || isValid("ingredient", text))
//				{
//					validCodeCol.add(code);
//				}
//				else
//				{
//					LOGGER.warn("Filtered out: "+text);
//				}

	      AssertionCuePhraseAnnotation cuePhraseAnnotation = new AssertionCuePhraseAnnotation(jcas);
	      cuePhraseAnnotation.setBegin(neBegin);
	      cuePhraseAnnotation.setEnd(neEnd);
	      
	      cuePhraseAnnotation.setCuePhrase(cuePhrase);
	      cuePhraseAnnotation.setCuePhraseFirstWord(cuePhraseFirstWord);
	      
	      cuePhraseAnnotation.setCuePhraseCategory(cuePhraseCategory);
	      cuePhraseAnnotation.setCuePhraseAssertionFamily(cuePhraseFamily);
	      
	      cuePhraseAnnotation.addToIndexes();

			}
			


//			if (validCodeCol.size() > 0)
//			{
//				FSArray ocArr = createOntologyConceptArr(jcas, validCodeCol);
//				IdentifiedAnnotation neAnnot = new MedicationMention(jcas); // medication NEs are EventMention
//				neAnnot.setTypeID(CONST.NE_TYPE_ID_DRUG);
//				neAnnot.setBegin(neBegin);
//				neAnnot.setEnd(neEnd);
//				neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP);
//				neAnnot.setOntologyConceptArr(ocArr);
//				neAnnot.addToIndexes();
//			}
		}
	}

	/**
	 * For each valid code, a corresponding JCas OntologyConcept object is
	 * created and stored in a FSArray.
	 * 
	 * @param jcas
	 * @param validCodeCol
	 * @return
	 */
	private FSArray createOntologyConceptArr(JCas jcas, Collection validCodeCol)
	{
		FSArray ocArr = new FSArray(jcas, validCodeCol.size());
		int ocArrIdx = 0;
		Iterator validCodeItr = validCodeCol.iterator();
		while (validCodeItr.hasNext())
		{
			String validCode = (String) validCodeItr.next();
			OntologyConcept oc = new OntologyConcept(jcas);
			oc.setCode(validCode);
			oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY));

			ocArr.set(ocArrIdx, oc);
			ocArrIdx++;
		}
		return ocArr;
	}

	private boolean isValid(String fieldName, String str)
			throws AnalysisEngineProcessException
	{
		try
		{
			Query q = new TermQuery(new Term(fieldName, str));

            TopDocs topDoc = iv_searcher.search(q, iv_maxHits);
            ScoreDoc[] hits = topDoc.scoreDocs;
            if ((hits != null) && (hits.length > 0))
            {
                return true;
            }
            else
            {
                return false;
            }
		}
		catch (Exception e)
		{
			throw new AnalysisEngineProcessException(e);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy