
org.apache.ctakes.dictionary.lookup.ae.UmlsToSnomedConsumerImpl Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.dictionary.lookup.ae;
import org.apache.ctakes.dictionary.lookup.DictionaryException;
import org.apache.ctakes.dictionary.lookup.MetaDataHit;
import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
import org.apache.ctakes.typesystem.type.textsem.*;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import java.sql.SQLException;
import java.util.*;
/**
* Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity
* objects only the ones that have a SNOMED synonym.
* Override abstract method getSnomedCodes
and implement
* looking up the CUI->SNOMED mappings
*
* @author Mayo Clinic
*/
public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl {
static private final String CUI_MF_PRP_KEY = "cuiMetaField";
static private final String TUI_MF_PRP_KEY = "tuiMetaField";
static private final String CODING_SCHEME_PRP_KEY = "codingScheme";
static private final String MEDICATION_TUIS_PRP_KEY = "medicationTuis";
static private final String ANT_SITE_TUIS_PRP_KEY = "anatomicalSiteTuis";
static private final String PROCEDURE_TUIS_PRP_KEY = "procedureTuis";
static private final String DISORDER_TUIS_PRP_KEY = "disorderTuis";
static private final String FINDING_TUIS_PRP_KEY = "findingTuis";
private Set _medicationSet = new HashSet<>();
private Set _antSiteTuiSet = new HashSet<>();
private Set _procedureTuiSet = new HashSet<>();
private Set _disorderTuiSet = new HashSet<>();
private Set _findingTuiSet = new HashSet<>();
private Set _validTuiSet = new HashSet<>();
protected Properties props;
public UmlsToSnomedConsumerImpl( final UimaContext aCtx, final Properties properties ) throws Exception {
// TODO property validation could be done here
props = properties;
_medicationSet = loadList( props.getProperty( MEDICATION_TUIS_PRP_KEY ) ); // 1
_antSiteTuiSet = loadList( props.getProperty( ANT_SITE_TUIS_PRP_KEY ) ); // 6
_procedureTuiSet = loadList( props.getProperty( PROCEDURE_TUIS_PRP_KEY ) );// 5
_disorderTuiSet = loadList( props.getProperty( DISORDER_TUIS_PRP_KEY ) ); // 2
_findingTuiSet = loadList( props.getProperty( FINDING_TUIS_PRP_KEY ) ); // 3 aka sign/symptom
_validTuiSet.addAll( _medicationSet );
_validTuiSet.addAll( _antSiteTuiSet );
_validTuiSet.addAll( _procedureTuiSet );
_validTuiSet.addAll( _disorderTuiSet );
_validTuiSet.addAll( _findingTuiSet );
}
/**
* Searches for the Snomed codes that are synonyms of the UMLS concept with CUI umlsCode
*
* @param umlsCode -
* @return Set of SNOMED codes for the given UMLS CUI.
* @throws SQLException, DictionaryException
*/
protected abstract Set getSnomedCodes( final String umlsCode ) throws SQLException, DictionaryException;
/**
* {@inheritDoc}
*/
@Override
public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException {
try {
final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY );
final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY );
final Map> lookupHitMap = createLookupHitMap( lhItr );
// iterate over the LookupHit objects
for ( Map.Entry> entry : lookupHitMap.entrySet() ) {
// code is only valid if the covered text is also present in the filter
final int neBegin = entry.getKey().__start;
final int neEnd = entry.getKey().__end;
// Use key "cui,tui" to avoid duplicates at this offset
final Set cuiTuiSet = new HashSet<>();
// key = type of named entity, val = set of UmlsConcept objects
final Map> conceptMap = new HashMap<>();
// Iterate over the LookupHit objects and group Snomed codes by NE type
// For each NE type for which there is a hit, get all the Snomed codes
// that map to the given CUI.
for ( LookupHit lookupHit : entry.getValue() ) {
final MetaDataHit mdh = lookupHit.getDictMetaDataHit();
final String cui = mdh.getMetaFieldValue( cuiPropKey );
final String tui = mdh.getMetaFieldValue( tuiPropKey );
//String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
if ( !_validTuiSet.contains( tui ) ) {
continue;
}
final String cuiTuiKey = getUniqueKey( cui, tui );
if ( cuiTuiSet.contains( cuiTuiKey ) ) {
continue;
}
cuiTuiSet.add( cuiTuiKey );
final Set snomedCodeSet = getSnomedCodes( cui );
if ( !snomedCodeSet.isEmpty() ) {
final Integer neType = getNamedEntityType( tui );
Set conceptSet;
if ( conceptMap.containsKey( neType ) ) {
conceptSet = conceptMap.get( neType );
} else {
conceptSet = new HashSet<>();
conceptMap.put( neType, conceptSet );
}
final Collection conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet );
conceptSet.addAll( conceptCol );
}
}
for ( Map.Entry> conceptEntry : conceptMap.entrySet() ) {
final Set conceptSet = conceptEntry.getValue();
// Skip updating CAS if all Concepts for this type were filtered out for this span.
if ( !conceptSet.isEmpty() ) {
final FSArray conceptArr = new FSArray( jcas, conceptSet.size() );
int arrIdx = 0;
for ( UmlsConcept umlsConcept : conceptSet ) {
conceptArr.set( arrIdx, umlsConcept );
arrIdx++;
}
IdentifiedAnnotation neAnnot;
final int conceptKey = conceptEntry.getKey();
if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) {
neAnnot = new MedicationMention( jcas );
} else if ( conceptKey == CONST.NE_TYPE_ID_ANATOMICAL_SITE ) {
neAnnot = new AnatomicalSiteMention( jcas );
} else if ( conceptKey == CONST.NE_TYPE_ID_DISORDER ) {
neAnnot = new DiseaseDisorderMention( jcas );
} else if ( conceptKey == CONST.NE_TYPE_ID_FINDING ) {
neAnnot = new SignSymptomMention( jcas );
} else if ( conceptKey == CONST.NE_TYPE_ID_LAB ) {
neAnnot = new LabMention( jcas );
} else if ( conceptKey == CONST.NE_TYPE_ID_PROCEDURE ) {
neAnnot = new ProcedureMention( jcas );
} else {
neAnnot = new EntityMention( jcas );
}
neAnnot.setTypeID( conceptKey );
neAnnot.setBegin( neBegin );
neAnnot.setEnd( neEnd );
neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
neAnnot.setOntologyConceptArr( conceptArr );
neAnnot.addToIndexes();
}
}
}
} catch ( Exception e ) {
throw new AnalysisEngineProcessException( e );
}
}
private int getNamedEntityType( final String tui ) throws IllegalArgumentException {
if ( _medicationSet.contains( tui ) ) {
return CONST.NE_TYPE_ID_DRUG;
} else if ( _disorderTuiSet.contains( tui ) ) {
return CONST.NE_TYPE_ID_DISORDER;
} else if ( _findingTuiSet.contains( tui ) ) {
return CONST.NE_TYPE_ID_FINDING;
} else if ( _antSiteTuiSet.contains( tui ) ) {
return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
} else if ( _procedureTuiSet.contains( tui ) ) {
return CONST.NE_TYPE_ID_PROCEDURE;
} else {
throw new IllegalArgumentException( "TUI is not part of valid named entity types: " + tui );
}
}
/**
* For each SNOMED code, create a corresponding JCas UmlsConcept object and
* store in a Collection.
*
* @param jcas -
* @param snomedCodesCol -
* @return -
*/
private Collection createConceptCol( final JCas jcas, final String cui, final String tui,
final Collection snomedCodesCol ) {
final String codingSchemeKey = props.getProperty( CODING_SCHEME_PRP_KEY );
final List conceptList = new ArrayList<>();
for ( String snomedCode : snomedCodesCol ) {
final UmlsConcept uc = new UmlsConcept( jcas );
uc.setCode( snomedCode );
uc.setCodingScheme( codingSchemeKey );
uc.setCui( cui );
uc.setTui( tui );
conceptList.add( uc );
}
return conceptList;
}
private static String getUniqueKey( final String cui, final String tui ) {
final StringBuilder sb = new StringBuilder();
sb.append( cui );
sb.append( ':' );
sb.append( tui );
return sb.toString();
}
/**
* Load a comma delimited list
*
* @param delimitedString -
* @return -
*/
private static Set loadList( final String delimitedString ) {
if ( delimitedString == null || delimitedString.isEmpty() ) {
return Collections.emptySet();
}
final String[] stringArray = delimitedString.split( "," );
final Set stringSet = new HashSet<>();
for ( String text : stringArray ) {
final String trimText = text.trim();
if ( !trimText.isEmpty() ) {
stringSet.add( trimText );
}
}
return stringSet;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy