
org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel.metric;
import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public interface ConceptSimilarityService {
public enum SimilarityMetricEnum {
LCH(false, false), INTRINSIC_LCH(true, false), LIN(false, true), INTRINSIC_LIN(
true, false), PATH(false, false), INTRINSIC_PATH(true, false), JACCARD(
true, false), SOKAL(true, false), RADA(false, false), INTRINSIC_RADA(
true, false), WUPALMER(false, false), PAGERANK(false, false);
boolean intrinsicIC = false;
boolean corpusIC = false;
/**
* is this measure taxonomy based?
*
* @return
*/
public boolean isTaxonomy() {
return !intrinsicIC && !corpusIC;
}
/**
* is this measure based on intrinsic IC?
*
* @return
*/
public boolean isIntrinsicIC() {
return intrinsicIC;
}
/**
* is this measure based on corpus IC?
*
* @return
*/
public boolean isCorpusIC() {
return corpusIC;
}
SimilarityMetricEnum(boolean intrinsicIC, boolean corpusIC) {
this.intrinsicIC = intrinsicIC;
this.corpusIC = corpusIC;
}
}
public String getConceptGraphName();
// public abstract double lch(String concept1, String concept2);
// public abstract double lin(String concept1, String concept2);
public int lcs(String concept1, String concept2, List lcsPath);
public abstract ConceptGraph getConceptGraph();
/**
* cui - tui map. tuis are bitsets, indices correspond to tuis in
* {@link #getTuiList()}
*
* @return
*/
public abstract Map getCuiTuiMap();
// /**
// * supervised lin measure.
// *
// * @param concept1
// * @param concept2
// * @param conceptFilter
// * map of concept id to imputed infogain. if the concept isn't in
// * this map, the concepts won't be compared. null for
// * unsupervised lin.
// * @return
// */
// public abstract double filteredLin(String concept1, String concept2,
// Map conceptFilter);
/**
* list of tuis that corresponds to bitset indices
*
* @return
*/
public abstract List getTuiList();
/**
* For the given label and cutoff, get the corresponding concepts whose
* propagated ig meets the threshold. Used by lin kernel to find concepts
* that actually have a non-trivial similarity
*
* @param label
* label
* @param rankCutoff
* cutoff
* @param conceptFilter
* set to fill with concepts
* @return double minimum evaluation
*/
public abstract double loadConceptFilter(String label, int rankCutoff,
Map conceptFilter);
/**
* get the lcs(s) for the specified concepts
*
* @param concept1
* required
* @param concept2
* required
* @param lcses
* required - will be filled with the lcs(s).
* @param lcsPathMap
* optional - will be filled with lcs and paths through the
* lcses.
* @return distance of path through lcs
*/
public int getLCS(String concept1, String concept2, Set lcses,
List lcsPaths);
/**
* get the best lcs
*
* @param lcses
* set of lcses
* @param intrinsicIC
* should the intrinsic ic be used? false - use corpus-based ic.
* For multiple lcses not using concept filter, use the lcs with
* the lowest infocontent
* @param conceptFilter
* limit to lcses in the concept filter. The lcs with the highest
* value will be used.
* @return array with 2 entries. Entry 1 - lcs (String). Entry 2 -
* infocontent (double). Null if no lcses are in the concept filter.
*/
public Object[] getBestLCS(Set lcses, boolean intrinsicIC,
Map conceptFilter);
public abstract double getIC(String concept, boolean intrinsicICMap);
/**
* compute similarity for a pair of concepts
*
* @param metrics
* required, similarity metrics to compute
* @param concept1
* required
* @param concept2
* required
* @param conceptFilter
* optional - only lcs's in this set will be used.
* @param simInfo
* optional - pass this to get information on lcs. Instantiate
* the lcsPathMap to get paths through lcs
* @return similarities
*/
public abstract ConceptPairSimilarity similarity(
List metrics, String concept1,
String concept2, Map conceptFilter, boolean lcs);
/**
* compute similarity for a list of concept pairs
*
* @param conceptPairs
* required, concept pairs for which similarity should be
* computed
* @param metrics
* required, similarity metrics to compute
* @param conceptFilter
* optional - only lcs's in this set will be used.
* @param simInfos
* optional - if provided, this list will be filled with the
* similarity info for each concept pair.
* @return similarities
*/
public List similarity(
List conceptPairs, List metrics,
Map conceptFilter, boolean lcs);
public abstract int getDepth(String concept);
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy