All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.ytex.kernel.model.ConcRel Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.ytex.kernel.model;

import com.google.common.collect.ImmutableSet;
import org.apache.ctakes.ytex.kernel.metric.LCSPath;

import java.util.*;
import java.util.logging.Level;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ConcRel implements java.io.Serializable {
	private static final Logger LOGGER = LoggerFactory.getLogger( "ConcRel" );
	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	public static List crListToString(List crList) {
		if (crList != null) {
			List path = new ArrayList(crList.size());
			for (ConcRel cr : crList)
				path.add(cr.getConceptID());
			return path;
		} else {
			return null;
		}
	}

	/**
	 * get least common subsumer of the specified concepts and its distance from
	 * root.
	 * 
	 * @deprecated
	 * 
	 * @param c1
	 * @param c2
	 * @return
	 */
	public static ObjPair getLeastCommonConcept(ConcRel c1,
			ConcRel c2) {
			LOGGER.debug("getLeastCommonConcept(" + c1 + "," + c2 + ")");
		// result
		ObjPair res = new ObjPair(null,
				Integer.MAX_VALUE);
		// concept 1's parent distance map
		Map cand1 = new HashMap();
		// concept 2's parent distance map
		Map cand2 = new HashMap();

		// parents of concept 1
		HashSet parC1 = new HashSet();
		parC1.add(c1);
		// parents of concept 2
		HashSet parC2 = new HashSet();
		parC2.add(c2);
		HashSet tmp = new HashSet();
		HashSet tmp2;

		int dist = 0;
		// changed to start distance with 1 - we increment at the end of the
		// loop
		// we always look at the parents, so the distance has to start with 1
		// if one concept is the parent of the other, this would return 0 if
		// dist starts with 0
		// int dist = 1;
		// while there are parents
		// this does a dual-breadth first search
		// parC1 are the dist'th ancestors of concept 1
		// parC2 are the dist'th ancestors of concept 2
		while (!parC1.isEmpty() || !parC2.isEmpty()) {
			// grandparents
			tmp.clear();
			// go through parents of concept1
			for (Iterator it = parC1.iterator(); it.hasNext();) {
				ConcRel cr = it.next();
				// checkif it's in the map concept2's parent distance map
				// - map of distances from concept 1
				if (cand2.containsKey(cr)) {
					res.v1 = cr;
					res.v2 = dist + cand2.get(cr).intValue();
					// return
					return res;
				}
				// not in the map - add it to the concept-distance map
				cand1.put(cr, dist);
				// add the grandparents to the tmp set
				tmp.addAll(cr.parents);
			}
			// remove concepts already in concept1's parent distance map from
			// the grandparent map
			tmp.removeAll(cand1.keySet());
			// tmp2 becomes the parents of c1
			tmp2 = parC1;
			// par c1 becomes grandparents minus parents
			parC1 = tmp;
			// tmp becomes tmp2, which is going to be killed in the next line
			tmp = tmp2;

			tmp.clear();
			// repeat everything for concept2 - go up one level
			for (Iterator it = parC2.iterator(); it.hasNext();) {
				ConcRel cr = it.next();
				if (cand1.containsKey(cr)) {
					res.v1 = cr;
					res.v2 = dist + cand1.get(cr).intValue();
					return res;
				}
				cand2.put(cr, dist);
				tmp.addAll(cr.parents);
			}
			tmp.removeAll(cand2.keySet());
			tmp2 = parC2;
			parC2 = tmp;
			tmp = tmp2;

			++dist;
		}

		return res;
	}

	/**
	 * 
	 * @param c1
	 *            concept1
	 * @param c2
	 *            concept2
	 * @param lcses
	 *            least common subsumers, required
	 * @param paths
	 *            paths between concepts via lcses, optional. Key - lcs. Value -
	 *            2 element list corresponding to paths to lcs from c1 and c2
	 * @return path length, -1 if no lcs
	 */
	public static int getLeastCommonConcept(ConcRel c1, ConcRel c2,
			Set lcses, Map paths) {
			LOGGER.debug("getLeastCommonConcept(" + c1 + "," + c2 + ")");
		// concept 1's parent distance map
		Map cand1 = new HashMap();
		// concept 2's parent distance map
		Map cand2 = new HashMap();
		// paths corresponding to parents
		// we only calculate these if they are asked of us
		Map> paths1 = paths != null ? new HashMap>()
				: null;
		Map> paths2 = paths != null ? new HashMap>()
				: null;

		// parents of concept 1
		HashSet parC1 = new HashSet();
		parC1.add(c1);
		// parents of concept 2
		HashSet parC2 = new HashSet();
		parC2.add(c2);
		// temporary hashset for scratch work
		// not clear if this really reduces memory overhead
		HashSet tmp = new HashSet();
		HashSet candidateLCSes = new HashSet();

		int maxIter = -1;
		int dist = 0;
		int minDist = Integer.MAX_VALUE - 1;
		// continue the search while there are parents left
		// check maxIter - if this is some non-negative number
		// then it must be greater than 0
		while ((!parC1.isEmpty() || !parC2.isEmpty())
				&& (maxIter < 0 || maxIter != 0)) {
			// get next iteration of ancestors, save them
			updateParent(cand1, parC1, tmp, dist, paths1);
			updateParent(cand2, parC2, tmp, dist, paths2);
			// get the intersection across the ancestors
			tmp.clear();
			tmp.addAll(cand1.keySet());
			tmp.retainAll(cand2.keySet());
			tmp.removeAll(candidateLCSes);
			// if there is something in the intersection, we have a potential
			// winner. however, we can't stop here
			// example: ascites/hepatoma in snomed C2239176 C0003962
			// e.g. one path could be 3-3, but a shorter path could be 4-1
			// we would only find the 4-1 path after 4 iterations
			if (!tmp.isEmpty()) {
				// add candidates so we don't have to look at them in future
				// iterations
				candidateLCSes.addAll(tmp);
				// remove candidates' parents from the parent collections for
				// the next iterations
				removeParents(tmp, parC1);
				removeParents(tmp, parC2);
				// add the lcs candidates and their path length
				// even though we have a hit, we can't stop here
				// there could be uneven path lengths.
				// to account for this, the 1st time we find an lcs
				// we set maxIter to the minimum path length to either concept
				// from the lcs. if we can't find a match after maxIter
				// iterations, then we know that what we've found is a winner
				for (ConcRel lcs : tmp) {
					// path length for current lcs
					int distTmp = cand1.get(lcs) + cand2.get(lcs) + 1;
					// only add it to the list of lcses if it is less than or
					// equal to the current minimal path length
					if (distTmp <= minDist) {
						if (distTmp < minDist) {
							// we have a new best minimal path length
							// clear the current lcses
							lcses.clear();
						}
						minDist = distTmp;
						lcses.add(lcs);
					}
					// all additional lcses must be found within maxIter
					// iterations. maxIter is the shortest path between
					// the lcs and a concept
					int minLcsToConceptLen = Math.min(cand1.get(lcs),
							cand2.get(lcs));
					if (maxIter < 0 || maxIter > minLcsToConceptLen) {
						maxIter = minLcsToConceptLen;
					}
				}
			}
			// reduce maximum number of iterations left
			maxIter--;
			++dist;
		}
		if (lcses.isEmpty())
			return -1;
		else {
			if (paths != null) {
				for (ConcRel lcs : lcses) {
					LCSPath lcsPath = new LCSPath();
					lcsPath.setLcs(lcs.getConceptID());
					lcsPath.setConcept1Path(crListToString(paths1.get(lcs)));
					lcsPath.setConcept2Path(crListToString(paths2.get(lcs)));
					paths.put(lcs, lcsPath);
				}
			}
			return minDist;
		}
	}

	/**
	 * remove the parents of candidate lcses from the list of parents we were
	 * planning on looking at in the next iteration
	 * 
	 * @param lcses
	 * @param parents
	 */
	private static void removeParents(HashSet lcses,
			HashSet parents) {
		for (ConcRel lcs : lcses) {
			parents.removeAll(lcs.parents);
		}
	}

	/**
	 * perform 1 iteration of breadth-first search on lcs. update the various
	 * collections with the next iteration of ancestors.
	 * 
	 * @param cand1
	 * @param parC1
	 * @param tmp
	 * @param dist
	 */
	private static void updateParent(Map cand1,
			HashSet parC1, HashSet tmp, int dist,
			Map> paths) {
		tmp.clear();
		// go through parents of concept1
		for (Iterator it = parC1.iterator(); it.hasNext();) {
			ConcRel cr = it.next();
			if (!cand1.containsKey(cr)) {
				// not in the map - add it to the concept-distance map
				cand1.put(cr, dist);
				// add the grandparents to the tmp set
				tmp.addAll(cr.parents);
				if (paths != null) {
					// add the path to the parent to the map of paths
					List pathCR = paths.get(cr);
					for (ConcRel parent : cr.parents) {
						if (!paths.containsKey(parent)) {
							// path to parent = path to child + child
							List path = new ArrayList(
									pathCR != null ? pathCR.size() + 1 : 1);
							if (pathCR != null)
								path.addAll(pathCR);
							path.add(cr);
							paths.put(parent, path);
						}
					}
				}
			}
		}
		// remove concepts already in concept1's parent distance map from
		// the grandparent map
		tmp.removeAll(cand1.keySet());
		// parents for the next iteration
		parC1.clear();
		parC1.addAll(tmp);
	}

	/**
	 * children of this concept
	 */
	private Set children;
	private int[] childrenArray;
	
	private short depth;

	private double intrinsicInfoContent;

	/**
	 * id of this concept
	 */
	private String nodeCUI;

	private int nodeIndex;

	/**
	 * parents of this concept
	 */
	private Set parents;

	/**
	 * for java object serialization, need to avoid default serializer behavior
	 * of writing out entire object graph. just write the parent/children object
	 * ids and resolve the connections after loading this object.
	 */
	private int[] parentsArray;

	public ConcRel(String cui, int nodeIndex) {
		nodeCUI = cui;
		parents = new HashSet();
		children = new HashSet();
		parentsArray = null;
		childrenArray = null;
		this.nodeIndex = nodeIndex;
	}

	/**
	 * reconstruct the relationships to other ConcRel objects
	 * 
	 * @param db
	 */
	public void constructRel(List db) {
		ImmutableSet.Builder pBuilder = new ImmutableSet.Builder(); 
		for (int c : parentsArray)
			pBuilder.add(db.get(c));
		parents = pBuilder.build();
		parentsArray = null;

		ImmutableSet.Builder cBuilder = new ImmutableSet.Builder(); 
		for (int c : childrenArray)
			cBuilder.add(db.get(c));
		children = cBuilder.build();
		childrenArray = null;
	}

	public int depthMax() {
		int d = 0;
		for (Iterator it = children.iterator(); it.hasNext();) {
			ConcRel child = it.next();
			int dm = child.depthMax() + 1;
			if (dm > d)
				d = dm;
		}
		return d;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		ConcRel other = (ConcRel) obj;
		if (nodeIndex != other.nodeIndex)
			return false;
		return true;
	}

	public Set getChildren() {
		return children;
	}
	public int[] getChildrenArray() {
		return childrenArray;
	}
	public String getConceptID() {
		return nodeCUI;
	}

	public short getDepth() {
		return depth;
	}

	public double getIntrinsicInfoContent() {
		return intrinsicInfoContent;
	}

	public int getNodeIndex() {
		return nodeIndex;
	}

	public Set getParents() {
		return parents;
	}

	public int[] getParentsArray() {
		return parentsArray;
	}

	/**
	 * recursively build all paths to root from a concept - add elements from
	 * set of parents.
	 * 
	 * @param lpath
	 *            current path from children to this concept
	 * @param allPaths
	 *            list of all paths
	 * @param depth
	 *            current depth
	 * @param depthMax
	 */
	public void getPath(List lpath, List> allPaths,
			int depth, int depthMax) {
		if (depth >= depthMax)
			return;
		if (lpath == null)
			lpath = new ArrayList();

		lpath.add(this);

		if (isRoot()) {
			// add a copy to the list of all paths
			allPaths.add(new ArrayList(lpath));
		} else {
			// recurse
			for (ConcRel p : parents) {
				p.getPath(lpath, allPaths, depth + 1, depthMax);
			}
		}
		lpath.remove(lpath.size() - 1);
	}

	/**
	 * is the specified concept an ancestor of this concept?
	 * 
	 * @param cui
	 * @return
	 */
	public boolean hasAncestor(String cui) {
		if (nodeCUI.equals(cui))
			return true;
		for (ConcRel c : parents) {
			if (c.hasAncestor(cui))
				return true;
		}
		return false;
	}

	@Override
	public int hashCode() {
		return nodeIndex;
	}

	public boolean isLeaf() {
		return children.isEmpty();
	}

	public boolean isRoot() {
		return parents.isEmpty();
	}

	/**
	 * read parent/children concept ids, not the objects
	 */
	private void readObject(java.io.ObjectInputStream in)
			throws java.io.IOException, ClassNotFoundException {
		nodeCUI = (String) in.readObject();
		this.nodeIndex = in.readInt();
		this.intrinsicInfoContent = in.readDouble();
		this.depth = in.readShort();
		parentsArray = (int[]) in.readObject();
		childrenArray = (int[]) in.readObject();
		parents = new HashSet(parentsArray.length);
		children = new HashSet(childrenArray.length);
	}

	// public static ObjPair getLeastCommonConcept(
	// Vector> allPaths1, Vector> allPaths2) {
	// ObjPair res = new ObjPair(null,
	// Integer.MAX_VALUE);
	// ObjPair tmp = new ObjPair(null,
	// Integer.MAX_VALUE);
	//
	// int n = 0;
	// for (Vector path1 : allPaths1) {
	// // if(n++>200)
	// // break;
	// int n2 = 0;
	// for (Vector path2 : allPaths2) {
	// // if(n2++>200)
	// // break;
	// if (getCommonConcept(path1, path2, tmp) != null) {
	// if (tmp.v2.intValue() < res.v2.intValue()) {
	// res.v1 = tmp.v1;
	// res.v2 = tmp.v2;
	// }
	// }
	// }
	// }
	//
	// return res;
	// }

	// public static ConcRel getCommonConcept(Vector path1,
	// Vector path2, ObjPair oVals) {
	// ConcRel common = null;
	// int dist = Integer.MAX_VALUE;
	// int index1 = path1.size() - 1;
	// int index2 = path2.size() - 1;
	// while (index1 >= 0 && index2 >= 0) {
	// ConcRel r1 = path1.get(index1);
	// if (r1.equals(path2.get(index2))) {
	// common = r1;
	// dist = index1 + index2;
	// --index1;
	// --index2;
	// } else
	// break;
	// }
	//
	// oVals.v1 = common;
	// oVals.v2 = dist;
	//
	// return common;
	// }

	public void setChildrenArray(int[] childrenArray) {
		this.childrenArray = childrenArray;
	}

	public void setConceptID(String nodeCUI) {
		this.nodeCUI = nodeCUI;
	}

	public void setDepth(short depth) {
		this.depth = depth;
	}

	public void setIntrinsicInfoContent(double intrinsicInfoContent) {
		this.intrinsicInfoContent = intrinsicInfoContent;
	}

	public void setNodeIndex(int nodeIndex) {
		this.nodeIndex = nodeIndex;
	}

	public void setParentsArray(int[] parentsArray) {
		this.parentsArray = parentsArray;
	}

	@Override
	public String toString() {
		return "ConcRel [nodeCUI=" + nodeCUI + "]";
	}

	/**
	 * serialize parent/children concept ids, not the objects
	 */
	private void writeObject(java.io.ObjectOutputStream out)
			throws java.io.IOException {
		out.writeObject(nodeCUI);
		out.writeInt(this.nodeIndex);
		out.writeDouble(this.intrinsicInfoContent);
		out.writeShort(this.depth);
		if (parentsArray == null) {
			parentsArray = new int[parents.size()];
			int i = 0;
			for (ConcRel c : parents)
				parentsArray[i++] = c.getNodeIndex();
		}
		if (childrenArray == null) {
			childrenArray = new int[children.size()];
			int i = 0;
			for (ConcRel c : children)
				childrenArray[i++] = c.getNodeIndex();
		}

		out.writeObject(parentsArray);
		out.writeObject(childrenArray);
		parentsArray = null;
		childrenArray = null;
	}

	// public static void main(String[] args) {
	// int c1 = 18563; // 4903;
	// int c2 = 18670; // 175695;
	//
	// ConcRel r1 = MetaDB.concRelDB.cuiRelDB.get(c1);
	// ConcRel r2 = MetaDB.concRelDB.cuiRelDB.get(c2);
	// if (r1 == null)
	// System.out.println("No rel for " + c1);
	// if (r2 == null)
	// System.out.println("No rel for " + c2);
	//
	// if (r1 == null || r2 == null)
	// return;
	//
	// Vector> allPaths1 = new Vector>();
	// Vector> allPaths2 = new Vector>();
	//
	// r1.getPath(null, allPaths1, 0, 1000);
	// r2.getPath(null, allPaths2, 0, 1000);
	//
	// int i = 0;
	// System.out.println("***Paths for " + c1);
	// i = 0;
	// for (Vector vc : allPaths1) {
	// System.out.print("#P" + (i++) + ": ");
	// i++;
	// for (ConcRel cr : vc) {
	// System.out.print("->" + cr.nodeCUI);
	// }
	// System.out.println("");
	// }
	//
	// System.out.println("***Paths for " + c2);
	// i = 0;
	// for (Vector vc : allPaths2) {
	// System.out.print("##P" + (i++) + ": ");
	// for (ConcRel cr : vc) {
	// System.out.print("->" + cr.nodeCUI);
	// }
	// System.out.println("");
	// }
	//
	// ObjPair obp = getLeastCommonConcept(allPaths1,
	// allPaths2);
	// System.out.println("Common concept :"
	// + (obp.v1 == null ? "none" : obp.v1.nodeCUI));
	// System.out.println("dist: " + obp.v2);
	//
	// obp = getLeastCommonConcept(r1, r2);
	// System.out.println("Common concept2 :"
	// + (obp.v1 == null ? "none" : obp.v1.nodeCUI));
	// System.out.println("dist: " + obp.v2);
	//
	// }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy