All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biopax.paxtools.pattern.Searcher Maven / Gradle / Ivy

Go to download

BioPAX Pattern Search library. Also, converts BioPAX model to SIF (simple binary interactions) text format.

The newest version!
package org.biopax.paxtools.pattern;

import org.biopax.paxtools.controller.Cloner;
import org.biopax.paxtools.controller.Completer;
import org.biopax.paxtools.controller.SimpleEditorMap;
import org.biopax.paxtools.io.BioPAXIOHandler;
import org.biopax.paxtools.io.SimpleIOHandler;
import org.biopax.paxtools.model.BioPAXElement;
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.*;
import org.biopax.paxtools.model.level3.Process;
import org.biopax.paxtools.pattern.util.ProgressWatcher;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
 * Searcher for searching a given pattern in a model.
 *
 * @author Ozgun Babur
 */
public class Searcher
{
	/**
	 * Searches the pattern starting from the given match. The first element of the match should be
	 * assigned. Others are optional.
	 * @param m match to start from
	 * @param pattern pattern to search
	 * @return result matches
	 */
	public static List search(Match m, Pattern pattern)
	{
		assert pattern.getStartingClass().isAssignableFrom(m.get(0).getModelInterface());

		return searchRecursive(m, pattern.getConstraints(), 0);
	}

	/**
	 * Searches the pattern starting from the given element.
	 * @param ele element to start from
	 * @param pattern pattern to search
	 * @return matching results
	 */
	public static List search(BioPAXElement ele, Pattern pattern)
	{
		assert pattern.getStartingClass().isAssignableFrom(ele.getModelInterface());

		Match m = new Match(pattern.size());
		m.set(ele, 0);
		return search(m, pattern);
	}

	/**
	 * Continues searching with the mapped constraint at the given index.
	 * @param match match to start from
	 * @param mc mapped constraints of the pattern
	 * @param index index of the current mapped constraint
	 * @return matching results
	 */
	public static List searchRecursive(Match match, List mc, int index) 
	{
		List result = new ArrayList<>();

		Constraint con = mc.get(index).getConstr();
		int[] ind = mc.get(index).getInds();
		int lastInd = ind[ind.length-1];

		if (con.canGenerate() && match.get(lastInd) == null)
		{
			Collection elements = con.generate(match, ind);

			for (BioPAXElement ele : elements)
			{
				match.set(ele, lastInd);
				
				if (mc.size() == index + 1)
				{
					result.add((Match) match.clone());
				}
				else
				{
					result.addAll(searchRecursive(match, mc, index + 1));
				}
				
				match.set(null, lastInd);
			}
		}
		else
		{
			if (con.satisfies(match, ind))
			{
				if (mc.size() == index + 1)
				{
					result.add((Match) match.clone());
				}
				else
				{
					result.addAll(searchRecursive(match, mc, index + 1));
				}
			}
		}
		return result;
	}

	/**
	 * Searches the pattern in a given model, but instead of a match map, returns all matches in a
	 * list.
	 * @param model model to search in
	 * @param pattern pattern to search for
	 * @return matching results
	 */
	public static List searchPlain(Model model, Pattern pattern)
	{
		List list = new LinkedList<>();

		Map> map = search(model, pattern);
		for (List matches : map.values())
		{
			list.addAll(matches);
		}
		return list;
	}

	/**
	 * Searches the pattern starting from given elements, but instead of a match map, returns all
	 * matches in a list.
	 * @param eles elements to start from
	 * @param pattern pattern to search for
	 * @return matching results
	 */
	public static List searchPlain(Collection eles, Pattern pattern)
	{
		List list = new LinkedList<>();

		Map> map = search(eles, pattern);
		for (List matches : map.values())
		{
			list.addAll(matches);
		}
		return list;
	}

	/**
	 * Searches the given pattern in the given model.
	 * @param model model to search in
	 * @param pattern pattern to search for
	 * @return map from starting elements to the list of results
	 */
	public static Map> search(Model model, Pattern pattern)
	{
		return search(model, pattern, null);
	}

	/**
	 * Searches the given pattern in the given model.
	 * @param model model to search in
	 * @param pattern pattern to search for
	 * @param prg progress watcher to keep track of the progress
	 * @return map from starting elements to the ordered list of results
	 */
	public static Map> search(final Model model, final Pattern pattern,
														 final ProgressWatcher prg)
	{
		final Map> map = new ConcurrentHashMap<>();
		final ExecutorService exec = Executors.newFixedThreadPool(20);

		Set eles = model.getObjects(pattern.getStartingClass());
		if (prg != null) prg.setTotalTicks(eles.size());

		for (final BioPAXElement ele : eles)
		{
			exec.execute(() -> {
				List matches = search(ele, pattern);
				if (!matches.isEmpty())
				{
					map.put(ele, matches);
				}
				if (prg != null) prg.tick(1);
			});
		}

		exec.shutdown();

		try {
			exec.awaitTermination(10, TimeUnit.MINUTES);
		} catch (InterruptedException e) {
			throw new RuntimeException("search, failed due to exec timed out.", e);
		}

		return Collections.unmodifiableMap(map);
	}

	/**
	 * Searches the given pattern starting from the given elements.
	 * @param eles elements to start from
	 * @param pattern pattern to search for
	 * @return map from starting element to the matching results
	 */
	public static Map> search(
		Collection eles, Pattern pattern)
	{
		final Map> map = new ConcurrentHashMap<>();
		final ExecutorService exec = Executors.newFixedThreadPool(10);

		for (final BioPAXElement ele : eles)
		{
			if (!pattern.getStartingClass().isAssignableFrom(ele.getModelInterface())) continue;
			
			exec.execute(() -> {
				List matches = search(ele, pattern);
				if (!matches.isEmpty()) {
					map.put(ele, matches);
				}
			});
		}

		exec.shutdown();

		try {
			exec.awaitTermination(10, TimeUnit.MINUTES);
		} catch (InterruptedException e) {
			throw new RuntimeException("search, failed due to exec timed out.", e);
		}

		return Collections.unmodifiableMap(new HashMap<>(map));
	}

	/**
	 * Searches a model for the given pattern, then collects the specified elements of the matches
	 * and returns.
	 *
	 * @param  BioPAX type
	 * @param model model to search in
	 * @param pattern pattern to search for
	 * @param index index of the element in the match to collect
	 * @param c type of the element to collect
	 * @return set of the elements at the specified index of the matching results
	 */
	public static  Set searchAndCollect(
		Model model, Pattern pattern, int index, Class c)
	{
		return searchAndCollect(model.getObjects(pattern.getStartingClass()), pattern, index, c);
	}

	/**
	 * Searches the given pattern starting from the given elements, then collects the specified
	 * elements of the matches and returns.
	 *
	 * @param  BioPAX type
	 * @param eles elements to start from
	 * @param pattern pattern to search for
	 * @param index index of the element in the match to collect
	 * @param c type of the element to collect
	 * @return set of the elements at the specified index of the matching results
	 */
	public static  Set searchAndCollect(
		Collection eles, Pattern pattern, int index, Class c)
	{
		Set set = new HashSet<>();

		for (Match match : searchPlain(eles, pattern))
		{
			set.add((T) match.get(index));
		}
		return set;
	}

	/**
	 * Searches the given pattern starting from the given element, then collects the specified
	 * elements of the matches and returns.
	 *
	 * @param  BioPAX type
	 * @param ele element to start from
	 * @param pattern pattern to search for
	 * @param index index of the element in the match to collect
	 * @param c type of the element to collect
	 * @return set of the elements at the specified index of the matching results
	 */
	public static  Set searchAndCollect(
		BioPAXElement ele, Pattern pattern, int index, Class c)
	{
		Set set = new HashSet<>();

		for (Match match : search(ele, pattern))
		{
			set.add((T) match.get(index));
		}
		return set;
	}

	/**
	 * Checks if there is any match for the given pattern if search starts from the given element.
	 * @param p pattern to search for
	 * @param ele element to start from
	 * @return true if there is a match
	 */
	public boolean hasSolution(Pattern p, BioPAXElement ... ele)
	{
		Match m = new Match(p.size());
		for (int i = 0; i < ele.length; i++)
		{
			m.set(ele[i], i);
		}

		return !search(m, p).isEmpty();
	}

	/**
	 * Searches a pattern reading the model from the given file, and creates another model that is
	 * excised using the matching patterns.
	 * @param p pattern to search for
	 * @param inFile filename for the model to search in
	 * @param outFile filename for the result model
	 * @throws FileNotFoundException when no file exists
	 */
	public static void searchInFile(Pattern p, String inFile, String outFile) throws FileNotFoundException
	{
		searchInFile(p, inFile, outFile, Integer.MAX_VALUE, Integer.MAX_VALUE);
	}

	/**
	 * Searches a pattern reading the model from the given file, and creates another model that is
	 * excised using the matching patterns. Users can limit the max number of starting element, and
	 * max number of matches for any starting element. These parameters is good for limiting the
	 * size of the result graph.
	 * @param p pattern to search for
	 * @param inFile filename for the model to search in
	 * @param outFile filename for the result model
	 * @param seedLimit max number of starting elements
	 * @param graphPerSeed max number of matches for a starting element
	 * @throws FileNotFoundException when no file exists
	 */
	public static void searchInFile(Pattern p, String inFile, String outFile, int seedLimit,
		int graphPerSeed) throws FileNotFoundException
	{
		SimpleIOHandler h = new SimpleIOHandler();
		Model model = h.convertFromOWL(new FileInputStream(inFile));

		Map> matchMap = Searcher.search(model, p);

//		System.out.println("matching groups size = " + matchMap.size());

		List> inters = new LinkedList<>();
		Set encountered = new HashSet<>();
		Set toExise = new HashSet<>();

		int seedCounter = 0;
		for (BioPAXElement ele : matchMap.keySet())
		{
			if (seedCounter >= seedLimit) break;

			int matchCounter = 0;

			List matches = matchMap.get(ele);

			if (!matches.isEmpty()) seedCounter++;

			for (Match match : matches)
			{
				matchCounter++;
				
				if (matchCounter > graphPerSeed) break;

				Set ints = getInter(match);

				toExise.addAll(Arrays.asList(match.getVariables()));
				toExise.addAll(ints);

				Integer hash = hashSum(ints);
				if (!encountered.contains(hash))
				{
					encountered.add(hash);
					inters.add(ints);
				}
			}
		}

		System.out.println("created pathways = " + inters.size());

		Model clonedModel = excise(toExise);

		int i = 0;
		for (Set ints : inters)
		{
			Pathway pathway = clonedModel.addNew(Pathway.class,
				System.currentTimeMillis() + "PaxtoolsPatternGeneratedMatch" + (++i));

			pathway.setDisplayName("Match " + getLeadingZeros(i, inters.size()) + i);

			for (Interaction anInt : ints)
			{
				pathway.addPathwayComponent((Process) clonedModel.getByID(anInt.getUri()));
			}
		}

		handler.convertToOWL(clonedModel, new FileOutputStream(outFile));
	}

	/**
	 * Prepares an int for printing with leading zeros for the given size.
	 * @param i the int to prepare
	 * @param size max value for i
	 * @return printable string for i with leading zeros
	 */
	private static String getLeadingZeros(int i, int size)
	{
		assert i <= size;
		int w1 = (int) Math.floor(Math.log10(size));
		int w2 = (int) Math.floor(Math.log10(i));
		
		String s = "";

		for (int j = w2; j < w1; j++)
		{
			s += "0";
		}
		return s;
	}

	/**
	 * IO handler for reading and writing BioPAX.
	 */
	private static BioPAXIOHandler handler =  new SimpleIOHandler();

	/**
	 * Editor map to use for excising.
	 */
	private static final SimpleEditorMap EM = SimpleEditorMap.L3;

	/**
	 * Excises a model to the given elements.
	 * @param result elements to excise to
	 * @return excised model
	 */
	private static Model excise(Set result)
	{
		Completer c = new Completer(EM);
		result = c.complete(result);
		Cloner cln = new Cloner(EM, BioPAXLevel.L3.getDefaultFactory());
		return cln.clone(result);
	}

	/**
	 * Gets all interactions in a match.
	 * @param match match to search
	 * @return all interaction in the match
	 */
	private static Set getInter(Match match)
	{
		Set set = new HashSet<>();
		for (BioPAXElement ele : match.getVariables())
		{
			if (ele instanceof Interaction) 
			{
				set.add((Interaction) ele);
				addControlsRecursive((Interaction) ele, set);
			}
		}
		return set;
	}

	/**
	 * Adds controls of the given interactions recursively to the given set.
	 * @param inter interaction to add its controls
	 * @param set set to add to
	 */
	private static void addControlsRecursive(Interaction inter, Set set)
	{
		for (Control ctrl : inter.getControlledOf())
		{
			set.add(ctrl);
			addControlsRecursive(ctrl, set);
		}
	}

	/**
	 * Creates a hash code for a set of interactions.
	 * @param set interactions
	 * @return sum of hashes
	 */
	private static Integer hashSum(Set set)
	{
		int x = 0;
		for (Interaction inter : set)
		{
			x += inter.hashCode();
		}
		return x;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy