All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.mmp.MMPReader Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
 * Copyright (c) 2017
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Gregori Gerebtzoff
 */

package com.actelion.research.chem.mmp;

import com.actelion.research.chem.AbstractDepictor;
import com.actelion.research.chem.ExtendedDepictor;
import com.actelion.research.chem.IDCodeParser;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.mmp.MMP.MoleculeIndex;
import com.actelion.research.chem.mmp.MMPUniqueFragments.MMPUniqueFragment;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.chem.reaction.ReactionEncoder;
import com.actelion.research.gui.generic.GenericDrawContext;
import com.actelion.research.gui.generic.GenericRectangle;
import com.actelion.research.gui.swing.SwingDrawContext;
import com.actelion.research.util.Base64;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.List;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MMPReader {
	public static final String SORT_BY_SIMILARITY = "similarity";
	public static final String SORT_BY_NUMBER_OF_EXAMPLES = "results";
	
	private HashMap> wholeMoleculesIndex;   // Molecule idCode, [[molName, molDatas], ..]
	private List molecules;                                          // Ordered List of molecules idCodes; molIndex is used in mmpFragmentsIndex
	private List dataFields;                                      // Ordered List of numerical data fieds
	private List uniqueFragmentsIndex;                               // Ordered List of unique fragments idCodes
	private MMPUniqueFragments mmpUniqueFragments;                           // List of unique fragments (index, number of atoms, fingerprints)
	private HashMap> mmpFragmentsIndex;                  // keys (tab-delimited) - {valueFragmentIndex, molIndex}
	private HashMap>> mmpIndex;        // MMP container:   
	private String datasetName;
	private String date;
	private Integer keysMinAtoms;
	private String version;
	private MMPPropertyCalculator mPropertyCalculator;
	
	public class DataField {
		private String fieldName;
		private String longFieldName;
		private String categoryName;
		private String percentile5;
		private String percentile95;
		
		private DataField(String fieldName, String longFieldName, String categoryName, String percentile5, String percentile95) {
			this.fieldName = fieldName;
			this.categoryName = categoryName;
			this.longFieldName = longFieldName;
			this.percentile5 = percentile5;
			this.percentile95 = percentile95;
		}
	}
	
	private class MatchedMolecularPairExamples {
		ArrayList example1;
		ArrayList example2;
		int similarity;
		
		public MatchedMolecularPairExamples(ArrayList example1, ArrayList example2, int similarity) {
			this.example1 = example1;
			this.example2 = example2;
			this.similarity = similarity;
		}
	}
	
	public static class MatchedMolecularPair {
		int numberOfExamples;
		String value1;
		int value1Index;
		int value1Atoms;
		String value2;
		int value2Index;
		int value2Atoms;
		int similarity;
		Reaction transformation;
		String transformationString;
		List mmpExamples;
		ArrayList> datas;
		ArrayList similarities;
		int[][] n;
		int[][] increase;
		int[][] decrease;
		int[][] neutral;
		Double[][] average;
		Double[][] sd;
		Integer[][] numberOfIncrease;
		Integer[][] numberOfDecrease;
		Integer[][] numberOfNeutral;
		boolean targetExists;
		
		/**
		 * Creates a new Matched Molecular Pair
		 * @param value1 seed 'value' (variable part of the molecule)
		 * @param value1Index index of the seed 'value'
		 * @param value1Atoms number of heavy atoms of the seed 'value'
		 * @param value1FP fingerprints of the seed 'value'
		 * @param value2 target 'value' (variable part of the molecule)
		 * @param value2Index index of the target 'value'
		 * @param value2Atoms number of heavy atoms of the target 'value'
		 * @param value2FP fingerprints of the target 'value'
		 * @param mmpExamples List of examples
		 * @param numberOfFields number of numerical fields
		 * @param targetExists true/false if the target exists (the transformed seed molecule exists in the data set)
		 */
		public MatchedMolecularPair(String value1, int value1Index, int value1Atoms, String[] value1FP, String value2, int value2Index, int value2Atoms, String[] value2FP, List mmpExamples, int numberOfFields, int targetExists) {
			this.numberOfExamples = mmpExamples.size();
			this.value1 = value1;
			this.value1Index = value1Index;
			this.value1Atoms = value1Atoms;
			this.value2 = value2;
			this.value2Index = value2Index;
			this.value2Atoms = value2Atoms;
			this.transformation = reactionFromTwoValues(value1, value2);
			this.transformationString = null;
			this.mmpExamples = mmpExamples;
			this.datas = new ArrayList>();
			this.similarities = new ArrayList();
			this.n = new int[numberOfFields][6];
			this.average = new Double[numberOfFields][6];
			this.sd = new Double[numberOfFields][6];
			this.increase = new int[numberOfFields][6];
			this.decrease = new int[numberOfFields][6];
			this.neutral = new int[numberOfFields][6];
			this.targetExists = targetExists > -1 ? true : false;
			
			for (int i=0; i());
			}
			for (int i=0; i example1 = mmpExamples.get(i).example1;
				ArrayList example2 = mmpExamples.get(i).example2;
				int similarity = mmpExamples.get(i).similarity;
				Double[] example1Data = averageData(example1, numberOfFields);
				Double[] example2Data = averageData(example2, numberOfFields);
				similarities.add(similarity);
				for (int j=0; j= k) {
								this.n[j][k] += 1;
							}
						}
						ArrayList data = datas.get(j);
						data.add(example2Data[j] - example1Data[j]);
						datas.set(j, data);
					}
				}
			}
			for (int i=0; i 1) {
						this.sd[i][j] = calcStanDev(datas.get(i).subList(0,  this.n[i][j]));
					}
					else {
						this.sd[i][j] = null;
					}
					int[] tendency = calcTendency(datas.get(i).subList(0,  this.n[i][j]));
					this.increase[i][j] = tendency[0];
					this.decrease[i][j] = tendency[1];
					this.neutral[i][j] = tendency[2];
				}
			}
			this.similarity = 0;
			for (int i=value1FP.length-1; i>=0; i--) {
				if (value1FP[i].equals(value2FP[i])) {
					this.similarity = i + 1;
					break;
				}
			}
		}
		
		/**
		 * Generates the transformation string, to be displayed in DataWarrior
		 */
		private void calcTransformationString() {
			this.transformationString = idCodeFromTwoValues(value1, value2);
		}
		
		/**
		 * Calculates the average from a list of data
		 * @param data List of data
		 * @return average
		 */
		private static Double calcAverage(List data) {
			if (data.size() > 0) {
				double allData = 0.0;
				int numberOfData = 0;
				for (Double d: data) {
					numberOfData++;
					allData += d;
				}
				return allData / numberOfData;
			}
			return null;
		}
		
		/**
		 * Calculates the standard deviation from a list of data
		 * @param data List of data
		 * @return standard deviation
		 */
		private static double calcStanDev(List data) {
			return Math.pow(calcVariance(data), 0.5); 
		} 
		
		
		/**
		 * Calculates the variance from a list of data
		 * @param data List of data
		 * @return variance
		 */
		private static double calcVariance(List data) {
			int n = data.size();
			double total = 0;
			double sTotal = 0;
			double scalar = 1/(double)(n-1); 
			for (Double d: data) { 
				total += d; 
				sTotal += Math.pow(d, 2); 
			} 
			return (scalar*(sTotal - (Math.pow(total, 2)/n))); 
		}
		
		/**
		 * Calculates the tendency from a list of data: number of increase, decrease, and neutral
		 * @param data List of data
		 * @return array of [number of increase, number of decrease, number of neutral]
		 */
		private static int[] calcTendency(List data) {
			int[] retVal = new int[3]; // increase, decrease, neutral
			Arrays.fill(retVal, 0);
			for (Double d: data) {
				if (d >= 0.1) {
					retVal[0] += 1;
				}
				else if (d <= -0.1) {
					retVal[1] += 1;
				}
				else {
					retVal[2] += 1;
				}
			}
			return retVal;
		}
		
		/**
		 * Generates the Reaction object from two seed and target fragments 
		 * @param value1 seed 'value' (variable part of the molecule)
		 * @param value2 target 'value' (variable part of the molecule)
		 * @return Reaction object
		 */
		private Reaction reactionFromTwoValues(String value1, String value2) {
			StereoMolecule mol1 = new StereoMolecule();
			StereoMolecule mol2 = new StereoMolecule();
			IDCodeParser idCodeParser = new IDCodeParser();
			idCodeParser.parse(mol1, value1);
			idCodeParser.parse(mol2, value2);
			Reaction rxn = new Reaction(new StereoMolecule[]{mol1, mol2}, 1);
			return rxn;
		}
		
		/**
		 * Generates the idCode from the transformation of one seed 'value' to one target 'value', to be displayed in DataWarrior
		 * @param value1 seed 'value' (variable part of the molecule)
		 * @param value2 target 'value' (variable part of the molecule)
		 * @return idCode of the transformation
		 */
		private String idCodeFromTwoValues(String value1, String value2) {
			StereoMolecule mol1 = new StereoMolecule();
			StereoMolecule mol2 = new StereoMolecule();
			IDCodeParser idCodeParser = new IDCodeParser();
			idCodeParser.parse(mol1, value1);
			idCodeParser.parse(mol2, value2);
			Reaction rxn = new Reaction(new StereoMolecule[]{mol1, mol2}, 1);
			String[] rxnEncoder = ReactionEncoder.encode(rxn, false);
			return value1 + "!" + value2 + "##" + rxnEncoder[2];
//			return rxnEncoder[0] + "##" + rxnEncoder[2];
//			mol1.addSubstituent(mol2, -1);
//			return mol1.getIDCode();
		}
		
		/**
		 * In case of several compounds having the same structure, averages data between these compounds
		 * @param example List of data
		 * @param numberOfFields number of numerical fields
		 * @return Array of averages
		 */
		private Double[] averageData(ArrayList example, int numberOfFields) {
			int[] numberOfValues = new int[numberOfFields];
			Double[] values = new Double[numberOfFields];
			Arrays.fill(numberOfValues, 0);
			Arrays.fill(values, 0.0);
			for (int i=0; i NUMBER_OF_EXAMPLES_SORT = new Comparator() {
		public int compare(MatchedMolecularPair matchedMolecularPair1, MatchedMolecularPair matchedMolecularPair2) {
			return matchedMolecularPair2.numberOfExamples - matchedMolecularPair1.numberOfExamples;
		}
	};

	public static final Comparator SIMILARITY_SORT = new Comparator() {
		public int compare(MatchedMolecularPair matchedMolecularPair1, MatchedMolecularPair matchedMolecularPair2) {
			return matchedMolecularPair2.similarity - matchedMolecularPair1.similarity;
		}
	};
	
	public static final Comparator EXAMPLES_SIMILARITY_SORT = new Comparator() {
		public int compare(MatchedMolecularPairExamples matchedMolecularPairExample1, MatchedMolecularPairExamples matchedMolecularPairExample2) {
			return matchedMolecularPairExample2.similarity - matchedMolecularPairExample1.similarity;
		}
	};
	
	public MMPReader(BufferedReader br, boolean verbose) throws IOException, Exception {
		mmpFragmentsIndex = new HashMap>();
		mmpIndex = new HashMap>>();
		mmpUniqueFragments = new MMPUniqueFragments();
		mPropertyCalculator = new MMPPropertyCalculator();
		readMMPFile(br, verbose);
		br.close();
	}
	
	/**
	 * Reads the header block of a MMP file
	 * @param br
	 * @throws IOException
	 * @throws Exception
	 */
	private void readMMPFile(BufferedReader br, boolean verbose) throws IOException, Exception {
		HashMap rowCounts = new HashMap();
		int rowCountsCounter = 0;
		String strLine;
		Pattern pattern1 = Pattern.compile("<(.*?)=\"(.*?)\">");
		Pattern pattern2 = Pattern.compile("<(.*?rowcount)=([0-9]*?)>");
		while ((strLine = br.readLine()) != null && rowCountsCounter < 4) {
			Matcher matcher1 = pattern1.matcher(strLine);
			if (matcher1.find()) {
				if (matcher1.group(1).equals("dataset")) {
					datasetName = matcher1.group(2);
				}
				else if (matcher1.group(1).equals("date")) {
					date = matcher1.group(2);
				}
				else if (matcher1.group(1).equals("keysminatoms")) {
					keysMinAtoms = Integer.parseInt(matcher1.group(2));
				}
				else if (matcher1.group(1).equals("version")) {
					version = matcher1.group(2);
				}
			}
			else {
				Matcher matcher2 = pattern2.matcher(strLine);
				if (matcher2.find()) {
					rowCounts.put(matcher2.group(1), Integer.parseInt(matcher2.group(2)));
					rowCountsCounter++;
				}
			}
		}
		if (rowCountsCounter < 4) {
			throw new IOException("General: cannot find the four rowcount lines");
		}
		if (verbose) {
			System.out.println("The dataset contains " + rowCounts.get("moleculesrowcount") + " molecules, " + rowCounts.get("mmpuniquefragmentsrowcount") + " unique fragments, " + rowCounts.get("mmpfragmentsrowcount") + " molecules fragments combinations and " + rowCounts.get("mmprowcount") + " MMPs.");
			System.out.println("                  0        10        20        30        40        50        60        70        80        90       100");
		}
		readMolecules(br, rowCounts.get("moleculesrowcount"), verbose);
		readUniqueFragments(br, rowCounts.get("mmpuniquefragmentsrowcount"), verbose);
		readFragments(br, rowCounts.get("mmpfragmentsrowcount"), verbose);
		readMMPs(br, rowCounts.get("mmprowcount"), verbose);
	}
	
	/**
	 * Reads the Molecules block of a MMP file
	 * @param br
	 * @param rowCount Number of expected rows (from the header block)
	 * @throws IOException
	 * @throws Exception
	 */
	private void readMolecules(BufferedReader br, int rowCount, boolean verbose) throws IOException, Exception {
		wholeMoleculesIndex = new LinkedHashMap>();
		dataFields = new ArrayList();
		molecules = new ArrayList();
		keysMinAtoms = MMPFragmenter.KEYS_MIN_ATOMS;
		if (verbose)
			System.out.print("Molecules:        #");
		try {
			String strLine;
			boolean moleculesBlock = false;
			int linesToRead = 11;
			int entries = 0;
			int lastEntryIndex = -1;
			Pattern tagPattern = Pattern.compile("<(columnName=.*?)>");
		    Pattern attValue = Pattern.compile("(\\w+)=\"(.*?)\"");
			while ((strLine = br.readLine()) != null && entries < rowCount) {
				if (strLine.startsWith("") || (moleculesBlock == true && linesToRead > 0)) {
					moleculesBlock = true;
					Matcher matcher = tagPattern.matcher(strLine);
					if (matcher.find()) {
						matcher = attValue.matcher(matcher.group(1));
						String fieldName = null;
						String longFieldName = null;
						String categoryName = null;
						String percentile5 = null;
						String percentile95 = null;
						while (matcher.find()) {
							if (matcher.group(1).equals("columnName")) {
								fieldName = matcher.group(2);
							}
							else if (matcher.group(1).equals("longName")) {
								longFieldName = matcher.group(2);
							}
							else if (matcher.group(1).equals("percentile5")) {
								percentile5 = matcher.group(2);
							}
							else if (matcher.group(1).equals("percentile95")) {
								percentile95 = matcher.group(2);
							}
							else if (matcher.group(1).equals("category")) {
								categoryName = matcher.group(2);
							}
						}
						if (fieldName != null && !fieldName.equals("moleculeIndex") && !fieldName.equals("idcoordinates2D") && !fieldName.equals("molecule") && !fieldName.equals("moleculeName")) {
							dataFields.add(new DataField(fieldName, longFieldName, categoryName, percentile5, percentile95));
							linesToRead++;
						}
					}
					linesToRead--;
				}
				else if (moleculesBlock == true && linesToRead == 0) {
					String[] items = strLine.split("\t", -1);
					if (items.length == dataFields.size() + 4) {
						items = strLine.split("\t", 5); // index, coordinates, idcode, name
						String[] data = items[4].split("\t", -1);
						int molIndex = Integer.parseInt(items[0]);
						MoleculeIndex moleculeIndex;
						ArrayList moleculesIndex = new ArrayList();
						if (molIndex == lastEntryIndex) {
							moleculeIndex = new MoleculeIndex(molIndex, items[3], data);
							moleculesIndex = wholeMoleculesIndex.get(items[2]);
						}
						else {
							moleculeIndex = new MoleculeIndex(molIndex, items[1], items[2], items[3], data);
							molecules.add(items[2]);
							if (molecules.size() != Integer.parseInt(items[0])+1) {
								System.out.println(molecules.size());
							}
						}
						moleculesIndex.add(moleculeIndex);
						wholeMoleculesIndex.put(items[2], moleculesIndex);
						printProgress(verbose, rowCount, entries);
						entries++;
						lastEntryIndex = molIndex;
					}
					else if (strLine.startsWith("")) {
						throw new IOException("molecules: Bad number of entries");
					}
				}
			}
		}
		catch (IOException ioe) {
		}
		if (verbose)
			System.out.print("\n");
	}
	
	/**
	 * Reads the Unique Fragments block of a MMP file
	 * @param br
	 * @param rowCount Number of expected rows (from the header block)
	 */
	private void readUniqueFragments(BufferedReader br, int rowCount, boolean verbose) {
		uniqueFragmentsIndex = new ArrayList(rowCount);
		if (verbose)
			System.out.print("Unique Fragments: #");
		try {
			String strLine;
			boolean mmpUniqueFragmentsBlock = false;
			int linesToRead = 17;
			int entries = 0;
			while ((strLine = br.readLine()) != null && entries < rowCount) {
				if (strLine.startsWith("") || (mmpUniqueFragmentsBlock == true && linesToRead > 0)) {
					mmpUniqueFragmentsBlock = true;
					linesToRead--;
				}
				else if (mmpUniqueFragmentsBlock == true && linesToRead == 0) {
					String[] items = strLine.split("\t", -1);
					if (items.length == 7) {
						uniqueFragmentsIndex.add(items[0]);
						mmpUniqueFragments.addFragment(items[0], Integer.parseInt(items[1]), new String[]{items[2], items[3], items[4], items[5], items[6]});
						printProgress(verbose, rowCount, entries);
						entries++;
					}
					else if (strLine.startsWith("")) {
						throw new IOException("mmpUniqueFragments: Bad number of entries");
					}
				}
			}
		}
		catch (IOException ioe) {
		}
		if (verbose)
			System.out.print("\n");
	}
	
	/**
	 * Reads the Fragments block of a MMP file
	 * @param br
	 * @param rowCount Number of expected rows (from the header block)
	 */	
	private void readFragments(BufferedReader br, int rowCount, boolean verbose)  {
		if (verbose)
			System.out.print("Fragments:        #");
		try {
			String strLine;
			boolean mmpFragmentsBlock = false;
			int linesToRead = 9;
			int entries = 0;
			while ((strLine = br.readLine()) != null && entries < rowCount) {
				if (strLine.startsWith("") || (mmpFragmentsBlock == true && linesToRead > 0)) {
					mmpFragmentsBlock = true;
					linesToRead--;
				}
				else if (mmpFragmentsBlock == true && linesToRead == 0) {
					String[] items = strLine.split("\t", -1);
					if (items.length == 5) {
						if (items[3].equals("1")) { // cutType
							addFragment(items[0], new int[]{Integer.parseInt(items[2]), Integer.parseInt(items[4])});
							// This is to index also {key-value} for smaller keys (used later to sort by similarity) that wouldn't be otherwise indexed
							if (mmpUniqueFragments.getFragmentAtoms(uniqueFragmentsIndex.get(Integer.parseInt(items[2]))) < keysMinAtoms) {
								addFragment(items[2], new int[]{Integer.parseInt(items[0]), Integer.parseInt(items[4])});
							}
						}
						else {
							addFragment(items[0] + "\t" + items[1], new int[]{Integer.parseInt(items[2]), Integer.parseInt(items[4])});
						}
						printProgress(verbose, rowCount, entries);
						entries++;
					}
				}
				else if (strLine.startsWith("")) {
					throw new IOException("mmpFragments: Bad number of entries");
				}
			}
		}
		catch (IOException ioe) {
		}
		if (verbose)
			System.out.print("\n");
	}
	
	/**
	 * Reads the Matched Molecular Pairs block of a MMP file
	 * @param br
	 * @param rowCount Number of expected rows (from the header block)
	 */
	private void readMMPs(BufferedReader br, int rowCount, boolean verbose) {
		if (verbose)
			System.out.print("MMPs:             #");
		try {
			String strLine;
			boolean mmpBlock = false;
			int linesToRead = 11;
			int entries = 0;
			int value1Atoms = -1;
			HashMap>> tempMMPIndex = null;
			while ((strLine = br.readLine()) != null && entries < rowCount)   {
				if (strLine.startsWith("") || (mmpBlock == true && linesToRead > 0)) {
					mmpBlock = true;
					linesToRead--;
				}
				else if (mmpBlock == true && linesToRead == 0) {
					String[] items = strLine.split("\t", -1);
					if (items.length == 7) {
						if (version == "1.1") {
							int val1Atoms = Integer.parseInt(items[1]);
							if (val1Atoms != value1Atoms) {
								if (tempMMPIndex != null) {
									mmpIndex.putAll(tempMMPIndex);
								}
								tempMMPIndex = new HashMap>>();
								value1Atoms = val1Atoms;
							}
							tempMMPIndex = addTempMMP(tempMMPIndex, Integer.parseInt(items[0]), Integer.parseInt(items[3]), Integer.parseInt(items[2]), items[6].split("\\|", -1));
						}
						else {
							addMMP(Integer.parseInt(items[0]), Integer.parseInt(items[3]), Integer.parseInt(items[2]), items[6].split("\\|", -1));
						}
						printProgress(verbose, rowCount, entries);
						entries++;
					}
				}
				else if (strLine.startsWith("")) {
					throw new IOException("matchedMolecularPairs: Bad number of entries");
				}
			}
			if (tempMMPIndex != null) {
				mmpIndex.putAll(tempMMPIndex);
			}
		}
		catch (IOException ioe) {
		}
		if (verbose)
			System.out.print("\n");
	}
	
	/**
	 * In verbose mode, print the process of reading the MMP file
	 * @param verbose
	 * @param rowCount Total number of rows
	 * @param entries Number of read entries
	 */
	private void printProgress(boolean verbose, int rowCount, int entries) {
		if (verbose) {
			double percentage = (entries+1) * 100.0 / rowCount;
			double one = 100.0 / rowCount;
			if (Math.floor(percentage) != Math.floor(percentage - one)) {
				if (Math.floor(percentage) % 10.0 == 0) {
					System.out.print("#");
				}
				else {
					System.out.print(".");
				}
			}
		}
	}

	/**
	 * Adds a new fragment
	 * @param keys tab-delimited keys (one for single cut, two for double cut)
	 * @param data [valueFragmentIndex, molIndex]
	 */
	private void addFragment(String keys, int[] data) {
		List datas = new ArrayList();
		if (mmpFragmentsIndex.containsKey(keys)) {
			datas = mmpFragmentsIndex.get(keys);
		}
		datas.add(data);
		mmpFragmentsIndex.put(keys, datas);
	}
	
	/**
	 * Adds a new Matched Molecular Pair (for version 1.1)
	 * @param tempMMPIndex temporary container for the MMPs
	 * @param value1FragmentIndex 'seed' fragment index
	 * @param value2Atoms 'target' number of heavy atoms
	 * @param value2
	 * @param examples List of examples; the first item is the target 'value' idCode
	 * @return the temporary container with the newly added MMP
	 */
	private HashMap>> addTempMMP(HashMap>> tempMMPIndex, int value1FragmentIndex, int value2Atoms, int value2, String[] examples) {
		int[] val2_examples = new int[examples.length * 2 + 1];
		val2_examples[0] = value2;
		int counter = 1;
		for (String value2AndExample: examples) {
			String[] items = value2AndExample.split(",");
			val2_examples[counter] = Integer.parseInt(items[0]);
			val2_examples[counter+1] = Integer.parseInt(items[1]);
			counter += 2;
		}
		HashMap> values2 = new HashMap>();
		List values2OfSizeX = new ArrayList();
		if (tempMMPIndex.containsKey(value1FragmentIndex)) {
			values2 = tempMMPIndex.get(value1FragmentIndex);
			if (values2.containsKey(value2Atoms)) {
				values2OfSizeX = values2.get(value2Atoms);
			}
		}
		values2OfSizeX.add(val2_examples);
		values2.put(value2Atoms, values2OfSizeX);
		tempMMPIndex.put(value1FragmentIndex, values2);
		return tempMMPIndex;
	}
	
	/**
	 * Adds a new Matched Molecular Pair (for version 1.0)
	 * @param value1FragmentIndex 'seed' fragment index
	 * @param value2Atoms 'target' number of heavy atoms
	 * @param value2
	 * @param examples List of examples; the first item is the target 'value' idCode
	 */
	private void addMMP(int value1FragmentIndex, int value2Atoms, int value2, String[] examples) {
		int[] val2_examples = new int[examples.length * 2 + 1];
		val2_examples[0] = value2;
		int counter = 1;
		for (String value2AndExample: examples) {
			String[] items = value2AndExample.split(",");
			val2_examples[counter] = Integer.parseInt(items[0]);
			val2_examples[counter+1] = Integer.parseInt(items[1]);
			counter += 2;
		}
		HashMap> values2 = new HashMap>();
		List values2OfSizeX = new ArrayList();
		if (mmpIndex.containsKey(value1FragmentIndex)) {
			values2 = mmpIndex.get(value1FragmentIndex);
			if (values2.containsKey(value2Atoms)) {
				values2OfSizeX = values2.get(value2Atoms);
			}
		}
		values2OfSizeX.add(val2_examples);
		values2.put(value2Atoms, values2OfSizeX);
		mmpIndex.put(value1FragmentIndex, values2);
	}
	
	/**
	 * Returns the fragment index from one fragment idCode
	 * @param fragment idCode of a fragment
	 * @return fragment index
	 */
	public Integer fragmentToFragmentIndex(String fragment) {
		if (uniqueFragmentsIndex.contains(fragment)) {
			return uniqueFragmentsIndex.indexOf(fragment);
		}
		return null;
	}

	/**
	 * Return the fragment indexes from one (single cut) or several fragment idCodes  
	 * @param fragments List of fragment idCodes
	 * @return List of fragment indexes
	 */
	public Integer[] fragmentToFragmentIndex(String[] fragments) {
		Integer retVal[] = new Integer[fragments.length];
		for (int i=0; i chemicalSpace = mmpFragmentsIndex.get(keysString);
			Set molList = new HashSet();
			for (int[] chemSpace: chemicalSpace) {
				molList.add(chemSpace[1]);
			}
			// TODO: loop through molList and count "real" number of molecules (i.e. same structure, different names)
			chemicalSpaceSize = molList.size(); 
		}
		return chemicalSpaceSize;
	}
	
	/**
	 * Returns the list of idCode & molecules names representing the chemical space of one constant part of a molecule
	 * @param keys array of one (single cut) or two (double cut) IDCodes of the 'keys' (constant part of the molecule)
	 * @param value the variable part of the molecule; not used yet but might be used to identify the current compound
	 * @return an array of tab-delimited idCode, idCoord, molecule names [and data]
	 */
	public List getChemicalSpace(String[] keys, String value, String dataField) {
		List chemicalSpaceMolecules = new ArrayList();
		List chemicalSpace = new ArrayList();
		Integer[] keysIndex = fragmentToFragmentIndex(keys);
		String keysString = keysToKeysString(keysIndex);
		int dataFieldIndex = -1;
		if (dataField != null) {
			for (int i=0; i molList = new HashSet();
			for (int[] chemSpace: chemicalSpace) {
				molList.add(chemSpace[1]);
			}
			for (Integer molIndex: molList) {				
				String idCode = molecules.get(molIndex);
				String idCoord = null;
				for (MoleculeIndex moleculeIndex: wholeMoleculesIndex.get(idCode)) {
					if (idCoord == null) {
						idCoord = moleculeIndex.moleculeIDCoord;
					}
					if (dataFieldIndex == -1) {
						chemicalSpaceMolecules.add(idCode + "\t" + idCoord + "\t" + moleculeIndex.moleculeName);
					}
					else {
						chemicalSpaceMolecules.add(idCode + "\t" + idCoord + "\t" + moleculeIndex.moleculeName + "\t" + moleculeIndex.moleculeData[dataFieldIndex]);
					}
				}
			}
		}
//		organize the array to put the current molecule on top
//		for (int i=0; i\n");
		dWAR.append("\n");
		List chemicalSpace = getChemicalSpace(keys, null, dataField);
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		if (dataField != null) {
			dWAR.append("Structure\tidcoordinates2D\tActelion No\t" + dataField + "\n");
		}
		else {
			dWAR.append("Structure\tidcoordinates2D\tActelion No\n");
		}
		for (String chemSpace: chemicalSpace) {
			dWAR.append(chemSpace + "\n");
		}
		dWAR.append("\n");
		if (dataFieldIndex != -1) {
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\">\n");
		dWAR.append("\n");
		
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		return dWAR.toString();
	}
	
	/**
	 * Generates the DWAR file for the Matched Molecular Pairs corresponding to a specific seed 'value', target 'value', and list of data fields
	 * @param moleculeIDCode idCode of the seed molecule
	 * @param keys Array of one or two 'keys' idCodes (constant part of the molecule)
	 * @param value1 seed 'value' (variable part of the molecule)
	 * @param value2 target 'value' (variable part of the molecule)
	 * @param replacementSize size of the replacement (number of heavy atoms)
	 * @param properties List of data fields for which data should be retrieved
	 * @return the whole string of the generated DWAR file
	 */
	public String getMMPsDWAR(String moleculeIDCode, String[] keys, String value1, String value2, int replacementSize, List properties) {
		List transformations = getTransformations(moleculeIDCode, keys, value1, replacementSize, replacementSize, null);
		NumberFormat formatter = new DecimalFormat("#.##");
		StringBuilder dWAR = new StringBuilder();
		dWAR.append("\n");
		dWAR.append("\n");
		for (MatchedMolecularPair transformation: transformations) {
			if (transformation.value2.equals(value2)) {
				dWAR.append("\n");
				break;
			}
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("FragFp 2	Structure (1)\tidcoordinates2D (1)\tStructure (2)\tidcoordinates2D (2)\tActelion No (1)\tActelion No (2)\tSimilarity");
		for (String property: properties) {
			dWAR.append("\t" + property + " (1)\t" + property + " (2)\t" + property + " (delta)");
		}
		dWAR.append("\n");
		for (MatchedMolecularPair transformation: transformations) {
			if (transformation.value2.equals(value2)) {
				// TODO: handling of multiple compounds with same structure (get(0), get(1)...)
				for (int j=0; j") && !data1.startsWith("<") && !data2.equals("") && !data2.startsWith(">") && !data2.startsWith("<")) {
								delta = formatter.format(Double.parseDouble(data2) - Double.parseDouble(data1));
							}
							dWAR.append("\t" + data1 + "\t" + data2 + "\t" + delta);
						}
					}
					dWAR.append("\n");
				}
			}
		}
		dWAR.append("\n");
		if (properties.size() > 0) {
			dWAR.append("\n");
			dWAR.append("\n");
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		int descriptionCounter = 1;
		for (String property: properties) {
			int fieldIndex = -1;
			for (int i=0; i\n");
				dWAR.append("\n");
				dWAR.append("\n");
				descriptionCounter += 3;
			}
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		int filterCounter = 3;
		for (String property: properties) {
			dWAR.append("\n");
			dWAR.append("\n");
			dWAR.append("\n");
			filterCounter += 3;
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		return dWAR.toString();
	}
	
	/**
	 * Returns the number of transformations for a defined variable part of a molecule
	 * @param value1 IDCode of the variable part of a molecule
	 * @param minAtoms minimal number of atoms in the replacement (relative to value1Atoms)
	 * @param maxAtoms maximal number of atoms in the replacement (relative to value1Atoms)
	 * @return an integer corresponding to the number of transformations
	 */
	public int getTransformationsSize(String value1, int minAtoms, int maxAtoms) {
		int mmpSize = 0;
		MMPUniqueFragment value1Fragment = mmpUniqueFragments.fragmentIDToFragment(value1);
		if (value1Fragment != null) {
			Integer value1Atoms = value1Fragment.getFragmentAtoms();
			Integer value1Index = value1Fragment.getFragmentIndex();
			if (value1Index != null && mmpIndex.containsKey(value1Index)) {
				HashMap> mmps = mmpIndex.get(value1Index);
				for (int size=value1Atoms+minAtoms; size<=value1Atoms+maxAtoms; size++) {
					if (mmps.containsKey(size)) {
						mmpSize += mmps.get(size).size();
					}
				}
			}
		}
		return mmpSize;
	}
	
	/**
	 * Returns the list of transformations for a defined variable part of a molecule.
	 * @param moleculeIDCode IDCode of the input molecule
	 * @param keys IDCode of the constant part of the molecule
	 * @param value1 IDCode of the variable part of a molecule
	 * @param minAtoms minimal number of atoms in the replacement (relative to value1Atoms)
	 * @param maxAtoms maximal number of atoms in the replacement (relative to value1Atoms)
	 * @return a list of transformations currently sorted by decreasing number of examples for each transformation 
	 */
	private List getTransformations(String moleculeIDCode, String[] keys, String value1, int minAtoms, int maxAtoms, String sortBy) {
		List retVal = new ArrayList();
		Integer[] keyIndex = fragmentToFragmentIndex(keys);
		String keyIndexString = null;
		if (keyIndex.length == 1 && keyIndex[0] != null) {
			keyIndexString = Integer.toString(keyIndex[0]);
		}
		else if (keyIndex.length == 2 && keyIndex[0] != null && keyIndex[1] != null) {
			keyIndexString = Integer.toString(keyIndex[0]) + "\t" + Integer.toString(keyIndex[1]);
		}
		MMPUniqueFragment value1Fragment = mmpUniqueFragments.fragmentIDToFragment(value1);
		if (value1Fragment != null) {
			Integer value1Atoms = value1Fragment.getFragmentAtoms();
			Integer value1Index = value1Fragment.getFragmentIndex();
			// TODO: obtain 2 keysFragment for doublecuts and two keysFP, and use them in examplesToMolecules
			MMPUniqueFragment keysFragment = mmpUniqueFragments.fragmentIDToFragment(keys);
			String[] keysFP = null;
			if (keysFragment != null) {
				keysFP = keysFragment.getFragmentFP();	
			}
			String[] value1FP = value1Fragment.getFragmentFP();
			if (value1Index != null && mmpIndex.containsKey(value1Index)) {
				HashMap> mmps = mmpIndex.get(value1Index);
				List fragmentsIndex = null;
				if (keyIndexString != null && mmpFragmentsIndex.containsKey(keyIndexString)){
					fragmentsIndex = mmpFragmentsIndex.get(keyIndexString);
				}
				for (int size=value1Atoms+minAtoms; size<=value1Atoms+maxAtoms; size++) {
					if (mmps.containsKey(size)) {
						List values2 = mmps.get(size);
						for (int[] value2_and_examples: values2) {
							int value2Index = value2_and_examples[0];
							String value2 = uniqueFragmentsIndex.get(value2Index);
//							int[] examples = Arrays.copyOfRange(value2_and_examples, 1, value2_and_examples.length);
							int targetExists = -1;
							if (fragmentsIndex != null) { // fragmentsIndex contains all [valueIndex, molIndex] for the seeded key 
								for (int[] fragmentIndex: fragmentsIndex) {
									if (fragmentIndex[0] == value2Index) {
										targetExists = fragmentIndex[1];
										break;
									}
								}
							}
							List mmpExamples = examplesToMolecules(value2_and_examples, keysFP, value2Index, targetExists);
							if (targetExists == -1) { 
								ArrayList currents = wholeMoleculesIndex.get(moleculeIDCode);
								if (currents == null) {
									StereoMolecule virtualMol = new StereoMolecule();
									IDCodeParser idCodeParser = new IDCodeParser();
									idCodeParser.parse(virtualMol, moleculeIDCode);
									MoleculeIndex current = new MoleculeIndex(-1, "", moleculeIDCode, "", generateData(virtualMol));
//									current.setIDCode(moleculeIDCode);
									currents = new ArrayList();
									currents.add(current);
								}
								else {
									currents.get(0).setIDCode(moleculeIDCode);
								}
								StereoMolecule virtualMol = molFromKeyValue(keys, value2);
								MoleculeIndex virtual = new MoleculeIndex(-1, "", virtualMol.getIDCode(), "", generateData(virtualMol));
//								virtual.setIDCode(idCodeFromKeyValue(keys, value2));
								ArrayList virtuals = new ArrayList();
								virtuals.add(virtual);
								MatchedMolecularPairExamples matchedMolecularPairExamples = new MatchedMolecularPairExamples(currents, virtuals, 6);
								mmpExamples.add(0, matchedMolecularPairExamples);
							}
							Collections.sort(mmpExamples, EXAMPLES_SIMILARITY_SORT);
							MMPUniqueFragment value2Fragment = mmpUniqueFragments.fragmentIDToFragment(value2);
							String[] value2FP = value2Fragment.getFragmentFP();
							retVal.add(new MatchedMolecularPair(value1, value1Index, value1Atoms, value1FP, value2, value2Index, size, value2FP, mmpExamples, dataFields.size(), targetExists));
						}
					}
				}
			}
		}
		if (sortBy == null || sortBy.equals(SORT_BY_NUMBER_OF_EXAMPLES)) {
			Collections.sort(retVal, NUMBER_OF_EXAMPLES_SORT);
		}
		else if (sortBy.equals(SORT_BY_SIMILARITY)) {
			Collections.sort(retVal, SIMILARITY_SORT);
		}
		return retVal;
	}
	
	/**
	 * Generates the DWAR file for the Transformations corresponding to a specific seed 'value', number of atoms for the replacement, environment size, and list of data fields
	 * @param moleculeIDCode idCode of the seed molecule
	 * @param keys Array of 'keys' idCodes (constant part of the molecule)
	 * @param value1 seed 'value' idCode (variable part of the molecule)
	 * @param minAtoms minimal number of atoms in the replacement (relative to value1Atoms)
	 * @param maxAtoms maximal number of atoms in the replacement (relative to value1Atoms)
	 * @param environmentSize size of the environment (0-5)
	 * @param properties List of numerical data fields
	 * @return String of the whole DWAR file
	 */
	public String getTransformationsDWAR(String moleculeIDCode, String[] keys, String value1, int minAtoms, int maxAtoms, Integer environmentSize, List properties) {
		if (environmentSize == null) {
			environmentSize = 0;
		}
		List transformations = getTransformations(moleculeIDCode, keys, value1, minAtoms, maxAtoms, null);
		NumberFormat formatter = new DecimalFormat("#.##");
		StringBuilder dWAR = new StringBuilder();
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("Transformation\tProduct\tDeltaAtoms\tStructure\tidcoordinates2D\tActelion No\tExists\tExamples");
		for (String property: properties) {
			dWAR.append("\t" + property + " Avg\t" + property + " SD\t" + property + " n");
		}
		dWAR.append("\n");
		for (MatchedMolecularPair transformation: transformations) {
			transformation.calcTransformationString();
			String buildingBlock = generateBuildingBlock(transformation.value2);
			dWAR.append(transformation.transformationString + "\t" + buildingBlock + "\t" + (transformation.value2Atoms - transformation.value1Atoms));
			if (!transformation.targetExists) {
				StereoMolecule virtualMol = molFromKeyValue(keys, transformation.value2);
				dWAR.append("\t" + virtualMol.getIDCode() + "\t\t");
			}
			else {
				dWAR.append("\t" + transformation.mmpExamples.get(0).example2.get(0).moleculeIDCode + "\t" + transformation.mmpExamples.get(0).example2.get(0).moleculeIDCoord + "\t" + transformation.mmpExamples.get(0).example1.get(0).moleculeName);
			}
			dWAR.append("\t" + transformation.targetExists + "\t" + transformation.numberOfExamples);
			for (String property: properties) {
				int fieldIndex = -1;
				for (int i=0; i\n");
		if (properties.size() > 0) {
			dWAR.append("\n");
			if (properties.size() > 1) {
				dWAR.append("\n");
			}
			else {
				dWAR.append("\n");
			}
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		int descriptionCounter = 3;
		for (String property: properties) {
			int fieldIndex = -1;
			for (int i=0; i\n");
				dWAR.append("\n");
				dWAR.append("\n");
				descriptionCounter += 3;
			}
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		int filterCounter = 2;
		if (minAtoms != maxAtoms) {
			dWAR.append("\n");
			filterCounter++;
		}
		dWAR.append("\n");
		filterCounter++;
		dWAR.append("\n");
		filterCounter++;
		for (String property: properties) {
			dWAR.append("\n");
//			dWAR.append("\n");
			filterCounter += 1;
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		String tableName = "Transformations";
		if (environmentSize != 0) {
			tableName = "Transformations (environment size " + Integer.toString(environmentSize) + ")";
		}
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		dWAR.append("\n");
		return dWAR.toString();
	}
	
	/**
	 * Returns a list of transformations
	 * @param keys Array of 'keys' idCodes (constant part of the molecule)
	 * @param value1 seed 'value' idCode (variable part of the molecule)
	 * @param minAtoms minimal number of atoms in the replacement (relative to value1Atoms)
	 * @param maxAtoms maximal number of atoms in the replacement (relative to value1Atoms)
	 * @return List of transformations
	 */
	public List transformationsListToTable(String[] keys, String value1, int minAtoms, int maxAtoms) {
		List transformations = getTransformations(null, keys, value1, minAtoms, maxAtoms, null);
		List retVal = new ArrayList();
		for (MatchedMolecularPair transformation: transformations) {
			String isCurrent = transformation.targetExists ? "1" : "0";
			retVal.add(new String[]{transformation.value1, transformation.value2, Integer.toString(transformation.numberOfExamples), isCurrent});
		}
		return retVal;
	}
	
	/**
	 * Generates the main JSON file for Transformations and Matched Molecular Pairs
	 * @param moleculeIDCode idCode of the seed molecule
	 * @param keys Array of 'keys' idCodes (constant part of the molecule)
	 * @param value1 seed 'value' idCode (variable part of the molecule)
	 * @param minAtoms minimal number of atoms in the replacement (relative to value1Atoms)
	 * @param maxAtoms maximal number of atoms in the replacement (relative to value1Atoms)
	 * @param sortBy SORT_BY_SIMILARITY, SORT_BY_NUMBER_OF_EXAMPLES
	 * @return JSON string
	 */
	public String getTransformationsJSON(String moleculeIDCode, String[] keys, String value1, int minAtoms, int maxAtoms, String sortBy) {
		List transformations = getTransformations(moleculeIDCode, keys, value1, minAtoms, maxAtoms, sortBy);
		NumberFormat formatter = new DecimalFormat("#.##");
		StringBuilder jSON = new StringBuilder();
		jSON.append("{\"transformations\": [");
		int counter = 0;
		for (MatchedMolecularPair transformation: transformations) {
			if (counter > 0) {
				jSON.append(", ");
			}
			jSON.append( "\n\t{\"value1\": \"" + transformation.value1.replace("\\", "\\\\") + "\"");
			jSON.append(",\n\t \"value2\": \"" + transformation.value2.replace("\\", "\\\\") + "\"");
			jSON.append(",\n\t \"n\": " + transformation.numberOfExamples);
			jSON.append(",\n\t \"delta_atoms\": " + (transformation.value2Atoms - transformation.value1Atoms));
			jSON.append(",\n\t \"similarity\": " + transformation.similarity);
			String isCurrent = transformation.targetExists ? "true" : "false";
			jSON.append(",\n\t \"current\": " + isCurrent + "");
			try {
				jSON.append(",\n\t \"image\": \"" + getB64Image(getImage(transformation.value1, transformation.value2, 580, 266)) + "\"");
			}
			catch (Exception e) {
				// cannot generate image
			}
			// TODO: handling of multiple compounds with same structure (get(0), get(1)...)
			jSON.append(",\n\t \"compounds\": [");
			for (int j=0; j 0) {
					jSON.append(", ");
				}
				jSON.append("[\"" + transformation.mmpExamples.get(j).example1.get(0).moleculeName + "\", \"" + transformation.mmpExamples.get(j).example2.get(0).moleculeName + "\"]");
			}
			jSON.append("]");
			jSON.append(",\n\t \"similarities\": [");
			for (int j=0; j 0) {
					jSON.append(", ");
				}
				jSON.append(transformation.mmpExamples.get(j).similarity);
			}
			jSON.append("]");
			// TODO: handling of multiple compounds with same structure (get(0), get(1)...)
			jSON.append(",\n\t \"structures\": [");
			for (int j=0; j 0) {
					jSON.append(", ");
				}
				jSON.append("[\"" + transformation.mmpExamples.get(j).example1.get(0).moleculeIDCode.replace("\\", "\\\\") + "\", \"" + transformation.mmpExamples.get(j).example2.get(0).moleculeIDCode.replace("\\", "\\\\") + "\"]");
			}
			jSON.append("]");
			jSON.append(",\n\t \"coordinates\": [");
			for (int j=0; j 0) {
					jSON.append(", ");
				}
				jSON.append("[\"" + transformation.mmpExamples.get(j).example1.get(0).moleculeIDCoord.replace("\\", "\\\\") + "\", \"" + transformation.mmpExamples.get(j).example2.get(0).moleculeIDCoord.replace("\\", "\\\\") + "\"]");
			}
			jSON.append("]");
			jSON.append(",\n\t \"datas\": [");
			for (int i=0; i 0) {
					jSON.append(", ");
				}
				jSON.append("{");
				for (int j=0; j<6; j++) {
					if (j > 0) {
						jSON.append(", ");
					}
					jSON.append( "\n\t\t\"similarity" + j + "\":");
					jSON.append( "\n\t\t{\"n\": " + transformation.n[i][j]);
					jSON.append(",\n\t\t \"increase\": " + transformation.increase[i][j]);
					jSON.append(",\n\t\t \"decrease\": " + transformation.decrease[i][j]);
					jSON.append(",\n\t\t \"neutral\": " + transformation.neutral[i][j]);
					if (transformation.average[i][j] != null) {
						jSON.append(",\n\t\t \"average\": " + formatter.format(transformation.average[i][j]));
					}
					else {
						jSON.append(",\n\t\t \"average\": " + transformation.average[i][j]);
					}
					if (transformation.sd[i][j] != null) {
						if (Double.isNaN(transformation.sd[i][j])) {
							jSON.append(",\n\t\t \"sd\": 0.0");
						}
						else {
							jSON.append(",\n\t\t \"sd\": " + formatter.format(transformation.sd[i][j]));
						}
					}
					else {
						jSON.append(",\n\t\t \"sd\": " + transformation.sd[i][j]);
					}
					jSON.append("}");
				}
				jSON.append(",\n\t\t \"data\": [");
				for (int j=0; j example1 = transformation.mmpExamples.get(j).example1;
					ArrayList example2 = transformation.mmpExamples.get(j).example2;
					if (j > 0) {
						jSON.append(",");
					}
					// TODO: handling of multiple compounds with same structure (get(0), get(1)...)
					String data1 = "";
					if (example1.get(0).moleculeData != null) {
						data1 = example1.get(0).moleculeData[i];
					}
					String data2 = "";
					if (example2.get(0).moleculeData != null) {
						data2 = example2.get(0).moleculeData[i]; // virtual compound
					}
					if (data1.equals("")) {
						data1 = "\"n.a.\"";
					}
					else if (data1.startsWith(">") || data1.startsWith("<")) {
						data1 = "\"" + data1 + "\"";
					}
					else {
						data1 = formatter.format(Double.parseDouble(data1));
					}
					if (data2.equals("")) {
						data2 = "\"n.a.\"";
					}
					else if (data2.startsWith(">") || data2.startsWith("<")) {
						data2 = "\"" + data2 + "\"";
					}
					else {
						data2 = formatter.format(Double.parseDouble(data2));
					}
					jSON.append("[" + data1  + ", " + data2 + "]");
				}
				jSON.append("]}");
			}
			jSON.append("]}");
			counter += 1;
		}
		jSON.append("]}");
	return jSON.toString();
	}
	
	/**
	 * For double cuts, the R-Group of the second fragment has to be correctly tagged 
	 * @param mol input fragment
	 * @return modified StereoMolecule
	 */
	private StereoMolecule changeR1ToR2(StereoMolecule mol) {
		for (int i=0; i= MMPFragmenter.FRAGMENT_ATOMIC_NO) {
				mol.setAtomQueryFeature(i, StereoMolecule.cAtomQFAny, true);
			}
			else {
				mol.setAtomQueryFeature(i, StereoMolecule.cAtomQFNoMoreNeighbours, true);
			}
		}
		return mol.getIDCode();
	}
	
	/**
	 * Generates a molecule from one 'value' and one or two 'keys'
	 * @param keys Array of 'keys' idCodes (constant part of the molecule)
	 * @param value 'value' idCode (variable part of the molecule)
	 * @return newly generated molecule
	 */
	private StereoMolecule molFromKeyValue(String[] keys, String value) {
		int [] firstAtoms = new int[2*keys.length];
		int [] rBonds = new int[2*keys.length];
		int [] rGroupAtoms = new int[2*keys.length];
		int [] secondAtoms = new int[2*keys.length];
		int [] rGroupsIndex = new int[2*keys.length];
		int [] rGroupCounters = new int[keys.length];
		int atomLabel;
		StereoMolecule mol1 = new StereoMolecule();
		StereoMolecule mol2 = new StereoMolecule();
		IDCodeParser idCodeParser = new IDCodeParser();
		idCodeParser.parse(mol1, value);
		idCodeParser.parse(mol2, keys[0]);
		mol1.addMolecule(mol2);
		if (keys.length == 2) {
			StereoMolecule mol3 = new StereoMolecule();
			idCodeParser.parse(mol3, keys[1]);
			mol3 = changeR1ToR2(mol3);
			mol1.addMolecule(mol3);
			rGroupCounters[1] = 2;
		}
		mol1.ensureHelperArrays(StereoMolecule.cHelperBitNeighbours);
		for (int bond=0; bond= 142 || mol1.getAtomicNo(atom2) == 0 || mol1.getAtomicNo(atom2) >= 142) {
				if (mol1.getAtomicNo(atom1) == 0 || mol1.getAtomicNo(atom1) >= 142) {
					atomLabel = mol1.getAtomicNo(atom1) - MMPFragmenter.FRAGMENT_ATOMIC_NO;
					rBonds[rGroupCounters[atomLabel]] = bond;
					firstAtoms[rGroupCounters[atomLabel]] = atom1;
					rGroupAtoms[rGroupCounters[atomLabel]] = 0;
					secondAtoms[rGroupCounters[atomLabel]] = atom2;
					rGroupsIndex[rGroupCounters[atomLabel]] = atomLabel;
				}
				else {
					atomLabel = mol1.getAtomicNo(atom2) - MMPFragmenter.FRAGMENT_ATOMIC_NO;
					rBonds[rGroupCounters[atomLabel]] = bond;
					firstAtoms[rGroupCounters[atomLabel]] = atom2;
					rGroupAtoms[rGroupCounters[atomLabel]] = 1;
					secondAtoms[rGroupCounters[atomLabel]] = atom1;
					rGroupsIndex[rGroupCounters[atomLabel]] = atomLabel;
				}
				rGroupCounters[atomLabel]++;
			}
		}
		mol1.setBondAtom(rGroupAtoms[0], rBonds[0], secondAtoms[1]);
		mol1.markBondForDeletion(rBonds[1]);
		mol1.markAtomForDeletion(firstAtoms[1]);
		mol1.markAtomForDeletion(firstAtoms[0]);
		if (keys.length == 2) {
			mol1.setBondAtom(rGroupAtoms[2], rBonds[2], secondAtoms[3]);
			mol1.markBondForDeletion(rBonds[3]);
			mol1.markAtomForDeletion(firstAtoms[3]);
			mol1.markAtomForDeletion(firstAtoms[2]);			
		}
		mol1.deleteMarkedAtomsAndBonds();
//		return mol1.getIDCode();
		return mol1;
	}
	
	/**
	 * Returns a list of MatchedMolecularPairExamples from a list of molecule names
	 * @param examples List of molecule indexes
	 * @param keys1FP fingerprints of the 'keys'
	 * @param value2Index index of the target 'value'
	 * @param targetExists true/false if the target molecule exists (after replacing the seed 'value' by the target 'value' in the seeded molecule)
	 * @return a list of MatchedMolecularPairExamples
	 */
	private List examplesToMolecules(int[] examples, String[] keys1FP, int value2Index, int targetExists) {
		List retVal = new ArrayList();
		List fragmentsIndex = null;
		if (mmpFragmentsIndex.containsKey(Integer.toString(value2Index))) {
			fragmentsIndex = mmpFragmentsIndex.get(Integer.toString(value2Index));
		}
		// pass keyIndex and use that instead of value2Index in case of too small value
		for (int i=1; i example1 = molIndexToMolecule(examples[i]);
			ArrayList example2 = molIndexToMolecule(examples[i+1]);
			int similarity = -1;
			if (fragmentsIndex != null) { // fragmentsIndex contains all [valueIndex, molIndex] for the seeded key (in this case the value, to find the key) 
				for (int[] fragmentIndex: fragmentsIndex) {
					if (fragmentIndex[1] == examples[i+1]) {
						if (fragmentIndex[1] == targetExists) {
							similarity = 6;
						}
						else {
							similarity = 0;
							int keyIndex = fragmentIndex[0];
							String keyID = uniqueFragmentsIndex.get(keyIndex);
							MMPUniqueFragment keysFragment = mmpUniqueFragments.fragmentIDToFragment(keyID);
							String[] keys2FP = keysFragment.getFragmentFP();
							if (keys1FP != null) {
								for (int j=keys1FP.length-1; j>=0; j--) {
									if (keys1FP[j].equals(keys2FP[j])) {
										similarity = j + 1;
										break;
									}
								}
							}
						}
						break;
					}
				}
			}
			MatchedMolecularPairExamples matchedMolecularPairExamples = new MatchedMolecularPairExamples(example1, example2, similarity);
			retVal.add(matchedMolecularPairExamples);
		}
		return retVal;
	}
	
	/**
	 * Returns a list of MoleculeIndex objects from one molIndex
	 * @param molIndex
	 * @return
	 */
	private ArrayList molIndexToMolecule(int molIndex) {
		String moleculeIDCode = molecules.get(molIndex);
		ArrayList moleculesIndex = wholeMoleculesIndex.get(moleculeIDCode);
		// here I could put the IDCode just for the first entry, since I only read the first one later on...
		for (MoleculeIndex moleculeIndex: moleculesIndex) {
			moleculeIndex.setIDCode(moleculeIDCode);
		}
		return moleculesIndex;
	}
	
	/**
	 * Returns the idCode from one compound name, null if the compound is not found
	 * @param molName name of a compound
	 * @return null or idCode
	 */
	public String getIDCodeFromMolName(String molName) {
		for (Entry> cursor : wholeMoleculesIndex.entrySet()) {
			for (MoleculeIndex moleculeIndex: cursor.getValue()) {
				if (moleculeIndex.moleculeName.equals(molName)) {
					if (cursor.getValue().get(0).moleculeIDCoord != null) {
						return cursor.getKey() + "\t" + cursor.getValue().get(0).moleculeIDCoord;
					}
					return cursor.getKey();
				}
			}
		}
		return null;
//		Iterator it = wholeMoleculesIndex.keySet().iterator();
//		while (it.hasNext()) {
//			String idCode = it.next();
//			List moleculesIndex = wholeMoleculesIndex.get(idCode);
//			for (MoleculeIndex moleculeIndex: moleculesIndex) {
//				if (moleculeIndex.moleculeName.equals(molName)) {
//					retVal = idCode;
//					return retVal;
//				}
//			}
//		}
//		return retVal;
	}

	private String keysToKeysString(Integer[] keys) {
		String keysString = null;
		if (keys.length == 1 && keys[0] != null) {
			keysString = Integer.toString(keys[0]);
		}
		else if (keys.length == 2 && keys[0] != null && keys[1] != null) {
			keysString = Integer.toString(keys[0]) + "\t" + Integer.toString(keys[1]);
		}
		return keysString;
	}
	
	/**
	 * Returns a list of fields
	 * @param what Requested data (fieldName, longFieldName, categoryName, percentile5, percentile95)
	 * @return List of fields
	 */
	public List getDataFields(String what) {
		List retVal = new ArrayList();
		for (DataField dataField: dataFields) {
			if (what.equals("fieldName")) {
				retVal.add(dataField.fieldName);
			}
			else if (what.equals("longFieldName")) {
				if (dataField.longFieldName != null) {
					retVal.add(dataField.longFieldName);
				}
				else {
					retVal.add(dataField.fieldName);
				}
			}
			else if (what.equals("categoryName")) {
				if (dataField.categoryName != null) {
					retVal.add(dataField.categoryName);
				}
				else {
					retVal.add("Other");
				}
			}
			else if (what.equals("percentile5")) {
				retVal.add(dataField.percentile5);
			}
			else if (what.equals("percentile95")) {
				retVal.add(dataField.percentile95);
			}
		}
		return retVal;
	}
	
	/**
	 * Returns the requested general information of the data set
	 * @param what Requested information (datasetName, date, numberOfMolecules, randomMoleculeName)
	 * @return the requested information
	 */
	public String getWhat(String what) {
		if (what.equals("datasetName")) {
			return datasetName; 
		}
		else if (what.equals("date")) {
			return date;
		}
		else if (what.equals("numberOfMolecules")) {
			return Integer.toString(wholeMoleculesIndex.size());
		}
		else if (what.equals("randomMoleculeName")) {
			Random randomGenerator = new Random();
			if (molecules.size() > 0) {
				int index = randomGenerator.nextInt(molecules.size());
				ArrayList moleculesIndex = wholeMoleculesIndex.get(molecules.get(index));
				if (moleculesIndex.size() > 0) {
					return moleculesIndex.get(0).moleculeName;
				}
				return null;
			}
			return null;
		}
		return null;
	}
	
	/**
	 * Generates an image of the transformation from the seed 'value' to the target 'value'
	 * @param value1 seed 'value' idCode
	 * @param value2 target 'value' idCode
	 * @param width of the image
	 * @param height of the image
	 * @return BufferedImage object
	 */
	private static BufferedImage getImage(String value1, String value2, int width, int height) {
		BufferedImage bufferedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
		GenericRectangle viewRect = new GenericRectangle(0, 0, width, height);
		ExtendedDepictor extendedDepictor;
		StereoMolecule mol1 = new StereoMolecule();
		IDCodeParser idCodeParser = new IDCodeParser();
		idCodeParser.parse(mol1, value1);
		if (value2 == null) {
			extendedDepictor = new ExtendedDepictor(new StereoMolecule[]{mol1}, null);
		}
		else {
			StereoMolecule mol2 = new StereoMolecule();
			idCodeParser.parse(mol2, value2);
			Reaction rxn = new Reaction(new StereoMolecule[]{mol1, mol2}, 1);		
			extendedDepictor = new ExtendedDepictor(rxn, rxn.getDrawingObjects(), true);
		}
        Graphics2D graphics2D = (Graphics2D) bufferedImage.getGraphics();
		GenericDrawContext context = new SwingDrawContext(graphics2D);
        extendedDepictor.validateView(context, viewRect, AbstractDepictor.cModeInflateToMaxAVBL + 45);
        RenderingHints renderingHints = new RenderingHints(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
		graphics2D.addRenderingHints(renderingHints);
		extendedDepictor.paint(context);
        return bufferedImage;
	}
	
	/**
	 * Generates the 64 bits-encoded representation of an image
	 * @param bufferedImage
	 * @return 64 bits-encoded string
	 * @throws IOException
	 */
	private static String getB64Image(BufferedImage bufferedImage) throws IOException {
		ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
		OutputStream b64 = new Base64.OutputStream(outputStream);
		ImageIO.write(bufferedImage, "png", b64);
		return outputStream.toString("UTF-8");
	}
	
	private String[] generateData(StereoMolecule mol) {
		String[] datas = new String[dataFields.size()];
		Arrays.fill(datas, "");
		for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy