All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.io.CompoundFileHelper Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

package com.actelion.research.chem.io;

import com.actelion.research.chem.Canonizer;
import com.actelion.research.chem.MolfileParser;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.reaction.Reaction;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;

public abstract class CompoundFileHelper {
	public static final int cFileTypeMask = 0x007FFFFF;
	public static final int cFileTypeDataWarrior = 0x00000001;
	public static final int cFileTypeDataWarriorTemplate = 0x00000002;
	public static final int cFileTypeDataWarriorQuery = 0x00000004;
	public static final int cFileTypeDataWarriorMacro = 0x00000008;
	public static final int cFileTypeTextTabDelimited = 0x00000010;
    public static final int cFileTypeTextCommaSeparated = 0x00000020;
	public static final int cFileTypeTextSemicolonSeparated = 0x00000040;
	public static final int cFileTypeTextVLineSeparated = 0x00000080;
	public static final int cFileTypeTextAnyCSV = cFileTypeTextCommaSeparated | cFileTypeTextSemicolonSeparated | cFileTypeTextVLineSeparated;
    public static final int cFileTypeTextAny = cFileTypeTextTabDelimited | cFileTypeTextAnyCSV;
	public static final int cFileTypeSDV3 = 0x00000100;
    public static final int cFileTypeSDV2 = 0x00000200;
    public static final int cFileTypeSD = cFileTypeSDV3 | cFileTypeSDV2;
	public static final int cFileTypeRXN = 0x00000400;
	public static final int cFileTypeSOM = 0x00000800;
	public static final int cFileTypeJPG = 0x00001000;
	public static final int cFileTypeGIF = 0x00002000;
	public static final int cFileTypePNG = 0x00004000;
	public static final int cFileTypeSVG = 0x00008000;
	public static final int cFileTypePictureFile = cFileTypeJPG | cFileTypeGIF | cFileTypePNG | cFileTypeSVG;
    public static final int cFileTypeRDV3 = 0x00010000;
    public static final int cFileTypeRDV2 = 0x00020000;
    public static final int cFileTypeRD = cFileTypeRDV3 | cFileTypeRDV2;
	public static final int cFileTypeMOL = 0x00040000;
	public static final int cFileTypeMOL2 = 0x00080000;
	public static final int cFileTypePDB = 0x00100000;
	public static final int cFileTypeMMTF = 0x00200000;
	public static final int cFileTypeProtein = cFileTypePDB | cFileTypeMMTF;
	public static final int cFileTypeSDGZ = 0x00400000;
    public static final int cFileTypeUnknown = -1;
	public static final int cFileTypeDirectory = -2;

	public static final int cFileTypeCompoundFiles =
			  CompoundFileHelper.cFileTypeMOL
			| CompoundFileHelper.cFileTypeMOL2
			| CompoundFileHelper.cFileTypeSD
			| CompoundFileHelper.cFileTypeDataWarrior;

	// explicitly supported compression format (SD-files only)
	public static final String cGZipExtention = ".gz";

	public static final int cFileTypeDataWarriorCompatibleData = cFileTypeDataWarrior | cFileTypeTextAny | cFileTypeRD | cFileTypeSD | cFileTypeSDGZ;
	public static final int cFileTypeDataWarriorTemplateContaining = cFileTypeDataWarrior | cFileTypeDataWarriorQuery | cFileTypeDataWarriorTemplate;

	private static File sCurrentDirectory;
	private int mRecordCount,mErrorCount;

	public abstract String selectOption(String message, String title, String[] option);
	public abstract File selectFileToOpen(String title, int filetypes);
	public abstract String selectFileToSave(String title, int filetype, String newFileName);
	public abstract void showMessage(String message);

	public static File getCurrentDirectory() {
		return sCurrentDirectory;
		}

	public static void setCurrentDirectory(File d) {
		sCurrentDirectory = d;
		}

	public ArrayList readStructuresFromFile(boolean readIdentifier) {
        File file = selectFileToOpen("Please select a compound file", cFileTypeCompoundFiles);
        return readStructuresFromFile(file, readIdentifier);
	    }

	public ArrayList readIDCodesFromFile() {
        File file = selectFileToOpen("Please select a compound file", cFileTypeCompoundFiles);
        return readIDCodesFromFile(file);
	    }

	public ArrayList readStructuresFromFile(File file, boolean readIdentifier) {
        if (file == null)
            return null;

        ArrayList moleculeList = new ArrayList();
        readChemObjectsFromFile(file, moleculeList, null, null, readIdentifier, false);

        return moleculeList;
	    }

	/**
	 * Reads all compounds as idcode list from the given file.
	 * @param file MOL-, mol2-, SD- or DataWarrior file
	 * @return
	 */
	public ArrayList readIDCodesFromFile(File file) {
        if (file == null)
            return null;

        ArrayList idcodeList = new ArrayList();
        readChemObjectsFromFile(file, null, idcodeList, null, false, false);

        return idcodeList;
	    }

	/**
	 * Reads all compounds as idcode list with identifiers from the given file.
	 * Therefore, it asks for an identifier column.
	 * @param file if null the user is asked for a file
	 * @param readIDCoords if true, then the id-coords are SPACE delimited attached to the idcode
	 * @return list of String[2] with idcode (index 0) and molecule name (index 1)
	 */
	public ArrayList readIDCodesWithNamesFromFile(File file, boolean readIDCoords) {
		if (file == null)
			file = selectFileToOpen("Please select substance file",
									CompoundFileHelper.cFileTypeMOL
								  | CompoundFileHelper.cFileTypeSD
								  | CompoundFileHelper.cFileTypeDataWarrior);

		if (file == null)
			return null;

		ArrayList idcodeWithIDList = new ArrayList();
		readChemObjectsFromFile(file, null, null, idcodeWithIDList, false, readIDCoords);

		return idcodeWithIDList;
		}

	private void readChemObjectsFromFile(File file,
                                         ArrayList moleculeList,
	                                     ArrayList idcodeList,
										 ArrayList idcodeWithIDList,
	                                     boolean readIdentifier, boolean readIDCoords) {
	    mRecordCount = 0;
	    mErrorCount = 0;
	    String filename = file.getName();
	    int index = filename.indexOf('.');
	    String extention = (index == -1) ? "" : filename.substring(index).toLowerCase();

	    if (extention.equals(".mol")
		 || extention.equals(".mol2")) {
		    StereoMolecule mol = null;
	    	if (extention.equals(".mol"))
			    mol = new MolfileParser().getCompactMolecule(file);
	    	else
			    try { mol = new Mol2FileParser().load(filename); } catch (Exception e) { e.printStackTrace(); }

		    if (mol != null && mol.getAllAtoms() != 0) {
			    if (moleculeList != null)
			        moleculeList.add(mol);
			    if (idcodeList != null || idcodeWithIDList != null) {
			        Canonizer canonizer = new Canonizer(mol);
			        String idcode = canonizer.getIDCode();
				    String coords = canonizer.getEncodedCoordinates();
			        if (idcode != null && coords.length() != 0 && readIDCoords)
			            idcode = idcode+" "+coords;
			        if (idcodeList != null)
			            idcodeList.add(idcode);
			        if (idcodeWithIDList != null) {
					    String[] idcodeWithID = new String[2];
					    idcodeWithID[0] = idcode;
					    idcodeWithID[1] = mol.getName();
					    idcodeWithIDList.add(idcodeWithID);
					    }
				    }
			    }
	    	return;
		    }

	    CompoundFileParser parser = (extention.equals(".sdf")) ?
	                                           new SDFileParser(file)
	                              : (extention.equals(".dwar")) ?
	                                           new DWARFileParser(file)
	                              : (extention.equals(".ode")) ?
	                                           new ODEFileParser(file) : null;

	    // If we create molecules,
	    // then we might set the name field with the proper identifier
	    int indexOfID = -1;
	    if (idcodeWithIDList != null || readIdentifier) {
	        String[] fieldNames = parser.getFieldNames();
	        if (fieldNames != null && fieldNames.length != 0) {
	            String id = selectOption("Select compound name or identifier", filename, fieldNames);
	            if (id != null)
	            	for (int i=0; i0 && i0 && i getExtensionList(int fileTypes) {
    	ArrayList list = new ArrayList();
    	int type = 0x00000001;
    	while ((type & cFileTypeMask) != 0) {
    		if ((type & fileTypes) != 0)
    			for (String extension:getExtensions(type))
    			    if (!list.contains(extension))
    				    list.add(extension);

    		type <<= 1;
    		}
    	return list;
    	}

	/**
	 * @param filetype
	 * @return preferred file extension including the dot
	 */
	public static String getExtension(int filetype) {
    	String[] extensions = getExtensions(filetype);
    	return extensions.length == 0 ? "" : extensions[0];
		}

	/**
	 * @param filetype
	 * @return file extensions including the dot
	 */
    public static String[] getExtensions(int filetype) {
		ArrayList extensions = new ArrayList<>();
		switch (filetype) {
		case cFileTypeDataWarrior:
			extensions.add(".dwar");
			break;
		case cFileTypeDataWarriorQuery:
			extensions.add(".dwaq");
			break;
		case cFileTypeDataWarriorTemplate:
			extensions.add(".dwat");
			break;
		case cFileTypeDataWarriorMacro:
			extensions.add(".dwam");
			break;
		case cFileTypeTextTabDelimited:
			extensions.add(".txt");
			extensions.add(".tsv");
			break;
		case cFileTypeTextAnyCSV:
        case cFileTypeTextCommaSeparated:
		case cFileTypeTextSemicolonSeparated:
		case cFileTypeTextVLineSeparated:
            extensions.add(".csv");
            break;
		case cFileTypeSD:
        case cFileTypeSDV2:
        case cFileTypeSDV3:
			extensions.add(".sdf");
			break;
		case cFileTypeRD:
        case cFileTypeRDV2:
        case cFileTypeRDV3:
			extensions.add(".rdf");
			break;
		case cFileTypeRXN:
			extensions.add(".rxn");
			break;
		case cFileTypeSOM:
			extensions.add(".dwas");
			break;
		case cFileTypeJPG:
			extensions.add(".jpeg");
			extensions.add(".jpg");
			break;
		case cFileTypeGIF:
			extensions.add(".gif");
			break;
		case cFileTypePNG:
			extensions.add(".png");
			break;
		case cFileTypeSVG:
			extensions.add(".svg");
			break;
		case cFileTypeMOL:
			extensions.add(".mol");
			break;
		case cFileTypeMOL2:
			extensions.add(".mol2");
			break;
		case cFileTypePDB:
			extensions.add(".pdb");
			break;
		case cFileTypeMMTF:
			extensions.add(".mmtf");
			break;
		case cFileTypeSDGZ:
			extensions.add(".sdf.gz");
			break;
			}
		return extensions.toArray(new String[0]);
		}

	public void saveRXNFile(Reaction rxn) {
		String fileName = selectFileToSave("Select reaction file", cFileTypeRXN, "Untitled Reaction");
		if (fileName != null) {
			String extension = ".rxn";
			int dotIndex = fileName.lastIndexOf('.');
			int slashIndex = fileName.lastIndexOf(File.separator);
			if (dotIndex == -1
			 || dotIndex < slashIndex)
				fileName = fileName.concat(extension);
		    else if (!fileName.substring(dotIndex).equalsIgnoreCase(extension)) {
				showMessage("uncompatible file name extension.");
			    return;
				}

			try {
				BufferedWriter theWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8));
				new RXNFileCreator(rxn).writeRXNfile(theWriter);
				theWriter.close();
				}
			catch (IOException e) {
				showMessage("IOException: "+e);
				}
			}
		}
	}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy