org.openscience.cdk.tools.efgf.app.ErtlFunctionalGroupsFinderPerformanceSnapshotApp Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ErtlFunctionalGroupsFinder Show documentation
Show all versions of ErtlFunctionalGroupsFinder Show documentation
ErtlFunctionalGroupsFinder for CDK
/*
* ErtlFunctionalGroupsFinder for CDK
* Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny
*
* Source code is available at
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.openscience.cdk.tools.efgf.app;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.aromaticity.Aromaticity;
import org.openscience.cdk.aromaticity.ElectronDonation;
import org.openscience.cdk.atomtype.CDKAtomTypeMatcher;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.ConnectivityChecker;
import org.openscience.cdk.graph.CycleFinder;
import org.openscience.cdk.graph.Cycles;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomContainerSet;
import org.openscience.cdk.interfaces.IAtomType;
import org.openscience.cdk.io.iterator.IteratingSDFReader;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.AtomTypeManipulator;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
* An application for testing the performance of the ErtlFunctionalGroupsFinder.find() method under parallelization on
* multiple threads.
*
* @author Jonas Schaub
* @version 1.2.0.0
*/
public class ErtlFunctionalGroupsFinderPerformanceSnapshotApp {
//
/**
* Name of file for logging occurred exceptions
*/
private static final String EXCEPTIONS_LOG_FILE_NAME = "Exceptions_Log.txt";
/**
* Name of file for writing results
*/
private static final String RESULTS_FILE_NAME = "Results.txt";
/**
* All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder;
* String will be split and resulting integers passed to a set
*/
private static final String NON_METALLIC_ATOMIC_NUMBERS = "1,2,6,7,8,9,10,15,16,17,18,34,35,36,53,54,86";
//
//
/**
* All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder as a set of integers (will be parsed from
* NON_METALLIC_ATOMIC_NUMBERS)
*/
private Set nonMetallicAtomicNumbersSet;
/**
* The working directory (the jar-file's directory)
*/
private String workingPath;
/**
* The given number of different threads to use
*/
private int numberOfThreadsToUse;
/**
* All molecules loaded from the SD file
*/
private IAtomContainer[] moleculesArray;
/**
* The aromaticity model in use
*/
private Aromaticity aromaticityModel;
//
//
/**
* Instantiates and starts the application. It first loads all molecules from a given SD file into memory and then
* distributes them equally on the given number of different threads to use. It measures the time it takes for all
* threads to complete the extraction of functional groups using the ErtlFunctionalGroupsFinder. It exits the system
* if an unexpected exception occurs that prevents the application from working, e.g. an IllegalArgumentException
* (will be logged to a file, not printed on the console).
*
* @param anArgs the command line arguments, anArgs[0] must be the name of the SD file to load (must be located in
* the same directory as the application's JAR file) and anArgs[1] must be the number of different threads to use
* @throws java.io.IOException if the constructor is unable to open a text file for logging occurred exceptions
*/
public ErtlFunctionalGroupsFinderPerformanceSnapshotApp(String[] anArgs) throws IOException {
this.workingPath = (new File("").getAbsoluteFile().getAbsolutePath()) + File.separator;
LocalDateTime tmpDateTime = LocalDateTime.now();
String tmpTimeStamp = tmpDateTime.format(DateTimeFormatter.ofPattern("uuuu_MM_dd_HH_mm"));
File tmpExceptionsLogFile = new File(this.workingPath
+ ErtlFunctionalGroupsFinderPerformanceSnapshotApp.EXCEPTIONS_LOG_FILE_NAME);
FileWriter tmpExceptionsLogFileWriter = new FileWriter(tmpExceptionsLogFile, true);
PrintWriter tmpExceptionsPrintWriter = new PrintWriter(tmpExceptionsLogFileWriter);
tmpExceptionsPrintWriter.println("#########################################################################");
tmpExceptionsPrintWriter.println("Time-stamp: " + tmpTimeStamp);
tmpExceptionsPrintWriter.println();
tmpExceptionsPrintWriter.flush();
tmpExceptionsPrintWriter.close();
FileInputStream tmpDBFileInputStream = null;
ExecutorService executor = null;
PrintWriter tmpResultsPrintWriter = null;
IteratingSDFReader tmpDBReader = null;
boolean tmpHasAnErrorOccurred = false;
try {
if (anArgs.length != 2) {
throw new IllegalArgumentException("Two arguments (a file name and the number of threads to use) are required.");
}
this.numberOfThreadsToUse = 0;
try {
this.numberOfThreadsToUse = Integer.parseInt(anArgs[1]);
} catch (NumberFormatException aNumberFormatException) {
throw new IllegalArgumentException("Argument \"" + anArgs[1] + "\" must be an integer.");
}
if (this.numberOfThreadsToUse <= 0) {
throw new IllegalArgumentException("The number of threads to use must be at least 1.");
}
File tmpDBFile = new File(this.workingPath + anArgs[0]);
try {
tmpDBFileInputStream = new FileInputStream(tmpDBFile);
} catch (FileNotFoundException | SecurityException anException) {
throw new IllegalArgumentException("The database file (name) is not valid: " + anException.getMessage());
}
CycleFinder tmpCycleFinder = Cycles.or(Cycles.all(), Cycles.vertexShort());
this.aromaticityModel = new Aromaticity(ElectronDonation.daylight(), tmpCycleFinder);
String[] tmpMetalNumbersStrings = ErtlFunctionalGroupsFinderPerformanceSnapshotApp.NON_METALLIC_ATOMIC_NUMBERS.split(",");
Integer[] tmpMetalNumbersInt = new Integer[tmpMetalNumbersStrings.length];
for (int i = 0; i < tmpMetalNumbersStrings.length; i++) {
tmpMetalNumbersInt[i] = Integer.parseInt(tmpMetalNumbersStrings[i]);
}
this.nonMetallicAtomicNumbersSet = new HashSet(Arrays.asList(tmpMetalNumbersInt));
File tmpResultsLogFile = new File(this.workingPath
+ ErtlFunctionalGroupsFinderPerformanceSnapshotApp.RESULTS_FILE_NAME);
FileWriter tmpResultsLogFileWriter = new FileWriter(tmpResultsLogFile, true);
tmpResultsPrintWriter = new PrintWriter(tmpResultsLogFileWriter);
tmpResultsPrintWriter.println("#########################################################################");
tmpResultsPrintWriter.println("Time-stamp: " + tmpTimeStamp);
tmpResultsPrintWriter.println();
tmpResultsPrintWriter.println("Application initialized. Loading database file named " + anArgs[0] + ".");
tmpResultsPrintWriter.flush();
System.out.println("\nApplication initialized. Loading database file named " + anArgs[0] + ".");
tmpDBReader = new IteratingSDFReader(tmpDBFileInputStream,
SilentChemObjectBuilder.getInstance(),
true);
List tmpMoleculesList = new LinkedList<>();
while (tmpDBReader.hasNext()) {
try {
IAtomContainer tmpMolecule = (IAtomContainer) tmpDBReader.next();
tmpMolecule = this.applyFiltersAndPreprocessing(tmpMolecule);
tmpMoleculesList.add(tmpMolecule);
} catch (Exception anException) {
/*If an IllegalArgumentException is thrown in applyFiltersAndPreprocessing (meaning that the molecule
should be filtered) the molecule is skipped by catching this exception*/
}
}
try {
tmpDBReader.close();
} catch (IOException ex) {
this.appendToLogfile(ex);
}
long tmpSeed = System.nanoTime();
Collections.shuffle(tmpMoleculesList, new Random(tmpSeed));
this.moleculesArray = new IAtomContainer[tmpMoleculesList.size()];
this.moleculesArray = tmpMoleculesList.toArray(this.moleculesArray);
tmpResultsPrintWriter.println("\nDone Loading database. Found and processed " + this.moleculesArray.length + " valid molecules.");
System.out.println("Done Loading database. Found and processed " + this.moleculesArray.length + " valid molecules.");
tmpResultsPrintWriter.flush();
final int tmpNumberOfMolecules = this.moleculesArray.length;
int tmpNumberOfMoleculesPerThread = tmpNumberOfMolecules/this.numberOfThreadsToUse;
List tmpListOfThreads = new LinkedList<>();
//the modulo of dividing the number of valid molecules by the number of threads is simply discarded
int tmpLastEndIndex = tmpNumberOfMolecules - tmpNumberOfMolecules % this.numberOfThreadsToUse - 1;
for (int i = 0; i <= tmpLastEndIndex; i += tmpNumberOfMoleculesPerThread) {
IAtomContainer[] tmpMoleculesArrayForThread = Arrays.copyOfRange(this.moleculesArray, i, i + tmpNumberOfMoleculesPerThread);
tmpListOfThreads.add(new ExtractFunctionalGroupsTask(tmpMoleculesArrayForThread));
}
executor = Executors.newFixedThreadPool(this.numberOfThreadsToUse);
List> tmpFuturesList = new LinkedList<>();
long tmpStartTime = System.currentTimeMillis();
try {
tmpFuturesList = executor.invokeAll(tmpListOfThreads);
} catch (Exception ex) {
this.appendToLogfile(ex);
throw ex;
}
long tmpEndTime = System.currentTimeMillis();
tmpResultsPrintWriter.println("Divided molecules onto " + this.numberOfThreadsToUse
+ " threads. Extraction of functional groups took " + (tmpEndTime - tmpStartTime) + " ms.");
System.out.println("Divided molecules onto " + this.numberOfThreadsToUse
+ " threads. Extraction of functional groups took " + (tmpEndTime - tmpStartTime) + " ms.");
int tmpExceptionsCounter = 0;
for (Future tmpFuture : tmpFuturesList) {
tmpExceptionsCounter += tmpFuture.get();
}
tmpResultsPrintWriter.println(tmpExceptionsCounter + " molecules produced an exception.");
tmpResultsPrintWriter.flush();
executor.shutdownNow();
tmpResultsPrintWriter.println();
tmpResultsPrintWriter.flush();
tmpResultsPrintWriter.close();
} catch (Exception anException) {
this.appendToLogfile(anException);
anException.printStackTrace(System.err);
if (anException instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
} finally {
if (!Objects.isNull(executor)) {
executor.shutdownNow();
}
if (!Objects.isNull(tmpResultsPrintWriter)) {
tmpResultsPrintWriter.close();
}
if (!Objects.isNull(tmpDBFileInputStream)) {
tmpDBFileInputStream.close();
}
if (!Objects.isNull(tmpDBReader)) {
tmpDBReader.close();
}
if (tmpHasAnErrorOccurred) {
System.exit(1);
}
}
}
//
//
/**
* Performs all preprocessing needed for the ErtlFunctionalGroupsFinder and throws an IllegalArgumentException
* if the given molecule should not be passed on to the find() method (filtering).
*
* @throws IllegalArgumentException if the given molecule should be filtered i.e. does not meet the
* ErtlFunctionalGroupsFinder's requirements
* @throws CDKException for several causes connected to different CDK functionalities
*/
private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) throws IllegalArgumentException, CDKException {
if (aMolecule.getAtomCount() == 0 || aMolecule.getBondCount() == 0) {
throw new IllegalArgumentException("Molecule must be filtered!");
}
aMolecule.removeProperty(CDKConstants.CTAB_SGROUPS);
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(aMolecule);
if (!ConnectivityChecker.isConnected(aMolecule)) {
IAtomContainerSet tmpFragmentsSet = ConnectivityChecker.partitionIntoMolecules(aMolecule);
IAtomContainer tmpBiggestFragment = null;
for (IAtomContainer tmpFragment : tmpFragmentsSet.atomContainers()) {
if (tmpBiggestFragment == null || tmpBiggestFragment.getAtomCount() < tmpFragment.getAtomCount()) {
tmpBiggestFragment = tmpFragment;
}
}
if (!Objects.isNull(tmpBiggestFragment)) {
aMolecule = tmpBiggestFragment;
}
}
for (IAtom tmpAtom : aMolecule.atoms()) {
if (!this.nonMetallicAtomicNumbersSet.contains(tmpAtom.getAtomicNumber())) {
throw new IllegalArgumentException("Molecule must be filtered!");
}
if (tmpAtom.getFormalCharge() != 0) {
tmpAtom.setFormalCharge(0);
CDKHydrogenAdder tmpHAdder = CDKHydrogenAdder.getInstance(aMolecule.getBuilder());
CDKAtomTypeMatcher tmpMatcher = CDKAtomTypeMatcher.getInstance(aMolecule.getBuilder());
IAtomType tmpMatchedType = tmpMatcher.findMatchingAtomType(aMolecule, tmpAtom);
if (tmpMatchedType != null) {
AtomTypeManipulator.configure(tmpAtom, tmpMatchedType);
}
tmpHAdder.addImplicitHydrogens(aMolecule, tmpAtom);
}
}
this.aromaticityModel.apply(aMolecule);
return aMolecule;
}
/**
* Appends the given exception's stack trace to a log file.
*
* @param anException the exception to log
*/
private void appendToLogfile(Exception anException) {
if (anException == null) {
return;
}
PrintWriter tmpPrintWriter = null;
try {
FileWriter tmpFileWriter = new FileWriter(this.workingPath
+ ErtlFunctionalGroupsFinderPerformanceSnapshotApp.EXCEPTIONS_LOG_FILE_NAME,
true);
tmpPrintWriter = new PrintWriter(tmpFileWriter);
StringWriter tmpStringWriter = new StringWriter();
anException.printStackTrace(new PrintWriter(tmpStringWriter));
String tmpStackTrace = tmpStringWriter.toString();
tmpPrintWriter.println(tmpStackTrace);
} catch (IOException anIOException) {
anIOException.printStackTrace(System.err);
} finally {
if (tmpPrintWriter != null) {
tmpPrintWriter.println();
tmpPrintWriter.flush();
tmpPrintWriter.close();
}
}
}
//
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy