uk.ac.ebi.interpro.scan.jms.master.AbstractBlackBoxMaster Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jms-implementation Show documentation
InterProScan JMS Implementation Module
The newest version!
package uk.ac.ebi.interpro.scan.jms.master;

import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.util.StringUtils;
import uk.ac.ebi.interpro.scan.io.FileOutputFormat;
import uk.ac.ebi.interpro.scan.jms.activemq.CleanRunDatabase;
import uk.ac.ebi.interpro.scan.management.model.implementations.WriteOutputStep;
import uk.ac.ebi.interpro.scan.management.model.implementations.stepInstanceCreation.StepInstanceCreatingStep;
import uk.ac.ebi.interpro.scan.management.model.implementations.stepInstanceCreation.nucleotide.RunGetOrfStep;
import uk.ac.ebi.interpro.scan.management.model.implementations.stepInstanceCreation.proteinLoad.FastaFileLoadStep;
import uk.ac.ebi.interpro.scan.util.Utilities;

import java.util.*;

/**
 * Created with IntelliJ IDEA.
 * @author: Phil Jones, Gift Nuka
 * Date: 26/07/12
 */
public abstract class AbstractBlackBoxMaster extends AbstractMaster implements BlackBoxMaster {

    private static final Logger LOGGER = Logger.getLogger(AbstractBlackBoxMaster.class.getName());


    protected String fastaFilePath;
    protected String outputBaseFilename;
    /* Default value, if no output format is specified */
    private String[] outputFormats;
    private boolean inclTSVVersion = false;
    /**
     * Specifies the type of the I5 input sequences.
     * 
     * p: Protein (DEFAULT)
     * n: nucleic acid (DNA or RNA)
     */
    protected String sequenceType = "p";
    /**
     * Minimum nucleotide size of ORF to report (Any integer value). Default value is 50.
     */
    private String minSize;
    protected String explicitFileName;
    private boolean useMatchLookupService = true;
    /**
     * This boolean allows configuration of whether or not the Master closes down when there are no more
     * runnable StepExecutions available.
     */
    protected CleanRunDatabase databaseCleaner;
    private boolean excludeSites = false;
    private boolean includeTsvSites = false;
    private boolean mapToInterPro = false;
    private boolean mapToGO = false;
    private boolean mapToPathway = false;

    protected boolean hasInVmWorker;

    private int concurrentInVmWorkerCount;

    private int maxConcurrentInVmWorkerCount;

    private int maxConcurrentInVmWorkerCountForWorkers;

    protected String userDir;

    protected boolean verboseLog;

    protected int verboseLogLevel;


    private final long startUpTime = System.currentTimeMillis();
    private long maximumLifeMillis = Long.MAX_VALUE;

    protected int gridCheckInterval = 60; //seconds

    protected static final int LOW_PRIORITY = 4;
    protected static final int HIGH_PRIORITY = 6;
    protected static final int HIGHER_PRIORITY = 8;
    protected static final int HIGHEST_PRIORITY = 9;

    @Required
    public void setHasInVmWorker(boolean hasInVmWorker) {
        this.hasInVmWorker = hasInVmWorker;
    }


    protected void loadInMemoryDatabase() throws InterruptedException {
        final Thread databaseLoaderThread = new Thread(databaseCleaner);
        Long timeStarted = System.currentTimeMillis();
        LOGGER.debug("Loading database into memory...");
        databaseLoaderThread.start();
        // Pause while the database is loaded from the zip backup
        while (databaseCleaner.stillLoading()) {
            // Takes about 1500 ms to load the database
            Thread.sleep(200);
        }
        Long timeSpentLoading = System.currentTimeMillis() - timeStarted;
        if (LOGGER.isDebugEnabled()) LOGGER.debug("Database loaded in " + timeSpentLoading + " ms.");
    }

    /**
     * If a fastaFilePath has been passed in as an argument, then StepInstances are created
     * for the fasta file loading job.  Note that this also creates all of the necessary StepInstances
     * for analyses for the loaded proteins.
     */
    protected int createFastaFileLoadStepInstance() {
        int stepInstancesCreated = 0;
        if (fastaFilePath != null) {
            LOGGER.debug("Creating FASTA file load step.");
            Map params = new HashMap<>();
            params.put(FastaFileLoadStep.FASTA_FILE_PATH_KEY, fastaFilePath);
            createBlackBoxParams(params);
            stepInstancesCreated = createStepInstancesForJob("jobLoadFromFasta", params);
            LOGGER.info("Fasta file load step instance has been created.");
        }
        return stepInstancesCreated;
    }

    protected int createStepInstances() {
        return ("n".equalsIgnoreCase(this.sequenceType))
                ? createNucleicAcidLoadStepInstance()
                : createFastaFileLoadStepInstance();
    }


    protected int createNucleicAcidLoadStepInstance() {
        int stepInstancesCreated = 0;
        if (fastaFilePath != null) {
            LOGGER.debug("Creating nucleic acid load step.");
            Map params = new HashMap<>();
            params.put(RunGetOrfStep.SEQUENCE_FILE_PATH_KEY, fastaFilePath);
            params.put(FastaFileLoadStep.FASTA_FILE_PATH_KEY, fastaFilePath);
            createBlackBoxParams(params);
            stepInstancesCreated = createStepInstancesForJob("jobLoadNucleicAcidSequence", params);
        } else {
            LOGGER.error("No nucleic acid sequence file path has been provided to load.");
        }
        return stepInstancesCreated;
    }

    private void createBlackBoxParams(final Map params) {
        // Analyses as a comma separated list
        if (analyses != null && analyses.length > 0) {
            List jobNameList = new ArrayList<>();
            Collections.addAll(jobNameList, analyses);
            params.put(StepInstanceCreatingStep.ANALYSIS_JOB_NAMES_KEY, StringUtils.collectionToCommaDelimitedString(jobNameList));
        }

        processOutputFormats(params, this.outputFormats);
        params.put(StepInstanceCreatingStep.COMPLETION_JOB_NAME_KEY, "jobWriteOutput");
        params.put(WriteOutputStep.INCL_TSV_VERSION, Boolean.toString(this.inclTSVVersion));

        String outputBaseName;
        if (outputBaseFilename == null || outputBaseFilename.isEmpty()) {
            // If no output base file name provided just use the same name as the input fasta file (extension will be added later)
            outputBaseName = fastaFilePath;
        } else {
            // Use the output base file name provided (extension will be added later)
            outputBaseName = outputBaseFilename;
        }
        if (explicitFileName == null) {
            params.put(WriteOutputStep.OUTPUT_FILE_PATH_KEY, outputBaseName);
        } else {
            params.put(WriteOutputStep.OUTPUT_EXPLICIT_FILE_PATH_KEY, explicitFileName);
        }

        params.put(WriteOutputStep.MAP_TO_INTERPRO_ENTRIES, Boolean.toString(mapToInterPro));
        params.put(WriteOutputStep.MAP_TO_GO, Boolean.toString(mapToGO));
        params.put(StepInstanceCreatingStep.USE_MATCH_LOOKUP_SERVICE, Boolean.toString(useMatchLookupService));
        params.put(StepInstanceCreatingStep.EXCLUDE_SITES, Boolean.toString(excludeSites));
        params.put(StepInstanceCreatingStep.INCLUDE_TSV_SITES, Boolean.toString(includeTsvSites));
        params.put(WriteOutputStep.MAP_TO_PATHWAY, Boolean.toString(mapToPathway));
        params.put(WriteOutputStep.SEQUENCE_TYPE, this.sequenceType);
        params.put(RunGetOrfStep.MIN_NUCLEOTIDE_SIZE, this.minSize);
    }

    /**
     * Outputs formats as a comma separated list.
     *
     * @param params
     */
    public void processOutputFormats(final Map params, final String[] outputFormats) {
        List outputFormatList = new ArrayList<>();
        if (outputFormats != null && outputFormats.length > 0) {
            Collections.addAll(outputFormatList, outputFormats);
        }
        // It seems that no valid output formats were specified, so just default to all
        else {
            if (LOGGER.isInfoEnabled()) {
                LOGGER.info("No valid output formats specified, therefore use the default (all for sequence type " + this.sequenceType + ")");
            }
            for (FileOutputFormat outputFormat : FileOutputFormat.values()) {
                String extension = outputFormat.getFileExtension();
                //specify default output formats: TSV, XML and GFF3, but not SVG, HTML, GFF3 partial, XML slim or TSV production
                if (extension.equalsIgnoreCase(FileOutputFormat.SVG.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.HTML.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.RAW.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.GFF3_PARTIAL.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.XML_SLIM.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.JSON.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.JSON_SLIM.getFileExtension()) ||
                        extension.equalsIgnoreCase(FileOutputFormat.TSV_PRO.getFileExtension())) {
                    // SVG, HTML and RAW formats are not part of the default formats
                    continue;
                }
                outputFormatList.add(extension);
            }
        }
        params.put(WriteOutputStep.OUTPUT_FILE_FORMATS, StringUtils.collectionToCommaDelimitedString(outputFormatList));
    }


    /**
     * Called by quartz to load proteins from UniParc.
     */
    public void createProteinLoadJob() {
        createStepInstancesForJob("jobLoadFromUniParc", null);
    }

    /**
     *  return the minimum steps expected to run
     *
     * @return
     */
    public int getMinimumStepsExpected(){
        int analysesCount = 1;
        if (analyses != null) {
            Utilities.verboseLog("analyses != null:  " + analyses.toString());
            analysesCount = analyses.length;
        }else{
            analysesCount = jobs.getActiveNonDeprecatedAnalysisJobs().getJobIdList().size();
        }
        Utilities.verboseLog("analysesCount :  " + analysesCount);
        int minimumStepForEachAnalysis = 0;
        int minimumSteps = 2;
        if (! isUseMatchLookupService()){
            minimumStepForEachAnalysis = 4; //writefasta, runbinary, deletefasta, parseoutput
        }

        minimumSteps = minimumSteps + (analysesCount * minimumStepForEachAnalysis);

        return minimumSteps;
    }


    /**
     * If a fasta file path is set, load the proteins at start up and analyse them.
     *
     * @param fastaFilePath from which to load the proteins at start up and analyse them.
     */
    @Override
    public void setFastaFilePath(String fastaFilePath) {
        this.fastaFilePath = fastaFilePath;
    }

    /**
     * Parameter passed in on command line to set kind of input sequence
     * p: Protein
     * n: nucleic acid (DNA or RNA)
     *
     * @param sequenceType the kind of input sequence
     */
    @Override
    public void setSequenceType(String sequenceType) {
        this.sequenceType = sequenceType;
    }

    /**
     * Parameter passed in on command line to set minimum nucleotide size of ORF to report (EMBOSS getorf parameter).
     * Default size for InterProScan is 50 nucleic acids (which overwrites the getorf default value of 30).

     * This option is also configurable within the interproscan.properties file, but will be overwritten by the command value if specified.
     *
     * @param minSize Minimum nucleotide size of ORF to report (EMBOSS getorf parameter).
     */
    public void setMinSize(String minSize) {
        this.minSize = minSize;
    }

    @Override
    public void setExplicitOutputFilename(String explicitFileName) {
        this.explicitFileName = explicitFileName;
    }

    public boolean isUseMatchLookupService() {
        return useMatchLookupService;
    }

    /**
     * Called to turn off the use of the precalculated match lookup service on this run.
     */
    public void disablePrecalc() {
        this.useMatchLookupService = false;
    }

    /**
     * @param outputBaseFilename If set, then the results will be output to this file in the format specified in
     *                           the field outputFormats. The application will apply the appropriate file extension automatically.
     */
    @Override
    public void setOutputBaseFilename(String outputBaseFilename) {
        this.outputBaseFilename = outputBaseFilename;
    }

    /**
     * Allows the output format to be changed from the default (all available formats for that sequence type).
     *
     * @param outputFormats The comma separated list of output formats.
     */
    @Override
    public void setOutputFormats(String[] outputFormats) {
        this.outputFormats = outputFormats;
    }

    protected boolean isExcludeSites() {
        return excludeSites;
    }

    @Override
    public void setExcludeSites(boolean excludeSites) {
        this.excludeSites = excludeSites;
    }

    public boolean isIncludeTsvSites() {
        return includeTsvSites;
    }

    @Override
    public void setIncludeTsvSites(boolean includeTsvSites) {
        this.includeTsvSites = includeTsvSites;
    }

    @Override
    public void setMapToInterProEntries(boolean mapToInterPro) {
        this.mapToInterPro = mapToInterPro;
    }

    @Override
    public void setMapToGOAnnotations(boolean mapToGO) {
        this.mapToGO = mapToGO;
    }

    public void setMapToPathway(boolean mapToPathway) {
        this.mapToPathway = mapToPathway;
    }

    public void setDatabaseCleaner(CleanRunDatabase databaseCleaner) {
        this.databaseCleaner = databaseCleaner;
    }

    public void setUserDir(String userDir) {
        this.userDir = userDir;
    }

    public int getConcurrentInVmWorkerCount() {
        return concurrentInVmWorkerCount;
    }

    public void setConcurrentInVmWorkerCount(int concurrentInVmWorkerCount) {
        this.concurrentInVmWorkerCount = concurrentInVmWorkerCount;
    }

    public int getMaxConcurrentInVmWorkerCount() {
        return maxConcurrentInVmWorkerCount;
    }

    public void setMaxConcurrentInVmWorkerCount(int maxConcurrentInVmWorkerCount) {
        this.maxConcurrentInVmWorkerCount = maxConcurrentInVmWorkerCount;
    }

    public int getMaxConcurrentInVmWorkerCountForWorkers() {
        return maxConcurrentInVmWorkerCountForWorkers;
    }

    public void setMaxConcurrentInVmWorkerCountForWorkers(int maxConcurrentInVmWorkerCountForWorkers) {
        this.maxConcurrentInVmWorkerCountForWorkers = maxConcurrentInVmWorkerCountForWorkers;
    }

    public void setVerboseLog(boolean verboseLog) {
        this.verboseLog = verboseLog;
    }

    public void setVerboseLogLevel(int verboseLogLevel) {
        this.verboseLogLevel = verboseLogLevel;
    }

    public long getMaximumLifeMillis() {
        return maximumLifeMillis;
    }

    public void setMaximumLifeMillis(long maximumLifeMillis) {
        this.maximumLifeMillis = maximumLifeMillis;
    }

    public long getStartUpTime() {
        return startUpTime;
    }

    public long getMasterLifeRemaining(){
        return System.currentTimeMillis() - startUpTime;
    }

    public int getGridCheckInterval() {
        return gridCheckInterval;
    }

    public void setGridCheckInterval(int gridCheckInterval) {
        this.gridCheckInterval = gridCheckInterval;
    }

    @Override
    public void setInclTSVVersion(boolean inclTSVVersion) {
        this.inclTSVVersion = inclTSVVersion;
    }
}