All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.julielab.geneexpbase.hpo.HpoCorpusRegistry Maven / Gradle / Ivy

package de.julielab.geneexpbase.hpo;

import de.julielab.geneexpbase.data.DocumentSourceFileRegistry;
import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.data.UnknownCorpusException;

public class HpoCorpusRegistry {

    public static final String GNORMPLUS = "gnormplus";
    public static final String BC2_TRAIN = "bc2train";
    public static final String BC2_TEST = "bc2test";
    public static final String NLMIAT = "nlmiat";
    public static final String DECA = "deca";
    public static final String SPECIESCUSTOM = "speciescustom";
    public static final String ALL = "all";
    public static final String BC2 = "bc2";
    public static final String TRAIN = "train";
    public static final String TEST = "test";
    public static final String BC3 = "bc3";
    public static final String TRAINSET1 = "trainset1";
    public static final String TRAINSET2 = "trainset2";
    public static final String TEST50 = "test50";
    public static final String BC3_SPECIES = "bc3species";
    public static final String NLM_GENE = "nlmgene";
    public static final String UNITTEST = "unittest";

    public static HpoInstance bc2test() {
        return new HpoInstance(GNORMPLUS, BC2_TEST);
    }
    public static HpoInstance gnpBc2Train() {
        return new HpoInstance(GNORMPLUS, BC2_TRAIN);
    }
    public static HpoInstance gnpBc2Test() {
        return new HpoInstance(GNORMPLUS, BC2_TEST);
    }
    public static HpoInstance gnpNlmiat() {
        return new HpoInstance(GNORMPLUS, NLMIAT);
    }
    public static HpoInstance speciesCustom() {
        return new HpoInstance(SPECIESCUSTOM, ALL);
    }
    public static HpoInstance deca() {
        return new HpoInstance(DECA, ALL);
    }
    public static HpoInstance bc3Trainset1() {
        return new HpoInstance(BC3, TRAINSET1);
    }
    public static HpoInstance bc3SpeciesTrainset1() {
        return new HpoInstance(BC3_SPECIES, TRAINSET1);
    }
    public static HpoInstance bc3SpeciesTrainset2() {
        return new HpoInstance(BC3_SPECIES, TRAINSET2);
    }
    public static HpoInstance unitTest() {
        return new HpoInstance(UNITTEST, ALL);
    }

    public static DocumentSourceFiles getCorpusFiles(HpoInstance si) {
        if (GNORMPLUS.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case BC2_TEST:
                    return DocumentSourceFileRegistry.gnpBc2gnTest();
                case BC2_TRAIN:
                    return DocumentSourceFileRegistry.gnpBc2gnTrain();
                case NLMIAT:
                    return DocumentSourceFileRegistry.gnpNlmIat();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        }  else if (BC2.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case TEST:
                    return DocumentSourceFileRegistry.bc2gntest();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        }else if (BC3.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case TRAINSET1:
                    return DocumentSourceFileRegistry.bc3Trainset1();
                case TEST50:
                    return DocumentSourceFileRegistry.bc3Test50();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        } else if (NLM_GENE.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case TRAIN:
                    return DocumentSourceFileRegistry.nlmGeneTrain();
                case TEST:
                    return DocumentSourceFileRegistry.nlmGeneTest();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        } else if (BC3_SPECIES.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case TRAINSET1:
                    return DocumentSourceFileRegistry.bc3Trainset1Species();
                case TRAINSET2:
                    return DocumentSourceFileRegistry.bc3Trainset2Species();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        } else if (DECA.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case ALL:
                    return DocumentSourceFileRegistry.decaSpeciesCorpus();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        } else if (SPECIESCUSTOM.equals(si.getCorpus())) {
            switch (si.getSubcorpus()) {
                case ALL:
                    return DocumentSourceFileRegistry.speciesCorpusCustomGnormPlusBC2TrainGoldGenes();
                default:
                    throw new UnknownCorpusException(si.getCorpus() + ":" + si.getSubcorpus());
            }
        } else if (UNITTEST.equals(si.getCorpus())) {
            if (!si.getSubcorpus().equals(ALL))
                throw new IllegalArgumentException("The UnitTest corpus only has the 'all' sub-corpus. Requested was '" + si.getSubcorpus() + "'.");
            return DocumentSourceFileRegistry.unitTests();
        } else {
            throw new UnknownCorpusException(si.getCorpus());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy