io.github.clearwsd.app.EvolutionaryFeatureOptimizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clearwsd-cli Show documentation
Show all versions of clearwsd-cli Show documentation
Command line interfaces for non-programmatic training and experimentation.
/*
* Copyright (C) 2017 James Gung
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package io.github.clearwsd.app;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import io.github.clearwsd.classifier.PaClassifier;
import io.github.clearwsd.corpus.semlink.VerbNetReader;
import io.github.clearwsd.eval.CrossValidation;
import io.github.clearwsd.eval.Evaluation;
import io.github.clearwsd.feature.annotator.AggregateAnnotator;
import io.github.clearwsd.feature.context.NlpContextFactory;
import io.github.clearwsd.feature.extractor.StringExtractor;
import io.github.clearwsd.feature.extractor.StringListExtractor;
import io.github.clearwsd.feature.function.FeatureFunction;
import io.github.clearwsd.feature.function.MultiStringFeatureFunction;
import io.github.clearwsd.feature.function.StringFeatureFunction;
import io.github.clearwsd.feature.optim.EvolutionaryModelTrainer;
import io.github.clearwsd.feature.optim.ga.Chromosome;
import io.github.clearwsd.feature.optim.ga.CrossValidatingFitnessFunction;
import io.github.clearwsd.feature.optim.ga.DefaultChromosome;
import io.github.clearwsd.feature.optim.ga.GeneticAlgorithm;
import io.github.clearwsd.feature.optim.ga.Genotype;
import io.github.clearwsd.feature.optim.ga.NlpClassifierGenotype;
import io.github.clearwsd.feature.optim.ga.OptionGene;
import io.github.clearwsd.feature.pipeline.NlpClassifier;
import io.github.clearwsd.type.DepNode;
import io.github.clearwsd.type.DepTree;
import io.github.clearwsd.type.FeatureType;
import io.github.clearwsd.type.NlpFocus;
import lombok.extern.slf4j.Slf4j;
import static io.github.clearwsd.app.VerbNetClassifierUtils.BROWN;
import static io.github.clearwsd.app.VerbNetClassifierUtils.CLUSTERS;
import static io.github.clearwsd.app.VerbNetClassifierUtils.collocations;
import static io.github.clearwsd.app.VerbNetClassifierUtils.filteredContexts;
import static io.github.clearwsd.app.VerbNetClassifierUtils.windowUnigrams;
import static io.github.clearwsd.feature.context.Contexts.focus;
import static io.github.clearwsd.feature.context.Contexts.head;
import static io.github.clearwsd.feature.extractor.Extractors.concat;
import static io.github.clearwsd.feature.extractor.Extractors.listConcat;
import static io.github.clearwsd.feature.extractor.Extractors.listLookup;
import static io.github.clearwsd.feature.extractor.Extractors.lookup;
import static io.github.clearwsd.feature.extractor.Extractors.lowerForm;
import static io.github.clearwsd.feature.extractor.Extractors.lowerLemma;
import static io.github.clearwsd.type.FeatureType.Dep;
import static io.github.clearwsd.type.FeatureType.Pos;
/**
* @author jamesgung
*/
@Slf4j
public class EvolutionaryFeatureOptimizer {
private static OptionGene>> gene(
List>> featureFunctions,
GeneticAlgorithm ga) {
return new OptionGene<>(featureFunctions, ga.random(), ga.activationProbability());
}
private static List>> getFeatureFunctions(
List, DepNode>> contexts,
StringExtractor extractor) {
List>> results = new ArrayList<>();
for (NlpContextFactory, DepNode> factory : contexts) {
results.add(new StringFeatureFunction<>(factory, Collections.singletonList(extractor)));
}
return results;
}
private static List>> getListFeatureFunctions(
List, DepNode>> contexts,
StringListExtractor extractor) {
List>> results = new ArrayList<>();
for (NlpContextFactory, DepNode> factory : contexts) {
results.add(new MultiStringFeatureFunction<>(factory, Collections.singletonList(extractor)));
}
return results;
}
private static OptionGene propertyGene(GeneticAlgorithm ga, String value, String... options) {
return new OptionGene<>(Arrays.stream(options).map(o -> {
Properties properties = new Properties();
properties.setProperty(value, o);
return properties;
}).collect(Collectors.toList()), ga.random(), 1);
}
private static Chromosome> hyperparams(GeneticAlgorithm ga) {
List> genes = new ArrayList<>();
genes.add(propertyGene(ga, "Cost", "10", "1", "0.1"));
genes.add(propertyGene(ga, "Epsilon", "0.1", "0.01", "0.001"));
return new DefaultChromosome<>(genes, ga.random());
}
private static Chromosome>>> chromosome(GeneticAlgorithm ga) {
List, DepNode>> windowUnigrams = windowUnigrams();
List, DepNode>> windowBigrams = collocations();
List, DepNode>> depContexts = filteredContexts(0);
List, DepNode>> childModContexts = filteredContexts(1);
List, DepNode>> childSkipModContexts = filteredContexts(2);
List, DepNode>> head = Collections.singletonList(head());
List, DepNode>> focus = Collections.singletonList(focus());
StringExtractor text = lowerForm();
StringExtractor lemma = lowerLemma();
StringExtractor dep = lookup(Dep);
StringExtractor pos = lookup(Pos);
StringExtractor textDep = concat(text, dep);
StringExtractor posDep = concat(pos, dep);
StringExtractor lemmaDep = concat(lemma, dep);
StringListExtractor brown = listConcat(listLookup(BROWN), dep);
StringListExtractor cluster100 = listConcat(listLookup(CLUSTERS.get(0)), dep);
StringListExtractor cluster320 = listConcat(listLookup(CLUSTERS.get(1)), dep);
StringListExtractor cluster1000 = listConcat(listLookup(CLUSTERS.get(2)), dep);
StringListExtractor cluster3200 = listConcat(listLookup(CLUSTERS.get(3)), dep);
StringListExtractor cluster10000 = listConcat(listLookup(CLUSTERS.get(4)), dep);
List>>> genes = new ArrayList<>();
genes.add(gene(getFeatureFunctions(windowUnigrams, text), ga));
genes.add(gene(getFeatureFunctions(windowUnigrams, pos), ga));
genes.add(gene(getFeatureFunctions(windowUnigrams, lemma), ga));
genes.add(gene(getFeatureFunctions(windowUnigrams, dep), ga));
genes.add(gene(getFeatureFunctions(windowBigrams, text), ga));
genes.add(gene(getFeatureFunctions(windowBigrams, pos), ga));
genes.add(gene(getFeatureFunctions(windowBigrams, lemma), ga));
genes.add(gene(getFeatureFunctions(windowBigrams, dep), ga));
genes.add(gene(getFeatureFunctions(depContexts, textDep), ga));
genes.add(gene(getFeatureFunctions(depContexts, posDep), ga));
genes.add(gene(getFeatureFunctions(depContexts, lemmaDep), ga));
genes.add(gene(getFeatureFunctions(head, textDep), ga));
genes.add(gene(getFeatureFunctions(head, posDep), ga));
genes.add(gene(getFeatureFunctions(head, lemmaDep), ga));
genes.add(gene(getFeatureFunctions(childModContexts, posDep), ga));
genes.add(gene(getFeatureFunctions(childModContexts, dep), ga));
genes.add(gene(getFeatureFunctions(childSkipModContexts, posDep), ga));
genes.add(gene(getFeatureFunctions(childSkipModContexts, dep), ga));
genes.add(gene(getListFeatureFunctions(depContexts, brown), ga));
genes.add(gene(getListFeatureFunctions(depContexts, cluster100), ga));
genes.add(gene(getListFeatureFunctions(depContexts, cluster320), ga));
genes.add(gene(getListFeatureFunctions(depContexts, cluster1000), ga));
genes.add(gene(getListFeatureFunctions(depContexts, cluster3200), ga));
genes.add(gene(getListFeatureFunctions(depContexts, cluster10000), ga));
genes.add(gene(getListFeatureFunctions(focus, brown), ga));
genes.add(gene(getListFeatureFunctions(focus, cluster100), ga));
genes.add(gene(getListFeatureFunctions(focus, cluster320), ga));
genes.add(gene(getListFeatureFunctions(focus, cluster1000), ga));
genes.add(gene(getListFeatureFunctions(focus, cluster3200), ga));
genes.add(gene(getListFeatureFunctions(focus, cluster10000), ga));
return new DefaultChromosome<>(genes, ga.random());
}
public static void main(String[] args) throws IOException {
List> instances = new VerbNetReader().readInstances(new FileInputStream(args[1]));
AggregateAnnotator> annotator = new AggregateAnnotator<>(VerbNetClassifierUtils.annotators());
annotator.initialize(VerbNetClassifierUtils.resourceManager());
instances.forEach(annotator::annotate);
CrossValidation> cv = new CrossValidation<>(
(NlpFocus i) -> i.feature(FeatureType.Gold));
List>> folds = cv.createFolds(instances, 5);
CrossValidatingFitnessFunction> fitness = new CrossValidatingFitnessFunction<>(cv);
GeneticAlgorithm>> ga = new GeneticAlgorithm<>(0, fitness);
Genotype>> genotype =
new NlpClassifierGenotype<>(chromosome(ga), hyperparams(ga), PaClassifier::new);
ga.prototype(genotype);
EvolutionaryModelTrainer> modelTrainer = new EvolutionaryModelTrainer<>(ga);
List evaluations = cv.crossValidate(modelTrainer, folds);
for (Evaluation evaluation : evaluations) {
log.debug("\n{}", evaluation.toString());
}
log.debug("\n\n{}", new Evaluation(evaluations));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy