gate.plugin.learningframework.data.Attributes Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of learningframework Show documentation
Show all versions of learningframework Show documentation
A GATE plugin that provides many different machine learning
algorithms for a wide range of NLP-related machine learning tasks like
text classification, tagging, or chunking.
/*
* Copyright (c) 2015-2016 The University Of Sheffield.
*
* This file is part of gateplugin-LearningFramework
* (see https://github.com/GateNLP/gateplugin-LearningFramework).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software. If not, see .
*/
package gate.plugin.learningframework.data;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.LabelAlphabet;
import gate.plugin.learningframework.features.CodeAs;
import gate.plugin.learningframework.features.Datatype;
import gate.plugin.learningframework.features.FeatureExtractionMalletSparse;
import gate.plugin.learningframework.features.FeatureInfo;
import gate.plugin.learningframework.features.FeatureSpecAttribute;
import gate.plugin.learningframework.features.FeatureSpecAttributeList;
import gate.plugin.learningframework.features.FeatureSpecNgram;
import gate.plugin.learningframework.features.FeatureSpecSimpleAttribute;
import gate.plugin.learningframework.mallet.LFAlphabet;
import gate.plugin.learningframework.mallet.LFPipe;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Describes all Attributes/features of a dataset.
* This describes the attributes of a corpus representation at
* the time the instance is created.
* @author Johann Petraj
*/
public class Attributes implements Iterable {
/**
* Generate the attributes object from the information in the pipe.
* The pipe should be a LFPipe, but we also try to come up with something
* if it is an ordinary pipe.
*
* @param pipe mallet pipe
* @param instanceType instance type
*/
public Attributes(Pipe pipe, String instanceType) {
// first create the attributes (independent vars)
Alphabet dataAlphabet = pipe.getDataAlphabet();
// if we can, also represent the pipe as LFPipe
LFPipe lfPipe;
FeatureInfo featureInfo = null;
if(pipe instanceof LFPipe) {
lfPipe = (LFPipe)pipe;
featureInfo = lfPipe.getFeatureInfo();
}
// the alphabet we use if we have a boolean variable
LFAlphabet booleanAlph = new LFAlphabet();
booleanAlph.lookupIndex("false");
booleanAlph.lookupIndex("true");
for(int i =0; i=attributes.size()) {
throw new RuntimeException("Attribute "+index+" does not exist, only have "+attributes.size());
}
return attributes.get(index);
}
public int nAttributes() {
return attributes.size();
}
protected Map name2index = new HashMap<>();
protected List attributes = new ArrayList<>();
/**
* Get the target Attribute.
* NOTE: the target attribute has index one larger than the highest
* index of the independent attributes, but this index cannot be used
* to retrieve it!
*
* @return target attribute instance
*/
public Attribute getTargetAttribute() {
return targetAttribute;
}
protected Attribute targetAttribute;
@Override
public Iterator iterator() {
return new AttributeIterator();
}
public class AttributeIterator implements Iterator {
/* the index which would get returned next */
private int currentIndex = 0;
@Override
public boolean hasNext() {
return(attributes.size()>currentIndex);
}
@Override
public Attribute next() {
return attributes.get(currentIndex++);
}
}
}