All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.plugin.learningframework.data.Attributes Maven / Gradle / Ivy

Go to download

A GATE plugin that provides many different machine learning algorithms for a wide range of NLP-related machine learning tasks like text classification, tagging, or chunking.

There is a newer version: 4.2
Show newest version
/*
 * Copyright (c) 2015-2016 The University Of Sheffield.
 *
 * This file is part of gateplugin-LearningFramework 
 * (see https://github.com/GateNLP/gateplugin-LearningFramework).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 2.1 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this software. If not, see .
 */
package gate.plugin.learningframework.data;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.LabelAlphabet;
import gate.plugin.learningframework.features.CodeAs;
import gate.plugin.learningframework.features.Datatype;
import gate.plugin.learningframework.features.FeatureExtractionMalletSparse;
import gate.plugin.learningframework.features.FeatureInfo;
import gate.plugin.learningframework.features.FeatureSpecAttribute;
import gate.plugin.learningframework.features.FeatureSpecAttributeList;
import gate.plugin.learningframework.features.FeatureSpecNgram;
import gate.plugin.learningframework.features.FeatureSpecSimpleAttribute;
import gate.plugin.learningframework.mallet.LFAlphabet;
import gate.plugin.learningframework.mallet.LFPipe;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * Describes all Attributes/features of a dataset.
 * This describes the attributes of a corpus representation at
 * the time the instance is created. 
 * @author Johann Petraj
 */
public class Attributes implements Iterable {
  /**
   * Generate the attributes object from the information in the pipe.
   * The pipe should be a LFPipe, but we also try to come up with something
   * if it is an ordinary pipe. 
   * 
   * @param pipe  mallet pipe
   * @param instanceType instance type
   */
  public Attributes(Pipe pipe, String instanceType) {
    // first create the attributes (independent vars)    
    Alphabet dataAlphabet = pipe.getDataAlphabet();
    // if we can, also represent the pipe as LFPipe
    LFPipe lfPipe;
    FeatureInfo featureInfo = null;
    if(pipe instanceof LFPipe) {
      lfPipe = (LFPipe)pipe;
      featureInfo = lfPipe.getFeatureInfo();
    }
    // the alphabet we use if we have a boolean variable
    LFAlphabet booleanAlph = new LFAlphabet();
    booleanAlph.lookupIndex("false");
    booleanAlph.lookupIndex("true");    
    for(int i =0; i=attributes.size()) {
      throw new RuntimeException("Attribute "+index+" does not exist, only have "+attributes.size());
    }
    return attributes.get(index);
  }
  public int nAttributes() {
    return attributes.size();
  }
  protected Map name2index = new HashMap<>();
  protected List attributes = new ArrayList<>();

  /**
   * Get the target Attribute.
   * NOTE: the target attribute has index one larger than the highest
   * index of the independent attributes, but this index cannot be used
   * to retrieve it! 
   * 
   * @return target attribute instance
   */
  public Attribute getTargetAttribute() {
    return targetAttribute;
  }
  
  protected Attribute targetAttribute;
  
  @Override
  public Iterator iterator() {
    return new AttributeIterator();
  }
  
  public class AttributeIterator implements Iterator {

    /* the index which would get returned next */
    private int currentIndex = 0;
    @Override
    public boolean hasNext() {
      return(attributes.size()>currentIndex);
    }

    @Override
    public Attribute next() {
      return attributes.get(currentIndex++);      
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy