All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.TKnudsen.ComplexDataObject.model.io.parsers.arff.ARFFParser Maven / Gradle / Ivy

Go to download

A library that models real-world objects in Java, referred to as ComplexDataObjects. Other features: IO and preprocessing of ComplexDataObjects.

There is a newer version: 0.2.13
Show newest version
package com.github.TKnudsen.ComplexDataObject.model.io.parsers.arff;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.math3.exception.NullArgumentException;

import com.github.TKnudsen.ComplexDataObject.data.complexDataObject.ComplexDataObject;
import com.github.TKnudsen.ComplexDataObject.model.io.parsers.ComplexDataObjectParser;
import com.github.TKnudsen.ComplexDataObject.model.io.parsers.ParserTools;

import weka.core.Instance;
import weka.core.Instances;

/**
 * 

* Title: ARFFParser *

* *

* Description: Parses ComplexDataObjects from an ARFF file. Note: this parser * is not part of the persistence layer. In fact, it gathers new * ComplexDataObjects from a given file. *

* *

* Copyright: Copyright (c) 2015 *

* * @author Juergen Bernard * @version 1.0 */ public class ARFFParser implements ComplexDataObjectParser { private String missingValueIndicator = "?"; @Override public List parse(String filename) throws IOException { Instances instances = parseARFF(filename); if (instances == null) return null; List data = new ArrayList<>(); // Step1: create metaMapping Map>> metaMapping = WekaTools.getAttributeSchema(instances); // Step2: create ComplexDataObjects for (int zeile = 0; zeile < instances.numInstances(); zeile++) { Instance instance = instances.instance(zeile); ComplexDataObject complexDataObject = new ComplexDataObject(); // parse columns for (Integer spalte = 0; spalte < instances.numAttributes(); spalte++) { Entry entry = null; try { entry = WekaTools.assignEntry(metaMapping, instance, spalte, missingValueIndicator); } catch (Exception e) { } if (entry != null) { if (entry.getValue() != null && entry.getValue() instanceof String) { Date date = ParserTools.parseDate((String) entry.getValue()); if (date != null) complexDataObject.add(entry.getKey(), date); else complexDataObject.add(entry.getKey(), entry.getValue()); } else complexDataObject.add(entry.getKey(), entry.getValue()); } else throw new NullArgumentException(); if (complexDataObject.getAttribute("Name") != null) complexDataObject.setName(complexDataObject.getAttribute("Name").toString()); if (complexDataObject.getAttribute("Description") != null) complexDataObject.setDescription(complexDataObject.getAttribute("Description").toString()); } data.add(complexDataObject); } return data; } public static Instances parseARFF(String arffFile) { if (arffFile == null) return null; BufferedReader reader; try { FileReader fileReader = new FileReader(arffFile); reader = new BufferedReader(fileReader); Instances instances = new Instances(reader); reader.close(); return instances; } catch (FileNotFoundException e) { e.printStackTrace(); return null; } catch (IOException e) { e.printStackTrace(); return null; } } public String getMissingValueIndicator() { return missingValueIndicator; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy