
stream.io.SvmLightWriter Maven / Gradle / Ivy
The newest version!
/*
* streams library
*
* Copyright (C) 2011-2014 by Christian Bockermann, Hendrik Blom
*
* streams is a library, API and runtime environment for processing high
* volume data streams. It is composed of three submodules "stream-api",
* "stream-core" and "stream-runtime".
*
* The streams library (and its submodules) is free software: you can
* redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* The stream.ai library (and its submodules) is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package stream.io;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import stream.Data;
import stream.data.DataUtils;
/**
*
* @author Christian Bockermann <[email protected]>
* @deprecated
*/
public class SvmLightWriter extends CsvWriter {
/*
* This map provides a mapping of features to indexes features with a
* numeric (integer) name will be mapped to their value directly
*/
Map indexes = new HashMap();
Integer largestIndex = 0;
boolean includeAnnotations = true;
public SvmLightWriter() {
}
public SvmLightWriter(OutputStream out) {
super(out);
}
@Override
public void writeHeader(Data datum) {
}
@Override
public void write(Data datum) {
Serializable label = datum.get("@label");
if (label == null) {
log.error("SvmLightStreamWriter does only support writing labeled data!");
log.error("Skipping datum {}", datum);
return;
}
p.print(label);
StringBuffer annotation = new StringBuffer();
for (String key : DataUtils.getKeys(datum)) {
try {
Double value = new Double(datum.get(key).toString());
p.print(" ");
Integer index = -1;
if (key.matches("\\d+")) {
index = new Integer(key);
} else {
index = this.indexes.get(key);
if (index == null) {
index = largestIndex + 1;
indexes.put(key, index);
}
}
if (largestIndex < index)
largestIndex = index;
p.print(index);
p.print(":");
p.print(value);
} catch (Exception e) {
log.debug("Skipping non-numerical feature '{}'", key);
annotation.append(" ");
annotation.append(key);
annotation.append(":'");
annotation.append(lineEscape(datum.get(key)));
annotation.append("'");
}
}
if (includeAnnotations && annotation.length() > 0) {
p.print(" #");
p.print(annotation.toString());
}
p.println();
}
/**
* @return the includeAnnotations
*/
public boolean isIncludeAnnotations() {
return includeAnnotations;
}
/**
* @param includeAnnotations
* the includeAnnotations to set
*/
public void setIncludeAnnotations(boolean includeAnnotations) {
this.includeAnnotations = includeAnnotations;
}
protected String lineEscape(Serializable val) {
if (val == null)
return "";
String str = val.toString();
return str.replaceAll("'", ",").replaceAll("\\n", " ");
}
public void printMapping(File file) throws Exception {
FileOutputStream fos = new FileOutputStream(file);
printMapping(fos);
fos.close();
}
public void printMapping(OutputStream out) {
PrintStream p = new PrintStream(out);
p.println("#feature,index");
for (String key : indexes.keySet()) {
p.println(key + "," + indexes.get(key));
}
p.flush();
p.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy