All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stream.io.SvmLightWriter Maven / Gradle / Ivy

/*
 *  streams library
 *
 *  Copyright (C) 2011-2012 by Christian Bockermann, Hendrik Blom
 * 
 *  streams is a library, API and runtime environment for processing high
 *  volume data streams. It is composed of three submodules "stream-api",
 *  "stream-core" and "stream-runtime".
 *
 *  The streams library (and its submodules) is free software: you can 
 *  redistribute it and/or modify it under the terms of the 
 *  GNU Affero General Public License as published by the Free Software 
 *  Foundation, either version 3 of the License, or (at your option) any 
 *  later version.
 *
 *  The stream.ai library (and its submodules) is distributed in the hope
 *  that it will be useful, but WITHOUT ANY WARRANTY; without even the implied 
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package stream.io;

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

import stream.data.Data;
import stream.data.DataUtils;

public class SvmLightWriter extends CsvWriter {

	/* This map provides a mapping of features to indexes 
	 * features with a numeric (integer) name will be mapped to their value directly */
	Map indexes = new HashMap();
	Integer largestIndex = 0;
	boolean includeAnnotations = true;
	
	public SvmLightWriter(){
	}
	
	public SvmLightWriter(OutputStream out) {
		super(out);
	}

	@Override
	public void writeHeader(Data datum) {
	}

	@Override
	public void write(Data datum) {

		Serializable label = datum.get( "@label" );
		if( label == null ){
			log.error( "SvmLightStreamWriter does only support writing labeled data!" );
			log.error( "Skipping datum {}", datum );
			return;
		}

		p.print( label );

		StringBuffer annotation = new StringBuffer();
		
		for( String key : DataUtils.getKeys( datum ) ){

			try {
				Double value = new Double( datum.get( key ).toString() );
				p.print( " " );
				Integer index = -1;
				if( key.matches( "\\d+" ) ){
					index = new Integer( key );
				} else {
					index = this.indexes.get( key );
					if( index == null ){
						index = largestIndex + 1;
						indexes.put( key, index );
					}
				}

				if( largestIndex < index )
					largestIndex = index;

				p.print( index );
				p.print( ":" );
				p.print( value );
			} catch (Exception e) {
				log.debug( "Skipping non-numerical feature '{}'", key );
				annotation.append( " " );
				annotation.append( key );
				annotation.append( ":'" );
				annotation.append( lineEscape( datum.get( key ) ) );
				annotation.append( "'" );
			}
		}
		if( includeAnnotations && annotation.length() > 0 ){
			p.print( " #" );
			p.print( annotation.toString() );
		}
			
		p.println();
	}
	
	
	
	
	
	
	/**
     * @return the includeAnnotations
     */
    public boolean isIncludeAnnotations()
    {
        return includeAnnotations;
    }

    /**
     * @param includeAnnotations the includeAnnotations to set
     */
    public void setIncludeAnnotations(boolean includeAnnotations)
    {
        this.includeAnnotations = includeAnnotations;
    }

    protected String lineEscape( Serializable val ){
		if( val == null )
			return "";
		
		String str = val.toString();
		return str.replaceAll( "'", "," ).replaceAll( "\\n", " " );
	}
	
	
	
	public void printMapping( File file ) throws Exception {
		FileOutputStream fos = new FileOutputStream( file );
		printMapping( fos );
		fos.close();
	}
	
	
	public void printMapping( OutputStream out ){
		PrintStream p = new PrintStream( out );
		p.println( "#feature,index" );
		for( String key : indexes.keySet() ){
			p.println( key + "," + indexes.get( key ) );
		}
		p.flush();
		p.close();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy