All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.liglab.jlcm.io.FileCollector Maven / Gradle / Ivy

Go to download

A multi-threaded implementation of the LCM (Linear Closed itemsets Miner) algorithm proposed by T.Uno and H.Arimura

There is a newer version: 1.7.0
Show newest version
/*
	This file is part of jLCM - see https://github.com/martinkirch/jlcm/
	
	Copyright 2013,2014 Martin Kirchgessner, Vincent Leroy, Alexandre Termier, Sihem Amer-Yahia, Marie-Christine Rousset, Université Joseph Fourier and CNRS

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	 http://www.apache.org/licenses/LICENSE-2.0
	 
	or see the LICENSE.txt file joined with this program.

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
*/


package fr.liglab.jlcm.io;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;


/**
 * a thread-unsafe PatternsCollector that write to the path provided at instanciation
 * @see MultiThreadedFileCollector
 */
public class FileCollector extends PatternsWriter {
	
	// this should be profiled and tuned !
	protected static final int BUFFER_CAPACITY = 4096;
	
	protected long collected = 0;
	protected long collectedLength = 0;
	protected FileOutputStream stream;
	protected FileChannel channel;
	protected ByteBuffer buffer;
	protected static final Charset charset = Charset.forName("ASCII");
	
	public FileCollector(final String path) throws IOException {
		File file = new File(path);
		
		if (file.exists()) {
			System.err.println("Warning : overwriting output file "+path);
		}
		
		this.stream = new FileOutputStream(file, false);
		this.channel = this.stream.getChannel();
		
		this.buffer = ByteBuffer.allocateDirect(BUFFER_CAPACITY);
		this.buffer.clear();
	}

	@Override
	public void collect(int support, int[] pattern, int length) {
		putInt(support);
		safePut((byte) '\t'); // putChar('\t') would append TWO bytes, but in ASCII we need only one
		
		boolean addSeparator = false;
		for (int i = 0; i < length; i++) {
			if (addSeparator) {
				safePut((byte) ' ');
			} else {
				addSeparator = true;
			}
			
			putItem(pattern[i]);
		}
		
		safePut((byte) '\n');
		this.collected++;
		this.collectedLength += pattern.length;
	}
	
	protected void putItem(final int i) {
		this.putInt(i);
	}
	
	protected void putInt(final int i) {
		try {
			byte[] asBytes = Integer.toString(i).getBytes(charset);
			this.buffer.put(asBytes);
		} catch (BufferOverflowException e) {
			flush();
			putInt(i);
		}
	}
	
	protected void safePut(final byte b) {
		try {
			this.buffer.put(b);
		} catch (BufferOverflowException e) {
			flush();
			safePut(b);
		}
	}
	
	protected void flush() {
		try {
			this.buffer.flip();
			this.channel.write(this.buffer);
			this.buffer.clear();
		} catch (IOException e) {
			e.printStackTrace(System.err);
		}
	}

	public long close() {
		try {
			flush();
			this.channel.close();
			this.stream.close();
		} catch (IOException e) {
			e.printStackTrace(System.err);
		}
		
		return this.collected;
	}

	public int getAveragePatternLength() {
		if (this.collected == 0) {
			return 0;
		} else {
			return (int) (this.collectedLength / this.collected);
		}
	}
	
	/**
	 * @return how many patterns have been written so far
	 */
	public long getCollected() {
		return this.collected;
	}
	
	/**
	 * @return sum of collected patterns' lengths
	 */
	public long getCollectedLength() {
		return this.collectedLength;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy