All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.statistics.UniformSampler Maven / Gradle / Ivy

/*
 * Copyright 2013, 2014 Deutsche Nationalbibliothek
 *
 * Licensed under the Apache License, Version 2.0 the "License";
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.metafacture.statistics;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;


/**
 * Draws a uniform sample of records from the input stream.
 *
 * @param  object type
 *
 * @author Christoph Böhme
 * @author Markus Geipel
 *
 */
@Description("Draws a uniform sample of records from the input stream.")
@In(Object.class)
@Out(Object.class)
@FluxCommand("draw-uniform-sample")
public final class UniformSampler extends
		DefaultObjectPipe> {

	private final int sampleSize;
	private final List sample;
	private final Random random = new Random();

	private long count;

	public UniformSampler(final int sampleSize) {
		super();
		this.sampleSize = sampleSize;
		sample = new ArrayList(sampleSize);
	}


	public UniformSampler(final String sampleSize) {
		this(Integer.parseInt(sampleSize));
	}


	public int getSampleSize() {
		return sampleSize;
	}


	public void setSeed(final long seed) {
		random.setSeed(seed);
	}

	@Override
	public void process(final T obj) {
		assert !isClosed();
		assert null!=obj;
		count += 1;
		if (sample.size() < sampleSize) {
			sample.add(obj);
		} else {
			final double p = sampleSize / (double)count;
			if (random.nextDouble() < p) {
				sample.set(random.nextInt(sampleSize), obj);
			}
		}
	}

	@Override
	protected void onCloseStream() {
		for(T obj : sample) {
			getReceiver().process(obj);
		}
		sample.clear();
		count = 0;
	}

	@Override
	protected void onResetStream() {
		sample.clear();
		count = 0;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy