
src.it.unimi.dsi.law.rank.EquiprobablePreferenceVector Maven / Gradle / Ivy
Show all versions of law Show documentation
package it.unimi.dsi.law.rank;
import java.io.Serializable;
import java.util.Collections;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/*
* Copyright (C) 2006-2020 Paolo Boldi, Massimo Santini and Sebastiano Vigna
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.util.PrefixMap;
import it.unimi.dsi.fastutil.doubles.AbstractDoubleList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.LongInterval;
import it.unimi.dsi.util.LongIntervals;
/**
* A class that impersonates a preference vector which is equiprobable on a set of values and zero
* elsewhere. The set of values for which the vector is non-zero (that is, the distribution
* support) is expressed as a set of intervals. An utility main method is included to
* specify a set of hosts and a prefix map from which to extract entire host intervals.
Despite
* the fact the class implements add methods, it is immutable.
*
* @author Alessio Orlandi
*/
public class EquiprobablePreferenceVector extends AbstractDoubleList implements Serializable {
private static final long serialVersionUID = 1L;
private static final Logger LOGGER = LoggerFactory.getLogger(EquiprobablePreferenceVector.class);
/**
* The array containing all starting positions of the intervals in the preference vector. The
* positions are inside in the interval.
*/
protected IntList begin;
/**
* The array containing all ending positions of the intervals in the preference vector. The
* positions are inside the interval.
*/
protected IntList end;
/**
* The effective size of the preference vector, usually greater than the last interval end.
*/
protected int size;
/**
* The probability value on the support elements, that is, 1 divided by the cardinality of the
* support itself.
*/
protected double probability;
/**
* Create a new preference vector which is equiprobable on the given support and zero elsewhere.
*
* @param begin the ascending-order sorted list containing start positions for the support
* intervals.
* @param end the ascending-order sorted list containing end positions for the support
* intervals.
* @param size the total size of the preference vector (maxposition + 1)
* @throws IllegalArgumentException if the two lists are not sorted or overlap or disagree in
* size or the size parameter is inconsistent.
*/
public EquiprobablePreferenceVector(final IntList begin, final IntList end, int size) throws IllegalArgumentException {
this.begin = begin;
this.end = end;
this.size = size;
// Do some sanity check
if (begin.size() != end.size())
throw new IllegalArgumentException(" The two lists must have the same size ");
if (size == 0) {
probability = 0.0;
size = 0;
return;
}
if (end.size() > 0 && size <= end.getInt(end.size() - 1))
throw new IllegalArgumentException(" The given size is not consistent with the last interval end!");
int supportSize = end.getInt(0) - begin.getInt(0) + 1;
for (int i = 1; i < begin.size(); i++)
if (begin.getInt(i) <= begin.getInt(i - 1) || end.getInt(i) <= end.getInt(i - 1) || begin.getInt(i) < end.getInt(i - 1))
throw new IllegalArgumentException("The two lists describing intervals are not sorted in ascending or overlap");
else
supportSize += end.getInt(i) - begin.getInt(i) + 1;
probability = 1.0 / supportSize;
}
// TODO: we really search in a list using objects?
@Override
@SuppressWarnings("boxing")
public double getDouble(final int pos) {
if (pos < 0 || pos >= size)
throw new IndexOutOfBoundsException("Invalid position " + pos + " specified.");
int search = Collections.binarySearch(begin, pos);
if (search >= 0)
return probability;
// Not found: find the insertion point.
if ((search = -(search + 1)) <= 0)
return 0.0;
if (begin.getInt(search - 1) <= pos && end.getInt(search - 1) >= pos)
return probability;
return 0.0;
}
@Override
public int size() {
return size;
}
@SuppressWarnings("unchecked")
public static void main(final String[] args) throws Exception {
PrefixMap extends CharSequence> haystack;
FileLinesCollection needles;
long size;
if (args.length < 3) {
System.out.println("Use a prefix map and a list of hosts to extract" + " intervals for the given hosts.\n");
System.out.println("Usage: " + EquiprobablePreferenceVector.class.getSimpleName()
+ "