All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.law.rank.EquiprobablePreferenceVector Maven / Gradle / Ivy

The newest version!
package it.unimi.dsi.law.rank;

import java.io.Serializable;
import java.util.Collections;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * Copyright (C) 2006-2020 Paolo Boldi, Massimo Santini and Sebastiano Vigna
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.big.util.PrefixMap;
import it.unimi.dsi.fastutil.doubles.AbstractDoubleList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.LongInterval;
import it.unimi.dsi.util.LongIntervals;


/**
 * A class that impersonates a preference vector which is equiprobable on a set of values and zero
 * elsewhere. The set of values for which the vector is non-zero (that is, the distribution
 * support) is expressed as a set of intervals. 

An utility main method is included to * specify a set of hosts and a prefix map from which to extract entire host intervals.

Despite * the fact the class implements add methods, it is immutable. * * @author Alessio Orlandi */ public class EquiprobablePreferenceVector extends AbstractDoubleList implements Serializable { private static final long serialVersionUID = 1L; private static final Logger LOGGER = LoggerFactory.getLogger(EquiprobablePreferenceVector.class); /** * The array containing all starting positions of the intervals in the preference vector. The * positions are inside in the interval. */ protected IntList begin; /** * The array containing all ending positions of the intervals in the preference vector. The * positions are inside the interval. */ protected IntList end; /** * The effective size of the preference vector, usually greater than the last interval end. */ protected int size; /** * The probability value on the support elements, that is, 1 divided by the cardinality of the * support itself. */ protected double probability; /** * Create a new preference vector which is equiprobable on the given support and zero elsewhere. * * @param begin the ascending-order sorted list containing start positions for the support * intervals. * @param end the ascending-order sorted list containing end positions for the support * intervals. * @param size the total size of the preference vector (maxposition + 1) * @throws IllegalArgumentException if the two lists are not sorted or overlap or disagree in * size or the size parameter is inconsistent. */ public EquiprobablePreferenceVector(final IntList begin, final IntList end, int size) throws IllegalArgumentException { this.begin = begin; this.end = end; this.size = size; // Do some sanity check if (begin.size() != end.size()) throw new IllegalArgumentException(" The two lists must have the same size "); if (size == 0) { probability = 0.0; size = 0; return; } if (end.size() > 0 && size <= end.getInt(end.size() - 1)) throw new IllegalArgumentException(" The given size is not consistent with the last interval end!"); int supportSize = end.getInt(0) - begin.getInt(0) + 1; for (int i = 1; i < begin.size(); i++) if (begin.getInt(i) <= begin.getInt(i - 1) || end.getInt(i) <= end.getInt(i - 1) || begin.getInt(i) < end.getInt(i - 1)) throw new IllegalArgumentException("The two lists describing intervals are not sorted in ascending or overlap"); else supportSize += end.getInt(i) - begin.getInt(i) + 1; probability = 1.0 / supportSize; } // TODO: we really search in a list using objects? @Override @SuppressWarnings("boxing") public double getDouble(final int pos) { if (pos < 0 || pos >= size) throw new IndexOutOfBoundsException("Invalid position " + pos + " specified."); int search = Collections.binarySearch(begin, pos); if (search >= 0) return probability; // Not found: find the insertion point. if ((search = -(search + 1)) <= 0) return 0.0; if (begin.getInt(search - 1) <= pos && end.getInt(search - 1) >= pos) return probability; return 0.0; } @Override public int size() { return size; } @SuppressWarnings("unchecked") public static void main(final String[] args) throws Exception { PrefixMap haystack; FileLinesCollection needles; long size; if (args.length < 3) { System.out.println("Use a prefix map and a list of hosts to extract" + " intervals for the given hosts.\n"); System.out.println("Usage: " + EquiprobablePreferenceVector.class.getSimpleName() + " [size]"); System.out.println("The host list is a set of hostnames, one per host. Not found hosts will be discarded"); System.out.println("If size is not specified, it is automatically induced by the prefix map"); return; } haystack = (PrefixMap)BinIO.loadObject(args[0]); needles = new FileLinesCollection(args[1], "ASCII"); if (args.length > 3) size = Long.parseLong(args[3]); else size = haystack.size64(); if (size > Integer.MAX_VALUE) throw new IllegalArgumentException(); final IntList start = new IntArrayList(), end = new IntArrayList(); final ProgressLogger pl = new ProgressLogger(LOGGER); pl.displayFreeMemory = false; pl.expectedUpdates = needles.size(); pl.start(); for (final CharSequence host : needles) { final LongInterval i = haystack.rangeMap().get(host); if (i == LongIntervals.EMPTY_INTERVAL) System.err.println("WARNING: The given map does not contain " + host); start.add((int)i.left); end.add((int)i.right); pl.update(); } pl.stop(); Collections.sort(start); Collections.sort(end); BinIO.storeObject(new EquiprobablePreferenceVector(start, end, (int)size), args[2]); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy