All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.standrews.cs.utilities.mi_file.CandidateSet Maven / Gradle / Ivy

There is a newer version: 1.0.4
Show newest version
/*
 * Copyright 2019 Systems Research Group, University of St Andrews:
 * 
 *
 * This file is part of the module utilities.
 *
 * utilities is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * utilities is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with utilities. If not, see
 * .
 */
package uk.ac.standrews.cs.utilities.mi_file;

import uk.ac.standrews.cs.utilities.metrics.coreConcepts.DataDistance;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.Metric;

import java.util.*;

/**
 * Created by al on 11/10/2017.
 */
public class CandidateSet {

    private final int maxEntries;
    private final TreeMap orderedRes;         //here we maintain the k current best using the distance as entry
    private final HashMap objectsOfOrderedRes; //here we maintain the k current best using the data as entry
    private final T query;                              // the object for which we are finding NN.
    private final Metric distance_wrapper;
    private int k;                                      // number of objects to be retrieved

    private double kDist = Double.MAX_VALUE;

    private HashMap tempResults = new HashMap<>(); // Temporary results?

    public CandidateSet(T query, Metric distance_wrapper, int maxEntries, int k) {

        this.query = query;
        this.distance_wrapper = distance_wrapper;
        this.maxEntries = maxEntries;
        this.k = k;
        orderedRes = new TreeMap();
        objectsOfOrderedRes = new HashMap();
        tempResults = new HashMap<>();
    }

    public void put(T object, int score, int position, T pivot, int postingListToBeAccessed, int accessedPostingLists) {

        int increment = Math.abs(position - score); //the "spearman footrule" distance is the sum of the position differences

        double currentScore;

        Double i = tempResults.get(object);
        if (i != null) {
            currentScore = i - maxEntries + increment;
            i = currentScore;
        } else {
            currentScore = maxEntries * (postingListToBeAccessed - 1) + increment; /* all ints? */
        }

        double minDist = currentScore - maxEntries * (postingListToBeAccessed - accessedPostingLists); // the minimum distance this object can reach

        if (minDist > kDist) {
            tempResults.remove(object);
        } else {                       //the object can enter the best k. Let's insert it
            if (i == null) {
                i = currentScore;
                tempResults.put(object, i);   //insert in the full result list
            }
            orderedInsert(object, currentScore);  //insert in the best current k
        }
    }

    public List> getDataDistances() {
        List> result = new ArrayList<>();

        for (Map.Entry entry : orderedRes.entrySet()) {
            result.add(new DataDistance<>(entry.getValue(), distance_wrapper.distance(entry.getValue(), query)));

        }
        return result;
    }

    //-------------------------------------------------------------

    private void orderedInsert(T object, double dist) {
        double rdist = 0;

        if (orderedRes.size() < k) { // the list is not full - we can do the insert.

            Double d = eliminateDuplicateObjects(object);  // if it is already in objectsOfOrderedRes we remove it
            if (d != null) {                            // it was in the above collection - so remove it from the other data structure
                eliminateDuplicateMetrics(d);
            } // next do the insertion in the lists
            while (object != null) { // loop is necessary because could be two objects at same distance. We are using the TreeMap in a sorted way - hack.
                dist += rdist;
                d = dist;
                objectsOfOrderedRes.put(object, d);           // might do multiple inserts but this is safe - they will overwrite each other in the map.
                object = orderedRes.put(d, object);           //there could be another object with the same distance: we have to reinsert it
                rdist += 0.0001 * Math.random();               // Hack to insert two objects at (almost) the same distance.
                // if there  was an object at this distance already go around the loop again.
            }
            kDist = orderedRes.lastKey();
        } else if (dist < kDist) {

            Double d = eliminateDuplicateObjects(object);  // if it is already in objectsOfOrderedRes we remove it
            if (d != null) {                            // it was in the above collection - so remove it from the other data structure
                eliminateDuplicateMetrics(d);
            }
            if (orderedRes.size() >= k) {
                T to = orderedRes.remove(orderedRes.lastKey());
                objectsOfOrderedRes.remove(to);
            }
            while (object != null) { // loop is necessary because could be two objects at same distance. We are using the TreeMap in a sorted way - hack.

                dist += rdist;
                d = dist;
                objectsOfOrderedRes.put(object, d);
                object = orderedRes.put(d, object);        //there could be another object with the same distance: we have to reinsert it
                rdist += 0.0001 * Math.random();            // Hack as above.

            }
            kDist = orderedRes.lastKey();
        }
    }

    private Double eliminateDuplicateObjects(T object) {
        return objectsOfOrderedRes.remove(object);
    }

    //if object o is already one of the k current best, we eliminate it
    private T eliminateDuplicateMetrics(double d) {
        return orderedRes.remove(d);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy