All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ciir.umass.edu.learning.SparseDataPoint Maven / Gradle / Ivy

/*===============================================================================
 * Copyright (c) 2010-2012 University of Massachusetts.  All Rights Reserved.
 *
 * Use of the RankLib package is subject to the terms of the software license set
 * forth in the LICENSE file included with this software, and also available at
 * http://people.cs.umass.edu/~vdang/ranklib_license.html
 *===============================================================================
 */

package ciir.umass.edu.learning;

import ciir.umass.edu.utilities.RankLibError;

import java.util.Arrays;
import java.util.function.Predicate;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
 * Implements a sparse data point using a compressed sparse row data structure
 *
 * @author Siddhartha Bagaria
 */
public class SparseDataPoint extends DataPoint {

    private static accessPattern searchPattern = accessPattern.RANDOM;

    // The feature ids for known values
    int fIds[];

    // Profiling variables
    // private static int numCalls = 0;
    // private static float avgTime = 0;
    // Internal search optimizers. Currently unused.
    int lastMinId = -1;

    // The feature values for corresponding Ids
    //float fVals[]; //moved to the parent class
    int lastMinPos = -1;

    public SparseDataPoint(String text) {
        super(text);
    }

    /**
     * 

Allows the direct allocation of a SparseDataPoint instead of having it parse text.

*

The fVals array represents the sparse feature values. Only non-null values must be specified.

*

The fIds array has equal length to fVals and specifies for each position the feature ID that this position * in both array stands for. Thus, if fIds[1] == '5' then fVals[5] is the value of this data point for feature * number 5. The values in fIds must be sorted in an ascending manner.

* * @param fVals The feature values, starting at index 0. * @param fIds The feature IDs, starting at index 0. * @param queryId The ID of the query this datapoint is associated with. * @param relevanceLabel The relevance label of this datapoint with respect to the query. */ public SparseDataPoint(float[] fVals, int[] fIds, int numKnownFeatures, String queryId, float relevanceLabel) { this.fVals = fVals; this.fIds = fIds; this.knownFeatures = numKnownFeatures; this.id = queryId; this.label = relevanceLabel; } public SparseDataPoint(SparseDataPoint dp) { label = dp.label; id = dp.id; description = dp.description; cached = dp.cached; fIds = new int[dp.fIds.length]; fVals = new float[dp.fVals.length]; System.arraycopy(dp.fIds, 0, fIds, 0, dp.fIds.length); System.arraycopy(dp.fVals, 0, fVals, 0, dp.fVals.length); } private int locate(int fid) { if (searchPattern == accessPattern.SEQUENTIAL) { if (lastMinId > fid) { lastMinId = -1; lastMinPos = -1; } while (lastMinPos < knownFeatures && lastMinId < fid) lastMinId = fIds[++lastMinPos]; if (lastMinId == fid) return lastMinPos; } else if (searchPattern == accessPattern.RANDOM) { int pos = Arrays.binarySearch(fIds, fid); if (pos >= 0) return pos; } else System.err.println("Invalid search pattern specified for sparse data points."); return -1; } public boolean hasFeature(int fid) { return locate(fid) != -1; } @Override public float getFeatureValue(int fid) { //long time = System.nanoTime(); if (fid <= 0 || fid > knownFeatures) { if (missingZero) return 0f; throw RankLibError.create("Error in SparseDataPoint::getFeatureValue(): requesting unspecified feature, fid=" + fid); } int pos = locate(fid); //long completedIn = System.nanoTime() - time; //avgTime = (avgTime*numCalls + completedIn)/(++numCalls); //System.out.println("getFeatureValue average time: "+avgTime); if (pos >= 0) return fVals[pos]; return 0; // Should ideally be returning unknown? } @Override public void setFeatureValue(int fid, float fval) { if (fid <= 0 || fid > knownFeatures) { throw RankLibError.create("Error in SparseDataPoint::setFeatureValue(): feature (id=" + fid + ") out of range."); } int pos = locate(fid); if (pos >= 0) fVals[pos] = fval; else { System.err.println("Error in SparseDataPoint::setFeatureValue(): feature (id=" + fid + ") not found."); System.exit(1); } } @Override public float[] getFeatureVector() { float[] dfVals = new float[knownFeatures+1]; Arrays.fill(dfVals, UNKNOWN); for (int i = 0; i < fIds.length; i++) dfVals[fIds[i]] = fVals[i]; return dfVals; } @Override public void setFeatureVector(float[] dfVals) { int numPresentFeatures = (int) IntStream.range(0, dfVals.length).mapToDouble(i -> dfVals[i]).filter(val -> !Double.isNaN(val) && val != 0).count(); fIds = new int[numPresentFeatures]; fVals = new float[numPresentFeatures]; int pos = 0; for (int i = 1; i < dfVals.length; i++) { if (!isUnknown(dfVals[i])) { fIds[pos] = i; fVals[pos] = dfVals[i]; pos++; } } assert (pos == numPresentFeatures); } // Access pattern of the feature values private enum accessPattern { SEQUENTIAL, RANDOM } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy