All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.expleague.ml.data.impl.RankedDataSet Maven / Gradle / Ivy

package com.expleague.ml.data.impl;

import com.expleague.commons.math.vectors.Mx;
import com.expleague.commons.math.vectors.Vec;
import com.expleague.ml.data.set.DataSet;
import com.expleague.ml.data.set.VecDataSet;
import com.expleague.ml.data.stats.OrderByFeature;


/**
 * Created by noxoomo on 02/04/15.
 */
public class RankedDataSet {
  private final DataSet base;
  private final float[][] ranks;
  final OrderByFeature byFeature;

  public RankedDataSet(DataSet base) {
    this.base = base;
    byFeature = base.cache().cache(OrderByFeature.class, DataSet.class);
    final Mx data = ((VecDataSet) base).data();
    this.ranks = new float[data.columns()][data.rows()];
    for (int feature=0; feature < ranks.length;++feature) {
      int[] order = byFeature.orderBy(feature).direct();
      ranks[feature] = rank(data, feature, order);
    }
  }


  private float[] rank(final double[] sortedValues) {
    final float[] ranks = new float[sortedValues.length];
    for (int i = 0; i < sortedValues.length; ++i) {
      int j = i + 1;
      while (j < sortedValues.length && Math.abs(sortedValues[j] - sortedValues[j - 1]) < 1e-9) ++j;
      final float rk = i + 0.5f * (j - i);
      for (; i < j; ++i) {
        ranks[i] = rk;

      }
      --i;
    }
    return ranks;
  }

  private float[] rank(final Mx data,int feature, int[] order) {
    final float[] ranks = new float[order.length];
    for (int i = 0; i < order.length; ++i) {
      int j = i + 1;
      while (j < order.length && Math.abs(data.get(order[j],feature)-data.get(order[j-1],feature)) < 1e-9) ++j;
      final float rk = i + 0.5f * (j - i);
      for (; i < j; ++i) {
        ranks[order[i]] = rk;
      }
      --i;
    }
    return ranks;
  }

  public float[] feature(int fIndex) {
    return ranks[fIndex];
  }


  float rank(int fIndex, float condition) {
    final Mx data = ((VecDataSet) base).data();
    return upperBound(data.col(fIndex), byFeature.orderBy(fIndex).direct(), condition);
  }


  //java version doesn't guarantee, that we'll find last entry
  //should return first index, that greater than key
  private int upperBound(final double[] arr, final double key) {
    int left = 0;
    int right = arr.length;
    while (right - left > 1) {
      final int mid = (left + right) >>> 1;
      final double midVal = arr[mid];
      if (midVal <= key)
        left = mid;
      else
        right = mid;
    }
    return right;
  }


  //java version doesn't guarantee, that we'll find last entry
  //should return first index, that greater than key
  private int upperBound(final Vec feature, final int[] map, final double key) {
    int left = 0;
    int right = map.length;
    while (right - left > 1) {
      final int mid = (left + right) >>> 1;
      final double midVal = feature.get(map[mid]);
      if (midVal <= key)
        left = mid;
      else
        right = mid;
    }
    return right;
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy