All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.mines.jtk.util.MedianFinder Maven / Gradle / Ivy

There is a newer version: 1.1.0
Show newest version
/****************************************************************************
Copyright 2010, Colorado School of Mines and others.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
****************************************************************************/
package edu.mines.jtk.util;

import static edu.mines.jtk.util.ArrayMath.*;

/**
 * Computes medians or weighted medians.
 * 

* The weighted median of n values x[i] is the value x that minimizes * the following function: *

 *        n-1
 * f(x) = sum w[i]*abs(x-x[i])
 *        i=0
 * 
* Here, the x[.] are values for which the median is to be computed, * the w[.] are positive weights, and x is the desired weighted median. * The function f(x) is convex and piecewise linear, so at least one * (but no more than two) of the specified values x[i] minimizes the * function f(x). *

* To find a minimum, we define sums of weights w[.] for which x[.] * is less than, equal to, and greater than x: *

 * wl(x) = sum of w[i] for x[i] < x
 * wm(x) = sum of w[i] for x[i] = x
 * wr(x) = sum of w[i] for x[i] > x
 * 
* and then define the left and right derivatives of f(x): *
 * dl(x) = wl(x)-wm(x)-wr(x) <= 0
 * dr(x) = wl(x)+wm(x)-wr(x) >= 0
 * 
* These inequality conditions are necessary and sufficient for x * to minimize f(x), that is, for x to be the weighted median. *

* When either dl(x) = 0 or dr(x) = 0, then the function f(x) has * zero slope between two values x[i] that both minimize f(x), and * the weighted median is then computed to be the average of those * two minimizing values. For example, such an average is always * computed when the number of values x[.] is even and all weights * w[.] are equal. *

* Computational complexity for both the median and the weighted * median is O(n), where n is the number of values x[.] (and weights * w[.]). In benchmark tests for large n, the cost of a median is * about 16 times more costly than a simple mean, and the cost of a * weighted median is about 1.5 times more costly than an unweighted * median. * * @author Dave Hale, Colorado School of Mines * @version 2010.10.11 */ public class MedianFinder { /** * Constructs a median finder. * @param n number of values for which to compute medians. */ public MedianFinder(int n) { _n = n; _x = new float[n]; } /** * Returns the median of the specified array of values. * @param x array of values. * @return the median. */ public float findMedian(float[] x) { Check.argument(_n==x.length,"length of x is valid"); copy(x,_x); int k = (_n-1)/2; quickPartialSort(k,_x); float xmed = _x[k]; if (_n%2==0) { float xmin = _x[_n-1]; for (int i=_n-2; i>k; --i) if (_x[i]a[k] ? j : a[i]>a[k] ? k : i); } private static void swap(float[] a, float[] b, int i, int j) { float ai = a[i]; a[i] = a[j]; a[j] = ai; float bi = b[i]; b[i] = b[j]; b[j] = bi; } private static void swap(float[] a, float[] b, int i, int j, int n) { while (n>0) { float ai = a[i]; a[i ] = a[j]; a[j ] = ai; float bi = b[i]; b[i++] = b[j]; b[j++] = bi; --n; } } private static void insertionSort(float[] w, float[] x, int p, int q) { for (int i=p+1; i<=q; ++i) for (int j=i; j>p && x[j-1]>x[j]; --j) swap(w,x,j,j-1); } private static void quickPartition(float[] w, float[] x, int[] m) { int p = m[0]; int q = m[1]; int k = med3(x,p,(p+q)/2,q); float y = x[k]; int a=p,b=p; int c=q,d=q; while (true) { while (b<=c && x[b]<=y) { if (x[b]==y) swap(w,x,a++,b); ++b; } while (c>=b && x[c]>=y) { if (x[c]==y) swap(w,x,c,d--); --c; } if (b>c) break; swap(w,x,b,c); ++b; --c; } int r = Math.min(a-p,b-a); int s = Math.min(d-c,q-d); int t = q+1; swap(w,x,p,b-r,r); swap(w,x,b,t-s,s); m[0] = p+(b-a); // p --- m[0]-1 | m[0] --- m[1] | m[1]+1 --- q m[1] = q-(d-c); // xy } private float findMedianSmallN(float[] w, float[] x) { // Insertion sort is quick for small n. for (int i=1; i<_n; ++i) for (int j=i; j>0 && x[j-1]>x[j]; --j) swap(w,x,j,j-1); // Half the sum of all weights = wh. For the median x, // we require wl(x) <= wh and wr(x) <= wh. float ws = 0.0f; for (int i=0; i<_n; ++i) ws += w[i]; float wh = 0.5f*ws; // Index one above upper bound for left sum of weights. int kl = 0; float wl = w[kl]; while (wl0.0f) { // if left derivative > 0, ... q = pp-1; // minimum lies to the left wc -= wm+wr; // decrease weight compensation } else if (dr<0.0f) { // if right derivative < 0, ... p = qq+1; // minimum lies to the right wc += wl+wm; // increase weight compensation } else if (dl==0.0f) { // if left-derivative = 0, ... float xmax = x[p0]; // find largest value in the left partition for (int i=pp-1; i>p0; --i) if (x[i]>xmax) xmax = x[i]; xmed = 0.5f*(x[pp]+xmax); // median = average of two values } else if (dr==0.0f) { // if right-derivative = 0, ... float xmin = x[q0]; // find smallest value in the right partition for (int i=qq+1; i 0 xmed = x[pp]; // so median is unique } } if (xmed==xnot) xmed = x[p]; // = x[q] return xmed; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy