All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.cf.taste.impl.similarity;

import java.util.Collection;

import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

import com.google.common.base.Preconditions;

/**
 * 

* Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of * preference values themselves. That is, each user's preferences are sorted and then assign a rank as their * preference value, with 1 being assigned to the least preferred item. *

*/ public final class SpearmanCorrelationSimilarity implements UserSimilarity { private final DataModel dataModel; public SpearmanCorrelationSimilarity(DataModel dataModel) { this.dataModel = Preconditions.checkNotNull(dataModel); } @Override public double userSimilarity(long userID1, long userID2) throws TasteException { PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1); PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2); int xLength = xPrefs.length(); int yLength = yPrefs.length(); if (xLength <= 1 || yLength <= 1) { return Double.NaN; } // Copy prefs since we need to modify pref values to ranks xPrefs = xPrefs.clone(); yPrefs = yPrefs.clone(); // First sort by values from low to high xPrefs.sortByValue(); yPrefs.sortByValue(); // Assign ranks from low to high float nextRank = 1.0f; for (int i = 0; i < xLength; i++) { // ... but only for items that are common to both pref arrays if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) { xPrefs.setValue(i, nextRank); nextRank += 1.0f; } // Other values are bogus but don't matter } nextRank = 1.0f; for (int i = 0; i < yLength; i++) { if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) { yPrefs.setValue(i, nextRank); nextRank += 1.0f; } } xPrefs.sortByItem(); yPrefs.sortByItem(); long xIndex = xPrefs.getItemID(0); long yIndex = yPrefs.getItemID(0); int xPrefIndex = 0; int yPrefIndex = 0; double sumXYRankDiff2 = 0.0; int count = 0; while (true) { int compare = xIndex < yIndex ? -1 : xIndex > yIndex ? 1 : 0; if (compare == 0) { double diff = xPrefs.getValue(xPrefIndex) - yPrefs.getValue(yPrefIndex); sumXYRankDiff2 += diff * diff; count++; } if (compare <= 0) { if (++xPrefIndex >= xLength) { break; } xIndex = xPrefs.getItemID(xPrefIndex); } if (compare >= 0) { if (++yPrefIndex >= yLength) { break; } yIndex = yPrefs.getItemID(yPrefIndex); } } if (count <= 1) { return Double.NaN; } // When ranks are unique, this formula actually gives the Pearson correlation return 1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1)); } @Override public void setPreferenceInferrer(PreferenceInferrer inferrer) { throw new UnsupportedOperationException(); } @Override public void refresh(Collection alreadyRefreshed) { alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed); RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy