All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codelibs.elasticsearch.taste.similarity.GenericItemSimilarity Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.codelibs.elasticsearch.taste.similarity;

import java.util.Collection;
import java.util.Iterator;

import org.apache.mahout.common.RandomUtils;
import org.codelibs.elasticsearch.taste.common.FastByIDMap;
import org.codelibs.elasticsearch.taste.common.FastIDSet;
import org.codelibs.elasticsearch.taste.common.Refreshable;
import org.codelibs.elasticsearch.taste.exception.TasteException;
import org.codelibs.elasticsearch.taste.model.DataModel;
import org.codelibs.elasticsearch.taste.recommender.TopItems;

import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;

/**
 * 

* A "generic" {@link ItemSimilarity} which takes a static list of precomputed item similarities and bases its * responses on that alone. The values may have been precomputed offline by another process, stored in a file, * and then read and fed into an instance of this class. *

* *

* This is perhaps the best {@link ItemSimilarity} to use with * {@link org.codelibs.elasticsearch.taste.recommender.GenericItemBasedRecommender}, for now, since the point * of item-based recommenders is that they can take advantage of the fact that item similarity is relatively * static, can be precomputed, and then used in computation to gain a significant performance advantage. *

*/ public final class GenericItemSimilarity implements ItemSimilarity { private static final long[] NO_IDS = new long[0]; private final FastByIDMap> similarityMaps = new FastByIDMap>(); private final FastByIDMap similarItemIDsIndex = new FastByIDMap(); /** *

* Creates a {@link GenericItemSimilarity} from a precomputed list of {@link ItemItemSimilarity}s. Each * represents the similarity between two distinct items. Since similarity is assumed to be symmetric, it is * not necessary to specify similarity between item1 and item2, and item2 and item1. Both are the same. It * is also not necessary to specify a similarity between any item and itself; these are assumed to be 1.0. *

* *

* Note that specifying a similarity between two items twice is not an error, but, the later value will win. *

* * @param similarities * set of {@link ItemItemSimilarity}s on which to base this instance */ public GenericItemSimilarity(final Iterable similarities) { initSimilarityMaps(similarities.iterator()); } /** *

* Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of similarities * from the given {@link Iterable} of similarities. It will keep those with the highest similarity -- those * that are therefore most important. *

* *

* Thanks to tsmorton for suggesting this and providing part of the implementation. *

* * @param similarities * set of {@link ItemItemSimilarity}s on which to base this instance * @param maxToKeep * maximum number of similarities to keep */ public GenericItemSimilarity( final Iterable similarities, final int maxToKeep) { final Iterable keptSimilarities = TopItems .getTopItemItemSimilarities(maxToKeep, similarities.iterator()); initSimilarityMaps(keptSimilarities.iterator()); } /** *

* Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a * {@link DataModel}, rather than a list of {@link ItemItemSimilarity}s. *

* *

* It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point of an * item-based recommender. Item-based recommenders use the assumption that item-item similarities are * relatively fixed, and might be known already independent of user preferences. Hence it is useful to * inject that information, using {@link #GenericItemSimilarity(Iterable)}. *

* * @param otherSimilarity * other {@link ItemSimilarity} to get similarities from * @param dataModel * data model to get items from */ public GenericItemSimilarity(final ItemSimilarity otherSimilarity, final DataModel dataModel) { final long[] itemIDs = GenericUserSimilarity .longIteratorToList(dataModel.getItemIDs()); initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity, itemIDs)); } /** *

* Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only keep the specified * number of similarities from the given {@link DataModel}. It will keep those with the highest similarity * -- those that are therefore most important. *

* *

* Thanks to tsmorton for suggesting this and providing part of the implementation. *

* * @param otherSimilarity * other {@link ItemSimilarity} to get similarities from * @param dataModel * data model to get items from * @param maxToKeep * maximum number of similarities to keep */ public GenericItemSimilarity(final ItemSimilarity otherSimilarity, final DataModel dataModel, final int maxToKeep) { final long[] itemIDs = GenericUserSimilarity .longIteratorToList(dataModel.getItemIDs()); final Iterator it = new DataModelSimilaritiesIterator( otherSimilarity, itemIDs); final Iterable keptSimilarities = TopItems .getTopItemItemSimilarities(maxToKeep, it); initSimilarityMaps(keptSimilarities.iterator()); } private void initSimilarityMaps( final Iterator similarities) { while (similarities.hasNext()) { final ItemItemSimilarity iic = similarities.next(); final long similarityItemID1 = iic.getItemID1(); final long similarityItemID2 = iic.getItemID2(); if (similarityItemID1 != similarityItemID2) { // Order them -- first key should be the "smaller" one long itemID1; long itemID2; if (similarityItemID1 < similarityItemID2) { itemID1 = similarityItemID1; itemID2 = similarityItemID2; } else { itemID1 = similarityItemID2; itemID2 = similarityItemID1; } FastByIDMap map = similarityMaps.get(itemID1); if (map == null) { map = new FastByIDMap(); similarityMaps.put(itemID1, map); } map.put(itemID2, iic.getValue()); doIndex(itemID1, itemID2); doIndex(itemID2, itemID1); } // else similarity between item and itself already assumed to be 1.0 } } private void doIndex(final long fromItemID, final long toItemID) { FastIDSet similarItemIDs = similarItemIDsIndex.get(fromItemID); if (similarItemIDs == null) { similarItemIDs = new FastIDSet(); similarItemIDsIndex.put(fromItemID, similarItemIDs); } similarItemIDs.add(toItemID); } /** *

* Returns the similarity between two items. Note that similarity is assumed to be symmetric, that * {@code itemSimilarity(item1, item2) == itemSimilarity(item2, item1)}, and that * {@code itemSimilarity(item1,item1) == 1.0} for all items. *

* * @param itemID1 * first item * @param itemID2 * second item * @return similarity between the two */ @Override public double itemSimilarity(final long itemID1, final long itemID2) { if (itemID1 == itemID2) { return 1.0; } long firstID; long secondID; if (itemID1 < itemID2) { firstID = itemID1; secondID = itemID2; } else { firstID = itemID2; secondID = itemID1; } final FastByIDMap nextMap = similarityMaps.get(firstID); if (nextMap == null) { return Double.NaN; } final Double similarity = nextMap.get(secondID); return similarity == null ? Double.NaN : similarity; } @Override public double[] itemSimilarities(final long itemID1, final long[] itemID2s) { final int length = itemID2s.length; final double[] result = new double[length]; for (int i = 0; i < length; i++) { result[i] = itemSimilarity(itemID1, itemID2s[i]); } return result; } @Override public long[] allSimilarItemIDs(final long itemID) { final FastIDSet similarItemIDs = similarItemIDsIndex.get(itemID); return similarItemIDs != null ? similarItemIDs.toArray() : NO_IDS; } @Override public void refresh(final Collection alreadyRefreshed) { // Do nothing } /** Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0]. */ public static final class ItemItemSimilarity implements Comparable { private final long itemID1; private final long itemID2; private final double value; /** * @param itemID1 * first item * @param itemID2 * second item * @param value * similarity between the two * @throws IllegalArgumentException * if value is NaN, less than -1.0 or greater than 1.0 */ public ItemItemSimilarity(final long itemID1, final long itemID2, final double value) { Preconditions.checkArgument(value >= -1.0 && value <= 1.0, "Illegal value: " + value + ". Must be: -1.0 <= value <= 1.0"); this.itemID1 = itemID1; this.itemID2 = itemID2; this.value = value; } public long getItemID1() { return itemID1; } public long getItemID2() { return itemID2; } public double getValue() { return value; } @Override public String toString() { return "ItemItemSimilarity[" + itemID1 + ',' + itemID2 + ':' + value + ']'; } /** Defines an ordering from highest similarity to lowest. */ @Override public int compareTo(final ItemItemSimilarity other) { final double otherValue = other.getValue(); return value > otherValue ? -1 : value < otherValue ? 1 : 0; } @Override public boolean equals(final Object other) { if (!(other instanceof ItemItemSimilarity)) { return false; } final ItemItemSimilarity otherSimilarity = (ItemItemSimilarity) other; return otherSimilarity.getItemID1() == itemID1 && otherSimilarity.getItemID2() == itemID2 && otherSimilarity.getValue() == value; } @Override public int hashCode() { return (int) itemID1 ^ (int) itemID2 ^ RandomUtils.hashDouble(value); } } private static final class DataModelSimilaritiesIterator extends AbstractIterator { private final ItemSimilarity otherSimilarity; private final long[] itemIDs; private int i; private long itemID1; private int j; private DataModelSimilaritiesIterator( final ItemSimilarity otherSimilarity, final long[] itemIDs) { this.otherSimilarity = otherSimilarity; this.itemIDs = itemIDs; i = 0; itemID1 = itemIDs[0]; j = 1; } @Override protected ItemItemSimilarity computeNext() { final int size = itemIDs.length; ItemItemSimilarity result = null; while (result == null && i < size - 1) { final long itemID2 = itemIDs[j]; double similarity; try { similarity = otherSimilarity.itemSimilarity(itemID1, itemID2); } catch (final Exception te) { // ugly: throw new TasteException("Invalid state: " + itemID1 + ", " + itemID2, te); } if (!Double.isNaN(similarity)) { result = new ItemItemSimilarity(itemID1, itemID2, similarity); } if (++j == size) { itemID1 = itemIDs[++i]; j = i + 1; } } if (result == null) { return endOfData(); } else { return result; } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy