org.codelibs.elasticsearch.taste.similarity.GenericItemSimilarity Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.codelibs.elasticsearch.taste.similarity;
import java.util.Collection;
import java.util.Iterator;
import org.apache.mahout.common.RandomUtils;
import org.codelibs.elasticsearch.taste.common.FastByIDMap;
import org.codelibs.elasticsearch.taste.common.FastIDSet;
import org.codelibs.elasticsearch.taste.common.Refreshable;
import org.codelibs.elasticsearch.taste.exception.TasteException;
import org.codelibs.elasticsearch.taste.model.DataModel;
import org.codelibs.elasticsearch.taste.recommender.TopItems;
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
/**
*
* A "generic" {@link ItemSimilarity} which takes a static list of precomputed item similarities and bases its
* responses on that alone. The values may have been precomputed offline by another process, stored in a file,
* and then read and fed into an instance of this class.
*
*
*
* This is perhaps the best {@link ItemSimilarity} to use with
* {@link org.codelibs.elasticsearch.taste.recommender.GenericItemBasedRecommender}, for now, since the point
* of item-based recommenders is that they can take advantage of the fact that item similarity is relatively
* static, can be precomputed, and then used in computation to gain a significant performance advantage.
*
*/
public final class GenericItemSimilarity implements ItemSimilarity {
private static final long[] NO_IDS = new long[0];
private final FastByIDMap> similarityMaps = new FastByIDMap>();
private final FastByIDMap similarItemIDsIndex = new FastByIDMap();
/**
*
* Creates a {@link GenericItemSimilarity} from a precomputed list of {@link ItemItemSimilarity}s. Each
* represents the similarity between two distinct items. Since similarity is assumed to be symmetric, it is
* not necessary to specify similarity between item1 and item2, and item2 and item1. Both are the same. It
* is also not necessary to specify a similarity between any item and itself; these are assumed to be 1.0.
*
*
*
* Note that specifying a similarity between two items twice is not an error, but, the later value will win.
*
*
* @param similarities
* set of {@link ItemItemSimilarity}s on which to base this instance
*/
public GenericItemSimilarity(final Iterable similarities) {
initSimilarityMaps(similarities.iterator());
}
/**
*
* Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of similarities
* from the given {@link Iterable} of similarities. It will keep those with the highest similarity -- those
* that are therefore most important.
*
*
*
* Thanks to tsmorton for suggesting this and providing part of the implementation.
*
*
* @param similarities
* set of {@link ItemItemSimilarity}s on which to base this instance
* @param maxToKeep
* maximum number of similarities to keep
*/
public GenericItemSimilarity(
final Iterable similarities, final int maxToKeep) {
final Iterable keptSimilarities = TopItems
.getTopItemItemSimilarities(maxToKeep, similarities.iterator());
initSimilarityMaps(keptSimilarities.iterator());
}
/**
*
* Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a
* {@link DataModel}, rather than a list of {@link ItemItemSimilarity}s.
*
*
*
* It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point of an
* item-based recommender. Item-based recommenders use the assumption that item-item similarities are
* relatively fixed, and might be known already independent of user preferences. Hence it is useful to
* inject that information, using {@link #GenericItemSimilarity(Iterable)}.
*
*
* @param otherSimilarity
* other {@link ItemSimilarity} to get similarities from
* @param dataModel
* data model to get items from
*/
public GenericItemSimilarity(final ItemSimilarity otherSimilarity,
final DataModel dataModel) {
final long[] itemIDs = GenericUserSimilarity
.longIteratorToList(dataModel.getItemIDs());
initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity,
itemIDs));
}
/**
*
* Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only keep the specified
* number of similarities from the given {@link DataModel}. It will keep those with the highest similarity
* -- those that are therefore most important.
*
*
*
* Thanks to tsmorton for suggesting this and providing part of the implementation.
*
*
* @param otherSimilarity
* other {@link ItemSimilarity} to get similarities from
* @param dataModel
* data model to get items from
* @param maxToKeep
* maximum number of similarities to keep
*/
public GenericItemSimilarity(final ItemSimilarity otherSimilarity,
final DataModel dataModel, final int maxToKeep) {
final long[] itemIDs = GenericUserSimilarity
.longIteratorToList(dataModel.getItemIDs());
final Iterator it = new DataModelSimilaritiesIterator(
otherSimilarity, itemIDs);
final Iterable keptSimilarities = TopItems
.getTopItemItemSimilarities(maxToKeep, it);
initSimilarityMaps(keptSimilarities.iterator());
}
private void initSimilarityMaps(
final Iterator similarities) {
while (similarities.hasNext()) {
final ItemItemSimilarity iic = similarities.next();
final long similarityItemID1 = iic.getItemID1();
final long similarityItemID2 = iic.getItemID2();
if (similarityItemID1 != similarityItemID2) {
// Order them -- first key should be the "smaller" one
long itemID1;
long itemID2;
if (similarityItemID1 < similarityItemID2) {
itemID1 = similarityItemID1;
itemID2 = similarityItemID2;
} else {
itemID1 = similarityItemID2;
itemID2 = similarityItemID1;
}
FastByIDMap map = similarityMaps.get(itemID1);
if (map == null) {
map = new FastByIDMap();
similarityMaps.put(itemID1, map);
}
map.put(itemID2, iic.getValue());
doIndex(itemID1, itemID2);
doIndex(itemID2, itemID1);
}
// else similarity between item and itself already assumed to be 1.0
}
}
private void doIndex(final long fromItemID, final long toItemID) {
FastIDSet similarItemIDs = similarItemIDsIndex.get(fromItemID);
if (similarItemIDs == null) {
similarItemIDs = new FastIDSet();
similarItemIDsIndex.put(fromItemID, similarItemIDs);
}
similarItemIDs.add(toItemID);
}
/**
*
* Returns the similarity between two items. Note that similarity is assumed to be symmetric, that
* {@code itemSimilarity(item1, item2) == itemSimilarity(item2, item1)}, and that
* {@code itemSimilarity(item1,item1) == 1.0} for all items.
*
*
* @param itemID1
* first item
* @param itemID2
* second item
* @return similarity between the two
*/
@Override
public double itemSimilarity(final long itemID1, final long itemID2) {
if (itemID1 == itemID2) {
return 1.0;
}
long firstID;
long secondID;
if (itemID1 < itemID2) {
firstID = itemID1;
secondID = itemID2;
} else {
firstID = itemID2;
secondID = itemID1;
}
final FastByIDMap nextMap = similarityMaps.get(firstID);
if (nextMap == null) {
return Double.NaN;
}
final Double similarity = nextMap.get(secondID);
return similarity == null ? Double.NaN : similarity;
}
@Override
public double[] itemSimilarities(final long itemID1, final long[] itemID2s) {
final int length = itemID2s.length;
final double[] result = new double[length];
for (int i = 0; i < length; i++) {
result[i] = itemSimilarity(itemID1, itemID2s[i]);
}
return result;
}
@Override
public long[] allSimilarItemIDs(final long itemID) {
final FastIDSet similarItemIDs = similarItemIDsIndex.get(itemID);
return similarItemIDs != null ? similarItemIDs.toArray() : NO_IDS;
}
@Override
public void refresh(final Collection alreadyRefreshed) {
// Do nothing
}
/** Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0]. */
public static final class ItemItemSimilarity implements
Comparable {
private final long itemID1;
private final long itemID2;
private final double value;
/**
* @param itemID1
* first item
* @param itemID2
* second item
* @param value
* similarity between the two
* @throws IllegalArgumentException
* if value is NaN, less than -1.0 or greater than 1.0
*/
public ItemItemSimilarity(final long itemID1, final long itemID2,
final double value) {
Preconditions.checkArgument(value >= -1.0 && value <= 1.0,
"Illegal value: " + value
+ ". Must be: -1.0 <= value <= 1.0");
this.itemID1 = itemID1;
this.itemID2 = itemID2;
this.value = value;
}
public long getItemID1() {
return itemID1;
}
public long getItemID2() {
return itemID2;
}
public double getValue() {
return value;
}
@Override
public String toString() {
return "ItemItemSimilarity[" + itemID1 + ',' + itemID2 + ':'
+ value + ']';
}
/** Defines an ordering from highest similarity to lowest. */
@Override
public int compareTo(final ItemItemSimilarity other) {
final double otherValue = other.getValue();
return value > otherValue ? -1 : value < otherValue ? 1 : 0;
}
@Override
public boolean equals(final Object other) {
if (!(other instanceof ItemItemSimilarity)) {
return false;
}
final ItemItemSimilarity otherSimilarity = (ItemItemSimilarity) other;
return otherSimilarity.getItemID1() == itemID1
&& otherSimilarity.getItemID2() == itemID2
&& otherSimilarity.getValue() == value;
}
@Override
public int hashCode() {
return (int) itemID1 ^ (int) itemID2
^ RandomUtils.hashDouble(value);
}
}
private static final class DataModelSimilaritiesIterator extends
AbstractIterator {
private final ItemSimilarity otherSimilarity;
private final long[] itemIDs;
private int i;
private long itemID1;
private int j;
private DataModelSimilaritiesIterator(
final ItemSimilarity otherSimilarity, final long[] itemIDs) {
this.otherSimilarity = otherSimilarity;
this.itemIDs = itemIDs;
i = 0;
itemID1 = itemIDs[0];
j = 1;
}
@Override
protected ItemItemSimilarity computeNext() {
final int size = itemIDs.length;
ItemItemSimilarity result = null;
while (result == null && i < size - 1) {
final long itemID2 = itemIDs[j];
double similarity;
try {
similarity = otherSimilarity.itemSimilarity(itemID1,
itemID2);
} catch (final Exception te) {
// ugly:
throw new TasteException("Invalid state: " + itemID1 + ", "
+ itemID2, te);
}
if (!Double.isNaN(similarity)) {
result = new ItemItemSimilarity(itemID1, itemID2,
similarity);
}
if (++j == size) {
itemID1 = itemIDs[++i];
j = i + 1;
}
}
if (result == null) {
return endOfData();
} else {
return result;
}
}
}
}