org.codelibs.elasticsearch.taste.model.GenericDataModel Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.codelibs.elasticsearch.taste.model;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.codelibs.elasticsearch.taste.common.FastByIDMap;
import org.codelibs.elasticsearch.taste.common.FastIDSet;
import org.codelibs.elasticsearch.taste.common.LongPrimitiveArrayIterator;
import org.codelibs.elasticsearch.taste.common.LongPrimitiveIterator;
import org.codelibs.elasticsearch.taste.common.Refreshable;
import org.codelibs.elasticsearch.taste.exception.NoSuchItemException;
import org.codelibs.elasticsearch.taste.exception.NoSuchUserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
/**
*
* A simple {@link DataModel} which uses a given {@link List} of users as its data source. This implementation
* is mostly useful for small experiments and is not recommended for contexts where performance is important.
*
*/
public final class GenericDataModel extends AbstractDataModel {
/**
*
*/
private static final long serialVersionUID = 1L;
private static final Logger log = LoggerFactory
.getLogger(GenericDataModel.class);
private final long[] userIDs;
private final FastByIDMap preferenceFromUsers;
private final long[] itemIDs;
private final FastByIDMap preferenceForItems;
private final FastByIDMap> timestamps;
/**
*
* Creates a new {@link GenericDataModel} from the given users (and their preferences). This
* {@link DataModel} retains all this information in memory and is effectively immutable.
*
*
* @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)})
*/
public GenericDataModel(final FastByIDMap userData) {
this(userData, null);
}
/**
*
* Creates a new {@link GenericDataModel} from the given users (and their preferences). This
* {@link DataModel} retains all this information in memory and is effectively immutable.
*
*
* @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)})
* @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
* User IDs are mapped to maps of item IDs to Long timestamps.
*/
public GenericDataModel(final FastByIDMap userData,
final FastByIDMap> timestamps) {
Preconditions.checkArgument(userData != null, "userData is null");
preferenceFromUsers = userData;
final FastByIDMap> prefsForItems = new FastByIDMap>();
FastIDSet itemIDSet = new FastIDSet();
int currentCount = 0;
float maxPrefValue = Float.NEGATIVE_INFINITY;
float minPrefValue = Float.POSITIVE_INFINITY;
for (final Map.Entry entry : preferenceFromUsers
.entrySet()) {
final PreferenceArray prefs = entry.getValue();
prefs.sortByItem();
for (final Preference preference : prefs) {
final long itemID = preference.getItemID();
itemIDSet.add(itemID);
Collection prefsForItem = prefsForItems.get(itemID);
if (prefsForItem == null) {
prefsForItem = Lists.newArrayListWithCapacity(2);
prefsForItems.put(itemID, prefsForItem);
}
prefsForItem.add(preference);
final float value = preference.getValue();
if (value > maxPrefValue) {
maxPrefValue = value;
}
if (value < minPrefValue) {
minPrefValue = value;
}
}
if (++currentCount % 10000 == 0) {
log.info("Processed {} users", currentCount);
}
}
log.info("Processed {} users", currentCount);
setMinPreference(minPrefValue);
setMaxPreference(maxPrefValue);
itemIDs = itemIDSet.toArray();
itemIDSet = null; // Might help GC -- this is big
Arrays.sort(itemIDs);
preferenceForItems = toDataMap(prefsForItems, false);
for (final Map.Entry entry : preferenceForItems
.entrySet()) {
entry.getValue().sortByUser();
}
userIDs = new long[userData.size()];
int i = 0;
final LongPrimitiveIterator it = userData.keySetIterator();
while (it.hasNext()) {
userIDs[i++] = it.next();
}
Arrays.sort(userIDs);
this.timestamps = timestamps;
}
/**
* Swaps, in-place, {@link List}s for arrays in {@link Map} values .
*
* @return input value
*/
public static FastByIDMap toDataMap(
final FastByIDMap> data, final boolean byUser) {
for (final Map.Entry entry : ((FastByIDMap