All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.conqat.lib.commons.datamining.SparseVector Maven / Gradle / Ivy

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.conqat.lib.commons.datamining;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.conqat.lib.commons.collections.CollectionUtils;

/**
 * A sparse vector in the n-dimensional space of a numerical type T. Basically a mapping from
 * component (represented as an Integer) to a double value. Unset components correspond to 0, i.e. a
 * newly created instance represents the 0-vector.
 */
public class SparseVector {

	/** The data of this vector */
	private Map data = new HashMap<>();

	/**
	 * Sets the given value under the given key, value must not be null
	 */
	public void set(int key, double value) {
		data.put(key, value);
	}

	/**
	 * Computes the cosine distance between this and the given vector. See also
	 * http://en.wikipedia.org/wiki/Cosine_similarity
	 */
	public double cosineSimilarity(SparseVector other) {
		double dotProduct = 0.0D;

		Collection commonPositions = CollectionUtils.intersectionSet(data.keySet(), other.data.keySet());

		for (Integer commonPosition : commonPositions) {
			dotProduct += data.get(commonPosition) * other.data.get(commonPosition);
		}

		double divisor = l2norm() * other.l2norm();

		if (divisor == 0) {
			return 0;
		}

		return dotProduct / divisor;
	}

	/** Returns the L2 norm of this vector. */
	private double l2norm() {
		double tmp = 0.0D;
		for (Integer position : data.keySet()) {
			tmp += Math.pow(data.get(position), 2);
		}
		return Math.sqrt(tmp);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy