edu.ucla.sspace.evaluation.WordSimilarityEvaluationRunner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sspace-wordsi Show documentation
Show all versions of sspace-wordsi Show documentation
The S-Space Package is a collection of algorithms for building
Semantic Spaces as well as a highly-scalable library for designing new
distributional semantics algorithms. Distributional algorithms process text
corpora and represent the semantic for words as high dimensional feature
vectors. This package also includes matrices, vectors, and numerous
clustering algorithms. These approaches are known by many names, such as
word spaces, semantic spaces, or distributed semantics and rest upon the
Distributional Hypothesis: words that appear in similar contexts have
similar meanings.
The newest version!
/*
* Copyright 2009 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package edu.ucla.sspace.evaluation;
import edu.ucla.sspace.common.SemanticSpace;
import edu.ucla.sspace.common.Similarity;
import edu.ucla.sspace.common.Similarity.SimType;
import edu.ucla.sspace.vector.Vector;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* A test-runner for evaluating the performance of a {@link SemanticSpace} on a
* {@link WordSimilarityEvaluation} test.
*
* @authro David Jurgens
*/
public class WordSimilarityEvaluationRunner {
/**
* Evaluates the performance of a given {@code SemanticSpace} on a given
* {@code WordSimilarityEvaluation} using the provided similarity metric.
* Returns a {@link WordSimilarityReport} detailing the performance, with
* similarity scores scaled by the lowest and highest human based similarity
* ratings.
*
* @param sspace The {@link SemanticSpace} to test against
* @param test The {@link WordSimilarityEvaluation} providing human
* similarity scores
* @param vectorComparisonType The similarity measture to use
*
* @return A {@link WordSimilarityReport} detailing the performance
*/
public static WordSimilarityReport evaluate(
SemanticSpace sspace,
WordSimilarityEvaluation test,
Similarity.SimType vectorComparisonType) {
Collection wordPairs = test.getPairs();
int unanswerable = 0;
// Use lists here to keep track of the judgements for each word pair
// that the SemanticSpace has vectors for. This allows us to skip
// trying to correlate human judgements for pairs that the S-Space
// cannot handle.
List humanJudgements = new ArrayList(wordPairs.size());
List sspaceJudgements = new ArrayList(wordPairs.size());
double testRange = test.getMostSimilarValue() -
test.getLeastSimilarValue();
// Compute the word pair similarity using the given Semantic Space for
// each word.
for (WordSimilarity pair : wordPairs) {
// get the vector for each word
Vector firstVector = sspace.getVector(pair.getFirstWord());
Vector secondVector = sspace.getVector(pair.getSecondWord());
// check that the s-space had both words
if (firstVector == null || secondVector == null) {
unanswerable++;
continue;
}
// use the similarity result and scale it based on the original
// answers
double similarity =
Similarity.getSimilarity(vectorComparisonType,
firstVector, secondVector);
double scaled = (similarity * testRange) +
test.getLeastSimilarValue();
humanJudgements.add(pair.getSimilarity());
sspaceJudgements.add(scaled);
}
// Calculate the correlation between the human judgements and the
// semantic space judgements.
double[] humanArr = new double[humanJudgements.size()];
double[] sspaceArr = new double[humanJudgements.size()];
for(int i = 0; i < humanArr.length; ++i) {
humanArr[i] = humanJudgements.get(i);
sspaceArr[i] = sspaceJudgements.get(i);
}
double correlation = Similarity.correlation(humanArr, sspaceArr);
return new SimpleReport(wordPairs.size(), correlation, unanswerable);
}
/**
* A simple implementation of a {@code Report} that just returns values
* provided at the time of construction.
*/
private static class SimpleReport implements WordSimilarityReport {
/**
* The total number of word pairs
*/
private final int numWordPairs;
/**
* The correlation between the {@link SemanticSpace} judgements and the
* human similarity judgements
*/
private final double correlation;
/**
* The number of unaswnserable pairs.
*/
private final int unanswerable;
/**
* Creates a simple report
*/
public SimpleReport(int numWordPairs,
double correlation,
int unanswerable) {
this.numWordPairs = numWordPairs;
this.correlation = correlation;
this.unanswerable = unanswerable;
}
/**
* {@inheritDoc}
*/
public int numberOfWordPairs() {
return numWordPairs;
}
/**
* {@inheritDoc}
*/
public double correlation() {
return correlation;
}
/**
* {@inheritDoc}
*/
public int unanswerableQuestions() {
return unanswerable;
}
/**
* Returns a string describing the three values represented by this
* {@link report}
*/
public String toString() {
return String.format("%.4f correlation; %d/%d unanswered",
correlation, unanswerable, numWordPairs);
}
}
}