All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikibrain.sr.SRMetric Maven / Gradle / Ivy

There is a newer version: 0.9.1
Show newest version
package org.wikibrain.sr;

import gnu.trove.map.TIntDoubleMap;
import gnu.trove.set.TIntSet;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.sr.dataset.Dataset;
import org.wikibrain.sr.normalize.Normalizer;

import java.io.File;
import java.io.IOException;

/**
 * A monolingual SR metric supports SR operations in a single language.
 * @author Matt Lesicko
 * @author Shilad Sen
 */

public interface SRMetric {

    /**
     * @return the name of the similarity metric in a human readable format
     */
    public String getName();

    /**
     * @return The language associated with this metric.
     */
    public Language getLanguage();

    /**
     * Returns the directory containing all data for the metric.
     * @return
     */
    public File getDataDir();

    /**
     * Sets the data directory associated with the model.
     * This will apply to all future reads and writes.
     *
     * @param dir
     */
    public void setDataDir(File dir);

    /**
     * Determine the similarity between two local pages.
     *
     * @param pageId1 Id of the first page.
     * @param pageId2 Id of the second page.
     * @param explanations Whether explanations should be created.
     * @return
     */
    public SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException;

    /**
     * Determine the similarity between two strings in a given language by mapping through local pages.
     *
     * @param phrase1 The first phrase.
     * @param phrase2 The second phrase.
     * @param explanations Whether explanations should be created.
     * @return
     */
    public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException;

    /**
     * Find the most similar local pages to a local page within the same language.
     *
     * @param pageId The id of the local page whose similarity we are examining.
     * @param maxResults The maximum number of results to return.
     * @return
     */
    public SRResultList mostSimilar(int pageId, int maxResults) throws DaoException;

    /**
     * Find the most similar local pages to a local page.
     *
     * @param pageId The id of the local page whose similarity we are examining.
     * @param maxResults The maximum number of results to return.
     * @param validIds The local page ids to be considered.  Null means all ids in the language.
     * @return
     */
    public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException;

    /**
     * Find the most similar local pages to a phrase.
     *
     * @param phrase The phrase whose similarity we are examining.
     * @param maxResults The maximum number of results to return.
     * @return
     */
    public SRResultList mostSimilar(String phrase, int maxResults) throws DaoException;

    /**
     * Find the most similar local pages to a phrase.
     *
     * @param phrase The phrase whose similarity we are examining.
     * @param maxResults The maximum number of results to return.
     * @param validIds The local page ids to be considered.  Null means all ids in the language
     * @return
     */
    public SRResultList mostSimilar(String phrase, int maxResults, TIntSet validIds) throws DaoException;

    /**
     * Writes the metric to the current data directory.
     *
     * @throws java.io.IOException
     */
    public void write() throws IOException;

    /**
     * Reads the metric from the current data directory.
     */
    public void read() throws IOException;

    /**
     * Train the similarity() function.
     * The KnownSims may already be associated with Wikipedia ids (check wpId1 and wpId2).
     *
     * @param dataset A gold standard dataset
     */
    public void trainSimilarity(Dataset dataset) throws DaoException;

    /**
     * Train the mostSimilar() function
     * The KnownSims may already be associated with Wikipedia ids (check wpId1 and wpId2).
     *
     * @param dataset A gold standard dataset.
     * @param numResults The maximum number of similar articles computed per phrase.
     * @param validIds The Wikipedia ids that should be considered in result sets. Null means all ids.
     */
    public void trainMostSimilar(Dataset dataset, int numResults, TIntSet validIds);

    /**
     * @return true if similarity() is already trained (or doesn't need training)
     */
    public boolean similarityIsTrained();

    /**
     * @return true if mostSimilar() is already trained (or doesn't need training)
     */
    public boolean mostSimilarIsTrained();

    /**
     * Construct a cosimilarity matrix of Wikipedia ids in a given language.
     *
     * @param wpRowIds
     * @param wpColIds
     * @return
     */
    public double[][] cosimilarity(int wpRowIds[], int wpColIds[]) throws DaoException;


    /**
     * Construct a cosimilarity matrix of phrases.
     *
     * @param rowPhrases
     * @param colPhrases
     * @return
     */
    public double[][] cosimilarity(String rowPhrases[], String colPhrases[]) throws DaoException;

    /**
     * Construct symmetric comsimilarity matrix of Wikipedia ids in a given language.
     *
     * @param ids
     * @return
     */
    public double[][] cosimilarity(int ids[]) throws DaoException;

    /**
     * Construct symmetric cosimilarity matrix of phrases by mapping through local pages.
     *
     * @param phrases
     * @return
     */
    public double[][] cosimilarity(String phrases[]) throws DaoException;

    /**
     * @return the most similar normalizer.
     */
    public Normalizer getMostSimilarNormalizer();

    /**
     * Sets the most similar normalizer
     * @param n
     */
    public void setMostSimilarNormalizer(Normalizer n);

    /**
     *
     * @return the similarity normalizer.
     */
    public Normalizer getSimilarityNormalizer();

    /**
     * Sets the similarity normalizer.
     * @param n
     */
    public void setSimilarityNormalizer(Normalizer n);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy