All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.nlp.collocation.BigramCollocation Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/

package smile.nlp.collocation;

/**
 * Collocations are expressions of multiple words which commonly co-occur.
 * A bigram collocation is a pair of words w1 w2 that appear together with
 * statistically significance.
 *
 * @author Haifeng Li
 */
public class BigramCollocation implements Comparable {

    /**
     * The first word of bigram.
     */
    private String w1;
    /**
     * The second word of bigram.
     */
    private String w2;
    /**
     * The frequency of bigram in the corpus.
     */
    private int frequency;
    /**
     * The chi-square statistical score of the collocation.
     */
    private double score;

    /**
     * Constructor.
     * @param w1 the first word of bigram.
     * @param w2 the second word of bigram.
     * @param frequency the frequency of bigram in the corpus.
     * @param score the chi-square statistical score of collocation in a corpus.
     */
    public BigramCollocation(String w1, String w2, int frequency, double score) {
        this.w1 = w1;
        this.w2 = w2;
        this.frequency = frequency;
        this.score = score;
    }

    /**
     * Returns the first word of bigram.
     */
    public String w1() {
        return w1;
    }

    /**
     * Returns the second word of bigram.
     */
    public String w2() {
        return w2;
    }

    /**
     * Returns the frequency of bigram in the corpus.
     */
    public int frequency() {
        return frequency;
    }

    /**
     * Returns the chi-square statistical score of the collocation.
     */
    public double score() {
        return score;
    }

    @Override
    public String toString() {
        return String.format("(%s %s, %d, %.2f)", w1, w2, frequency, score);
    }

    @Override
    public int hashCode() {
        int hash = 5;
        hash = 37 * hash + w1.hashCode();
        hash = 37 * hash + w2.hashCode();
        return hash;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final BigramCollocation other = (BigramCollocation) obj;
        if ((this.w1 == null) ? (other.w1 != null) : !this.w1.equals(other.w1)) {
            return false;
        }
        if ((this.w2 == null) ? (other.w2 != null) : !this.w2.equals(other.w2)) {
            return false;
        }
        return true;
    }

    @Override
    public int compareTo(BigramCollocation o) {
        return (int) Math.signum(score - o.score);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy