All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.chenlb.mmseg4j.Chunk Maven / Gradle / Ivy

The newest version!
package com.chenlb.mmseg4j;


/**
 * 它是MMSeg分词算法中一个关键的概念。Chunk中包含依据上下文分出的一组词和相关的属性,包括长度(Length)、平均长度(Average Length)、标准差的平方(Variance)和自由语素度(Degree Of Morphemic Freedom)。
 * 
 * @author chenlb 2009-3-16 上午11:39:42
 */
public class Chunk {

	Word[] words = new Word[3];
	
	int count = -1;
	
	/** Word Length */
	private int len = -1;
	/** Largest Average Word Length */
	private double avgLen = -1;
	/** Variance of Word Lengths 就是 标准差的平方 */
	private double variance = -1;
	/** Sum of Degree of Morphemic Freedom of One-Character */
	private int sumDegree = -1;
	
	/** Word Length */
	public int getLen() {
		if(len < 0) {
			len = 0;
			count = 0;
			for(Word word : words) {
				if(word != null) {
					len += word.getLength();
					count++;
				}
			}
		}
		return len;
	}
	
	/** 有多少个词,最多3个。*/
	public int getCount() {
		if(count < 0) {
			count = 0;
			for(Word word : words) {
				if(word != null) {
					count++;
				}
			}
		}
		return count;
	}
	
	/** Largest Average Word Length */
	public double getAvgLen() {
		if(avgLen < 0) {
			avgLen = (double)getLen()/getCount();
		}
		return avgLen;
	}
	
	/** Variance of Word Lengths 就是 标准差的平方 */
	public double getVariance() {
		if(variance < 0) {
			double sum = 0;
			for(Word word : words) {
				if(word != null) {
					sum += Math.pow(word.getLength()-getAvgLen(), 2);
				}
			}
			variance = sum/getCount();
		}
		return variance;
	}
	
	/** Sum of Degree of Morphemic Freedom of One-Character */
	public int getSumDegree() {
		if(sumDegree < 0) {
			int sum = 0;
			for(Word word : words) {
				if(word != null && word.getDegree() > -1) {
					sum += word.getDegree();
				}
			}
			sumDegree = sum;
		}
		return sumDegree;
	}
	
	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder();
		for(Word word : words) {
			if(word != null) {
				sb.append(word.getString()).append('_');
			}
		}
		return sb.toString();
	}
	
	public String toFactorString() {
		StringBuilder sb = new StringBuilder();
		sb.append("[");
		sb.append("len=").append(getLen()).append(", ");
		sb.append("avgLen=").append(getAvgLen()).append(", ");
		sb.append("variance=").append(getVariance()).append(", ");
		sb.append("sum100log=").append(getSumDegree()).append("]");
		return sb.toString();
	}

	public Word[] getWords() {
		return words;
	}
	
	public void setWords(Word[] words) {
		this.words = words;
		count = words.length;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy