All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.util.MathUtil Maven / Gradle / Ivy

There is a newer version: 5.1.6
Show newest version
package org.ansj.util;

import java.util.Map;

import org.ansj.domain.Term;
import org.ansj.library.NatureLibrary;
import org.ansj.library.NgramLibrary;
import org.ansj.recognition.impl.NatureRecognition.NatureTerm;

public class MathUtil {

	// 平滑参数
	private static final double dSmoothingPara = 0.1;
	// 分隔符我最喜欢的
	private static final String TAB = "\t";
	// 一个参数
	private static final int MAX_FREQUENCE = 2079997;// 7528283+329805;
	// Two linked Words frequency
	private static final double dTemp = (double) 1 / MAX_FREQUENCE;

	/**
	 * 从一个词的词性到另一个词的词的分数
	 * 
	 * @param form
	 *            前面的词
	 * @param to
	 *            后面的词
	 * @return 分数
	 */
	public static double compuScore(Term from, Term to, Map relationMap) {
		double frequency = from.termNatures().allFreq + 1;

		if (frequency < 0) {
			double score = from.score() + MAX_FREQUENCE;
			from.score(score);
			return score;
		}

		double nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);

		if (relationMap != null) {
			Double d = relationMap.get(from.getName() + TAB + to.getName());
			if (d != null) {
				nTwoWordsFreq += d;
			}
		}

		double value = -Math.log(dSmoothingPara * frequency / (MAX_FREQUENCE + 80000) + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));

		if (value < 0) {
			value += frequency;
		}
		return from.score() + value;
	}

	/**
	 * 词性词频词长.计算出来一个分数
	 * 
	 * @param from
	 * @param term
	 * @return
	 */
	public static double compuScoreFreq(Term from, Term term) {
		return from.termNatures().allFreq + term.termNatures().allFreq;
	}

	/**
	 * 两个词性之间的分数计算
	 * 
	 * @param from
	 * @param to
	 * @return
	 */
	public static double compuNatureFreq(NatureTerm from, NatureTerm to) {
		double twoWordFreq = NatureLibrary.getTwoNatureFreq(from.termNature.nature, to.termNature.nature);
		if (twoWordFreq == 0) {
			twoWordFreq = Math.log(from.selfScore + to.selfScore);
		}
		double score = from.score + Math.log((from.selfScore + to.selfScore) * twoWordFreq) + to.selfScore;
		return score;
	}

	public static void main(String[] args) {
		System.out.println(Math.log(dTemp * 2));
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy