All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.plugins.pos.PerceptronPosService Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package com.mayabot.nlp.segment.plugins.pos;

import com.mayabot.nlp.MynlpEnv;
import com.mayabot.nlp.injector.Singleton;
import com.mayabot.nlp.logging.InternalLogger;
import com.mayabot.nlp.logging.InternalLoggerFactory;
import com.mayabot.nlp.perceptron.PerceptronFileFormat;
import com.mayabot.nlp.perceptron.PerceptronModel;
import com.mayabot.nlp.perceptron.PerceptronModelImpl;
import com.mayabot.nlp.segment.Nature;
import com.mayabot.nlp.segment.WordTerm;
import com.mayabot.nlp.segment.wordnet.Vertex;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * 感知机词性分词服务。
 * 单例服务,pos的资源是依赖mynlp-resouces里面
 * 

* 该服务可以独立调用 *

* PerceptronPosService service = Mynlps.instanceOf(PerceptronPosService.class); */ @Singleton public class PerceptronPosService { private POSPerceptron perceptron; static InternalLogger logger = InternalLoggerFactory.getInstance(PerceptronPosService.class); public PerceptronPosService(MynlpEnv mynlp) throws Exception { long t1 = System.currentTimeMillis(); PerceptronModel model = PerceptronFileFormat.loadFromNlpResource("pos-model"); this.perceptron = new POSPerceptron(model); long t2 = System.currentTimeMillis(); logger.info("PerceptronPosService Load use " + (t2 - t1) + " ms"); } public List pos(List words) { List nrList = perceptron.decodeNature(words); // // 单字 人名 特殊处理一下 // for (int i = 0; i < nrList.size(); i++) { // if (nrList.get(i) == Nature.nr && words.get(i).length() == 1) { // nrList.set(i,Nature.n); // } // } return nrList; } /** * @param sample word/x word/b */ public void learn(String sample) { perceptron.learn(sample); } /** * 定制版本,我们做一些特殊处理 * @param words * @return List */ public List posFromVertex(List words) { ArrayList stList = new ArrayList<>(words.size()); boolean findIndex = false; for (Vertex word : words) { String x = word.realWord(); if (word.nature == Nature.m) { stList.add("["+word.nature+"]"); findIndex = true; } else { stList.add(x); } } List result = pos(stList); if (findIndex) { for (int i = 0; i < words.size(); i++) { if(words.get(i).nature == Nature.m){ result.set(i, words.get(i).nature); } } }else{ return result; } return result; } public void posFromTerm(List words) { ArrayList stList = new ArrayList<>(words.size()); for (WordTerm word : words) { String x = word.word; if (word.getNature() == Nature.m) { stList.add("["+word.getNatureName()+"]"); } else { stList.add(x); } } List result = pos(stList); for (int i = 0; i < words.size(); i++) { Nature na = result.get(i); WordTerm word = words.get(i); if (word.getNature() == Nature.m) { na = word.getNature(); } word.setNature(na); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy