jvnsegmenter.CRFSegmenter Maven / Gradle / Ivy
/*
Copyright (C) 2010 by
*
* Cam-Tu Nguyen
* [email protected] or [email protected]
*
* Xuan-Hieu Phan
* [email protected]
*
* College of Technology, Vietnamese University, Hanoi
* Graduate School of Information Sciences, Tohoku University
*
* JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package jvnsegmenter;
import java.io.File;
import java.util.Vector;
import org.w3c.dom.Element;
import jflexcrf.Labeling;
import jvntextpro.data.DataReader;
import jvntextpro.data.DataWriter;
import jvntextpro.data.TaggingData;
// TODO: Auto-generated Javadoc
/**
* The Class CRFSegmenter.
*/
public class CRFSegmenter {
/** The reader. */
DataReader reader = new WordDataReader();
/** The writer. */
DataWriter writer = new WordDataWriter();
/** The data tagger. */
TaggingData dataTagger = new TaggingData();
/** The labeling. */
Labeling labeling = null;
/**
* Instantiates a new cRF segmenter.
*
* @param modelDir the model dir
*/
public CRFSegmenter(String modelDir){
init(modelDir);
}
/**
* Instantiates a new cRF segmenter.
*/
public CRFSegmenter() {
//do nothing until now
}
/**
* Inits the.
*
* @param modelDir the model dir
*/
public void init(String modelDir) {
//Read feature template file
String templateFile = modelDir + File.separator + "featuretemplate.xml";
Vector nodes = BasicContextGenerator.readFeatureNodes(templateFile);
for (int i = 0; i < nodes.size(); ++i){
Element node = nodes.get(i);
String cpType = node.getAttribute("value");
BasicContextGenerator contextGen = null;
if (cpType.equals("Conjunction")){
contextGen = new ConjunctionContextGenerator(node);
}
else if (cpType.equals("Lexicon")){
contextGen = new LexiconContextGenerator(node);
LexiconContextGenerator.loadVietnameseDict(modelDir + File.separator + "VNDic_UTF-8.txt");
LexiconContextGenerator.loadViLocationList(modelDir + File.separator + "vnlocations.txt");
LexiconContextGenerator.loadViPersonalNames(modelDir + File.separator + "vnpernames.txt");
}
else if (cpType.equals("Regex")){
contextGen = new RegexContextGenerator(node);
}
else if (cpType.equals("SyllableFeature")){
contextGen = new SyllableContextGenerator(node);
}
else if (cpType.equals("ViSyllableFeature")){
contextGen = new VietnameseContextGenerator(node);
}
if (contextGen != null)
dataTagger.addContextGenerator(contextGen);
}
//create context generators
labeling = new Labeling(modelDir, dataTagger, reader, writer);
}
/**
* Segmenting.
*
* @param instr the instr
* @return the string
*/
public String segmenting(String instr) {
return labeling.strLabeling(instr);
}
/**
* Segmenting.
*
* @param file the file
* @return the string
*/
public String segmenting(File file) {
return labeling.strLabeling(file);
}
/**
* Sets the data reader.
*
* @param reader the new data reader
*/
public void setDataReader(DataReader reader){
this.reader = reader;
}
/**
* Sets the data writer.
*
* @param writer the new data writer
*/
public void setDataWriter(DataWriter writer){
this.writer = writer;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy