jvnsegmenter.VietnameseContextGenerator Maven / Gradle / Ivy
/*
Copyright (C) 2010 by
*
* Cam-Tu Nguyen
* [email protected] or [email protected]
*
* Xuan-Hieu Phan
* [email protected]
*
* College of Technology, Vietnamese University, Hanoi
* Graduate School of Information Sciences, Tohoku University
*
* JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package jvnsegmenter;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import jvntextpro.data.Sentence;
import jvntextpro.util.VnSyllParser;
import org.w3c.dom.Element;
// TODO: Auto-generated Javadoc
/**
* The Class VietnameseContextGenerator.
*/
public class VietnameseContextGenerator extends BasicContextGenerator{
//------------------------------
//Methods
//------------------------------
/**
* Instantiates a new vietnamese context generator.
*
* @param node the node
*/
public VietnameseContextGenerator(Element node){
readFeatureParameters(node);
}
/* (non-Javadoc)
* @see jvntextpro.data.ContextGenerator#getContext(jvntextpro.data.Sentence, int)
*/
@Override
public String[] getContext(Sentence sent, int pos) {
List cps = new ArrayList();
for (int it = 0; it < cpnames.size(); ++it){
String cp = cpnames.get(it);
Vector paras = this.paras.get(it);
String cpvalue = "";
String word = "";
for (int i = 0; i < paras.size(); ++i) {
if (pos + paras.get(i) < 0 || pos + paras.get(i)>= sent.size()){
cpvalue = "";
continue;
}
word += sent.getWordAt(pos + paras.get(i)) + " ";
}
word = word.trim().toLowerCase();
VnSyllParser parser = new VnSyllParser(word);
if (!parser.isValidVnSyllable() && cp.equals("not_valid_vnsyll"))
cpvalue = "nvs:" + word;
if (!cpvalue.equals("")) cps.add(cpvalue);
}
String [] ret = new String[cps.size()];
return cps.toArray(ret);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy