jvnsegmenter.WordDataWriter Maven / Gradle / Ivy
/*
Copyright (C) 2010 by
*
* Cam-Tu Nguyen
* [email protected] or [email protected]
*
* Xuan-Hieu Phan
* [email protected]
*
* College of Technology, Vietnamese University, Hanoi
* Graduate School of Information Sciences, Tohoku University
*
* JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package jvnsegmenter;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import jvntextpro.data.DataWriter;
import jvntextpro.data.Sentence;
// TODO: Auto-generated Javadoc
/**
* The Class WordDataWriter.
*/
public class WordDataWriter extends DataWriter {
/* (non-Javadoc)
* @see jvntextpro.data.DataWriter#writeFile(java.util.List, java.lang.String)
*/
@Override
public void writeFile(List lblSeqs, String filename) {
String ret = writeString(lblSeqs);
try{
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(filename), "UTF-8"));
out.write(ret);
out.close();
}
catch (Exception e){
}
}
/* (non-Javadoc)
* @see jvntextpro.data.DataWriter#writeString(java.util.List)
*/
@Override
public String writeString(List lblSeqs) {
String ret = "";
for (int i = 0; i < lblSeqs.size(); ++i){
Sentence sent = (Sentence) lblSeqs.get(i);
boolean start = true;
String word = "";
String sentStr = "";
for (int j = 0; j < sent.size(); ++j){
String curTag = sent.getTagAt(j);
if (curTag.equalsIgnoreCase("B-W") || curTag.equalsIgnoreCase("O")){
start = true;
}
else if (start && curTag.equalsIgnoreCase("I-W")){
start = false;
}
if (start){
sentStr += " " + word;
word = sent.getWordAt(j);
}
else {
word = word + "_" + sent.getWordAt(j);
}
}
sentStr += " " + word;
ret = ret + "\n" + sentStr.trim();
}
return ret.trim();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy