All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jvnsegmenter.WordDataWriter Maven / Gradle / Ivy

/*
 Copyright (C) 2010 by
 * 
 * 	Cam-Tu Nguyen 
 *  [email protected] or [email protected]
 *
 *  Xuan-Hieu Phan  
 *  [email protected] 
 *
 *  College of Technology, Vietnamese University, Hanoi
 * 	Graduate School of Information Sciences, Tohoku University
 *
 * JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with  JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */
package jvnsegmenter;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import jvntextpro.data.DataWriter;
import jvntextpro.data.Sentence;

// TODO: Auto-generated Javadoc
/**
 * The Class WordDataWriter.
 */
public class WordDataWriter extends DataWriter {

	/* (non-Javadoc)
	 * @see jvntextpro.data.DataWriter#writeFile(java.util.List, java.lang.String)
	 */
	@Override
	public void writeFile(List lblSeqs, String filename) {
		String ret = writeString(lblSeqs);
		try{
			BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(filename), "UTF-8"));
			out.write(ret);
			out.close();
		}
		catch (Exception e){
			
		}
	}

	/* (non-Javadoc)
	 * @see jvntextpro.data.DataWriter#writeString(java.util.List)
	 */
	@Override
	public String writeString(List lblSeqs) {
		String ret = "";
		for (int i = 0; i < lblSeqs.size(); ++i){
			Sentence sent = (Sentence) lblSeqs.get(i);
			
			boolean start = true;
			String word = "";
			String sentStr = "";
			for (int j = 0; j < sent.size(); ++j){
				String curTag = sent.getTagAt(j);
				if (curTag.equalsIgnoreCase("B-W") || curTag.equalsIgnoreCase("O")){
					start = true;
				}
				else if (start && curTag.equalsIgnoreCase("I-W")){
					start = false;
				}
				
				if (start){
					sentStr  += " " + word;
					word = sent.getWordAt(j);
				}
				else {
					word = word + "_" + sent.getWordAt(j);
				}			
			}
			sentStr += " " + word;
			ret = ret + "\n" + sentStr.trim();			
		}		
		
		return ret.trim();		
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy