All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lv.semti.morphology.pipetool.WordPipe Maven / Gradle / Ivy

Go to download

Webservice API for Tēzaurs.lv and other ailab.lv Latvian computational linguistic tools

There is a newer version: 2.5.7
Show newest version
/*******************************************************************************
 * Copyright 2012, 2013, 2014 Institute of Mathematics and Computer Science, University of Latvia
 * Author: Pēteris Paikens
 * 
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program.  If not, see .
 *******************************************************************************/
package lv.semti.morphology.pipetool;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import lv.ailab.lnb.fraktur.Transliterator;
import lv.semti.morphology.analyzer.*;
import lv.semti.morphology.corpus.Statistics;

public class WordPipe {
	public static void main(String[] args) throws Exception {
		boolean full_output = false;
		boolean tab_output = false;
		boolean transliterate = false;
		boolean useAux = true;
		boolean probabilities = false;
		for (int i=0; i tokens = Splitting.tokenize(analyzer, s, false);	    	
	    	
	    	if (!tab_output) 
	    		out.println( analyze( analyzer, tokens, full_output));
	    	else out.println( analyze_tab( analyzer, tokens, full_output, probabilities));
	    	out.flush();
	    }
	}	
	
	private static String analyze(Analyzer analyzer, List tokens, boolean all_options) {		
		LinkedList tokenJSON = new LinkedList();
		
		for (Word word : tokens) {
			if (all_options) tokenJSON.add(word.toJSON());
			else tokenJSON.add(word.toJSONsingle());
		}
		
		String s = formatJSON(tokenJSON).toString();
		tokens = null;
		tokenJSON = null;
		
		return s;
	}
	
	private static String analyze_tab(Analyzer analyzer, List tokens, boolean all_options, boolean probabilities){
		StringBuilder s = new StringBuilder(); 
		
		for (Word word : tokens) {
			if (s.length()>0) s.append("\t");
			if (all_options)
				s.append(word.toTabSep(probabilities));
			else s.append(word.toTabSepsingle());
		}
		
		tokens = null;
		return s.toString();
	}
	
	private static StringBuilder formatJSON(Collection tags) {
		Iterator i = tags.iterator();
		StringBuilder out = new StringBuilder("[");
		while (i.hasNext()) {
			out.append(i.next());
			if (i.hasNext()) out.append(", ");
		}
		out.append("]");
		return out;
	}
}	




© 2015 - 2025 Weber Informatics LLC | Privacy Policy