All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lv.semti.morphology.webservice.InflectPeopleResource Maven / Gradle / Ivy

Go to download

Webservice API for Tēzaurs.lv and other ailab.lv Latvian computational linguistic tools

There is a newer version: 2.5.7
Show newest version
/*******************************************************************************
 * Copyright 2012, 2013, 2014 Institute of Mathematics and Computer Science, University of Latvia
 * Author: Pēteris Paikens
 * 
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program.  If not, see .
 *******************************************************************************/
package lv.semti.morphology.webservice;

import java.io.IOException;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import lv.semti.morphology.analyzer.Analyzer;
import lv.semti.morphology.analyzer.Splitting;
import lv.semti.morphology.analyzer.Word;
import lv.semti.morphology.analyzer.Wordform;
import lv.semti.morphology.attributes.AttributeNames;
import lv.semti.morphology.attributes.AttributeValues;

import org.restlet.resource.Get;
import org.restlet.resource.ServerResource;

public class InflectPeopleResource extends ServerResource {
	@Get
	public String retrieve() {
		getResponse().setAccessControlAllowOrigin("*");
		String query = (String) getRequest().getAttributes().get("query");

		List> processedtokens = inflect(query, getQuery().getValues("gender"));
				
		String format = (String) getRequest().getAttributes().get("format");
		if (format.equalsIgnoreCase("xml")) {
			StringWriter s = new StringWriter();					
			try {
				s.write("\n");
				for (List token : processedtokens) {
					s.write("\n");
					for (Wordform wf : token) wf.toXML(s);	
					s.write("\n");
				}		
				s.write("\n");
			} catch (IOException e) { e.printStackTrace(); }
			return s.toString();
		} else {
			List tokenJSON = new LinkedList();
			for (List token : processedtokens) {
				List wordJSON = new LinkedList();
				for (Wordform wf : token) wordJSON.add(wf.toJSON());
				tokenJSON.add(formatJSON(wordJSON));
			}		
			return formatJSON(tokenJSON);			
		}
	}

	private synchronized List> inflect(String query, String gender) {
		try {
			query = URLDecoder.decode(query, "UTF8");
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}

		Analyzer analyzer = MorphoServer.getAnalyzer();
		analyzer.enableGuessing = true;
		analyzer.enableVocative = true;
		analyzer.guessVerbs = false;
		analyzer.guessAdjectives = true;
		analyzer.guessParticiples = false;
		analyzer.guessInflexibleNouns = true;
		analyzer.enableAllGuesses = true;
		
		LinkedList showAttrs = new LinkedList();
		showAttrs.add("Vārds"); showAttrs.add("Locījums"); showAttrs.add("Skaitlis"); showAttrs.add("Dzimte"); showAttrs.add("Deklinācija");
		
		AttributeValues filter = new AttributeValues();
		if (gender != null) {
			if (gender.equalsIgnoreCase("m")) filter.addAttribute(AttributeNames.i_Gender, AttributeNames.v_Masculine);
			if (gender.equalsIgnoreCase("f")) filter.addAttribute(AttributeNames.i_Gender, AttributeNames.v_Feminine);
		}
		
		String words = query;
		List tokens = Splitting.tokenize(analyzer, words);
		LinkedList> processedTokens = new LinkedList>();
		
		for (Word word : tokens) {
			List formas = analyzer.generateInflections(word.getToken(), true, filter);
			for (Wordform wf : formas) {
				wf.filterAttributes(showAttrs);
				String name = wf.getValue(AttributeNames.i_Word);
				name = name.substring(0, 1).toUpperCase() + name.substring(1,name.length());
				wf.addAttribute(AttributeNames.i_Word, name);
			}
			processedTokens.add(formas);
		}
		
		analyzer.defaultSettings();
		return processedTokens;
	}
	
	private String formatJSON(Collection tags) {
		Iterator i = tags.iterator();
		String out = "[";
		while (i.hasNext()) {
			out += i.next();
			if (i.hasNext()) out += ",\n";
		}
		out += "]";
		return out;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy