All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lv.semti.morphology.attributes.AttributeValues Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright 2008, 2009, 2014 Institute of Mathematics and Computer Science, University of Latvia
 * Author: Pēteris Paikens
 * 
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program.  If not, see .
 *******************************************************************************/
package lv.semti.morphology.attributes;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Collection;
import java.util.HashMap;
import java.util.Set;
import java.util.Map.Entry;

import org.json.simple.JSONValue;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

//TODO - būtu vienkārši jāinherito HashMap
public class AttributeValues implements FeatureStructure, Cloneable {
	protected HashMap attributes = new HashMap();

	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder();
		for (Entry īpašība : attributes.entrySet()) {
			sb.append(String.format("%s = %s; ", īpašība.getKey(),īpašība.getValue()));
		}
		return sb.toString();
	}

	public void describe() {
		PrintWriter izeja;
		try {
			izeja = new PrintWriter(new OutputStreamWriter(System.err, "UTF-8"));
			this.describe(izeja);
			izeja.flush();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
	}

	public void describe(PrintStream pipe) {
		this.describe(new PrintWriter(pipe));
	}
	
	public void describe(PrintWriter pipe) {
		pipe.printf("\t%s\n", this.getTag());
		for (Entry īpašība : attributes.entrySet()) {
			pipe.format("\t\t%s = %s%n", īpašība.getKey(),īpašība.getValue());
		}
		pipe.flush();
	}

	public void addAttribute(String attribute, String value) {
		//FIXME - vajag nodalīt īpašību pielikšanu no īpašību aizvietošanas
		attributes.put(attribute, value);
	}

	public void removeAttribute(String attribute) {
		attributes.remove(attribute);
	}

	public void addAttributes(HashMap newAttributes) {
		this.attributes.putAll(newAttributes);
		//FIXME - a ko tad, ja kautkas konfliktē??
	}

	public void addAttributes(AttributeValues newAttributes) {
		this.attributes.putAll(newAttributes.attributes);
		//FIXME - a ko tad, ja kautkas konfliktē??
	}
	
	/**
	 * Remove all attributes except those listed.
	 */
	public void filterAttributes(Collection leaveAttributes) {
		attributes.keySet().retainAll(leaveAttributes);
	}

	/***
	 *  Returns null if attribute does not exist
	 */
	public String getValue(String attribute) {
		return attributes.get(attribute);
	}

	/**
	 * Returns true either if the attribute exists and matches the provided
	 * value or if attribute doesn't exist and provided value is null.  
	 */
	public boolean isMatchingStrong (String attribute, String value) {
		String result = attributes.get(attribute);
		if (result == null && value == null) return true;
		if (result == null) return false;
		if (result.contains("|")) {  // Multivalue support
			for (String v : result.split("\\|"))
				if (v.equals(value)) return true;
			return false;
		} else return result.equalsIgnoreCase(value);
	}

	/**
	 * Returns true either if all attributes in provided test set are strongly
	 * matching on this, and if all attributes in this are strongly matching on
	 * attributes provided in test set.
	 */
	public boolean isMatchingStrong(AttributeValues testSet) {
		boolean match = true;
		for (Entry aVPair : testSet.entrySet()) {
			if (!this.isMatchingStrong(aVPair.getKey(), aVPair.getValue()))
				match = false;
		}
		for (Entry aVPair : this.entrySet()) {
			if (!testSet.isMatchingStrong(aVPair.getKey(), aVPair.getValue()))
				match = false;
		}
		return match;
	}

	/**
	 * Returns true either if all attributes in provided test set are strongly
	 * matching on this, i.e. they must exist and match.
	 */
	public boolean isMatchingStrongOneSide(AttributeValues testSet) {
		boolean match = true;
		for (Entry aVPair : testSet.entrySet()) {
			if (!this.isMatchingStrong(aVPair.getKey(), aVPair.getValue()))
				match = false;
		}
		return match;
	}
	
	/**
	 * Returns true either if the attribute exists and matches the provided
	 * value or if attribute doesn't exist.
	 */
	public boolean isMatchingWeak (String attribute, String value) {
		String result = attributes.get(attribute);
		if (result == null) return true;
		if (result.contains("|")) {  // Multivalue support
			for (String v : result.split("\\|"))
				if (v.equals(value)) return true;
			return false;
		} else return result.equalsIgnoreCase(value);
	}	// Atshkjiriiba no checkAttribute - ja atribuuta nav, bet padotaa veertiiba nav null.
		// Shii metode dod true, check attribute - false.

	/**
	 * Returns true if all attributes provided in test set weakly matches on
	 * this.
	 */
	public boolean isMatchingWeak(AttributeValues testSet) {
		if (testSet == null) return true;
		boolean der = true;
		for (Entry pāris : testSet.entrySet()) {
			if (!this.isMatchingWeak(pāris.getKey(), pāris.getValue()))
				der = false;
		}
		return der;
	}

	
	public void toXML (Writer straume) throws IOException {
		straume.write(" pāris : attributes.entrySet()) {
			String īpašība = pāris.getKey().replace(" ", "_").replace("\"", """).replace("&", "&");
			if (īpašība.equals("")) continue;
			String vērtība = pāris.getValue().replace("\"", """).replace("&", "&");
			straume.write(" "+īpašība+"=\""+vērtība+"\"");
		}
		straume.write("/>");
	}
	
	public String toJSON() {
		return JSONValue.toJSONString(attributes);
	}

	public Entry get(int nr) {
	//FIXME - atgriež rediģējamu pāri... netīri kautkā, tas ir kā getteris domāts, nevis rakstīšanai..
	//jāmaina pieeja tur kur to sauc.

		Entry rezults = null;
		int i=0;
		for (Entry īpašība : attributes.entrySet()) {
			if (i==nr) rezults = īpašība;
			i++;
		}
		return rezults;
	}

	public int size() {
		return attributes.entrySet().size();
	}

	@Override
	@SuppressWarnings("unchecked")
	public Object clone() throws CloneNotSupportedException {
		try {
			AttributeValues kopija = (AttributeValues)super.clone();
			kopija.attributes = (HashMap)attributes.clone();
			return kopija;
        } catch (CloneNotSupportedException e) {
            throw new Error("Gļuks - nu vajag varēt klasi AttributeValues noklonēt.");
        }
	}

	public String getDescription() {
		String ret = "";
		for (Entry Īpašība : attributes.entrySet()) {
			if (!Īpašība.getKey().startsWith("Nozīme")) {
			if (ret.length() < 1) {
				ret = Īpašība.getValue();
			} else {
				ret = ret + ", "/* + Īpašība.getKey() + " = "*/ + Īpašība.getValue();
			}
			}
		}
		return ret;
	}

	public Set> entrySet() {
	//FIXME - jākopē, lai nav editējams - vai jāmaina pieeja tur kur šo sauc.
		return attributes.entrySet();
	}

	public AttributeValues(Node node) {
		NodeList nodes = node.getChildNodes();

		for (int i = 0; i < nodes.getLength(); i++) {
			if (nodes.item(i).getNodeName().equals("Attributes"))
				for (int j = 0; j < nodes.item(i).getAttributes().getLength(); j++) {
					Node n = nodes.item(i).getAttributes().item(j);
					addAttribute(n.getNodeName().replaceAll("_", " "), n.getTextContent());
				}
		}
	}

	public AttributeValues() {
		//irok
	}

	/**
	 * Creates a new set of AttributeValues, initializing the contents from a source AV object
	 * @param source
	 */
	public AttributeValues(AttributeValues source) {
		this.addAttributes(source);
	}

	public void clear() {
		attributes.clear();
	}
	
	/**
	 * Returns Semti-Kamols style positional morphosyntactic markup tag of this set of attributes
	 * @return
	 */
	public String getTag() {
		return TagSet.getTagSet().toTag(this);
	}	

	/**
	 * Removes a set of attributes that are considered not target of POS/morphotagging; mainly lexical features.
	 * NB! This set also defines which attributes will not be guessed/tagged by the automated tagger.
	 * FIXME - confusing name of function?
	 */
	public void removeNonlexicalAttributes() {
		removeAttribute(AttributeNames.i_Transitivity);
//		removeAttribute(AttributeNames.i_VerbType);
//		removeAttribute(AttributeNames.i_NounType);
		removeAttribute(AttributeNames.i_Declension);
		removeAttribute(AttributeNames.i_Konjugaacija);
		
		// removeAttribute(AttributeNames.i_ApstTips);
		removeAttribute(AttributeNames.i_SaikljaTips);
		removeAttribute(AttributeNames.i_SkaitljaTips);
		removeAttribute(AttributeNames.i_AdjectiveType);
		removeAttribute(AttributeNames.i_Uzbuuve);
		removeAttribute(AttributeNames.i_Order);
		//removeAttribute(AttributeNames.i_VvTips);
		removeAttribute(AttributeNames.i_Noliegums);
		removeAttribute(AttributeNames.i_VietasApstNoziime);
		
		if (isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Preposition)) {			
			removeAttribute(AttributeNames.i_Novietojums);
			//removeAttribute(AttributeNames.i_Rekcija);
			//removeAttribute(AttributeNames.i_Number);
		}
		
		//Principā analizators no galotnes varētu izdomāt, BET ir nepieciešams lai samazinātu klašu skaitu CRF tagerim
		removeAttribute(AttributeNames.i_Degree);
		removeAttribute(AttributeNames.i_Reflexive);
//		removeAttribute(AttributeNames.i_Laiks);  // FIXME - piemēram, ēdu mēdzu zīmēju nestrādās nošķirt pagātne tagadne
		removeAttribute(AttributeNames.i_Voice);
	}

	/**
	 * Removes a set of attributes that are ignored in the MorphoEvaluate test for corpus comparison
	 */
	public void removeAttributesForCorpusTest() {
		removeAttribute(AttributeNames.i_Transitivity);
		removeAttribute(AttributeNames.i_ApstTips);
		removeAttribute(AttributeNames.i_AdjectiveType);
//		removeAttribute(AttributeNames.i_VietasApstNoziime);
		if (isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Preposition)) {
			removeAttribute(AttributeNames.i_Novietojums);
		}
	}
	
	public void removeTechnicalAttributes() {
		removeAttribute(AttributeNames.i_LexemeID);
		removeAttribute(AttributeNames.i_EndingID);
		removeAttribute(AttributeNames.i_ParadigmID);
		removeAttribute(AttributeNames.i_Source);
		removeAttribute(AttributeNames.i_Word);
		removeAttribute(AttributeNames.i_Mija);
		removeAttribute(AttributeNames.i_Guess);
		removeAttribute(AttributeNames.i_Generate);
		removeAttribute(AttributeNames.i_Konjugaacija);
		removeAttribute(AttributeNames.i_Declension);
	}

	public StringBuilder pipeDelimitedEntries() {
		StringBuilder s = new StringBuilder();
		for (Entry entry : this.entrySet()) { // visi attributevalue paariishi
			 s.append(entry.getKey().replace(' ', '_'));
			 s.append('=');
			 s.append(entry.getValue().replace(' ', '_'));
			 s.append('|');
		}
		return s;
	}

    @Override
    public boolean equals(Object other) {
        if (other == null || !(other instanceof AttributeValues)) {
            return false;
        }
        AttributeValues that = (AttributeValues) other;

        if (this.attributes == null) {
            if (that.attributes != null) return false;
        } else if (!this.attributes.equals(that.attributes))
            return false;
        return true;
    }

    @Override
    public int hashCode() {
        return this.attributes.hashCode();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy