All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.univnantes.termsuite.model.Word Maven / Gradle / Ivy

Go to download

A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment

The newest version!
/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package fr.univnantes.termsuite.model;

import java.util.List;

import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import fr.univnantes.termsuite.utils.StringUtils;

public class Word {
	
	private CompoundType compoundType;
	
	private String stem;
	private String lemma;

	private List components;

	private static final String MSG_NOT_COMPOUND = "Word <%s> is not a compound";
	private static final String MSG_NOT_NEOCLASSICAL = "Word <%s> is not a neoclassical compound.";
	private static final String MSG_NO_NEOCLASSICAL_COMPOUND = "No neoclassical compound found for word <%s>";
	
	public Word(String lemma, String stem) {
		this.lemma = lemma;
		this.stem = stem;
		resetComposition();
	}

	public void resetComposition() {
		components = ImmutableList.of();
		compoundType = CompoundType.UNSET;
	}

	public boolean isCompound() {
		return compoundType != CompoundType.UNSET && this.components.size() > 1;
	}

	public String getStem() {
		return stem;
	}

	public void setStem(String stem) {
		this.stem = stem;
	}

	public void setComponents(List components) {
		this.components = components;
	}
	
	public List getComponents() {
		return components;
	}
	
	
	@Override
	public String toString() {
		return MoreObjects.toStringHelper(this)
				.add("lemma", this.lemma)
				.add("isCompound", this.isCompound())
				.toString();
	}
	
	public CompoundType getCompoundType() {
		return compoundType;
	}

	public void setCompoundType(CompoundType compoundType) {
		this.compoundType = compoundType;
	}
	
	@Override
	public boolean equals(Object obj) {
		if (obj instanceof Word) {
			Word o = (Word) obj;
			return this.lemma.equals(o.lemma);
		} else
			return false;
	}
	
	@Override
	public int hashCode() {
		return this.lemma.hashCode();
	}

	private String normalizedStem;
	public String getNormalizedStem() {
		if(normalizedStem == null)
			this.normalizedStem = StringUtils.replaceAccents(stem).toLowerCase();
		return this.normalizedStem;
	}

	private String normalizedLemma;
	public String getNormalizedLemma() {
		if(normalizedLemma == null)
			this.normalizedLemma = StringUtils.replaceAccents(this.lemma).toLowerCase();
		return this.normalizedLemma;
	}

	/**
	 * Returns the {@link Component} object if
	 * 
	 * @return
	 * 		The neoclassical component affix
	 * 
	 * @throws IllegalStateException when this word is not a neoclassical compound
	 */
	public Component getNeoclassicalAffix() {
		Preconditions.checkState(isCompound(), MSG_NOT_COMPOUND, this);
		Preconditions.checkState(getCompoundType() == CompoundType.NEOCLASSICAL, MSG_NOT_NEOCLASSICAL, this);
		for(Component c:components)
			if(c.isNeoclassicalAffix())
				return c;
		throw new IllegalArgumentException(String.format(MSG_NO_NEOCLASSICAL_COMPOUND, this));
	}
	
	public String getLemma() {
		return lemma;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy