All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.project.ttc.models.TermBuilder Maven / Gradle / Ivy

Go to download

A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment

There is a newer version: 3.0.10
Show newest version
/*******************************************************************************
 * Copyright 2015 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package eu.project.ttc.models;

import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import eu.project.ttc.utils.TermSuiteUtils;

public class TermBuilder {
	private static final Logger LOGGER  = LoggerFactory.getLogger(TermBuilder.class);
	
	private TermIndex termIndex;
	private OccurrenceStore occurrenceStore;
	
	private String groupingKey;
	private String spottingRule;
	private Optional rank = Optional.absent();
	private Optional id = Optional.absent();
	private List termWords = Lists.newArrayList();
	private List termOccurrences = Lists.newArrayList();
	private Optional frequency = Optional.absent();
	private Optional generalFrequencyNorm = Optional.absent();
	private Optional frequencyNorm = Optional.absent();
	private Optional specificity = Optional.absent();

	private Optional contextVector = Optional.absent();
	
	private TermBuilder() {
	}
	
	private TermBuilder(TermIndex termIndex) {
		Preconditions.checkNotNull(termIndex);
		this.termIndex = termIndex;
	}

	public Term createAndAddToIndex() {
		Preconditions.checkNotNull(this.termIndex, "No TermIndex given.");
		if(!id.isPresent())
			id = Optional.of(this.termIndex.newId());
		Term term = create();
		this.termIndex.addTerm(term);
		return term;
	}
	
	public Term create() {
		String gKey = TermSuiteUtils.getGroupingKey(this.termWords);
		if(this.groupingKey != null && !groupingKey.equals(gKey)) {
			LOGGER.warn("Given grouping key ({}) does not match expected grouping key: {}",
					this.groupingKey,
					gKey);
		}
		Term term = new Term(termIndex.getOccurrenceStore(), id.get(), gKey, termWords, spottingRule);
		if(generalFrequencyNorm.isPresent())
			term.setGeneralFrequencyNorm(generalFrequencyNorm.get());
		if(frequencyNorm.isPresent())
			term.setFrequencyNorm(frequencyNorm.get());

		if(specificity.isPresent())
			term.setSpecificity(specificity.get());
		
		if(rank.isPresent())
			term.setRank(rank.get());
		
		if(contextVector.isPresent())
			term.setContextVector(contextVector.get());

		/*
		 *  1 - set occurrences
		 */
		for(TermOccurrence occ:termOccurrences) {
			occ.setTerm(term);
			term.addOccurrence(occ);
		}
		
		/*
		 * 2 - frequency must be set after occurrences because it overwrites 
		 * the exiting term frequency incremented by occurrences adding.
		 */
		if(frequency.isPresent())
			term.setFrequency(frequency.get());
			
		return term;
	}
	
	public TermBuilder setId(int id) {
		this.id = Optional.of(id);
		return this;
	}

	public TermBuilder setGroupingKey(String groupingKey) {
		this.groupingKey = groupingKey;
		return this;
	}

	public TermBuilder setSpottingRule(String spottingRule) {
		this.spottingRule = spottingRule;
		return this;
	}
	

	public TermBuilder addWord(Word word, String syntacticLabel) {
		this.termWords.add(new TermWord(word, syntacticLabel));
		return this;
	}

	public TermBuilder setFrequency(int freq) {
		this.frequency = Optional.of(freq);
		return this;
	}

	public TermBuilder setFrequencyNorm(double frequencyNorm) {
		this.frequencyNorm = Optional.of(frequencyNorm);
		return this;
	}

	public TermBuilder setGeneralFrequencyNorm(double generalFrequencyNorm) {
		this.generalFrequencyNorm = Optional.of(generalFrequencyNorm);
		return this;
	}

	public TermBuilder addOccurrence(int begin, int end, Document sourceDocument, String coveredText) {
		this.termOccurrences.add(new TermOccurrence(null, coveredText, sourceDocument, begin, end));
		return this;
	}

	public void setContextVector(ContextVector vector) {
		this.contextVector  = Optional.of(vector);
	}
	public static TermBuilder start(TermIndex termIndex) {
		return new TermBuilder(termIndex);
	}

	public static TermBuilder start() {
		return new TermBuilder();
	}

	public TermBuilder setRank(int rank) {
		this.rank = Optional.of(rank);
		return this;
	}
	
	public TermBuilder setSpecificity(double specificity) {
		this.specificity = Optional.of(specificity);
		return this;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy