All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.project.ttc.models.TermIndex Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2015 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package eu.project.ttc.models;

import java.io.File;
import java.util.Collection;
import java.util.Iterator;

import org.apache.uima.jcas.JCas;

import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.models.index.CustomTermIndex;
import eu.project.ttc.models.index.TermMeasure;
import eu.project.ttc.models.index.TermValueProvider;
import eu.project.ttc.types.TermOccAnnotation;

public interface TermIndex {

	
	/*
	 * Attributes
	 */
	
	/**
	 * The term index id name
	 * @return
	 */
	public String getName();
	
	/**
	 * The language of the pipeline
	 * @return
	 */
	public Lang getLang();
	public void setCorpusId(String corpusID);
	public String getCorpusId();
	
	/**
	 * The occurrence store 
	 */
	public OccurrenceStore getOccurrenceStore();

	
	/*
	 * Terms
	 */
	public Term getTermByGroupingKey(String groupingKey);
	public Term getTermById(int termId);
//	public TermBuilder newTerm(String termId);
	public void removeTerm(Term t);
	public void addTerm(Term term);
	public Collection getTerms();
	public Collection getTermClasses();
	public void classifyTerms(Term classHead, Iterable classTerms);

	/*
	 * Words
	 */
	public void addWord(Word word);
	public Collection getWords();
	public void cleanOrphanWords();
	public Word getWord(String lemma);

	/*
	 * Documents
	 */
	/**
	 * Returns the document identified by this url or 
	 * creates a new one.
	 * 
	 * @param url
	 * 			A url accessible by {@link File}'s constructor.
	 * @return
	 * 			The created document
	 */
	public Document getDocument(String url);
	public Collection getDocuments();
	
	
//	/**
//	 * Get all single word terms that have the given lemma.
//	 * 
//	 * There is generally at most one SW term by lemma, except in a few case where
//	 * lemma are the same but syntactic labels are diffrente 
//	 * (e.g. "v: couvent" & "a: couvent")
//	 * 
//	 * 
//	 * @param lemma
//	 * @return
//	 */
//	public Collection getSingleWordTermByLemma(String lemma);

	/*
	 * Term classes
	 */
	
	

	
	public Iterator singleWordTermIterator();
	public Iterator multiWordTermIterator();
	public Iterator compoundWordTermIterator();
	
	/*
	 * Custom indexes
	 */
	public CustomTermIndex getCustomIndex(String indexName);
	public CustomTermIndex createCustomIndex(String indexName, TermValueProvider termClassProvider);
	public void dropCustomIndex(String indexName);

	/*
	 * Term measures
	 */
	public TermMeasure getWRMeasure();
	public TermMeasure getWRLogMeasure();
	public TermMeasure getFrequencyMeasure();
	public Iterable getMeasures();

	
	public void deleteMany(TermSelector selector);

	
	/*
	 * Occurrences
	 */
	//TODO remove these
	@Deprecated // Should use import JCas (important for the inner nbWordAnnotation)
	public Term addTermOccurrence(TermOccAnnotation annotation, String FileUri, boolean keepOccurrenceInTermIndex);
	public void createOccurrenceIndex();
	public void clearOccurrenceIndex();

	public void importCas(JCas cas, boolean keepOccurrenceInTermIndex);

	
	/*
	 * Id generator
	 */
	public int newId();

	public void setWordAnnotationsNum(int nbWordAnnotations);
	public int getWordAnnotationsNum();

	/**
	 * 
	 * The number of terms added to this TermIndex by invoking
	 * {@link TermIndex#addTermOccurrence(TermOccAnnotation, String, boolean)}
	 * 
	 * @return
	 * 		The number of spotted terms, 0 if none has been added through
	 * 		the method {@link #addTermOccurrence(TermOccAnnotation, String, boolean)}
	 */
	public int getSpottedTermsNum();
	public void setSpottedTermsNum(int nbSpottedTerms);

	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy