All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.project.ttc.tools.utils.TermPredicates Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright 2, 2015nership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package eu.project.ttc.tools.utils;

import java.text.Collator;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.TreeSet;

import com.google.common.collect.ComparisonChain;

import eu.project.ttc.models.Term;
import eu.project.ttc.models.index.TermMeasure;

/**
 * Consists exclusively of terms that operate or create {@link TermPredicate}s.
 * 
 * @author Sebastián Peña Saldarriaga
 */
public class TermPredicates {

	/** No instances */
	private TermPredicates() {
	};

	/** A predicate that accepts every term */
	public static final TermPredicate TRIVIAL_ACCEPTOR = new TermPredicate() {

		@Override
		public boolean accept(Term term) {
			return true;
		}
	};

	/**
	 * Returns a {@link TermPredicate} the accepts terms whose occurrences are
	 * bigger than the specified threshold.
	 * 
	 * @param threshold
	 *            The occurrences threshold
	 * @return The term predicate
	 */
	public static TermPredicate createOccurrencesPredicate(final int threshold) {
		return new TermPredicate() {

			@Override
			public boolean accept(Term term) {
				return term.getFrequency() >= threshold;
			}
		};
	}

	/**
	 * Returns a {@link TermPredicate} the accepts terms whose measure is
	 * bigger than the specified threshold.
	 * 
	 * @param threshold
	 *            The measure threshold
	 * @return The term predicate
	 */
	public static TermPredicate createMeasurePredicate(
			final double threshold, final TermMeasure termMeasure) {
		return new TermPredicate() {

			@Override
			public boolean accept(Term term) {
				return termMeasure.getValue(term) >= threshold;
			}
		};
	}


	/**
	 * Returns a {@link TermPredicate} that accepts terms whose grammatical
	 * categories are noun or adjective.
	 * 
	 * @return The term predicate
	 */
	public static TermPredicate createNounAdjectivePredicate() {
		return new TermPredicate() {

			@Override
			public boolean accept(Term term) {
				throw new UnsupportedOperationException("Terms do not support categories yet");
			}
		};
	}

	/**
	 * Returns a {@link TermPredicate} that accepts terms whose grammatical
	 * categories are verb or adverb.
	 * 
	 * @return The term predicate
	 */
	public static TermPredicate createVerbAdverbPredicate() {
		return new TermPredicate() {
			@Override
			public boolean accept(Term term) {
				throw new UnsupportedOperationException("Terms do not support categories yet");
			}
		};
	}

	/**
	 * Returns a {@link TermPredicate} that accepts a term that is accepted by
	 * pred1 or pred2.
	 * 
	 * @param pred1
	 *            The first predicate
	 * @param pred2
	 *            The second predicate
	 * @return The resulting term predicate
	 */
	public static TermPredicate createOrPredicate(final TermPredicate pred1,
			final TermPredicate pred2) {
		return new TermPredicate() {
			@Override
			public boolean accept(Term term) {
				return pred1.accept(term) || pred2.accept(term);
			}
		};
	}

	/**
	 * Returns a {@link TermPredicate} that accepts a term that is accepted by
	 * pred1 and pred2.
	 * 
	 * @param pred1
	 *            The first predicate
	 * @param pred2
	 *            The second predicate
	 * @return The resulting term predicate
	 */
	public static TermPredicate createAndPredicate(final TermPredicate pred1,
			final TermPredicate pred2) {
		return new TermPredicate() {
			@Override
			public boolean accept(Term term) {
				return pred1.accept(term) && pred2.accept(term);
			}
		};
	}

	/**
	 * Returns a {@link TermPredicate} that accepts terms in sorted occurrence
	 * order up to the specified cutoffRank.
	 * 
	 * @param cutoffRank
	 *            The rank cutoff
	 * @return The term predicate object
	 */
	public static TermPredicate createTopNByOccurrencesPredicate(int cutoffRank) {
		return createSortedTermPredicate(cutoffRank,
				DESCENDING_OCCURRENCE_ORDER);
	}

	/**
	 * Returns a {@link TermPredicate} that accepts terms in sorted measure
	 * order up to the specified cutoffRank.
	 * 
	 * @param cutoffRank
	 *            The rank cutoff
	 * @return The term predicate object
	 */
	public static TermPredicate createTopNByTermMeasurePredicate(int cutoffRank, TermMeasure termMeasure) {
		return createSortedTermPredicate(cutoffRank, termMeasure.getTermComparator(true));
	}

	/**
	 * Returns a {@link TermPredicate} that accepts terms sorted by the
	 * specified comparator up to the cutoffRank.
	 * 
	 * @param cutoffRank
	 *            The rank cutoff
	 * @param comparator
	 *            The comparator
	 * @return The term predicate instance
	 */
	private static TermPredicate createSortedTermPredicate(
			final int cutoffRank, final Comparator comparator) {
		return new ListBasedTermPredicate() {

			@Override
			public void initialize(List termList) {
				
				// Sort annotations
				TreeSet annotations = new TreeSet(comparator);
				annotations.addAll(termList);			
				System.out.println("Terms in original list : " + termList.size());
				System.out.println("Terms in tree set list : " + annotations.size());
				System.out.println("Cutoff rank : " + cutoffRank);
				
				// Copy annotation ids up to the cutoff rank
				selectedIds = new String[cutoffRank];
				int i = 0;
				for (Term term : annotations) {
					if (i >= cutoffRank) {
						break;
					}
					selectedIds[i++] = term.getGroupingKey();
				}
				System.out.println("Terms in final list : " + selectedIds.length);
				Arrays.sort(selectedIds);
			}
		};
	}

	/**
	 * Creates a {@link TermPredicate} that accepts only terms contained in the
	 * specified collection
	 * 
	 * @param collection
	 *            The containment collection
	 * @return A term predicate instance
	 */
	public static TermPredicate createContainsPredicate(
			final Collection collection) {
		return new TermPredicate() {
			@Override
			public boolean accept(Term term) {
				return collection.contains(term);
			}
		};
	}
	
	/**
	 * A term predicate that can be initialized with a specific term list.
	 * 
	 * @author Sebastián Peña Saldarriaga
	 */
	private static abstract class ListBasedTermPredicate implements
			TermPredicate {

		/** Stores ids that are accepted by the predicate */
		protected String[] selectedIds;

		/**
		 * Initializes the predicate with the initial term list.
		 * 
		 * @param termList
		 *            The initial term list.
		 */
		public abstract void initialize(List termList);

		@Override
		public boolean accept(Term term) {
			return Arrays.binarySearch(selectedIds, term.getGroupingKey()) >= 0;
		}

	}

	// ////////////////////////////////////////////////////////////////////////
	// Comparators

	/**
	 * A comparator that imposes a descending occurrence order on
	 * {@link Term}s.
	 */
	public static final Comparator DESCENDING_OCCURRENCE_ORDER = new Comparator() {

		@Override
		public int compare(Term o1, Term o2) {
			return ComparisonChain.start()
					.compare(o2.getFrequency(), o1.getFrequency())
					.compare(o1.getGroupingKey(), o2.getGroupingKey())
					.result();
		}
	};

//	/**
//	 * A comparator that imposes a descending specificity order on
//	 * {@link Term}s.
//	 */
//	public static final Comparator DESCENDING_SPECIFICITY_ORDER = new Comparator() {
//
//		@Override
//		public int compare(Term o1, Term o2) {
//			return ComparisonChain.start()
//					.compare(o2.getWR(), o1.getWR())
//					.compare(o1.getGroupingKey(), o2.getGroupingKey())
//					.result();
//		}
//	};
	
	
	/**
	 * A comparator that imposes an ascending covered text order on
	 * {@link Term}s.
	 */
	public static final Comparator ASCENDING_TEXT_ORDER = new Comparator() {

		/** Compare names in a locale sensitive manner */
		private final Collator TextCollator = Collator.getInstance();

		@Override
		public int compare(Term o1, Term o2) {
			return TextCollator.compare(o1.getGroupingKey(), o2.getGroupingKey());
		}
	};
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy