All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dell.doradus.search.analyzer.NumericTrie Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dell.doradus.search.analyzer;

import java.util.ArrayList;
import java.util.List;

/**
 * Helps tokenize and search on numeric trie values  
 *
 */
public class NumericTrie {
	//base of the trie. the number of sub-nodes in the trie nodes
	//Values that are powers of the base, result in much fewer search terms,
	//so chose it according to the type of the values.
	//For example, for values like age, height, weight, temperature etc. chose bas=10,
	//because query like age>30 is more probable then age>32.
	//For values like file size chose bas=32, because queries like size > 1KB is more probable than
	//size > 1000bytes.
	public int bas;
	//Lower bound (inclusive) of the range query min <= x < max.
	//If the query is x < max then you should set min to be the minimal possible value that x can take.
	//It is better though not required to chose it to be 0 or power of the base.
	//The less the range min-max is, the less are clauses in the query, so try to make it as small as possible
	//even better to take these values from the database if possible to build the query.
	//for example, age is usually less than 110 years. So for the query age > 3, you can chose max=1000, provided that
	//the base is 10 (because 1000 is the next power of 10 greater than 110).
	public long min;
	//Upper bound (exclusive) of the range query min <= x < max.
	public long max;

	public NumericTrie(int base) {
		this(base, Long.MIN_VALUE, Long.MAX_VALUE);
	}

	public NumericTrie(int bas, long min, long max) {
		this.bas = bas;
		this.min = min;
		this.max = max;
	}
	
	
	public List tokenize(long num) {
		List tokens = new ArrayList();
		if(num < 0) tokenize(tokens, -num, true);
		else tokenize(tokens, num, false);
		return tokens;
	}
	
	public List getSearchTerms() {
		List terms = new ArrayList();
		if(min < 0 && max >= 0) {
			range(terms, 1, -min + 1, true);
			range(terms, 0, max, false);
		}
		else if(min < 0 && max < 0) {
			range(terms, -max + 1, -min + 1, true);
		}
		// min >= 0 && max >= 0: no other option
		else {
			range(terms, min, max, false);
		}
		return terms;
	}
	
	
	private void tokenize(List terms, long num, boolean lessThanZero) {
		long det = 1;
		add(terms, det, num, lessThanZero);
		while(num > 0) {
			num /= bas;
			det *= bas;
			add(terms, det, num, lessThanZero);
		}
	}
	
	private void range(List terms, long a, long b, boolean lessThanZero) {
		long x = a;
		long det = 1;
		while(x < b) {
			if(x == 0) {
				add(terms, 1, 0, false);
				x = 1;
			}
			else if(x == det && x * bas <= b) {
				det *= bas;
				add(terms, det, 0, lessThanZero);
				x *= bas;
			}
			else if(x % (bas * det) == 0 && x + det * bas <= b) {
				det *= bas;
			}
			else if(x + det > b) {
				det /= bas;
			}
			else {
				add(terms, det, x/det, lessThanZero);
				x += det;
			}
		}
	}
	
	
	
	private static void add(List terms, long det, long num, boolean lessThanZero) {
		StringBuilder sb = new StringBuilder();
		if(det != 1) {
			sb.append(det);
			sb.append('/');
		}
		if(lessThanZero) sb.append('-');
		sb.append(num);
		terms.add(sb.toString());
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy