
com.dell.doradus.service.spider.SpiderHelper Maven / Gradle / Ivy
The newest version!
/*
* Copyright (C) 2014 Dell, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dell.doradus.service.spider;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.dell.doradus.common.ApplicationDefinition;
import com.dell.doradus.common.FieldDefinition;
import com.dell.doradus.common.TableDefinition;
import com.dell.doradus.common.Utils;
import com.dell.doradus.core.ObjectID;
import com.dell.doradus.fieldanalyzer.FieldAnalyzer;
import com.dell.doradus.fieldanalyzer.NullAnalyzer;
import com.dell.doradus.fieldanalyzer.TextAnalyzer;
import com.dell.doradus.search.IDHelper;
import com.dell.doradus.search.util.HeapSet;
import com.dell.doradus.service.db.DBService;
import com.dell.doradus.service.db.DColumn;
import com.dell.doradus.service.db.DRow;
import com.dell.doradus.service.db.Tenant;
/**
* A collection of helper methods for retrieving information from Cassandra database
* using Spider storage model.
*/
public class SpiderHelper {
public static final byte[] EMPTY_BYTES = new byte[0];
///////////////////////////
// PRIVATE HELPER FUNCTIONS
///////////////////////////
/**
* Converting a collection of ObjectIDs to list of strings representing these IDs.
*
* @param ids Collection of IDs
* @return List of corresponding strings
*/
private static List objectsToStrings(Collection ids) {
List keys = new ArrayList<>(ids.size());
for(ObjectID id: ids) {
keys.add(IDHelper.IDToString(id));
}
return keys;
}
/**
* Lists union until a given count elements is reached. Guarantees that
* minimal elements of the lists would be added to the resulting list.
*
* @param collection Source lists collection
* @param count Number of elements limit
* @return
*/
private static > List
unionUnique(List> collection, int count) {
HeapSet hl = new HeapSet(count);
for(List lst : collection) {
for(T v : lst) hl.Put(v);
}
List result = hl.GetValues();
return result;
}
/**
* Calculation of a first link value based on a start link object.
*
* @param linkDef Link field definition
* @param continuationLink Starting link or null for starting from the very beginning
* @param inclusive Initial search?
* @return First link value or the next one if inclusive == false.
*/
private static String fromLinksStart(FieldDefinition linkDef, ObjectID continuationLink, boolean inclusive) {
byte[] start = null;
if(continuationLink == null) {
start = IDHelper.linkBoundMinimum(linkDef);
} else {
start = IDHelper.linkToBytes(linkDef, continuationLink);
if (!inclusive) {
// shift to a next value
start = IDHelper.next(start);
}
}
return Utils.toString(start);
}
/**
* Calculation of a last link value based on a link field definition.
*
* @param linkDef Link field definition
* @return Last link value
*/
private static String fromLinksFinish(FieldDefinition linkDef) {
return Utils.toString(IDHelper.linkBoundMaximum(linkDef));
}
/**
* Calculation of a first term value based on a start link object.
*
* @param continuationObject First term object
* @param inclusive Is it initial search?
* @return First term value.
*/
private static String fromTerms(ObjectID continuationObject, boolean inclusive) {
byte[] start = EMPTY_BYTES;
if (continuationObject != null) {
start = IDHelper.idToBytes(continuationObject);
if (!inclusive) {
start = IDHelper.next(start);
}
}
return Utils.toString(start);
}
/**
* Generating an object link row key for sharded links.
*
* @param shard Shard number (not null)
* @param link Link field definition
* @param id Object ID
* @return Object links row key generated
*/
private static String linkKey(Integer shard, FieldDefinition link, ObjectID id) {
if (id == null) {
id = ObjectID.EMPTY;
}
return shard.toString() + "/~" + link.getName() + "/" + IDHelper.IDToString(id);
}
/**
* Extracting an object ID from a row key.
*
* @param shard Shard number (not null)
* @param link Link field definition
* @param key Source key.
* @return Object ID.
*/
private static ObjectID unlinkKey(Integer shard, FieldDefinition link, String key) {
String prefix = shard.toString() + "/~" + link.getName() + "/";
assert key.startsWith(prefix);
return IDHelper.createID(key.substring(prefix.length()));
}
/**
* Converting a collection of object IDs to a list of row keys.
*
* @param shard Shard number (not null)
* @param link Link field definition
* @param ids Collection of object IDs
* @return List of corresponding row keys
*/
private static List linkKeys(Integer shard, FieldDefinition link, Collection ids) {
List keys = new ArrayList(ids.size());
for (ObjectID id : ids) {
keys.add(shard.toString() + "/~" + link.getName() + "/" + IDHelper.IDToString(id));
}
return keys;
}
/**
* Generating of a row key for a terms table for a sharded table.
*
* @param shard Shard number (not null)
* @param field Field name
* @return Row key of all the terms in the given shard
*/
private static String termKey(Integer shard, String field) {
return shard.toString() + "/_terms/" + field;
}
/**
* Add the scalar value in the given DColumn to the given map. The column is converted
* from binary to String form based on its field definition, if any.
*
* @param tableDef Table that owns scalar field.
* @param scalarMap Map to add scalar name/value pair to.
* @param column DColumn retrieved from object row.
*/
private static void addScalarToMap(TableDefinition tableDef, Map scalarMap, DColumn column) {
String fieldName = column.getName();
FieldDefinition fieldDef = tableDef.getFieldDef(fieldName);
if (fieldDef != null && fieldDef.isBinaryField()) {
scalarMap.put(fieldName, fieldDef.getEncoding().encode(column.getRawValue()));
} else {
scalarMap.put(fieldName, column.getValue());
}
}
///////////////////
// SCALAR VALUES
///////////////////
public static Map> getScalarValues(TableDefinition tableDef,
Collection ids, String continuationField, int count) {
DBService dbService = DBService.instance();
Map> result = new HashMap<>();
if (continuationField == null && tableDef.isSharded()) {
continuationField = new String(new char[]{ '!' + 1 }); // next char after ! to skip shards
} else if (continuationField == null) {
continuationField = "";
}
String tableName = SpiderService.objectsStoreName(tableDef);
Collection rowKeys = objectsToStrings(ids);
Iterator iRows =
dbService.getRowsColumnSlice(Tenant.getTenant(tableDef), tableName, rowKeys, continuationField, "~");
while (iRows.hasNext()) {
DRow row = iRows.next();
Map scalarValues = new HashMap<>();
result.put(IDHelper.createID(row.getKey()), scalarValues);
Iterator iColumns = row.getColumns();
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
DColumn column = iColumns.next();
addScalarToMap(tableDef, scalarValues, column);
}
}
return result;
}
public static Map getScalarValues(TableDefinition tableDef,
ObjectID id, String continuationField, int count) {
DBService dbService = DBService.instance();
if (continuationField == null && tableDef.isSharded()) {
continuationField = new String(new char[] { '!' + 1 }); // next char after ! to skip shards
} else if (continuationField == null) {
continuationField = "";
}
String tableName = SpiderService.objectsStoreName(tableDef);
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), tableName, IDHelper.IDToString(id), continuationField, "~");
Map result = new HashMap<>();
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
DColumn column = iColumns.next();
result.put(column.getName(), column.getValue());
}
return result;
}
public static Map> getScalarValues(TableDefinition tableDef,
Collection ids, Collection fields) {
DBService dbService = DBService.instance();
Map> result = new HashMap<>();
String tableName = SpiderService.objectsStoreName(tableDef);
Collection rowKeys = objectsToStrings(ids);
Iterator iRows = dbService.getRowsColumns(Tenant.getTenant(tableDef), tableName, rowKeys, fields);
while (iRows.hasNext()) {
DRow row = iRows.next();
Map scalarValues = new HashMap<>();
result.put(IDHelper.createID(row.getKey()), scalarValues);
Iterator iColumns = row.getColumns();
while (iColumns.hasNext()) {
DColumn column = iColumns.next();
addScalarToMap(tableDef, scalarValues, column);
}
}
return result;
}
public static Map getScalarValues(TableDefinition tableDef,
ObjectID id, Collection fields) {
DBService dbService = DBService.instance();
Map result = new HashMap<>();
String tableName = SpiderService.objectsStoreName(tableDef);
Iterator iRows =
dbService.getRowsColumns(Tenant.getTenant(tableDef), tableName, Arrays.asList(IDHelper.IDToString(id)), fields);
if (iRows.hasNext()) {
Iterator iColumns = iRows.next().getColumns();
while (iColumns.hasNext()) {
DColumn column = iColumns.next();
result.put(column.getName(), column.getValue());
}
}
return result;
}
public static String fetchScalarFieldValue(TableDefinition tableDef, ObjectID id, String field) {
List fields = new ArrayList(1);
fields.add(field);
Map result = getScalarValues(tableDef, id, fields);
return result.get(field);
}
///////////////////
// LINKS
///////////////////
public static List getLinks(FieldDefinition linkDef,
ObjectID id, ObjectID continuationLink, boolean inclusive, int count) {
return getLinks(linkDef, (List)null, id, continuationLink, inclusive, count);
}
public static Map> getLinks(FieldDefinition linkDef,
Collection ids, ObjectID continuationLink, boolean inclusive, int count) {
return getLinks(linkDef, (List)null, ids, continuationLink, inclusive, count);
}
public static Map> getLinksUnsharded(FieldDefinition linkDef,
Collection ids, ObjectID continuationLink, boolean inclusive, int count) {
DBService dbService = DBService.instance();
TableDefinition tableDef = linkDef.getTableDef();
String tableName = SpiderService.objectsStoreName(tableDef);
Map> result = new HashMap<>();
String start = fromLinksStart(linkDef, continuationLink, inclusive);
String finish = fromLinksFinish(linkDef);
List keys = objectsToStrings(ids);
Iterator iRows =
dbService.getRowsColumnSlice(Tenant.getTenant(tableDef), tableName, keys, start, finish);
while (iRows.hasNext()) {
DRow row = iRows.next();
List list = new ArrayList<>();
result.put(IDHelper.createID(row.getKey()), list);
Iterator iColumns = row.getColumns();
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
list.add(IDHelper.linkValueToId(Utils.toBytes(iColumns.next().getName())));
}
}
return result;
}
public static List getLinksUnsharded(FieldDefinition linkDef,
ObjectID id, ObjectID continuationLink, boolean inclusive, int count) {
DBService dbService = DBService.instance();
TableDefinition tableDef = linkDef.getTableDef();
String tableName = SpiderService.objectsStoreName(tableDef);
List result = new ArrayList<>();
String start = fromLinksStart(linkDef, continuationLink, inclusive);
String finish = fromLinksFinish(linkDef);
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), tableName, IDHelper.IDToString(id), start, finish);
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(IDHelper.linkValueToId(Utils.toBytes(iColumns.next().getName())));
}
return result;
}
public static Map> getLinks(FieldDefinition linkDef, Integer shard,
Collection ids, ObjectID continuationLink, boolean inclusive, int count) {
if (shard.intValue() == 0) {
return getLinksUnsharded(linkDef, ids, continuationLink, inclusive, count);
}
DBService dbService = DBService.instance();
TableDefinition tableDef = linkDef.getTableDef();
String tableName = SpiderService.termsStoreName(tableDef);
Map> result = new HashMap<>();
String startCol = fromTerms(continuationLink, inclusive);
Iterator iRows =
dbService.getRowsColumnSlice(Tenant.getTenant(tableDef), tableName, linkKeys(shard, linkDef, ids), startCol, "");
while (iRows.hasNext()) {
DRow row = iRows.next();
List list = new ArrayList<>();
result.put(unlinkKey(shard, linkDef, row.getKey()), list);
Iterator iColumns = row.getColumns();
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
list.add(IDHelper.createID(iColumns.next().getName()));
}
}
return result;
}
public static List getLinks(FieldDefinition linkDef, Integer shard,
ObjectID id, ObjectID continuationLink, boolean inclusive, int count) {
if (shard.intValue() == 0) {
return getLinksUnsharded(linkDef, id, continuationLink, inclusive, count);
}
DBService dbService = DBService.instance();
TableDefinition tableDef = linkDef.getTableDef();
String tableName = SpiderService.termsStoreName(tableDef);
List result = new ArrayList<>();
String startCol = fromTerms(continuationLink, inclusive);
String key = linkKey(shard, linkDef, id);
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), tableName, key, startCol, "");
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(IDHelper.createID(iColumns.next().getName()));
}
return result;
}
public static Map> getLinks(FieldDefinition linkDef, Collection shards,
Collection ids, ObjectID continuationLink, boolean inclusive, int count) {
if (!linkDef.isSharded()) {
return getLinksUnsharded(linkDef, ids, continuationLink, inclusive, count);
}
ApplicationDefinition app = linkDef.getTableDef().getAppDef();
TableDefinition extent = app.getTableDef(linkDef.getLinkExtent());
if (shards == null) {
shards = SpiderHelper.getShards(extent);
}
if (shards.size() == 1) {
return getLinks(linkDef, shards.toArray(new Integer[1])[0], ids, continuationLink, inclusive, count);
}
Map>> values = new HashMap>>(ids.size());
for (Integer shard : shards) {
Map> res = getLinks(linkDef, shard, ids, continuationLink, inclusive, count);
for (Map.Entry> entry : res.entrySet()) {
if (values.containsKey(entry.getKey())) {
values.get(entry.getKey()).add(entry.getValue());
} else {
List> lst = new ArrayList>(shards.size());
lst.add(entry.getValue());
values.put(entry.getKey(), lst);
}
}
}
Map> result = new HashMap>(ids.size());
for (Map.Entry>> entry : values.entrySet()) {
Set set = new HashSet<>();
int i = 0;
extLoop: for (List list : entry.getValue()) {
for (ObjectID id : list) {
if (set.add(id)) {
if (++i >= count) {
break extLoop;
}
}
}
}
result.put(entry.getKey(), new ArrayList(set));
}
return result;
}
public static List getLinks(FieldDefinition linkDef, Collection shards,
ObjectID id, ObjectID continuationLink, boolean inclusive, int count) {
if (!linkDef.isSharded()) {
return getLinksUnsharded(linkDef, id, continuationLink, inclusive, count);
}
ApplicationDefinition app = linkDef.getTableDef().getAppDef();
TableDefinition extent = app.getTableDef(linkDef.getLinkExtent());
if (shards == null) {
shards = SpiderHelper.getShards(extent);
}
List> values = new ArrayList>(shards.size());
for(Integer shard: shards) {
values.add(getLinks(linkDef, shard, id, continuationLink, inclusive, count));
}
return unionUnique(values, count);
}
///////////////////
// TERMS
///////////////////
public static List getTerms(TableDefinition tableDef, String field, String prefix, int count) {
return getTerms(tableDef, (List)null, field, prefix, count);
}
public static List getTermsUnsharded(TableDefinition tableDef, String field, String prefix, int count) {
DBService dbService = DBService.instance();
String termsStore = SpiderService.termsStoreName(tableDef);
if (prefix == null) prefix = "";
List result = new ArrayList();
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), termsStore, "_terms/" + field, prefix, prefix + Character.MAX_VALUE);
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(iColumns.next().getName());
}
return result;
}
public static List getTermsUnsharded(TableDefinition tableDef, String field, String from, String to, int count, boolean reversed) {
DBService dbService = DBService.instance();
String termsStore = SpiderService.termsStoreName(tableDef);
if (from == null) from = "";
if (to == null) to = "";
List result = new ArrayList();
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), termsStore, "_terms/" + field, from, to, reversed);
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(iColumns.next().getName());
}
return result;
}
public static List getTerms(TableDefinition tableDef, Integer shard, String field, String prefix, int count) {
if (shard.intValue() == 0) {
return getTermsUnsharded(tableDef, field, prefix, count);
}
DBService dbService = DBService.instance();
String termsStore = SpiderService.termsStoreName(tableDef);
if (prefix == null) prefix = "";
List result = new ArrayList();
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), termsStore, termKey(shard, field), prefix, prefix + Character.MAX_VALUE);
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(iColumns.next().getName());
}
return result;
}
public static List getTerms(TableDefinition tableDef, Integer shard, String field, String from, String to, int count, boolean reversed) {
if (shard.intValue() == 0) {
return getTermsUnsharded(tableDef, field, from, to, count, reversed);
}
DBService dbService = DBService.instance();
String termsStore = SpiderService.termsStoreName(tableDef);
if (from == null) from = "";
if (to == null) to = "";
List result = new ArrayList();
Iterator iColumns =
dbService.getColumnSlice(Tenant.getTenant(tableDef), termsStore, termKey(shard, field), from, to, reversed);
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(iColumns.next().getName());
}
return result;
}
public static List getTerms(TableDefinition tableDef, Collection shards, String field, String prefix, int count) {
if (!tableDef.isSharded()) {
return getTermsUnsharded(tableDef, field, prefix, count);
}
if (shards == null) {
shards = SpiderHelper.getShards(tableDef);
}
List> terms = new ArrayList>(shards.size());
for(Integer shard: shards) {
terms.add(getTerms(tableDef, shard, field, prefix, count));
}
List result = unionUnique(terms, count);
return result;
}
///////////////////
// COUNTERS
///////////////////
public static List getFields(TableDefinition tableDef) {
List result = new ArrayList();
String tableName = SpiderService.termsStoreName(tableDef);
Iterator iColumns = DBService.instance().getAllColumns(Tenant.getTenant(tableDef), tableName, "_fields");
while (iColumns.hasNext()) {
result.add(iColumns.next().getName());
}
return result;
}
///////////////////
// TERMS
///////////////////
public static List getTermDocsUnsharded(TableDefinition tableDef,
String term, ObjectID continuationObject, boolean inclusive, int count) {
DBService dbService = DBService.instance();
String store = SpiderService.termsStoreName(tableDef);
List result = new ArrayList<>();
String startCol = continuationObject == null ? "" : IDHelper.IDToString(continuationObject);
if (!inclusive) {
startCol += (char)0;
}
Iterator iColumns = dbService.getColumnSlice(Tenant.getTenant(tableDef), store, term, startCol, "");
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
result.add(IDHelper.createID(iColumns.next().getName()));
}
return result;
}
public static Map> getTermDocsUnsharded(TableDefinition tableDef,
Collection terms, ObjectID continuationObject, boolean inclusive, int count) {
DBService dbService = DBService.instance();
String store = SpiderService.termsStoreName(tableDef);
Map> result = new HashMap<>();
String startCol = continuationObject == null ? "" : IDHelper.IDToString(continuationObject);
if (!inclusive) {
startCol += (char)0;
}
Iterator iRows = dbService.getRowsColumnSlice(Tenant.getTenant(tableDef), store, terms, startCol, "");
while (iRows.hasNext()) {
DRow row = iRows.next();
List list = new ArrayList<>();
result.put(row.getKey(), list);
Iterator iColumns = row.getColumns();
for (int i = 0; i < count && iColumns.hasNext(); ++i) {
list.add(IDHelper.createID(iColumns.next().getName()));
}
}
return result;
}
public static List getTermDocs(TableDefinition tableDef, Integer shard,
String term, ObjectID continuationObject, boolean inclusive, int count) {
if (shard.intValue() == 0) {
return getTermDocsUnsharded(tableDef, term, continuationObject, inclusive, count);
}
return getTermDocsUnsharded(tableDef, shard.toString() + "/" + term, continuationObject, inclusive, count);
}
public static Map> getTermDocs(TableDefinition tableDef, Integer shard,
Collection terms, ObjectID continuationObject, boolean inclusive, int count) {
if (shard.intValue() == 0) {
return getTermDocsUnsharded(tableDef, terms, continuationObject, inclusive, count);
}
List shardTerms = new ArrayList(terms.size());
String prefix = shard.toString() + "/";
for(String term : terms) shardTerms.add(prefix + term);
Map> result = getTermDocsUnsharded(tableDef, shardTerms, continuationObject, inclusive, count);
Map> result2 = new HashMap>(result.size());
for(Map.Entry> e : result.entrySet()) {
if (e.getKey().startsWith(prefix)) {
result2.put(e.getKey().substring(prefix.length()), e.getValue());
} else {
result2.put(e.getKey(), e.getValue());
}
}
return result2;
}
public static Set getShards(TableDefinition tableDef) {
Set results = new HashSet(SpiderService.instance().getShards(tableDef).keySet());
results.add(0);
return results;
}
/**
* Produces the set of terms of a given field value
* @param fieldName Scalar field name
* @param fieldValue Field value
* @param tabDef Table definition
* @return
*/
public static Set getTerms(String fieldName, String fieldValue, TableDefinition tabDef) {
FieldAnalyzer analyzer = TextAnalyzer.instance(); // default analyzer
FieldDefinition fieldDef = tabDef.getFieldDef(fieldName);
if (fieldDef != null) {
// Extract analyzer from the field definition
FieldAnalyzer fieldAnalyzer = FieldAnalyzer.findAnalyzer(fieldDef);
if (fieldAnalyzer == NullAnalyzer.instance()) {
// No terms produced
return null;
} else if (fieldAnalyzer != null) {
analyzer = fieldAnalyzer;
}
}
try {
return analyzer.extractTerms(fieldValue);
} catch (IllegalArgumentException e) {
return new HashSet();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy