Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to STRATIO (C) under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership. The STRATIO (C) licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.stratio.cassandra.lucene.service;
import com.stratio.cassandra.lucene.IndexConfig;
import com.stratio.cassandra.lucene.schema.Schema;
import com.stratio.cassandra.lucene.schema.column.Column;
import com.stratio.cassandra.lucene.schema.column.Columns;
import com.stratio.cassandra.lucene.search.Search;
import com.stratio.cassandra.lucene.util.TaskQueue;
import com.stratio.cassandra.lucene.util.TimeCounter;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.cql3.Operator;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.composites.CellName;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.lucene.search.BooleanClause.Occur.FILTER;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.SortField.FIELD_SCORE;
/**
* Class for providing operations between Cassandra and Lucene.
*
* @author Andres de la Pena {@literal }
*/
public abstract class RowService {
private static final Logger logger = LoggerFactory.getLogger(RowService.class);
/** The max number of rows to be read per iteration. */
private static final int MAX_PAGE_SIZE = 10000;
/** The min number of rows to be read per iteration. */
private static final int MIN_PAGE_SIZE = 100;
final ColumnFamilyStore baseCfs;
final CFMetaData metadata;
final RowMapper mapper;
final LuceneIndex lucene;
final List keySortFields;
protected final Schema schema;
private final TaskQueue indexQueue;
/**
* Returns a new {@code RowService} for the specified {@link IndexConfig}.
*
* @param cfs The indexed {@link ColumnFamilyStore}.
* @param config The {@link IndexConfig}.
* @throws IOException If there are I/O errors.
*/
protected RowService(ColumnFamilyStore cfs, IndexConfig config) throws IOException {
baseCfs = cfs;
metadata = config.getMetadata();
schema = config.getSchema();
lucene = new LuceneIndex(config);
mapper = RowMapper.build(config);
keySortFields = mapper.sortFields();
int threads = config.getIndexingThreads();
indexQueue = threads > 0 ? new TaskQueue(threads, config.getIndexingQueuesSize()) : null;
}
/**
* Returns a new {@link RowService} for the specified {@link IndexConfig}.
*
* @param cfs The indexed {@link ColumnFamilyStore}.
* @param config The {@link IndexConfig}.
* @return A new {@link RowService} for the specified {@link IndexConfig}.
* @throws IOException If there are I/O errors.
*/
public static RowService build(ColumnFamilyStore cfs, IndexConfig config) throws IOException {
return config.isWide() ? new RowServiceWide(cfs, config) : new RowServiceSkinny(cfs, config);
}
/**
* Returns the used {@link Schema}.
*
* @return The used {@link Schema}.
*/
public final Schema getSchema() {
return schema;
}
/**
* Returns the names of the document fields to be loaded when reading a Lucene index.
*
* @return The names of the document fields to be loaded.
*/
protected abstract Set fieldsToLoad();
/**
* Validates the row insertion specified by the specified partition key and column family.
*
* @param key A partition key.
* @param columnFamily A {@link ColumnFamily} with a single common cluster key.
*/
public abstract void validate(final ByteBuffer key, final ColumnFamily columnFamily);
/**
* Indexes the logical {@link Row} identified by the specified key and column family using the specified time stamp.
* The may require reading from the base {@link ColumnFamilyStore} because it could exist previously having more
* columns than the specified ones. The specified {@link ColumnFamily} is used for determine the cluster key. This
* operation is performed asynchronously.
*
* @param key A partition key.
* @param columnFamily A {@link ColumnFamily} with a single common cluster key.
* @param timestamp The insertion time.
* @throws IOException If there are I/O errors.
*/
public void index(final ByteBuffer key, final ColumnFamily columnFamily, final long timestamp) throws IOException {
if (indexQueue == null) {
doIndex(key, columnFamily, timestamp);
} else {
indexQueue.submitAsynchronous(key, new Runnable() {
@Override
public void run() {
try {
doIndex(key, columnFamily, timestamp);
} catch (Exception e) {
logger.error("Unrecoverable error during asynchronously indexing", e);
}
}
});
}
}
/**
* Puts in the Lucene index the Cassandra's the row identified by the specified partition key and the clustering
* keys contained in the specified {@link ColumnFamily}.
*
* @param key A partition key.
* @param columnFamily A {@link ColumnFamily} with a single common cluster key.
* @param timestamp The insertion time.
* @throws IOException If there are I/O errors.
*/
protected abstract void doIndex(ByteBuffer key, ColumnFamily columnFamily, long timestamp) throws IOException;
/**
* Deletes the partition identified by the specified partition key. This operation is performed asynchronously.
*
* @param partitionKey The partition key identifying the partition to be deleted.
* @throws IOException If there are I/O errors.
*/
public void delete(final DecoratedKey partitionKey) throws IOException {
if (indexQueue == null) {
doDelete(partitionKey);
} else {
indexQueue.submitAsynchronous(partitionKey, new Runnable() {
@Override
public void run() {
try {
doDelete(partitionKey);
} catch (Exception e) {
logger.error("Unrecoverable error during asynchronous deletion", e);
}
}
});
}
}
/**
* Deletes the partition identified by the specified partition key.
*
* @param partitionKey The partition key identifying the partition to be deleted.
* @throws IOException If there are I/O errors.
*/
protected abstract void doDelete(DecoratedKey partitionKey) throws IOException;
/**
* Deletes all the {@link Document}s.
*
* @throws IOException If there are I/O errors.
*/
public final void truncate() throws IOException {
lucene.truncate();
}
/**
* Closes and removes all the index files.
*
* @throws IOException If there are I/O errors.
*/
public final void delete() throws IOException {
if (indexQueue != null) {
indexQueue.shutdown();
}
lucene.delete();
schema.close();
}
/**
* Commits the pending changes. This operation is performed asynchronously.
*
* @throws IOException If there are I/O errors.
*/
public final void commit() throws IOException {
if (indexQueue != null) {
indexQueue.await();
}
lucene.commit();
}
/**
* Returns the Lucene {@link Document}s represented by the specified Cassandra row associated with their identifying
* {@link Term}s.
*
* @param partitionKey A partition key.
* @param columnFamily A column family.
* @param timestamp The operation time.
* @return The Lucene {@link Document}s represented by the specified Cassandra row associated with their identifying
* {@link Term}s.
*/
abstract Map documents(DecoratedKey partitionKey, ColumnFamily columnFamily, long timestamp);
/**
* Returns the stored and indexed {@link Row}s satisfying the specified restrictions.
*
* @param search The {@link Search} to be performed.
* @param expressions A list of filtering {@link IndexExpression}s to be satisfied.
* @param dataRange A {@link DataRange} to be satisfied.
* @param limit The max number of {@link Row}s to be returned.
* @param timestamp The operation time stamp.
* @param after A {@link RowKey} to start the search after.
* @param distinct If CQL DISTINCT operator is used.
* @return The {@link Row}s satisfying the specified restrictions.
* @throws IOException If there are I/O errors.
*/
public final List search(Search search,
List expressions,
DataRange dataRange,
final int limit,
long timestamp,
RowKey after,
boolean distinct) throws IOException {
// Setup stats
TimeCounter afterTime = TimeCounter.create();
TimeCounter queryTime = TimeCounter.create();
TimeCounter storeTime = TimeCounter.create();
int numDocs = 0;
int numPages = 0;
int numRows = 0;
List rows = new LinkedList<>();
// Refresh index if needed
if (search.refresh()) {
if (indexQueue != null) {
indexQueue.await();
}
lucene.refresh();
if (search.isEmpty()) {
return rows;
}
}
// Setup search
Query query = query(search, dataRange);
Sort sort = sort(search);
// Setup paging
int scorePosition = scorePosition(search);
int page = min(limit, MAX_PAGE_SIZE);
boolean mayBeMoreDocs;
int remainingRows;
SearcherManager searcherManager = lucene.getSearcherManager();
IndexSearcher searcher = searcherManager.acquire();
Set partitionKeys = new HashSet<>();
try {
// Get last position
afterTime.start();
ScoreDoc last = after(searcher, after, query, sort);
afterTime.stop();
do {
// Search rows identifiers in Lucene
queryTime.start();
Set fields = fieldsToLoad();
Map docs = lucene.search(searcher, query, sort, last, page, fields);
List searchResults = new ArrayList<>(docs.size());
int numIterationDocs = 0;
for (Map.Entry entry : docs.entrySet()) {
Document document = entry.getKey();
ScoreDoc scoreDoc = entry.getValue();
last = scoreDoc;
SearchResult searchResult = mapper.searchResult(document, scoreDoc);
DecoratedKey partitionKey = searchResult.getPartitionKey();
if (!(distinct && (partitionKeys.contains(partitionKey)
|| after != null && after.getPartitionKey().equals(partitionKey)))) {
searchResults.add(searchResult);
partitionKeys.add(partitionKey);
}
numIterationDocs++;
}
numDocs += numIterationDocs;
queryTime.stop();
// Collect rows from Cassandra
storeTime.start();
for (Row row : rows(searchResults, timestamp, scorePosition)) {
if (accepted(row, expressions)) {
rows.add(row);
numRows++;
}
}
storeTime.stop();
// Setup next iteration
mayBeMoreDocs = numIterationDocs == page;
remainingRows = limit - numRows;
page = min(max(MIN_PAGE_SIZE, remainingRows), MAX_PAGE_SIZE);
numPages++;
// Iterate while there are still documents to read and we don't have enough rows
} while (mayBeMoreDocs && remainingRows > 0);
} finally {
searcherManager.release(searcher);
}
// Ensure sorting
Comparator comparator = mapper.comparator(search);
Collections.sort(rows, comparator);
logger.debug("Search : {}", search);
logger.debug("Query : {}", query);
logger.debug("Sort : {}", sort);
logger.debug("After time : {}", afterTime);
logger.debug("Query time : {}", queryTime);
logger.debug("Store time : {}", storeTime);
logger.debug("Count docs : {}", numDocs);
logger.debug("Count rows : {}", numRows);
logger.debug("Count page : {}", numPages);
return rows;
}
/**
* Returns the {@link Query} representation of the specified {@link Search} filtered by the specified {@link
* DataRange}.
*
* @param search A {@link Search}.
* @param dataRange A {@link DataRange}.
* @return The {@link Query} representation of the specified {@link Search} filtered by the specified {@link
* DataRange}.
*/
public Query query(Search search, DataRange dataRange) {
Query range = mapper.query(dataRange);
Query query = search.query(schema);
Query filter = search.filter(schema);
if (query == null && filter == null && range == null) {
return new MatchAllDocsQuery();
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (range != null) {
builder.add(range, FILTER);
}
if (filter != null) {
builder.add(filter, FILTER);
}
if (query != null) {
builder.add(query, MUST);
}
return new CachingWrapperQuery(builder.build());
}
/**
* Returns a {@link Sort} for the specified {@link Search}.
*
* @param search A {@link Search}.
* @return A {@link Sort} for the specified {@link Search}.
*/
private Sort sort(Search search) {
List sortFields = new ArrayList<>();
if (search.usesSorting()) {
sortFields.addAll(search.sortFields(schema));
}
if (search.usesRelevance()) {
sortFields.add(FIELD_SCORE);
}
sortFields.addAll(keySortFields);
return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
}
private int scorePosition(Search search) {
if (search.usesRelevance()) {
return search.usesSorting() ? search.sortFields(schema).size() : 0;
} else {
return -1;
}
}
/**
* Returns the {@link ScoreDoc} of a previous search.
*
* @param searcher The Lucene {@link IndexSearcher} to be used.
* @param key The key of the last found row.
* @param query The previous query.
* @param sort The previous sort.
* @return The {@link ScoreDoc} of a previous search.
* @throws IOException If there are I/O errors.
*/
private ScoreDoc after(IndexSearcher searcher, RowKey key, Query query, Sort sort) throws IOException {
if (key == null) {
return null;
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(mapper.query(key), FILTER);
builder.add(query, MUST);
Query afterQuery = builder.build();
Set fields = Collections.emptySet();
Map results = lucene.search(searcher, afterQuery, sort, null, 1, fields);
return results.isEmpty() ? null : results.values().iterator().next();
}
/**
* Returns {@code true} if the specified {@link Row} satisfies the all the specified {@link IndexExpression}s,
* {@code false} otherwise.
*
* @param row A {@link Row}.
* @param expressions A list of {@link IndexExpression}s to be satisfied by {@code row}.
* @return {@code true} if the specified {@link Row} satisfies the all the specified {@link IndexExpression}s,
* {@code false} otherwise.
*/
private boolean accepted(Row row, List expressions) {
if (!expressions.isEmpty()) {
Columns columns = mapper.columns(row);
for (IndexExpression expression : expressions) {
if (!accepted(columns, expression)) {
return false;
}
}
}
return true;
}
/**
* Returns {@code true} if the specified {@link Columns} satisfies the the specified {@link IndexExpression}, {@code
* false} otherwise.
*
* @param columns A {@link Columns}.
* @param expression A {@link IndexExpression}s to be satisfied by {@code columns}.
* @return {@code true} if the specified {@link Columns} satisfies the the specified {@link IndexExpression}, {@code
* false} otherwise.
*/
private boolean accepted(Columns columns, IndexExpression expression) {
ColumnDefinition def = metadata.getColumnDefinition(expression.column);
String name = def.name.toString();
ByteBuffer expectedValue = expression.value;
Operator operator = expression.operator;
AbstractType> validator = def.type;
for (Column> column : columns.getColumnsByCellName(name)) {
if (accepted(column, validator, operator, expectedValue)) {
return true;
}
}
return false;
}
/**
* Returns {@code true} if the specified {@link Column} satisfies the the specified expression, {@code false}
* otherwise.
*
* @param column A {@link Column}.
* @param validator An expression validator.
* @param operator An expression operator.
* @param value An expression value.
* @return {@code true} if the specified {@link Column} satisfies the the specified expression, {@code false}
* otherwise.
*/
private boolean accepted(Column> column, AbstractType> validator, Operator operator, ByteBuffer value) {
if (column == null) {
return false;
}
ByteBuffer actualValue = column.getDecomposedValue();
if (actualValue == null) {
return false;
}
int comparison = validator.compare(actualValue, value);
return accepted(operator, comparison);
}
private boolean accepted(Operator operator, int comparison) {
switch (operator) {
case EQ:
return comparison == 0;
case GTE:
return comparison >= 0;
case GT:
return comparison > 0;
case LTE:
return comparison <= 0;
case LT:
return comparison < 0;
default:
throw new IllegalStateException();
}
}
/**
* Returns the {@link Row}s identified by the specified {@link Document}s, using the specified time stamp to ignore
* deleted columns. The {@link Row}s are retrieved from the storage engine, so it involves IO operations.
*
* @param results The {@link SearchResult}s
* @param timestamp The time stamp to ignore deleted columns.
* @param scorePosition The position where score column is placed.
* @return The {@link Row}s identified by the specified {@link Document}s
*/
protected abstract List rows(List results, long timestamp, int scorePosition);
/**
* Returns a {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified {@link ColumnFamily}.
*
* @param columnFamily A {@link ColumnFamily}.
* @param timestamp The max allowed timestamp for the {@link Cell}s.
* @return A {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified {@link ColumnFamily}.
*/
protected ColumnFamily cleanExpired(ColumnFamily columnFamily, long timestamp) {
ColumnFamily cleanColumnFamily = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
for (Cell cell : columnFamily) {
if (cell.isLive(timestamp)) {
cleanColumnFamily.addColumn(cell);
}
}
return cleanColumnFamily;
}
/**
* Adds to the specified {@link Row} the specified Lucene score column.
*
* @param row A {@link Row}.
* @param timestamp The score column timestamp.
* @param scoreDoc The score column value.
* @param scorePosition The position where score column is placed.
* @return The {@link Row} with the score.
*/
protected Row addScoreColumn(Row row, long timestamp, ScoreDoc scoreDoc, int scorePosition) {
ColumnFamily cf = row.cf;
CellName cellName = mapper.makeCellName(cf);
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
Float score = Float.parseFloat(fieldDoc.fields[scorePosition].toString());
ColumnFamily dcf = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());
dcf.addColumn(cellName, cellValue, timestamp);
dcf.addAll(row.cf);
return new Row(row.key, dcf);
}
/**
* Returns the used {@link RowMapper}.
*
* @return The used {@link RowMapper}.
*/
public RowMapper mapper() {
return mapper;
}
}