org.apache.metamodel.mongodb.mongo3.MongoDbDataContext Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.metamodel.mongodb.mongo3;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.MetaModelHelper;
import org.apache.metamodel.QueryPostprocessDataContext;
import org.apache.metamodel.UpdateScript;
import org.apache.metamodel.UpdateSummary;
import org.apache.metamodel.UpdateableDataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.DataSetHeader;
import org.apache.metamodel.data.InMemoryDataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.data.SimpleDataSetHeader;
import org.apache.metamodel.mongodb.common.MongoDBUtils;
import org.apache.metamodel.query.FilterItem;
import org.apache.metamodel.query.FromItem;
import org.apache.metamodel.query.OperatorType;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.query.SelectItem;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.schema.MutableColumn;
import org.apache.metamodel.schema.MutableSchema;
import org.apache.metamodel.schema.MutableTable;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.SimpleTableDef;
import org.bson.Document;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.DB;
import com.mongodb.WriteConcern;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoIterable;
/**
* DataContext implementation for MongoDB.
*
* Since MongoDB has no schema, a virtual schema will be used in this DataContext. This implementation supports either
* automatic discovery of a schema or manual specification of a schema, through the {@link SimpleTableDef} class.
*/
public class MongoDbDataContext extends QueryPostprocessDataContext implements UpdateableDataContext {
private static final Logger logger = LoggerFactory.getLogger(MongoDbDataSet.class);
private final MongoDatabase _mongoDb;
private final SimpleTableDef[] _tableDefs;
private WriteConcernAdvisor _writeConcernAdvisor;
private Schema _schema;
/**
* Constructs a {@link MongoDbDataContext}. This constructor accepts a custom array of {@link SimpleTableDef}s which
* allows the user to define his own view on the collections in the database.
*
* @param mongoDb the mongo db connection
* @param tableDefs an array of {@link SimpleTableDef}s, which define the table and column model of the mongo db
* collections. (consider using {@link #detectSchema(MongoDatabase)} or
* {@link #detectTable(MongoDatabase, String)} ).
*/
public MongoDbDataContext(MongoDatabase mongoDb, SimpleTableDef... tableDefs) {
super(false);
_mongoDb = mongoDb;
_tableDefs = tableDefs;
_schema = null;
}
/**
* Constructs a {@link MongoDbDataContext} and automatically detects the schema structure/view on all collections
* (see {@link #detectSchema(MongoDatabase)}).
*
* @param mongoDb the mongo db connection
*/
public MongoDbDataContext(MongoDatabase mongoDb) {
this(mongoDb, detectSchema(mongoDb));
}
/**
* Performs an analysis of the available collections in a Mongo {@link DB} instance and tries to detect the table's
* structure based on the first 1000 documents in each collection.
*
* @param mongoDb the mongo db to inspect
* @return a mutable schema instance, useful for further fine tuning by the user.
* @see #detectTable(MongoDatabase, String)
*/
public static SimpleTableDef[] detectSchema(MongoDatabase mongoDb) {
MongoIterable collectionNames = mongoDb.listCollectionNames();
List result = new ArrayList<>();
for (String collectionName : collectionNames) {
SimpleTableDef table = detectTable(mongoDb, collectionName);
result.add(table);
}
return result.toArray(new SimpleTableDef[0]);
}
/**
* Performs an analysis of an available collection in a Mongo {@link DB} instance and tries to detect the table
* structure based on the first 1000 documents in the collection.
*
* @param mongoDb the mongo DB
* @param collectionName the name of the collection
* @return a table definition for mongo db.
*/
public static SimpleTableDef detectTable(MongoDatabase mongoDb, String collectionName) {
final MongoCollection collection = mongoDb.getCollection(collectionName);
final FindIterable iterable = collection.find().limit(1000);
final SortedMap>> columnsAndTypes = new TreeMap>>();
for (Document document : iterable) {
Set keysInObject = document.keySet();
for (String key : keysInObject) {
Set> types = columnsAndTypes.get(key);
if (types == null) {
types = new HashSet>();
columnsAndTypes.put(key, types);
}
Object value = document.get(key);
if (value != null) {
types.add(value.getClass());
}
}
}
final String[] columnNames = new String[columnsAndTypes.size()];
final ColumnType[] columnTypes = new ColumnType[columnsAndTypes.size()];
int i = 0;
for (Entry>> columnAndTypes : columnsAndTypes.entrySet()) {
final String columnName = columnAndTypes.getKey();
final Set> columnTypeSet = columnAndTypes.getValue();
final Class> columnType;
if (columnTypeSet.size() == 1) {
columnType = columnTypeSet.iterator().next();
} else {
columnType = Object.class;
}
columnNames[i] = columnName;
if (columnType == ObjectId.class) {
columnTypes[i] = ColumnType.ROWID;
} else {
columnTypes[i] = ColumnTypeImpl.convertColumnType(columnType);
}
i++;
}
return new SimpleTableDef(collectionName, columnNames, columnTypes);
}
@Override
protected Schema getMainSchema() throws MetaModelException {
if (_schema == null) {
MutableSchema schema = new MutableSchema(getMainSchemaName());
for (SimpleTableDef tableDef : _tableDefs) {
MutableTable table = tableDef.toTable().setSchema(schema);
List rowIdColumns = table.getColumnsOfType(ColumnType.ROWID);
for (Column column : rowIdColumns) {
if (column instanceof MutableColumn) {
((MutableColumn) column).setPrimaryKey(true);
}
}
schema.addTable(table);
}
_schema = schema;
}
return _schema;
}
@Override
protected String getMainSchemaName() throws MetaModelException {
return _mongoDb.getName();
}
@Override
protected Number executeCountQuery(Table table, List whereItems, boolean functionApproximationAllowed) {
final MongoCollection collection = _mongoDb.getCollection(table.getName());
final List postProcessFilters = new ArrayList<>();
final Document query = createMongoDbQuery(table, whereItems, whereItem -> {
postProcessFilters.add(whereItem);
});
if (!postProcessFilters.isEmpty()) {
// not possible to use the native API for this
return null;
}
logger.info("Executing MongoDB 'count' query: {}", query);
final long count = collection.count(query);
return count;
}
@Override
protected Row executePrimaryKeyLookupQuery(Table table, List selectItems, Column primaryKeyColumn,
Object keyValue) {
final MongoCollection collection = _mongoDb.getCollection(table.getName());
final List whereItems = new ArrayList();
final SelectItem selectItem = new SelectItem(primaryKeyColumn);
final FilterItem primaryKeyWhereItem = new FilterItem(selectItem, OperatorType.EQUALS_TO, keyValue);
whereItems.add(primaryKeyWhereItem);
final Document query = createMongoDbQuery(table, whereItems, null);
final Document resultDoc = collection.find(query).first();
final DataSetHeader header = new SimpleDataSetHeader(selectItems);
final Row row = MongoDBUtils.toRow(resultDoc, header);
return row;
}
@Override
public DataSet executeQuery(Query query) {
// Check for queries containing only simple selects and where clauses,
// or if it is a COUNT(*) query.
// if from clause only contains a main schema table
List fromItems = query.getFromClause().getItems();
if (fromItems.size() == 1 && fromItems.get(0).getTable() != null
&& fromItems.get(0).getTable().getSchema() == _schema) {
final Table table = fromItems.get(0).getTable();
// if GROUP BY, HAVING and ORDER BY clauses are not specified
if (query.getGroupByClause().isEmpty() && query.getHavingClause().isEmpty()
&& query.getOrderByClause().isEmpty()) {
final List whereItems = query.getWhereClause().getItems();
// if all of the select items are "pure" column selection
boolean allSelectItemsAreColumns = true;
List selectItems = query.getSelectClause().getItems();
// if it is a
// "SELECT [columns] FROM [table] WHERE [conditions]"
// query.
for (SelectItem selectItem : selectItems) {
if (selectItem.hasFunction() || selectItem.getColumn() == null) {
allSelectItemsAreColumns = false;
break;
}
}
if (allSelectItemsAreColumns) {
logger.debug("Query can be expressed in full MongoDB, no post processing needed.");
// prepare for a non-post-processed query
// checking if the query is a primary key lookup query
if (whereItems.size() == 1) {
final FilterItem whereItem = whereItems.get(0);
final SelectItem selectItem = whereItem.getSelectItem();
if (!whereItem.isCompoundFilter() && selectItem != null && selectItem.getColumn() != null) {
final Column column = selectItem.getColumn();
if (column.isPrimaryKey() && OperatorType.EQUALS_TO.equals(whereItem.getOperator())) {
logger.debug(
"Query is a primary key lookup query. Trying executePrimaryKeyLookupQuery(...)");
final Object operand = whereItem.getOperand();
final Row row = executePrimaryKeyLookupQuery(table, selectItems, column, operand);
if (row == null) {
logger.debug(
"DataContext did not return any primary key lookup query results. Proceeding "
+ "with manual lookup.");
} else {
final DataSetHeader header = new SimpleDataSetHeader(selectItems);
return new InMemoryDataSet(header, row);
}
}
}
}
int firstRow = (query.getFirstRow() == null ? 1 : query.getFirstRow());
int maxRows = (query.getMaxRows() == null ? -1 : query.getMaxRows());
boolean thereIsAtLeastOneAlias = false;
for (SelectItem selectItem : selectItems) {
if (selectItem.getAlias() != null) {
thereIsAtLeastOneAlias = true;
break;
}
}
if (thereIsAtLeastOneAlias) {
final DataSet dataSet = materializeMainSchemaTableInternal(table, selectItems, whereItems,
firstRow, maxRows, false);
return dataSet;
} else {
final DataSet dataSet = materializeMainSchemaTableInternal(table, selectItems, whereItems,
firstRow, maxRows, false);
return dataSet;
}
}
}
}
logger.debug("Query will be simplified for MongoDB and post processed.");
return super.executeQuery(query);
}
private DataSet materializeMainSchemaTableInternal(Table table, List selectItems,
List whereItems, int firstRow, int maxRows, boolean queryPostProcessed) {
final List postProcessWhereItems = new ArrayList<>();
final MongoCursor cursor = getDocumentMongoCursor(table, whereItems, firstRow, maxRows, whereItem -> {
postProcessWhereItems.add(whereItem);
});
final DataSet dataSet;
if (postProcessWhereItems.isEmpty()) {
dataSet = new MongoDbDataSet(cursor, selectItems, queryPostProcessed);
} else {
final List selectItemsToQuery = new ArrayList<>(selectItems);
postProcessWhereItems.forEach(whereItem -> {
final Column column = whereItem.getSelectItem().getColumn();
if (column != null) {
// TODO: Minor optimization possible here to avoid having multiple select items for the same column.
// We could check if the column is already being queried.
selectItemsToQuery.add(new SelectItem(column));
}
});
final DataSet innerDataSet1 = new MongoDbDataSet(cursor, selectItemsToQuery, queryPostProcessed);
final DataSet innerDataSet2 = MetaModelHelper.getFiltered(innerDataSet1, postProcessWhereItems);
dataSet = MetaModelHelper.getSelection(selectItems, innerDataSet2);
}
return dataSet;
}
private MongoCursor getDocumentMongoCursor(Table table, List whereItems, int firstRow,
int maxRows, Consumer filterItemsToPostProcessConsumer) {
final MongoCollection collection = _mongoDb.getCollection(table.getName());
final Document query = createMongoDbQuery(table, whereItems, filterItemsToPostProcessConsumer);
logger.info("Executing MongoDB 'find' query: {}", query);
FindIterable iterable = collection.find(query);
if (maxRows > 0) {
iterable = iterable.limit(maxRows);
}
if (firstRow > 1) {
final int skip = firstRow - 1;
iterable = iterable.skip(skip);
}
return iterable.iterator();
}
protected Document createMongoDbQuery(Table table, List whereItems,
Consumer whereItemToPostProcessConsumer) {
assert _schema == table.getSchema();
final Document query = new Document();
if (whereItems != null && !whereItems.isEmpty()) {
for (FilterItem item : whereItems) {
final boolean converted = convertToCursorObject(query, item);
if (!converted) {
// it wasn't possible to push down the filter item
whereItemToPostProcessConsumer.accept(item);
}
}
}
return query;
}
private static Object convertArrayToList(Object arr) {
if (arr instanceof boolean[]) {
return Arrays.asList((boolean[]) arr);
} else if (arr instanceof byte[]) {
return Arrays.asList((byte[]) arr);
} else if (arr instanceof short[]) {
return Arrays.asList((short[]) arr);
} else if (arr instanceof char[]) {
return Arrays.asList((char[]) arr);
} else if (arr instanceof int[]) {
return Arrays.asList((int[]) arr);
} else if (arr instanceof long[]) {
return Arrays.asList((long[]) arr);
} else if (arr instanceof float[]) {
return Arrays.asList((float[]) arr);
} else if (arr instanceof double[]) {
return Arrays.asList((double[]) arr);
} else if (arr instanceof Object[]) {
return Arrays.asList((Object[]) arr);
}
// It's not an array.
return null;
}
/**
* Attempts to convert a FilterItem into a refinement of a MongoDB query
*
* @param query
* @param item
* @return true if the conversion was successful, false if not
*/
private boolean convertToCursorObject(Document query, FilterItem item) {
if (item.isCompoundFilter()) {
final List orList = new ArrayList();
final FilterItem[] childItems = item.getChildItems();
for (FilterItem childItem : childItems) {
final Document childDoc = new Document();
boolean converted = convertToCursorObject(childDoc, childItem);
if (!converted) {
return false;
}
orList.add(childDoc);
}
query.put("$or", orList);
return true;
} else {
final SelectItem selectItem = item.getSelectItem();
if (selectItem.hasFunction()) {
// at this point, (scalar) functions in filters aren't possible to push down to the query
return false;
}
final Column column = selectItem.getColumn();
final String columnName = column.getName();
final String operatorName;
try {
operatorName = getOperatorName(item);
} catch (UnsupportedOperationException e) {
// not possible to push this operator down to the query
return false;
}
Object operand = item.getOperand();
if (ObjectId.isValid(String.valueOf(operand))) {
operand = new ObjectId(String.valueOf(operand));
} else if (operand != null && operand.getClass().isArray()) {
operand = convertArrayToList(operand);
}
final Document existingFilterObject = (Document) query.get(columnName);
if (existingFilterObject == null) {
if (operatorName == null) {
if (OperatorType.LIKE.equals(item.getOperator())) {
query.put(columnName, turnOperandIntoRegExp(operand));
} else {
query.put(columnName, operand);
}
} else {
query.put(columnName, new Document(operatorName, operand));
}
} else {
if (operatorName == null) {
throw new IllegalStateException(
"Cannot retrieve records for a column with two EQUALS_TO operators");
} else {
existingFilterObject.append(operatorName, operand);
}
}
return true;
}
}
private String getOperatorName(FilterItem item) throws UnsupportedOperationException {
final OperatorType operator = item.getOperator();
if (OperatorType.EQUALS_TO.equals(operator)) {
return null;
}
if (OperatorType.LIKE.equals(operator)) {
return null;
}
if (OperatorType.LESS_THAN.equals(operator)) {
return "$lt";
}
if (OperatorType.LESS_THAN_OR_EQUAL.equals(operator)) {
return "$lte";
}
if (OperatorType.GREATER_THAN.equals(operator)) {
return "$gt";
}
if (OperatorType.GREATER_THAN_OR_EQUAL.equals(operator)) {
return "$gte";
}
if (OperatorType.DIFFERENT_FROM.equals(operator)) {
return "$ne";
}
if (OperatorType.IN.equals(operator)) {
return "$in";
}
throw new UnsupportedOperationException("Unsupported operator type: " + operator);
}
private Pattern turnOperandIntoRegExp(Object operand) {
StringBuilder operandAsRegExp = new StringBuilder(replaceWildCardLikeChars(operand.toString()));
operandAsRegExp.insert(0, "^").append("$");
return Pattern.compile(operandAsRegExp.toString(), Pattern.CASE_INSENSITIVE);
}
private String replaceWildCardLikeChars(String operand) {
return operand.replaceAll("%", ".*");
}
@Override
protected DataSet materializeMainSchemaTable(Table table, List columns, int maxRows) {
return materializeMainSchemaTableInternal(table,
columns.stream().map(SelectItem::new).collect(Collectors.toList()), null, 1, maxRows, true);
}
@Override
protected DataSet materializeMainSchemaTable(Table table, List columns, int firstRow, int maxRows) {
return materializeMainSchemaTableInternal(table,
columns.stream().map(SelectItem::new).collect(Collectors.toList()), null, firstRow, maxRows, true);
}
/**
* Executes an update with a specific {@link WriteConcernAdvisor}.
*/
public UpdateSummary executeUpdate(UpdateScript update, WriteConcernAdvisor writeConcernAdvisor) {
MongoDbUpdateCallback callback = new MongoDbUpdateCallback(this, writeConcernAdvisor);
try {
update.run(callback);
} finally {
callback.close();
}
return callback.getUpdateSummary();
}
/**
* Executes an update with a specific {@link WriteConcern}.
*/
public UpdateSummary executeUpdate(UpdateScript update, WriteConcern writeConcern) {
return executeUpdate(update, new SimpleWriteConcernAdvisor(writeConcern));
}
@Override
public UpdateSummary executeUpdate(UpdateScript update) {
return executeUpdate(update, getWriteConcernAdvisor());
}
/**
* Gets the {@link WriteConcernAdvisor} to use on {@link #executeUpdate(UpdateScript)} calls.
*/
public WriteConcernAdvisor getWriteConcernAdvisor() {
if (_writeConcernAdvisor == null) {
return new DefaultWriteConcernAdvisor();
}
return _writeConcernAdvisor;
}
/**
* Sets a global {@link WriteConcern} advisor to use on {@link #executeUpdate(UpdateScript)}.
*/
public void setWriteConcernAdvisor(WriteConcernAdvisor writeConcernAdvisor) {
_writeConcernAdvisor = writeConcernAdvisor;
}
/**
* Gets the {@link DB} instance that this {@link DataContext} is backed by.
*
* @return
*/
public MongoDatabase getMongoDb() {
return _mongoDb;
}
protected void addTable(MutableTable table) {
if (_schema instanceof MutableSchema) {
MutableSchema mutableSchema = (MutableSchema) _schema;
mutableSchema.addTable(table);
} else {
throw new UnsupportedOperationException("Schema is not mutable");
}
}
}