org.apache.ignite.internal.processors.cache.query.CacheQuery Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of ignite-core Show documentation
Java-based middleware for in-memory processing of big data in a distributed environment.
There is a newer version: 3.0.0-beta1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.internal.processors.cache.query;

import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.cache.affinity.AffinityKey;
import org.apache.ignite.cache.query.Query;
import org.apache.ignite.cache.query.annotations.QuerySqlField;
import org.apache.ignite.cache.query.annotations.QuerySqlFunction;
import org.apache.ignite.cache.query.annotations.QueryTextField;
import org.apache.ignite.cluster.ClusterGroup;
import org.apache.ignite.internal.util.lang.GridCloseableIterator;
import org.apache.ignite.lang.IgniteReducer;
import org.jetbrains.annotations.Nullable;

/**
 * Main API for configuring and executing cache queries.
 * 
 * 
SQL Queries
 * {@code SQL} query allows to execute distributed cache
 * queries using standard SQL syntax. All values participating in where clauses
 * or joins must be annotated with {@link QuerySqlField} annotation.
 * Field Queries
 * By default {@code select} clause is ignored as query result contains full objects.
 * This type of query replaces full objects with individual fields. Note that selected fields
 * must be annotated with {@link QuerySqlField} annotation.
 * Cross-Cache Queries
 * You are allowed to query data from several caches. Cache that this query was created on is
 * treated as default schema in this case. Other caches can be referenced by their names.
 * 
 * Note that cache name is case sensitive and has to always be specified in double quotes.
 * Here is an example of cross cache query (note that 'replicated' and 'partitioned' are
 * cache names for replicated and partitioned caches accordingly):
 * 
 * CacheQuery<Map.Entry<Integer, FactPurchase>> storePurchases = cache.queries().createSqlQuery(
 *     Purchase.class,
 *     "from \"replicated\".Store, \"partitioned\".Purchase where Store.id=Purchase.storeId and Store.id=?");
 * 
 * Custom functions in SQL queries.
 * It is possible to write custom Java methods and call then form SQL queries. These methods must be public static
 * and annotated with {@link QuerySqlFunction}. Classes containing these methods must be registered in
 * {@link org.apache.ignite.configuration.CacheConfiguration#setSqlFunctionClasses(Class[])}.
 * Full Text Queries
 * Ignite supports full text queries based on Apache Lucene engine. Note that all fields that
 * are expected to show up in text query results must be annotated with {@link QueryTextField}
 * annotation.
 * Scan Queries
 * Sometimes when it is known in advance that SQL query will cause a full data scan, or whenever data set
 * is relatively small, the full scan query may be used. This query will iterate over all cache
 * entries, skipping over entries that don't pass the optionally provided key-value filter.
 * Limitations
 * Data in Ignite cache is usually distributed across several nodes,
 * so some queries may not work as expected. Keep in mind following limitations
 * (not applied if data is queried from one node only):
 * 
 *     
 *         {@code Group by} and {@code sort by} statements are applied separately
 *         on each node, so result set will likely be incorrectly grouped or sorted
 *         after results from multiple remote nodes are grouped together.
 *     
 *     
 *         Aggregation functions like {@code sum}, {@code max}, {@code avg}, etc.
 *         are also applied on each node. Therefore you will get several results
 *         containing aggregated values, one for each node.
 *     
 *     
 *         Joins will work correctly only if joined objects are stored in
 *         colocated mode or at least one side of the join is stored in
 *         {@link org.apache.ignite.cache.CacheMode#REPLICATED} cache. Refer to
 *         {@link AffinityKey} javadoc for more information about colocation.
 *     
 * 
 * Query usage
 * As an example, suppose we have data model consisting of {@code 'Employee'} and {@code 'Organization'}
 * classes defined as follows:
 *  * public class Organization {
 *     // Indexed field.
 *     @QuerySqlField(index = true)
 *     private long id;
 *
 *     // Indexed field.
 *     @QuerySqlField(index = true)
 *     private String name;
 *     ...
 * }
 *
 * public class Person {
 *     // Indexed field.
 *     @QuerySqlField(index = true)
 *     private long id;
 *
 *     // Indexed field (Organization ID, used as a foreign key).
 *     @QuerySqlField(index = true)
 *     private long orgId;
 *
 *     // Without SQL field annotation, this field cannot be used in queries.
 *     private String name;
 *
 *     // Not indexed field.
 *     @QuerySqlField
 *     private double salary;
 *
 *     // Index for text search.
 *     @QueryTextField
 *     private String resume;
 *     ...
 * }
 * 
 * Then you can create and execute queries that check various salary ranges like so:
 *  * Cache<Long, Person> cache = G.grid().cache();
 * ...
 * // Create query which selects salaries based on range for all employees
 * // that work for a certain company.
 * CacheQuery<Map.Entry<Long, Person>> qry = cache.queries().createSqlQuery(Person.class,
 *     "from Person, Organization where Person.orgId = Organization.id " +
 *         "and Organization.name = ? and Person.salary > ? and Person.salary <= ?");
 *
 * // Query all nodes to find all cached Ignite employees
 * // with salaries less than 1000.
 * qry.execute("Ignition", 0, 1000);
 *
 * // Query only remote nodes to find all remotely cached Ignite employees
 * // with salaries greater than 1000 and less than 2000.
 * qry.projection(grid.remoteProjection()).execute("Ignition", 1000, 2000);
 * 
 * Here is a possible query that will use Lucene text search to scan all resumes to
 * check if employees have {@code Master} degree:
 *  * CacheQuery<Map.Entry<Long, Person>> mastersQry =
 *     cache.queries().createFullTextQuery(Person.class, "Master");
 *
 * // Query all cache nodes.
 * mastersQry.execute();
 * 
 * Geo-Spatial Indexes and Queries
 * Ignite also support Geo-Spatial Indexes. Here is an example of geo-spatial index:
 *  * private class MapPoint implements Serializable {
 *     // Geospatial index.
 *     @QuerySqlField(index = true)
 *     private org.locationtech.jts.geom.Point location;
 *
 *     // Not indexed field.
 *     @QuerySqlField
 *     private String name;
 *
 *     public MapPoint(org.locationtech.jts.geom.Point location, String name) {
 *         this.location = location;
 *         this.name = name;
 *     }
 * }
 * 
 * Example of spatial query on the geo-indexed field from above:
 *  * org.locationtech.jts.geom.GeometryFactory factory = new org.locationtech.jts.geom.GeometryFactory();
 *
 * org.locationtech.jts.geom.Polygon square = factory.createPolygon(new Coordinate[] {
 *     new org.locationtech.jts.geom.Coordinate(0, 0),
 *     new org.locationtech.jts.geom.Coordinate(0, 100),
 *     new org.locationtech.jts.geom.Coordinate(100, 100),
 *     new org.locationtech.jts.geom.Coordinate(100, 0),
 *     new org.locationtech.jts.geom.Coordinate(0, 0)
 * });
 *
 * Map.Entry records = cache.queries().createSqlQuery(MapPoint.class, "select * from MapPoint where location && ?")
 *     .queryArguments(square)
 *     .execute()
 *     .get();
 * 
 */
public interface CacheQuery {
    /**
     * Sets result page size. If not provided, {@link Query#DFLT_PAGE_SIZE} will be used.
     * Results are returned from queried nodes one page at a tme.
     *
     * @param pageSize Page size.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery pageSize(int pageSize);

    /**
     * Sets query timeout. {@code 0} means there is no timeout (this
     * is a default value).
     *
     * @param timeout Query timeout.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery timeout(long timeout);

    /**
     * Sets limit of returned records. {@code 0} means there is no limit
     *
     * @param limit Records limit.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery limit(int limit);

    /**
     * Sets whether or not to include backup entries into query result. This flag
     * is {@code false} by default.
     *
     * @param incBackups Query {@code includeBackups} flag.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery includeBackups(boolean incBackups);

    /**
     * Sets whether or not to deduplicate query result set. If this flag is {@code true}
     * then query result will not contain some key more than once even if several nodes
     * returned entries with the same keys. Default value is {@code false}.
     *
     * @param dedup Query {@code enableDedup} flag.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery enableDedup(boolean dedup);

    /**
     * Sets optional grid projection to execute this query on.
     *
     * @param prj Projection.
     * @return {@code this} query instance for chaining.
     */
    public CacheQuery projection(ClusterGroup prj);

    /**
     * Executes the query and returns the query future. Caller may decide to iterate
     * over the returned future directly in which case the iterator may block until
     * the next value will become available, or wait for the whole query to finish
     * by calling any of the {@code 'get(..)'} methods on the returned future.
     * 
     * Note that if the passed in grid projection is a local node, then query
     * will be executed locally without distribution to other nodes.
     * 
     * Also note that query state cannot be changed (clause, timeout etc.), except
     * arguments, if this method was called at least once.
     *
     * @param args Optional arguments.
     * @return Future for the query result.
     */
    public CacheQueryFuture execute(@Nullable Object... args);

    /**
     * Executes the query the same way as {@link #execute(Object...)} method but reduces result remotely.
     *
     * @param rmtReducer Remote reducer.
     * @param args Optional arguments.
     * @return Future for the query result.
     */
    public  CacheQueryFuture execute(IgniteReducer rmtReducer, @Nullable Object... args);

    /**
     * @return Scan query iterator.
     */
    public GridCloseableIterator executeScanQuery() throws IgniteCheckedException;
}