![JAR search and dependency download from the Maven repository](/logo.png)
oracle.kv.impl.api.avro.SchemaCache Maven / Gradle / Ivy
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.api.avro;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.avro.Schema;
import oracle.kv.Consistency;
import oracle.kv.Value;
import oracle.kv.avro.UndefinedSchemaException;
import oracle.kv.impl.test.TestHook;
/**
* Keeps a cache of all schemas for use by clients that use the Avro bindings,
* and (in the future) for use by queries and indexers running on an RN.
*
* The cache uses a copy-on-write approach for all cached data, to avoid any
* blocking among threads using the cache as long as there are no cache misses.
* Copy-on-write is used rather than a ReadWriteLock or concurrent collections
* for several reasons:
*
* -
* Cache hits vastly outnumber misses/updates.
*
-
* Cache reads are very small/quick operations and the added overhead of
* synchronization on read might be noticeable.
*
-
* The cost of copying the cache is low. It is not expected to be large and
* only a shallow copy is needed because Schema objects are immutable.
*
*
* Stored Schemas
*
* There are two types of cached information, stored schemas and user schemas.
* Stored schemas are queried using the SchemaAccessor and cached in two maps,
* one by schema ID and the other by schema name.
*
* When there is a cache miss, we query any recently added schema kv pairs in
* the store, while synchronized on the cache object itself. The expectation
* is that cache misses are infrequent and cache updates even less frequent
* (because schema changes are so infrequent), so blocking will normally only
* occur when the cache is initially populated. Cache updates, when necessary,
* are performed while synchronized to prevent multiple threads from reading
* the schema kv pairs concurrently, since this would be wasteful and could
* impact performance on the RN holding the schema kv pairs.
*
* Blocking and schema kv pair queries may occur frequently if multiple caller
* threads repeatedly try to use a schema that is undefined in the store. This
* should be unusual and is considered a programming error, so it not worth
* trying to optimize. There is a warning to this effect in the
* UndefinedSchemaException javadoc.
*
*
User Schemas
*
* Users pass Schema objects to the binding APIs for use with Avro as writer
* schemas and reader schemas. We must ensure those schemas are known
* (stored). To do this we maintain an identity map from user schemas to
* stored schemas. This allows us to quickly discover whether a user specified
* schema is known, but allows users to pass arbitrary schema objects to the
* binding APIs. Users typically create schema objects using Avro.
*
* Before adding the association between a user's schema and a stored schema to
* the identity map, we ensure the user's schema is equal to the stored schema.
* This is considered a lookup by schema value, since a deep comparison between
* schemas is performed. When a schema has multiple stored versions, multiple
* schemas may need to be compared to find the version specified by the user.
* Once an association has been added to the identity map, a lookup of the user
* schema is very quick and does not require a schema comparison.
*
* Although the map containing user schemas is updated when a user specifies a
* new schema, rather than by querying stored schemas that were recently added,
* the same copy-on-write approach and synchronization (on the cache object) is
* used. Potential blocking could be reduced by synchronizing on two different
* objects -- one for updating the stored schemas and another for updating user
* schemas -- but this would add complexity and potential ordering issues.
* Both types of cache updates are so infrequent that this is not worth the
* trouble.
*
* Blocking while adding a user schema may occur frequently if the user creates
* new schema objects often, e.g., for every operation. This may also use
* large amounts of memory for caching the user schemas and may eventually fill
* the JVM heap. This is considered a programming error and is not explicitly
* handled. There is a warning to this effect in the AvroCatalog javadoc.
*/
@SuppressWarnings("deprecation")
class SchemaCache {
/** Used to read schema kv pairs from the store. */
private final SchemaAccessor accessor;
/** For use by Avro C API. */
private final CBindingBridge cBindingBridge;
/**
* Current cache contents. This field is reassigned with a new Contents
* object when there is a change, and the assignment is performed while
* synchronized.
*/
private volatile Contents contents;
private TestHook cacheMissHook;
/**
* Initializes the cache with all currently stored schemas. Invoked when
* the AvroCatalog is first opened by a client app.
*/
SchemaCache(SchemaAccessor accessor) {
this.accessor = accessor;
cBindingBridge = new CBindingBridgeImpl();
contents = new Contents().updateStoredSchemas
(accessor, accessor.getLowestConsistency());
}
/**
* Updates the cache with stored schemas added since the cache was last
* initialized or updated. Invoked when a client calls
* AvroCatalog.refreshSchemaCache.
*
* Calling this method often from multiple threads may cause blocking, and
* calling it often (even from one thread) could have an impact on store
* performance. The AvroCatalog.refreshSchemaCache method javadoc contains
* warnings to this effect.
*/
void updateStoredSchemas(Consistency consistency) {
synchronized (this) {
/* Update the cache while synchronized. */
contents = contents.updateStoredSchemas(accessor, consistency);
}
}
/**
* Returns a map of stored schemas by name. The most recent version of
* each schema is contained in the map, according to the current contents
* of the cache. The cache is not updated by this method.
*/
Map getCurrentSchemas() {
return contents.currentSchemas;
}
/**
* Gets a stored schema by ID. If a schema with the given ID is not
* present in the cache, try updating the cache. If no such ID is known,
* return null.
*/
SchemaInfo getSchemaInfoById(int schemaId) {
/* First check for a cache hit without any synchronization. */
SchemaInfo info = contents.byId.get(schemaId);
if (info != null) {
return info;
}
if (cacheMissHook != null) {
cacheMissHook.doHook(null);
}
/* Synchronize when there is a cache miss. */
synchronized (this) {
/*
* Return cached schema if another thread added the schema to the
* cache while we waited to get the mutex. The double-check is
* safe because the contents field is volatile.
*/
info = contents.byId.get(schemaId);
if (info != null) {
return info;
}
/* Update the cache while synchronized. */
for (Consistency consistency : accessor.getConsistencyRamp()) {
contents = contents.updateStoredSchemas(accessor, consistency);
info = contents.byId.get(schemaId);
if (info != null) {
return info;
}
}
/*
* Final attempt refreshes all schemas from scratch when an older
* schema ID has recently been enabled.
*/
contents = contents.refreshStoredSchemas
(accessor, accessor.getHighestConsistency());
info = contents.byId.get(schemaId);
if (info != null) {
return info;
}
}
return null;
}
/**
* Gets a stored schema by value, using a given Schema for comparision.
* Returns a stored schema that is equal to the given schema, where
* equality is the same as Schema.equals with an important exception: Avro
* string type properties are disregarded. If such a schema is not present
* in the cache, try updating the cache. If no such schema is known,
* return null.
*/
SchemaInfo getSchemaInfoByValue(Schema schemaValue) {
/* First check for a cache hit without any synchronization. */
SchemaInfo info = contents.byValue.get(schemaValue);
if (info != null) {
return info;
}
if (cacheMissHook != null) {
cacheMissHook.doHook(null);
}
/* Synchronize when there is a cache miss. */
synchronized (this) {
/*
* Return cached schema if another thread added the schema to the
* cache while we waited to get the mutex. The double-check is
* safe because the contents field is volatile.
*/
info = contents.byValue.get(schemaValue);
if (info != null) {
return info;
}
/*
* Update the cache while synchronized. First try updating the
* by-value cache using the cached stored schemas. If that fails,
* try updating the stored schemas and then the by-value cache.
*/
contents = contents.updateUserSchemas(schemaValue);
info = contents.byValue.get(schemaValue);
if (info != null) {
return info;
}
for (Consistency consistency : accessor.getConsistencyRamp()) {
contents = contents.updateStoredSchemas(accessor, consistency);
contents = contents.updateUserSchemas(schemaValue);
info = contents.byValue.get(schemaValue);
if (info != null) {
return info;
}
}
/*
* Final attempt refreshes all schemas from scratch when an older
* schema ID has recently been enabled.
*/
contents = contents.refreshStoredSchemas
(accessor, accessor.getHighestConsistency());
contents = contents.updateUserSchemas(schemaValue);
info = contents.byValue.get(schemaValue);
if (info != null) {
return info;
}
}
return null;
}
/**
* Gets a stored schema by value like getSchemaInfoByValue. Unlike
* getSchemaInfoByValue, does not update the byValue map since the given
* Schema is coming from the C API and may be a temporary object. Updates
* the SchemaInfo to contain the given cSchema, unless another thread gets
* in first and updates it. If alwaysCacheCSchema is true, the given
* cSchema is always added to the byCSchema map, regardless of whether the
* SchemaInfo already has a non-zero cSchema.
*/
private SchemaInfo getByValueAndUpdateCSchema(Schema schemaValue,
long cSchema,
boolean alwaysCacheCSchema) {
/*
* This operation takes place after a cache miss. Do all checks while
* synchronized.
*/
synchronized (this) {
/*
* First get the SchemaInfo by value. Call findByValue to do a
* lookup without updating the byValue map.
*/
SchemaInfo info = contents.findByValue(schemaValue, true);
if (info == null) {
/* Try updating the stored schemas. */
for (Consistency consistency : accessor.getConsistencyRamp()) {
contents =
contents.updateStoredSchemas(accessor, consistency);
info = contents.findByValue(schemaValue, true);
if (info != null) {
break;
}
}
if (info == null) {
/*
* Final attempt refreshes all schemas from scratch when an
* older schema ID has recently been enabled.
*/
contents = contents.refreshStoredSchemas
(accessor, accessor.getHighestConsistency());
info = contents.findByValue(schemaValue, true);
if (info == null) {
/* Schema is not present in the store. */
return null;
}
}
}
/*
* We have a SchemaInfo. Now update its cSchema and add the
* cSchema to the byCSchema map.
*/
contents = contents.updateCSchema(cSchema, info,
alwaysCacheCSchema);
return info;
}
}
/** See CBindingBridge. */
public CBindingBridge getCBindingBridge() {
return cBindingBridge;
}
/** See CBindingBridge. */
private class CBindingBridgeImpl implements CBindingBridge {
@Override
public Schema getJavaSchema(long cSchema) {
final SchemaInfo info = contents.byCSchema.get(cSchema);
if (info == null) {
return null;
}
return info.getSchema();
}
@Override
public Schema putSchema(String schemaText, long cSchema)
throws UndefinedSchemaException, IllegalArgumentException {
final Schema javaSchema;
try {
javaSchema = new Schema.Parser().parse(schemaText);
} catch (RuntimeException e) {
throw new IllegalArgumentException("Error parsing schema", e);
}
final SchemaInfo info = getByValueAndUpdateCSchema
(javaSchema, cSchema, true /*alwaysCacheCSchema*/);
if (info == null) {
throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema);
}
return info.getSchema();
}
@Override
public long getCSchema(Schema javaSchema)
throws UndefinedSchemaException {
final SchemaInfo info = getSchemaInfoByValue(javaSchema);
if (info == null) {
throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema);
}
return info.getCSchema();
}
@Override
public long putSchema(Schema javaSchema, long cSchema)
throws UndefinedSchemaException {
final SchemaInfo info = getByValueAndUpdateCSchema
(javaSchema, cSchema, false /*alwaysCacheCSchema*/);
if (info == null) {
throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema);
}
return info.getCSchema();
}
@Override
public long[] getCachedCSchemas() {
final Map map = contents.byCSchema;
final long[] array = new long[map.size()];
int i = 0;
for (final long x : map.keySet()) {
array[i++] = x;
}
return array;
}
@Override
public int getValueRawDataOffset(Value value) {
return RawBinding.getValueRawDataOffset(value);
}
@Override
public Schema getValueSchema(Value value)
throws IllegalArgumentException {
return RawBinding.getValueSchema(value, SchemaCache.this);
}
@Override
public Value allocateValue(Schema schema, int rawDataSize)
throws UndefinedSchemaException {
return RawBinding.allocateValue(schema, rawDataSize,
SchemaCache.this);
}
}
/**
* An immutable object containing the contents of the cache.
*/
private static class Contents {
/**
* Map of full schema name to current schema info, which is the head of
* a chain of schemas (different versions) with the same name.
*/
final Map byName;
/** Map of schema ID to schema info, for every schema version. */
final Map byId;
/** Map of user schema to stored schema. */
final Map byValue;
/** Map of schema pointer in C API to stored schema. */
final Map byCSchema;
/** Map of full schema name to current schema. Derived from byName. */
final Map currentSchemas;
/** Next schema ID available, i.e., one more than highest known ID. */
final int nextSchemaId;
/** Constructor to initialize an empty Contents object. */
Contents() {
byName = Collections.emptyMap();
byId = Collections.emptyMap();
byValue = Collections.emptyMap();
byCSchema = Collections.emptyMap();
currentSchemas = Collections.emptyMap();
nextSchemaId = SchemaAccessor.FIRST_SCHEMA_ID;
}
/**
* Copy constructor that allows optionally specifying each field value.
* If a parameter is zero/false/null, the field is copied from
* prevContents; otherwise it is set to the given arg value.
*/
@SuppressWarnings("null")
private Contents(Contents prevContents,
Map byName,
Map byId,
Map byValue,
Map byCSchema,
boolean deriveCurrentSchemas,
int nextSchemaId) {
this.byName = (byName != null) ? byName : prevContents.byName;
this.byId = (byId != null) ? byId : prevContents.byId;
this.byValue = (byValue != null) ? byValue : prevContents.byValue;
this.byCSchema =
(byCSchema != null) ? byCSchema : prevContents.byCSchema;
this.nextSchemaId =
(nextSchemaId != 0) ? nextSchemaId : prevContents.nextSchemaId;
if (deriveCurrentSchemas) {
final Map newCurrentSchemas =
new HashMap(byName.size());
for (final Map.Entry entry :
byName.entrySet()) {
newCurrentSchemas.put(entry.getKey(),
entry.getValue().getSchema());
}
this.currentSchemas =
Collections.unmodifiableMap(newCurrentSchemas);
} else {
this.currentSchemas = prevContents.currentSchemas;
}
}
/**
* Returns a new Contents object containing the schemas in this
* Contents object plus any schemas that have been added via the admin
* interface since the cache was updated. If no new schemas are
* available, the new Contents object only has an updated timestamp.
*/
Contents updateStoredSchemas(SchemaAccessor accessor,
Consistency consistency) {
/*
* Read schemas that have been added since we last called
* readActiveSchemas. If none, no update is needed.
*/
final SortedMap newSchemas =
accessor.readActiveSchemas
(nextSchemaId, true /*includeStart*/, consistency);
if (newSchemas.isEmpty()) {
return this;
}
return addSchemas(newSchemas);
}
/**
* Returns a new Contents object containing the schemas in this
* Contents object plus any schemas that have been added or re-enabled
* via the admin interface since the cache was updated. If no new or
* re-enabled schemas are available, the new Contents object only has
* an updated timestamp.
*/
Contents refreshStoredSchemas(SchemaAccessor accessor,
Consistency consistency) {
/*
* Read all schemas. If all schema IDs match, no update is needed.
*/
final SortedMap allSchemas =
accessor.readActiveSchemas
(SchemaAccessor.FIRST_SCHEMA_ID, true /*includeStart*/,
consistency);
if (allSchemas.keySet().equals(byId.keySet())) {
return this;
}
/*
* If schema IDs do not match, check to see whether a full cache
* refresh is needed. The newIds set below contains the IDs just
* queried that are not currently in the cache. A full refresh is
* needed in two cases:
* + newIds is empty, which means the set of available schemas has
* been reduced by disabling one or more schemas;
* + the first new ID is less than nextSchemaId, which means an
* older schema has been disabled.
* These cases should be extremely rare so we don't mind starting
* from scratch.
*/
final SortedSet newIds =
new TreeSet(allSchemas.keySet());
newIds.removeAll(byId.keySet());
if (newIds.isEmpty() || newIds.first() < nextSchemaId) {
/* Full refresh is needed. */
return new Contents().addSchemas(allSchemas);
}
/* Only add new IDs. */
return addSchemas(allSchemas.tailMap(nextSchemaId));
}
/**
* Common method for adding schemas to an existing Contents or
* refreshing from scratch (when this Contents is empty).
*/
private Contents
addSchemas(SortedMap newSchemas) {
/*
* Copy this byName and byId maps, and add new stored schemas.
*/
final Map newByName =
new HashMap(byName);
final Map newById =
new HashMap(byId);
for (final Map.Entry entry :
newSchemas.entrySet()) {
final Integer id = entry.getKey();
final Schema schema = entry.getValue().getSchema();
final String name = schema.getFullName();
final SchemaInfo prevVersion = newByName.get(name);
final SchemaInfo info = new SchemaInfo(schema, id,
prevVersion);
newByName.put(name, info);
newById.put(id, info);
}
/* Update all fields except for byValue and byCSchema. */
return new Contents(this, Collections.unmodifiableMap(newByName),
Collections.unmodifiableMap(newById),
null, null, true, newSchemas.lastKey() + 1);
}
/**
* Returns a new Contents object containing the schemas in this
* Contents object plus a byValue mapping for the given schemaValue.
* If a stored schema matching schemaValue cannot be found, this
* Contents object is returned without modification.
*/
Contents updateUserSchemas(Schema schemaValue) {
/* Find by value. If no match, return the unmodified contents. */
final SchemaInfo info = findByValue(schemaValue, false);
if (info == null) {
return this;
}
/* Copy this byValue map and add new user schema. */
final Map newByValue =
new IdentityHashMap(byValue);
newByValue.put(schemaValue, info);
/* Update only the byValue field. */
return new Contents(this, null, null,
Collections.unmodifiableMap(newByValue),
null, false, 0);
}
/**
* Update the given SchemaInfo's cSchema and add the cSchema to the
* byCSchema map. The SchemaInfo is not updated if it already contains
* a non-zero cSchema because another thread got in first. In that
* case, if alwaysCacheCSchema is false then the given cSchema is not
* added to the byCSchema map.
*/
Contents updateCSchema(long cSchema,
SchemaInfo info,
boolean alwaysCacheCSchema) {
if (info.getCSchema() == 0) {
info.setCSchema(cSchema);
} else {
if (!alwaysCacheCSchema) {
return this;
}
}
/*
* We've decided to add cSchema to the byCSchema map, if it's not
* already present.
*/
if (byCSchema != null && byCSchema.containsKey(cSchema)) {
return this;
}
/* Copy this byCSchema map and add cSchema mapping. */
final Map newByCSchema =
(new HashMap(byCSchema));
newByCSchema.put(cSchema, info);
/* Update only the byCSchema field. */
return new Contents(this, null, null, null,
Collections.unmodifiableMap(newByCSchema),
false, 0);
}
/**
* Find by value, examining each schema version with the same name
* as the given schema.
*/
SchemaInfo findByValue(Schema schemaValue, boolean allowNullDefault) {
SchemaInfo info = byName.get(schemaValue.getFullName());
while (info != null) {
if (SchemaChecker.equalSerializationWithDefault
(schemaValue, info.getSchema(), allowNullDefault)) {
return info;
}
info = info.getPreviousVersion();
}
return null;
}
}
/** For testing. */
int getByIdSize() {
return contents.byId.size();
}
/** For testing. */
int getByNameSize() {
return contents.byName.size();
}
/** For testing. */
int getByValueSize() {
return contents.byValue.size();
}
/** For testing. */
int getByCSchemaSize() {
return contents.byCSchema.size();
}
/** For testing. */
void setCacheMissHook(TestHook hook) {
cacheMissHook = hook;
}
}