All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.impl.api.avro.SchemaCache Maven / Gradle / Ivy

/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */

package oracle.kv.impl.api.avro;

import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.avro.Schema;

import oracle.kv.Consistency;
import oracle.kv.Value;
import oracle.kv.avro.UndefinedSchemaException;
import oracle.kv.impl.test.TestHook;

/**
 * Keeps a cache of all schemas for use by clients that use the Avro bindings,
 * and (in the future) for use by queries and indexers running on an RN.
 * 

* The cache uses a copy-on-write approach for all cached data, to avoid any * blocking among threads using the cache as long as there are no cache misses. * Copy-on-write is used rather than a ReadWriteLock or concurrent collections * for several reasons: *

    *
  • * Cache hits vastly outnumber misses/updates. *
  • * Cache reads are very small/quick operations and the added overhead of * synchronization on read might be noticeable. *
  • * The cost of copying the cache is low. It is not expected to be large and * only a shallow copy is needed because Schema objects are immutable. *
* *

Stored Schemas

* * There are two types of cached information, stored schemas and user schemas. * Stored schemas are queried using the SchemaAccessor and cached in two maps, * one by schema ID and the other by schema name. *

* When there is a cache miss, we query any recently added schema kv pairs in * the store, while synchronized on the cache object itself. The expectation * is that cache misses are infrequent and cache updates even less frequent * (because schema changes are so infrequent), so blocking will normally only * occur when the cache is initially populated. Cache updates, when necessary, * are performed while synchronized to prevent multiple threads from reading * the schema kv pairs concurrently, since this would be wasteful and could * impact performance on the RN holding the schema kv pairs. *

* Blocking and schema kv pair queries may occur frequently if multiple caller * threads repeatedly try to use a schema that is undefined in the store. This * should be unusual and is considered a programming error, so it not worth * trying to optimize. There is a warning to this effect in the * UndefinedSchemaException javadoc. * *

User Schemas

* * Users pass Schema objects to the binding APIs for use with Avro as writer * schemas and reader schemas. We must ensure those schemas are known * (stored). To do this we maintain an identity map from user schemas to * stored schemas. This allows us to quickly discover whether a user specified * schema is known, but allows users to pass arbitrary schema objects to the * binding APIs. Users typically create schema objects using Avro. *

* Before adding the association between a user's schema and a stored schema to * the identity map, we ensure the user's schema is equal to the stored schema. * This is considered a lookup by schema value, since a deep comparison between * schemas is performed. When a schema has multiple stored versions, multiple * schemas may need to be compared to find the version specified by the user. * Once an association has been added to the identity map, a lookup of the user * schema is very quick and does not require a schema comparison. *

* Although the map containing user schemas is updated when a user specifies a * new schema, rather than by querying stored schemas that were recently added, * the same copy-on-write approach and synchronization (on the cache object) is * used. Potential blocking could be reduced by synchronizing on two different * objects -- one for updating the stored schemas and another for updating user * schemas -- but this would add complexity and potential ordering issues. * Both types of cache updates are so infrequent that this is not worth the * trouble. *

* Blocking while adding a user schema may occur frequently if the user creates * new schema objects often, e.g., for every operation. This may also use * large amounts of memory for caching the user schemas and may eventually fill * the JVM heap. This is considered a programming error and is not explicitly * handled. There is a warning to this effect in the AvroCatalog javadoc. */ @SuppressWarnings("deprecation") class SchemaCache { /** Used to read schema kv pairs from the store. */ private final SchemaAccessor accessor; /** For use by Avro C API. */ private final CBindingBridge cBindingBridge; /** * Current cache contents. This field is reassigned with a new Contents * object when there is a change, and the assignment is performed while * synchronized. */ private volatile Contents contents; private TestHook cacheMissHook; /** * Initializes the cache with all currently stored schemas. Invoked when * the AvroCatalog is first opened by a client app. */ SchemaCache(SchemaAccessor accessor) { this.accessor = accessor; cBindingBridge = new CBindingBridgeImpl(); contents = new Contents().updateStoredSchemas (accessor, accessor.getLowestConsistency()); } /** * Updates the cache with stored schemas added since the cache was last * initialized or updated. Invoked when a client calls * AvroCatalog.refreshSchemaCache. *

* Calling this method often from multiple threads may cause blocking, and * calling it often (even from one thread) could have an impact on store * performance. The AvroCatalog.refreshSchemaCache method javadoc contains * warnings to this effect. */ void updateStoredSchemas(Consistency consistency) { synchronized (this) { /* Update the cache while synchronized. */ contents = contents.updateStoredSchemas(accessor, consistency); } } /** * Returns a map of stored schemas by name. The most recent version of * each schema is contained in the map, according to the current contents * of the cache. The cache is not updated by this method. */ Map getCurrentSchemas() { return contents.currentSchemas; } /** * Gets a stored schema by ID. If a schema with the given ID is not * present in the cache, try updating the cache. If no such ID is known, * return null. */ SchemaInfo getSchemaInfoById(int schemaId) { /* First check for a cache hit without any synchronization. */ SchemaInfo info = contents.byId.get(schemaId); if (info != null) { return info; } if (cacheMissHook != null) { cacheMissHook.doHook(null); } /* Synchronize when there is a cache miss. */ synchronized (this) { /* * Return cached schema if another thread added the schema to the * cache while we waited to get the mutex. The double-check is * safe because the contents field is volatile. */ info = contents.byId.get(schemaId); if (info != null) { return info; } /* Update the cache while synchronized. */ for (Consistency consistency : accessor.getConsistencyRamp()) { contents = contents.updateStoredSchemas(accessor, consistency); info = contents.byId.get(schemaId); if (info != null) { return info; } } /* * Final attempt refreshes all schemas from scratch when an older * schema ID has recently been enabled. */ contents = contents.refreshStoredSchemas (accessor, accessor.getHighestConsistency()); info = contents.byId.get(schemaId); if (info != null) { return info; } } return null; } /** * Gets a stored schema by value, using a given Schema for comparision. * Returns a stored schema that is equal to the given schema, where * equality is the same as Schema.equals with an important exception: Avro * string type properties are disregarded. If such a schema is not present * in the cache, try updating the cache. If no such schema is known, * return null. */ SchemaInfo getSchemaInfoByValue(Schema schemaValue) { /* First check for a cache hit without any synchronization. */ SchemaInfo info = contents.byValue.get(schemaValue); if (info != null) { return info; } if (cacheMissHook != null) { cacheMissHook.doHook(null); } /* Synchronize when there is a cache miss. */ synchronized (this) { /* * Return cached schema if another thread added the schema to the * cache while we waited to get the mutex. The double-check is * safe because the contents field is volatile. */ info = contents.byValue.get(schemaValue); if (info != null) { return info; } /* * Update the cache while synchronized. First try updating the * by-value cache using the cached stored schemas. If that fails, * try updating the stored schemas and then the by-value cache. */ contents = contents.updateUserSchemas(schemaValue); info = contents.byValue.get(schemaValue); if (info != null) { return info; } for (Consistency consistency : accessor.getConsistencyRamp()) { contents = contents.updateStoredSchemas(accessor, consistency); contents = contents.updateUserSchemas(schemaValue); info = contents.byValue.get(schemaValue); if (info != null) { return info; } } /* * Final attempt refreshes all schemas from scratch when an older * schema ID has recently been enabled. */ contents = contents.refreshStoredSchemas (accessor, accessor.getHighestConsistency()); contents = contents.updateUserSchemas(schemaValue); info = contents.byValue.get(schemaValue); if (info != null) { return info; } } return null; } /** * Gets a stored schema by value like getSchemaInfoByValue. Unlike * getSchemaInfoByValue, does not update the byValue map since the given * Schema is coming from the C API and may be a temporary object. Updates * the SchemaInfo to contain the given cSchema, unless another thread gets * in first and updates it. If alwaysCacheCSchema is true, the given * cSchema is always added to the byCSchema map, regardless of whether the * SchemaInfo already has a non-zero cSchema. */ private SchemaInfo getByValueAndUpdateCSchema(Schema schemaValue, long cSchema, boolean alwaysCacheCSchema) { /* * This operation takes place after a cache miss. Do all checks while * synchronized. */ synchronized (this) { /* * First get the SchemaInfo by value. Call findByValue to do a * lookup without updating the byValue map. */ SchemaInfo info = contents.findByValue(schemaValue, true); if (info == null) { /* Try updating the stored schemas. */ for (Consistency consistency : accessor.getConsistencyRamp()) { contents = contents.updateStoredSchemas(accessor, consistency); info = contents.findByValue(schemaValue, true); if (info != null) { break; } } if (info == null) { /* * Final attempt refreshes all schemas from scratch when an * older schema ID has recently been enabled. */ contents = contents.refreshStoredSchemas (accessor, accessor.getHighestConsistency()); info = contents.findByValue(schemaValue, true); if (info == null) { /* Schema is not present in the store. */ return null; } } } /* * We have a SchemaInfo. Now update its cSchema and add the * cSchema to the byCSchema map. */ contents = contents.updateCSchema(cSchema, info, alwaysCacheCSchema); return info; } } /** See CBindingBridge. */ public CBindingBridge getCBindingBridge() { return cBindingBridge; } /** See CBindingBridge. */ private class CBindingBridgeImpl implements CBindingBridge { @Override public Schema getJavaSchema(long cSchema) { final SchemaInfo info = contents.byCSchema.get(cSchema); if (info == null) { return null; } return info.getSchema(); } @Override public Schema putSchema(String schemaText, long cSchema) throws UndefinedSchemaException, IllegalArgumentException { final Schema javaSchema; try { javaSchema = new Schema.Parser().parse(schemaText); } catch (RuntimeException e) { throw new IllegalArgumentException("Error parsing schema", e); } final SchemaInfo info = getByValueAndUpdateCSchema (javaSchema, cSchema, true /*alwaysCacheCSchema*/); if (info == null) { throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema); } return info.getSchema(); } @Override public long getCSchema(Schema javaSchema) throws UndefinedSchemaException { final SchemaInfo info = getSchemaInfoByValue(javaSchema); if (info == null) { throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema); } return info.getCSchema(); } @Override public long putSchema(Schema javaSchema, long cSchema) throws UndefinedSchemaException { final SchemaInfo info = getByValueAndUpdateCSchema (javaSchema, cSchema, false /*alwaysCacheCSchema*/); if (info == null) { throw AvroCatalogImpl.newUndefinedSchemaException(javaSchema); } return info.getCSchema(); } @Override public long[] getCachedCSchemas() { final Map map = contents.byCSchema; final long[] array = new long[map.size()]; int i = 0; for (final long x : map.keySet()) { array[i++] = x; } return array; } @Override public int getValueRawDataOffset(Value value) { return RawBinding.getValueRawDataOffset(value); } @Override public Schema getValueSchema(Value value) throws IllegalArgumentException { return RawBinding.getValueSchema(value, SchemaCache.this); } @Override public Value allocateValue(Schema schema, int rawDataSize) throws UndefinedSchemaException { return RawBinding.allocateValue(schema, rawDataSize, SchemaCache.this); } } /** * An immutable object containing the contents of the cache. */ private static class Contents { /** * Map of full schema name to current schema info, which is the head of * a chain of schemas (different versions) with the same name. */ final Map byName; /** Map of schema ID to schema info, for every schema version. */ final Map byId; /** Map of user schema to stored schema. */ final Map byValue; /** Map of schema pointer in C API to stored schema. */ final Map byCSchema; /** Map of full schema name to current schema. Derived from byName. */ final Map currentSchemas; /** Next schema ID available, i.e., one more than highest known ID. */ final int nextSchemaId; /** Constructor to initialize an empty Contents object. */ Contents() { byName = Collections.emptyMap(); byId = Collections.emptyMap(); byValue = Collections.emptyMap(); byCSchema = Collections.emptyMap(); currentSchemas = Collections.emptyMap(); nextSchemaId = SchemaAccessor.FIRST_SCHEMA_ID; } /** * Copy constructor that allows optionally specifying each field value. * If a parameter is zero/false/null, the field is copied from * prevContents; otherwise it is set to the given arg value. */ @SuppressWarnings("null") private Contents(Contents prevContents, Map byName, Map byId, Map byValue, Map byCSchema, boolean deriveCurrentSchemas, int nextSchemaId) { this.byName = (byName != null) ? byName : prevContents.byName; this.byId = (byId != null) ? byId : prevContents.byId; this.byValue = (byValue != null) ? byValue : prevContents.byValue; this.byCSchema = (byCSchema != null) ? byCSchema : prevContents.byCSchema; this.nextSchemaId = (nextSchemaId != 0) ? nextSchemaId : prevContents.nextSchemaId; if (deriveCurrentSchemas) { final Map newCurrentSchemas = new HashMap(byName.size()); for (final Map.Entry entry : byName.entrySet()) { newCurrentSchemas.put(entry.getKey(), entry.getValue().getSchema()); } this.currentSchemas = Collections.unmodifiableMap(newCurrentSchemas); } else { this.currentSchemas = prevContents.currentSchemas; } } /** * Returns a new Contents object containing the schemas in this * Contents object plus any schemas that have been added via the admin * interface since the cache was updated. If no new schemas are * available, the new Contents object only has an updated timestamp. */ Contents updateStoredSchemas(SchemaAccessor accessor, Consistency consistency) { /* * Read schemas that have been added since we last called * readActiveSchemas. If none, no update is needed. */ final SortedMap newSchemas = accessor.readActiveSchemas (nextSchemaId, true /*includeStart*/, consistency); if (newSchemas.isEmpty()) { return this; } return addSchemas(newSchemas); } /** * Returns a new Contents object containing the schemas in this * Contents object plus any schemas that have been added or re-enabled * via the admin interface since the cache was updated. If no new or * re-enabled schemas are available, the new Contents object only has * an updated timestamp. */ Contents refreshStoredSchemas(SchemaAccessor accessor, Consistency consistency) { /* * Read all schemas. If all schema IDs match, no update is needed. */ final SortedMap allSchemas = accessor.readActiveSchemas (SchemaAccessor.FIRST_SCHEMA_ID, true /*includeStart*/, consistency); if (allSchemas.keySet().equals(byId.keySet())) { return this; } /* * If schema IDs do not match, check to see whether a full cache * refresh is needed. The newIds set below contains the IDs just * queried that are not currently in the cache. A full refresh is * needed in two cases: * + newIds is empty, which means the set of available schemas has * been reduced by disabling one or more schemas; * + the first new ID is less than nextSchemaId, which means an * older schema has been disabled. * These cases should be extremely rare so we don't mind starting * from scratch. */ final SortedSet newIds = new TreeSet(allSchemas.keySet()); newIds.removeAll(byId.keySet()); if (newIds.isEmpty() || newIds.first() < nextSchemaId) { /* Full refresh is needed. */ return new Contents().addSchemas(allSchemas); } /* Only add new IDs. */ return addSchemas(allSchemas.tailMap(nextSchemaId)); } /** * Common method for adding schemas to an existing Contents or * refreshing from scratch (when this Contents is empty). */ private Contents addSchemas(SortedMap newSchemas) { /* * Copy this byName and byId maps, and add new stored schemas. */ final Map newByName = new HashMap(byName); final Map newById = new HashMap(byId); for (final Map.Entry entry : newSchemas.entrySet()) { final Integer id = entry.getKey(); final Schema schema = entry.getValue().getSchema(); final String name = schema.getFullName(); final SchemaInfo prevVersion = newByName.get(name); final SchemaInfo info = new SchemaInfo(schema, id, prevVersion); newByName.put(name, info); newById.put(id, info); } /* Update all fields except for byValue and byCSchema. */ return new Contents(this, Collections.unmodifiableMap(newByName), Collections.unmodifiableMap(newById), null, null, true, newSchemas.lastKey() + 1); } /** * Returns a new Contents object containing the schemas in this * Contents object plus a byValue mapping for the given schemaValue. * If a stored schema matching schemaValue cannot be found, this * Contents object is returned without modification. */ Contents updateUserSchemas(Schema schemaValue) { /* Find by value. If no match, return the unmodified contents. */ final SchemaInfo info = findByValue(schemaValue, false); if (info == null) { return this; } /* Copy this byValue map and add new user schema. */ final Map newByValue = new IdentityHashMap(byValue); newByValue.put(schemaValue, info); /* Update only the byValue field. */ return new Contents(this, null, null, Collections.unmodifiableMap(newByValue), null, false, 0); } /** * Update the given SchemaInfo's cSchema and add the cSchema to the * byCSchema map. The SchemaInfo is not updated if it already contains * a non-zero cSchema because another thread got in first. In that * case, if alwaysCacheCSchema is false then the given cSchema is not * added to the byCSchema map. */ Contents updateCSchema(long cSchema, SchemaInfo info, boolean alwaysCacheCSchema) { if (info.getCSchema() == 0) { info.setCSchema(cSchema); } else { if (!alwaysCacheCSchema) { return this; } } /* * We've decided to add cSchema to the byCSchema map, if it's not * already present. */ if (byCSchema != null && byCSchema.containsKey(cSchema)) { return this; } /* Copy this byCSchema map and add cSchema mapping. */ final Map newByCSchema = (new HashMap(byCSchema)); newByCSchema.put(cSchema, info); /* Update only the byCSchema field. */ return new Contents(this, null, null, null, Collections.unmodifiableMap(newByCSchema), false, 0); } /** * Find by value, examining each schema version with the same name * as the given schema. */ SchemaInfo findByValue(Schema schemaValue, boolean allowNullDefault) { SchemaInfo info = byName.get(schemaValue.getFullName()); while (info != null) { if (SchemaChecker.equalSerializationWithDefault (schemaValue, info.getSchema(), allowNullDefault)) { return info; } info = info.getPreviousVersion(); } return null; } } /** For testing. */ int getByIdSize() { return contents.byId.size(); } /** For testing. */ int getByNameSize() { return contents.byName.size(); } /** For testing. */ int getByValueSize() { return contents.byValue.size(); } /** For testing. */ int getByCSchemaSize() { return contents.byCSchema.size(); } /** For testing. */ void setCacheMissHook(TestHook hook) { cacheMissHook = hook; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy