
org.apache.avro.ParseContext Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;
import org.apache.avro.util.SchemaResolver;
import org.apache.avro.util.Schemas;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static java.util.Objects.requireNonNull;
/**
* Class to define a name context, useful to reference schemata with. This
* allows for the following:
*
*
* - Collect new named schemata.
* - Find schemata by name, including primitives.
* - Find schemas that do not exist yet.
* - Resolve references to schemas that didn't exist yet when first used.
*
*
*
* This class is NOT thread-safe.
*
*
*
* Note: this class has no use for most Avro users, but is a key component when
* implementing a schema parser.
*
*
* @see JSON based
* schema definition
**/
public class ParseContext {
private static final Map PRIMITIVES = new HashMap<>();
static {
PRIMITIVES.put("string", Schema.Type.STRING);
PRIMITIVES.put("bytes", Schema.Type.BYTES);
PRIMITIVES.put("int", Schema.Type.INT);
PRIMITIVES.put("long", Schema.Type.LONG);
PRIMITIVES.put("float", Schema.Type.FLOAT);
PRIMITIVES.put("double", Schema.Type.DOUBLE);
PRIMITIVES.put("boolean", Schema.Type.BOOLEAN);
PRIMITIVES.put("null", Schema.Type.NULL);
}
private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM,
Schema.Type.FIXED);
/**
* Collection of old schemata. Can contain unresolved references if !isResolved.
*/
private final Map oldSchemas;
/**
* Collection of new schemata. Can contain unresolved references.
*/
private final Map newSchemas;
/**
* The name validator to use.
*/
// Visible for use in JsonSchemaParser
final NameValidator nameValidator;
/**
* Visitor that was used to resolve schemata with. If not available, some
* schemata in {@code oldSchemas} may not be fully resolved. If available, all
* schemata in {@code oldSchemas} are resolved, and {@code newSchemas} is empty.
* After visiting a schema, it can return the corresponding resolved schema for
* a schema that possibly contains unresolved references.
*/
private SchemaResolver.ResolvingVisitor resolvingVisitor;
/**
* Create a {@code ParseContext} for the default/{@code null} namespace, using
* default name validation for new schemata.
*/
public ParseContext() {
this(NameValidator.UTF_VALIDATOR);
}
/**
* Create a {@code ParseContext} using the specified name validation for new
* schemata.
*/
public ParseContext(NameValidator nameValidator) {
this(requireNonNull(nameValidator), new LinkedHashMap<>(), new LinkedHashMap<>());
}
private ParseContext(NameValidator nameValidator, Map oldSchemas, Map newSchemas) {
this.nameValidator = nameValidator;
this.oldSchemas = oldSchemas;
this.newSchemas = newSchemas;
resolvingVisitor = null;
}
/**
* Tell whether this context contains a schema with the given name.
*
* @param name a schema name
* @return {@code true} if the context contains a schema with this name,
* {@code false} otherwise
*/
public boolean contains(String name) {
return PRIMITIVES.containsKey(name) || oldSchemas.containsKey(name) || newSchemas.containsKey(name);
}
/**
*
* Find a schema by name and namespace.
*
*
*
* That is:
*
*
*
* - If {@code name} is a primitive name, return a (new) schema for it
* - Otherwise, determine the full schema name (using the given
* {@code namespace} if necessary), and find it
* - If no schema was found and {@code name} is a simple name, find the schema
* in the default (null) namespace
* - If still no schema was found, return an unresolved reference for the full
* schema name (see step 2)
*
*
*
* Note: as an unresolved reference might be returned, the schema is not
* directly usable. Please {@link #put(Schema)} the schema using it in the
* context. The {@link SchemaParser} and protocol parsers will ensure you'll
* only get a resolved schema that is usable.
*
*
* @param name the schema name to find
* @param namespace the namespace to find the schema against
* @return the schema, or an unresolved reference
*/
public Schema find(String name, String namespace) {
Schema.Type type = PRIMITIVES.get(name);
if (type != null) {
return Schema.create(type);
}
String fullName = fullName(name, namespace);
Schema schema = getNamedSchema(fullName);
if (schema == null) {
schema = getNamedSchema(name);
}
return schema != null ? schema : SchemaResolver.unresolvedSchema(fullName);
}
private String fullName(String name, String namespace) {
if (namespace != null && name.lastIndexOf('.') < 0) {
return namespace + "." + name;
}
return name;
}
/**
* Get a schema by name. Note that the schema might not (yet) be resolved/usable
* until {@link #resolveAllSchemas()} has been called.
*
* @param fullName a full schema name
* @return the schema, if known
*/
public Schema getNamedSchema(String fullName) {
Schema schema = oldSchemas.get(fullName);
if (schema == null) {
schema = newSchemas.get(fullName);
}
return schema;
}
/**
* Put the schema into this context. This is an idempotent operation: it only
* fails if this context already has a different schema with the same name.
*
*
* Note that although this method works for all types except for arrays, maps
* and unions, all primitive types have already been defined upon construction.
* This means you cannot redefine a 'long' with a logical timestamp type.
*
*
* @param schema the schema to put into the context
*/
public void put(Schema schema) {
if (!(NAMED_SCHEMA_TYPES.contains(schema.getType()))) {
throw new AvroTypeException("You can only put a named schema into the context");
}
String fullName = requireValidFullName(schema.getFullName());
Schema alreadyKnownSchema = oldSchemas.get(fullName);
if (alreadyKnownSchema != null) {
if (!schema.equals(alreadyKnownSchema)) {
throw new SchemaParseException("Can't redefine: " + fullName);
}
} else {
resolvingVisitor = null;
Schema previouslyAddedSchema = newSchemas.putIfAbsent(fullName, schema);
if (previouslyAddedSchema != null && !previouslyAddedSchema.equals(schema)) {
throw new SchemaParseException("Can't redefine: " + fullName);
}
}
}
private String requireValidFullName(String fullName) {
String[] names = fullName.split("\\.");
for (int i = 0; i < names.length - 1; i++) {
validateName(names[i], "Namespace part");
}
validateName(names[names.length - 1], "Name");
return fullName;
}
private void validateName(String name, String typeOfName) {
NameValidator.Result result = nameValidator.validate(name);
if (!result.isOK()) {
throw new SchemaParseException(typeOfName + " \"" + name + "\" is invalid: " + result.getErrors());
}
}
public boolean hasNewSchemas() {
return !newSchemas.isEmpty();
}
public void commit() {
oldSchemas.putAll(newSchemas);
newSchemas.clear();
}
public SchemaParser.ParseResult commit(Schema mainSchema) {
Collection parsedNamedSchemas = newSchemas.values();
SchemaParser.ParseResult parseResult = new SchemaParser.ParseResult() {
@Override
public Schema mainSchema() {
return mainSchema == null ? null : resolve(mainSchema);
}
@Override
public List parsedNamedSchemas() {
return parsedNamedSchemas.stream().map(ParseContext.this::resolve).collect(Collectors.toList());
}
};
commit();
return parseResult;
}
public void rollback() {
newSchemas.clear();
}
/**
* Resolve all (named) schemas that were parsed. This resolves all forward
* references, even if parsed from different files. Note: the context must be
* committed for this method to work.
*
* @return all parsed schemas, in the order they were parsed
* @throws AvroTypeException if a schema reference cannot be resolved
*/
public List resolveAllSchemas() {
ensureSchemasAreResolved();
return new ArrayList<>(oldSchemas.values());
}
private void ensureSchemasAreResolved() {
if (hasNewSchemas()) {
throw new IllegalStateException("Schemas cannot be resolved unless the ParseContext is committed.");
}
if (resolvingVisitor == null) {
NameValidator saved = Schema.getNameValidator();
try {
// Ensure we use the same validation when copying schemas as when they were
// defined.
Schema.setNameValidator(nameValidator);
SchemaResolver.ResolvingVisitor visitor = new SchemaResolver.ResolvingVisitor(oldSchemas::get);
oldSchemas.values().forEach(schema -> Schemas.visit(schema, visitor));
// Before this point is where we can get exceptions due to resolving failures.
for (Map.Entry entry : oldSchemas.entrySet()) {
entry.setValue(visitor.getResolved(entry.getValue()));
}
resolvingVisitor = visitor;
} finally {
Schema.setNameValidator(saved);
}
}
}
/**
* Resolve unresolved references in a schema that was parsed for this
* context using the types known to this context. Note: this method will
* ensure all known schemas are resolved, or throw, and thus requires the
* context to be committed.
*
* @param schema the schema resolve
* @return the fully resolved schema
* @throws AvroTypeException if a schema reference cannot be resolved
*/
public Schema resolve(Schema schema) {
ensureSchemasAreResolved();
// As all (named) schemas are resolved now, we know:
// — All named types are either in oldSchemas or unknown.
// — All unnamed types can be visited&resolved without validation.
if (NAMED_SCHEMA_TYPES.contains(schema.getType()) && schema.getFullName() != null) {
return requireNonNull(oldSchemas.get(schema.getFullName()), () -> "Unknown schema: " + schema.getFullName());
} else {
// Unnamed or anonymous schema
// (protocol message request parameters are anonymous records)
Schemas.visit(schema, resolvingVisitor); // This field is set, as ensureSchemasAreResolved(); was called.
return resolvingVisitor.getResolved(schema);
}
}
/**
* Return all known types by their fullname. Warning: this returns all types,
* even uncommitted ones, including unresolved references!
*
* @return a map of all types by their name
*/
public Map typesByName() {
LinkedHashMap result = new LinkedHashMap<>();
result.putAll(oldSchemas);
result.putAll(newSchemas);
return result;
}
}