
org.apache.avro.SchemaParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;
import org.apache.avro.util.UtfTextUtils;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.ServiceLoader;
/**
* Avro schema parser for text-based formats like JSON, IDL, etc.
*
*
* Parses formatted (i.e., text based) schemata from a given source using the
* available {@link FormattedSchemaParser} implementations, and returns the
* first result. This means it can transparently handle any schema format. The
* Avro project defines a JSON based format and an IDL format (the latter
* available as a separate dependency), but you can also provide your own.
*
*
*
* The parser can handle various text based sources. If the source contains a
* UTF encoded latin text based format it can even detect which UTF encoding was
* used (UTF-8, UTF16BE, UTF16LE, UTF-32BE or UTF32LE).
*
*
* @see FormattedSchemaParser
* @see UtfTextUtils
*/
public class SchemaParser {
private final ParseContext parseContext;
private final Collection formattedSchemaParsers;
/**
* Create a schema parser. Initially, the list of known (named) schemata is
* empty.
*/
public SchemaParser() {
this.parseContext = new ParseContext();
this.formattedSchemaParsers = new ArrayList<>();
for (FormattedSchemaParser formattedSchemaParser : ServiceLoader.load(FormattedSchemaParser.class)) {
formattedSchemaParsers.add(formattedSchemaParser);
}
// Add the default / JSON parser last (not as a service, even though it
// implements the service interface), to allow implementations that parse JSON
// files into schemata differently.
formattedSchemaParsers.add(new JsonSchemaParser());
}
/**
* Parse an Avro schema from a file. The file content is assumed to be UTF-8
* text.
*
* @param file the file to read
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
* @see UtfTextUtils
*/
public ParseResult parse(File file) throws IOException, SchemaParseException {
return parse(file, null);
}
/**
* Parse an Avro schema from a file written with a specific character set.
*
* @param file the file to read
* @param charset the character set of the file contents
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(File file, Charset charset) throws IOException, SchemaParseException {
return parse(file.toPath(), charset);
}
/**
* Parse an Avro schema from a file. The file content is assumed to be UTF-8
* text.
*
* @param file the file to read
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
* @see UtfTextUtils
*/
public ParseResult parse(Path file) throws IOException, SchemaParseException {
return parse(file, null);
}
/**
* Parse an Avro schema from a file written with a specific character set.
*
* @param file the file to read
* @param charset the character set of the file contents
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(Path file, Charset charset) throws IOException, SchemaParseException {
URI inputDir = file.getParent().toUri();
try (InputStream stream = Files.newInputStream(file)) {
String formattedSchema = UtfTextUtils.readAllBytes(stream, charset);
return parse(inputDir, formattedSchema);
}
}
/**
* Parse an Avro schema from a file written with a specific character set.
*
* @param location the location of the schema resource
* @param charset the character set of the schema resource
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(URI location, Charset charset) throws IOException, SchemaParseException {
try (InputStream stream = location.toURL().openStream()) {
String formattedSchema = UtfTextUtils.readAllBytes(stream, charset);
return parse(location, formattedSchema);
}
}
/**
* Parse an Avro schema from an input stream. The stream content is assumed to
* be UTF-8 text. Note that the stream stays open after reading.
*
* @param in the stream to read
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
* @see UtfTextUtils
*/
public ParseResult parse(InputStream in) throws IOException, SchemaParseException {
return parse(in, null);
}
/**
* Parse an Avro schema from an input stream. Note that the stream stays open
* after reading.
*
* @param in the stream to read
* @param charset the character set of the stream contents
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(InputStream in, Charset charset) throws IOException, SchemaParseException {
return parse(UtfTextUtils.readAllBytes(in, charset));
}
/**
* Parse an Avro schema from an input reader.
*
* @param in the stream to read
* @return the schema
* @throws IOException when the schema cannot be read
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(Reader in) throws IOException, SchemaParseException {
return parse(UtfTextUtils.readAllChars(in));
}
/**
* Parse an Avro schema from a string.
*
* @param text the text to parse
* @return the schema
* @throws SchemaParseException if parsing the schema failed; contains
* suppressed underlying parse exceptions if
* available
*/
public ParseResult parse(CharSequence text) throws SchemaParseException {
try {
return parse(null, text);
} catch (IOException e) {
// This can only happen if parser implementations try to read other (related)
// schemata from somewhere.
throw new AvroRuntimeException("Could not read schema", e);
}
}
/**
* Parse the given schema (string) within the specified context using all
* available {@link FormattedSchemaParser} implementations, collecting any
* {@link SchemaParseException}s that occur, and return the first successfully
* parsed schema. If all parsers fail, throw a {@code SchemaParseException} with
* all collected parse exceptions added as suppressed exceptions. Uses the base
* location of the schema (e.g., the directory where the schema file lives) if
* available.
*
* @param baseUri the base location of the schema, or {@code null} if
* not known
* @param formattedSchema the schema as text
* @return the parsed schema
* @throws IOException if thrown by one of the parsers
* @throws RuntimeException if thrown by one of the parsers
* @throws SchemaParseException when all parsers fail
*/
private ParseResult parse(URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException {
List parseExceptions = new ArrayList<>();
for (FormattedSchemaParser formattedSchemaParser : formattedSchemaParsers) {
try {
Schema schema = formattedSchemaParser.parse(parseContext, baseUri, formattedSchema);
if (parseContext.hasNewSchemas() || schema != null) {
// Parsing succeeded: return the result.
return parseContext.commit(schema);
}
} catch (SchemaParseException e) {
parseContext.rollback();
parseExceptions.add(e);
}
}
// None of the available parsers succeeded
if (parseExceptions.size() == 1) {
throw parseExceptions.get(0);
}
SchemaParseException parseException = new SchemaParseException(
"Could not parse the schema (the suppressed exceptions tell why).");
parseExceptions.forEach(parseException::addSuppressed);
throw parseException;
}
/**
* Get all parsed schemata.
*
* @return all parsed schemas, in the order they were parsed
*/
public List getParsedNamedSchemas() {
return parseContext.resolveAllSchemas();
}
// Temporary method to reduce PR size
@Deprecated
public Schema resolve(ParseResult result) {
return result.mainSchema();
}
public interface ParseResult {
/**
* The main schema parsed from a file. Can be any schema, or {@code null} if the
* parsed file has no "main" schema.
*/
Schema mainSchema();
/**
* The list of named schemata that were parsed.
*/
List parsedNamedSchemas();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy