All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.SchemaParser Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro;

import org.apache.avro.util.UtfTextUtils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.ServiceLoader;

/**
 * Avro schema parser for text-based formats like JSON, IDL, etc.
 *
 * 

* Parses formatted (i.e., text based) schemata from a given source using the * available {@link FormattedSchemaParser} implementations, and returns the * first result. This means it can transparently handle any schema format. The * Avro project defines a JSON based format and an IDL format (the latter * available as a separate dependency), but you can also provide your own. *

* *

* The parser can handle various text based sources. If the source contains a * UTF encoded latin text based format it can even detect which UTF encoding was * used (UTF-8, UTF16BE, UTF16LE, UTF-32BE or UTF32LE). *

* * @see FormattedSchemaParser * @see UtfTextUtils */ public class SchemaParser { private final ParseContext parseContext; private final Collection formattedSchemaParsers; /** * Create a schema parser. Initially, the list of known (named) schemata is * empty. */ public SchemaParser() { this.parseContext = new ParseContext(); this.formattedSchemaParsers = new ArrayList<>(); for (FormattedSchemaParser formattedSchemaParser : ServiceLoader.load(FormattedSchemaParser.class)) { formattedSchemaParsers.add(formattedSchemaParser); } // Add the default / JSON parser last (not as a service, even though it // implements the service interface), to allow implementations that parse JSON // files into schemata differently. formattedSchemaParsers.add(new JsonSchemaParser()); } /** * Parse an Avro schema from a file. The file content is assumed to be UTF-8 * text. * * @param file the file to read * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available * @see UtfTextUtils */ public ParseResult parse(File file) throws IOException, SchemaParseException { return parse(file, null); } /** * Parse an Avro schema from a file written with a specific character set. * * @param file the file to read * @param charset the character set of the file contents * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(File file, Charset charset) throws IOException, SchemaParseException { return parse(file.toPath(), charset); } /** * Parse an Avro schema from a file. The file content is assumed to be UTF-8 * text. * * @param file the file to read * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available * @see UtfTextUtils */ public ParseResult parse(Path file) throws IOException, SchemaParseException { return parse(file, null); } /** * Parse an Avro schema from a file written with a specific character set. * * @param file the file to read * @param charset the character set of the file contents * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(Path file, Charset charset) throws IOException, SchemaParseException { URI inputDir = file.getParent().toUri(); try (InputStream stream = Files.newInputStream(file)) { String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); return parse(inputDir, formattedSchema); } } /** * Parse an Avro schema from a file written with a specific character set. * * @param location the location of the schema resource * @param charset the character set of the schema resource * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(URI location, Charset charset) throws IOException, SchemaParseException { try (InputStream stream = location.toURL().openStream()) { String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); return parse(location, formattedSchema); } } /** * Parse an Avro schema from an input stream. The stream content is assumed to * be UTF-8 text. Note that the stream stays open after reading. * * @param in the stream to read * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available * @see UtfTextUtils */ public ParseResult parse(InputStream in) throws IOException, SchemaParseException { return parse(in, null); } /** * Parse an Avro schema from an input stream. Note that the stream stays open * after reading. * * @param in the stream to read * @param charset the character set of the stream contents * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(InputStream in, Charset charset) throws IOException, SchemaParseException { return parse(UtfTextUtils.readAllBytes(in, charset)); } /** * Parse an Avro schema from an input reader. * * @param in the stream to read * @return the schema * @throws IOException when the schema cannot be read * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(Reader in) throws IOException, SchemaParseException { return parse(UtfTextUtils.readAllChars(in)); } /** * Parse an Avro schema from a string. * * @param text the text to parse * @return the schema * @throws SchemaParseException if parsing the schema failed; contains * suppressed underlying parse exceptions if * available */ public ParseResult parse(CharSequence text) throws SchemaParseException { try { return parse(null, text); } catch (IOException e) { // This can only happen if parser implementations try to read other (related) // schemata from somewhere. throw new AvroRuntimeException("Could not read schema", e); } } /** * Parse the given schema (string) within the specified context using all * available {@link FormattedSchemaParser} implementations, collecting any * {@link SchemaParseException}s that occur, and return the first successfully * parsed schema. If all parsers fail, throw a {@code SchemaParseException} with * all collected parse exceptions added as suppressed exceptions. Uses the base * location of the schema (e.g., the directory where the schema file lives) if * available. * * @param baseUri the base location of the schema, or {@code null} if * not known * @param formattedSchema the schema as text * @return the parsed schema * @throws IOException if thrown by one of the parsers * @throws RuntimeException if thrown by one of the parsers * @throws SchemaParseException when all parsers fail */ private ParseResult parse(URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { List parseExceptions = new ArrayList<>(); for (FormattedSchemaParser formattedSchemaParser : formattedSchemaParsers) { try { Schema schema = formattedSchemaParser.parse(parseContext, baseUri, formattedSchema); if (parseContext.hasNewSchemas() || schema != null) { // Parsing succeeded: return the result. return parseContext.commit(schema); } } catch (SchemaParseException e) { parseContext.rollback(); parseExceptions.add(e); } } // None of the available parsers succeeded if (parseExceptions.size() == 1) { throw parseExceptions.get(0); } SchemaParseException parseException = new SchemaParseException( "Could not parse the schema (the suppressed exceptions tell why)."); parseExceptions.forEach(parseException::addSuppressed); throw parseException; } /** * Get all parsed schemata. * * @return all parsed schemas, in the order they were parsed */ public List getParsedNamedSchemas() { return parseContext.resolveAllSchemas(); } // Temporary method to reduce PR size @Deprecated public Schema resolve(ParseResult result) { return result.mainSchema(); } public interface ParseResult { /** * The main schema parsed from a file. Can be any schema, or {@code null} if the * parsed file has no "main" schema. */ Schema mainSchema(); /** * The list of named schemata that were parsed. */ List parsedNamedSchemas(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy