org.apache.avro.io.ResolvingDecoder Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro.io;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Objects;

import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.io.parsing.ResolvingGrammarGenerator;
import org.apache.avro.io.parsing.Symbol;
import org.apache.avro.util.Utf8;

/**
 * {@link Decoder} that performs type-resolution between the reader's and
 * writer's schemas.
 *
 * 
 * When resolving schemas, this class will return the values of fields in
 * _writer's_ order, not the reader's order. (However, it returns _only_ the
 * reader's fields, not any extra fields the writer may have written.) To help
 * clients handle fields that appear to be coming out of order, this class
 * defines the method {@link #readFieldOrder}.
 *
 * 

 * See the parser documentation for
 * information on how this works.
 */
public class ResolvingDecoder extends ValidatingDecoder {

  private Decoder backup;

  ResolvingDecoder(Schema writer, Schema reader, Decoder in) throws IOException {
    this(resolve(writer, reader), in);
  }

  /**
   * Constructs a ResolvingDecoder using the given resolver. The resolver
   * must have been returned by a previous call to
   * {@link #resolve(Schema, Schema)}.
   *
   * @param resolver The resolver to use.
   * @param in       The underlying decoder.
   * @throws IOException
   */
  private ResolvingDecoder(Object resolver, Decoder in) throws IOException {
    super((Symbol) resolver, in);
  }

  /**
   * Produces an opaque resolver that can be used to construct a new
   * {@link ResolvingDecoder#ResolvingDecoder(Object, Decoder)}. The returned
   * Object is immutable and hence can be simultaneously used in many
   * ResolvingDecoders. This method is reasonably expensive, the users are
   * encouraged to cache the result.
   *
   * @param writer The writer's schema. Cannot be null.
   * @param reader The reader's schema. Cannot be null.
   * @return The opaque resolver.
   * @throws IOException
   * @throws NullPointerException if {@code writer} or {@code reader} is
   *                              {@code null}
   */
  public static Object resolve(Schema writer, Schema reader) throws IOException {
    Objects.requireNonNull(writer, "Writer schema cannot be null");
    Objects.requireNonNull(reader, "Reader schema cannot be null");
    return new ResolvingGrammarGenerator().generate(writer, reader);
  }

  /**
   * Returns the actual order in which the reader's fields will be returned to the
   * reader.
   *
   * This method is useful because {@link ResolvingDecoder} returns values in the
   * order written by the writer, rather than the order expected by the reader.
   * This method allows readers to figure out what fields to expect. Let's say the
   * reader is expecting a three-field record, the first field is a long, the
   * second a string, and the third an array. In this case, a typical usage might
   * be as follows:
   *
   * 
   *   Schema.Fields[] fieldOrder = in.readFieldOrder();
   *   for (int i = 0; i < 3; i++) {
   *     switch (fieldOrder[i].pos()) {
   *     case 1:
   *       foo(in.readLong());
   *       break;
   *     case 2:
   *       someVariable = in.readString();
   *       break;
   *     case 3:
   *       bar(in); // The code of "bar" will read an array-of-int
   *       break;
   *     }
   * 
   *
   * Note that {@link ResolvingDecoder} will return only the fields expected by
   * the reader, not other fields that may have been written by the writer. Thus,
   * the iteration-count of "3" in the above loop will always be correct.
   *
   * Throws a runtime exception if we're not just about to read the first field of
   * a record. (If the client knows the order of incoming fields, then the client
   * does not need to call this method but rather can just start reading
   * the field values.)
   *
   * @throws AvroTypeException If we're not starting a new record
   *
   */
  public final Schema.Field[] readFieldOrder() throws IOException {
    return ((Symbol.FieldOrderAction) parser.advance(Symbol.FIELD_ACTION)).fields;
  }

  /**
   * Same as {@link #readFieldOrder} except that it returns null if there
   * was no reordering of fields, i.e., if the correct thing for the reader to do
   * is to read (all) of its fields in the order specified by its own schema
   * (useful for optimizations).
   */
  public final Schema.Field[] readFieldOrderIfDiff() throws IOException {
    Symbol.FieldOrderAction top = (Symbol.FieldOrderAction) parser.advance(Symbol.FIELD_ACTION);
    return (top.noReorder ? null : top.fields);
  }

  /**
   * Consume any more data that has been written by the writer but not needed by
   * the reader so that the the underlying decoder is in proper shape for the next
   * record. This situation happens when, for example, the writer writes a record
   * with two fields and the reader needs only the first field.
   *
   * This function should be called after completely decoding an object but before
   * next object can be decoded from the same underlying decoder either directly
   * or through another resolving decoder. If the same resolving decoder is used
   * for the next object as well, calling this method is optional; the state of
   * this resolving decoder ensures that any leftover portions are consumed before
   * the next object is decoded.
   *
   * @throws IOException
   */
  public final void drain() throws IOException {
    parser.processImplicitActions();
  }

  @Override
  public long readLong() throws IOException {
    Symbol actual = parser.advance(Symbol.LONG);
    if (actual == Symbol.INT) {
      return in.readInt();
    } else if (actual == Symbol.DOUBLE) {
      return (long) in.readDouble();
    } else {
      assert actual == Symbol.LONG;
      return in.readLong();
    }
  }

  @Override
  public float readFloat() throws IOException {
    Symbol actual = parser.advance(Symbol.FLOAT);
    if (actual == Symbol.INT) {
      return (float) in.readInt();
    } else if (actual == Symbol.LONG) {
      return (float) in.readLong();
    } else {
      assert actual == Symbol.FLOAT;
      return in.readFloat();
    }
  }

  @Override
  public double readDouble() throws IOException {
    Symbol actual = parser.advance(Symbol.DOUBLE);
    if (actual == Symbol.INT) {
      return (double) in.readInt();
    } else if (actual == Symbol.LONG) {
      return (double) in.readLong();
    } else if (actual == Symbol.FLOAT) {
      return (double) in.readFloat();
    } else {
      assert actual == Symbol.DOUBLE;
      return in.readDouble();
    }
  }

  @Override
  public Utf8 readString(Utf8 old) throws IOException {
    Symbol actual = parser.advance(Symbol.STRING);
    if (actual == Symbol.BYTES) {
      return new Utf8(in.readBytes(null).array());
    } else {
      assert actual == Symbol.STRING;
      return in.readString(old);
    }
  }

  @Override
  public String readString() throws IOException {
    Symbol actual = parser.advance(Symbol.STRING);
    if (actual == Symbol.BYTES) {
      return new String(in.readBytes(null).array(), StandardCharsets.UTF_8);
    } else {
      assert actual == Symbol.STRING;
      return in.readString();
    }
  }

  @Override
  public void skipString() throws IOException {
    Symbol actual = parser.advance(Symbol.STRING);
    if (actual == Symbol.BYTES) {
      in.skipBytes();
    } else {
      assert actual == Symbol.STRING;
      in.skipString();
    }
  }

  @Override
  public ByteBuffer readBytes(ByteBuffer old) throws IOException {
    Symbol actual = parser.advance(Symbol.BYTES);
    if (actual == Symbol.STRING) {
      Utf8 s = in.readString(null);
      return ByteBuffer.wrap(s.getBytes(), 0, s.getByteLength());
    } else {
      assert actual == Symbol.BYTES;
      return in.readBytes(old);
    }
  }

  @Override
  public void skipBytes() throws IOException {
    Symbol actual = parser.advance(Symbol.BYTES);
    if (actual == Symbol.STRING) {
      in.skipString();
    } else {
      assert actual == Symbol.BYTES;
      in.skipBytes();
    }
  }

  @Override
  public int readEnum() throws IOException {
    parser.advance(Symbol.ENUM);
    Symbol.EnumAdjustAction top = (Symbol.EnumAdjustAction) parser.popSymbol();
    int n = in.readEnum();
    if (top.noAdjustments) {
      return n;
    }
    Object o = top.adjustments[n];
    if (o instanceof Integer) {
      return (Integer) o;
    } else {
      throw new AvroTypeException((String) o);
    }
  }

  @Override
  public int readIndex() throws IOException {
    parser.advance(Symbol.UNION);
    Symbol top = parser.popSymbol();
    final int result;
    if (top instanceof Symbol.UnionAdjustAction) {
      result = ((Symbol.UnionAdjustAction) top).rindex;
      top = ((Symbol.UnionAdjustAction) top).symToParse;
    } else {
      result = in.readIndex();
      top = ((Symbol.Alternative) top).getSymbol(result);
    }
    parser.pushSymbol(top);
    return result;
  }

  @Override
  public Symbol doAction(Symbol input, Symbol top) throws IOException {
    if (top instanceof Symbol.FieldOrderAction) {
      return input == Symbol.FIELD_ACTION ? top : null;
    }
    if (top instanceof Symbol.ResolvingAction) {
      Symbol.ResolvingAction t = (Symbol.ResolvingAction) top;
      if (t.reader != input) {
        throw new AvroTypeException("Found " + t.reader + " while looking for " + input);
      } else {
        return t.writer;
      }
    } else if (top instanceof Symbol.SkipAction) {
      Symbol symToSkip = ((Symbol.SkipAction) top).symToSkip;
      parser.skipSymbol(symToSkip);
    } else if (top instanceof Symbol.WriterUnionAction) {
      Symbol.Alternative branches = (Symbol.Alternative) parser.popSymbol();
      parser.pushSymbol(branches.getSymbol(in.readIndex()));
    } else if (top instanceof Symbol.ErrorAction) {
      throw new AvroTypeException(((Symbol.ErrorAction) top).msg);
    } else if (top instanceof Symbol.DefaultStartAction) {
      Symbol.DefaultStartAction dsa = (Symbol.DefaultStartAction) top;
      backup = in;
      in = DecoderFactory.get().binaryDecoder(dsa.contents, null);
    } else if (top == Symbol.DEFAULT_END_ACTION) {
      in = backup;
    } else {
      throw new AvroTypeException("Unknown action: " + top);
    }
    return null;
  }

  @Override
  public void skipAction() throws IOException {
    Symbol top = parser.popSymbol();
    if (top instanceof Symbol.ResolvingAction) {
      parser.pushSymbol(((Symbol.ResolvingAction) top).writer);
    } else if (top instanceof Symbol.SkipAction) {
      parser.pushSymbol(((Symbol.SkipAction) top).symToSkip);
    } else if (top instanceof Symbol.WriterUnionAction) {
      Symbol.Alternative branches = (Symbol.Alternative) parser.popSymbol();
      parser.pushSymbol(branches.getSymbol(in.readIndex()));
    } else if (top instanceof Symbol.ErrorAction) {
      throw new AvroTypeException(((Symbol.ErrorAction) top).msg);
    } else if (top instanceof Symbol.DefaultStartAction) {
      Symbol.DefaultStartAction dsa = (Symbol.DefaultStartAction) top;
      backup = in;
      in = DecoderFactory.get().binaryDecoder(dsa.contents, null);
    } else if (top == Symbol.DEFAULT_END_ACTION) {
      in = backup;
    }
  }
}