![JAR search and dependency download from the Maven repository](/logo.png)
co.cask.common.internal.io.SchemaHash Maven / Gradle / Ivy
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.common.internal.io;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.common.collect.Sets;
import java.nio.ByteBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Formatter;
import java.util.Set;
/**
*
*/
public final class SchemaHash {
private final byte[] hash;
private String hashStr;
public SchemaHash(Schema schema) {
hash = computeHash(schema);
}
/**
* Creates a SchemaHash by reading the hash value from the given {@link java.nio.ByteBuffer}.
* The position of the {@link java.nio.ByteBuffer} will be moved to the byte after the hash
* value.
*/
public SchemaHash(ByteBuffer bytes) {
hash = new byte[16];
bytes.get(hash);
}
/**
* @return A new byte array representing the raw hash value.
*/
public byte[] toByteArray() {
return Arrays.copyOf(hash, hash.length);
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
return Arrays.equals(hash, ((SchemaHash) other).hash);
}
@Override
public int hashCode() {
return Arrays.hashCode(hash);
}
@Override
public String toString() {
String str = hashStr;
if (str == null) {
// hex encode the bytes
Formatter formatter = new Formatter(new StringBuilder(32));
for (byte b : hash) {
formatter.format("%02X", b);
}
str = hashStr = formatter.toString();
}
return str;
}
private byte[] computeHash(Schema schema) {
try {
Set knownRecords = Sets.newHashSet();
MessageDigest md5 = updateHash(MessageDigest.getInstance("MD5"), schema, knownRecords);
return md5.digest();
} catch (NoSuchAlgorithmException e) {
throw Throwables.propagate(e);
}
}
/**
* Updates md5 based on the given schema.
*
* @param md5 {@link java.security.MessageDigest} to update.
* @param schema {@link Schema} for updating the md5.
* @param knownRecords bytes to use for updating the md5 for records that're seen before.
* @return The same {@link java.security.MessageDigest} in the parameter.
*/
private MessageDigest updateHash(MessageDigest md5, Schema schema, Set knownRecords) {
// Don't use enum.ordinal() as ordering in enum could change
switch (schema.getType()) {
case NULL:
md5.update((byte) 0);
break;
case BOOLEAN:
md5.update((byte) 1);
break;
case INT:
md5.update((byte) 2);
break;
case LONG:
md5.update((byte) 3);
break;
case FLOAT:
md5.update((byte) 4);
break;
case DOUBLE:
md5.update((byte) 5);
break;
case BYTES:
md5.update((byte) 6);
break;
case STRING:
md5.update((byte) 7);
break;
case ENUM:
md5.update((byte) 8);
for (String value : schema.getEnumValues()) {
md5.update(Charsets.UTF_8.encode(value));
}
break;
case ARRAY:
md5.update((byte) 9);
updateHash(md5, schema.getComponentSchema(), knownRecords);
break;
case MAP:
md5.update((byte) 10);
updateHash(md5, schema.getMapSchema().getKey(), knownRecords);
updateHash(md5, schema.getMapSchema().getValue(), knownRecords);
break;
case RECORD:
md5.update((byte) 11);
boolean notKnown = knownRecords.add(schema.getRecordName());
for (Schema.Field field : schema.getFields()) {
md5.update(Charsets.UTF_8.encode(field.getName()));
if (notKnown) {
updateHash(md5, field.getSchema(), knownRecords);
}
}
break;
case UNION:
md5.update((byte) 12);
for (Schema unionSchema : schema.getUnionSchemas()) {
updateHash(md5, unionSchema, knownRecords);
}
break;
}
return md5;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy