org.elasticsearch.index.store.StoreFileMetadata Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.index.store;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.lucene.store.ByteArrayIndexInput;
import org.elasticsearch.core.Nullable;
import java.io.IOException;
import java.text.ParseException;
import java.util.Objects;
public class StoreFileMetadata implements Writeable {
public static final BytesRef UNAVAILABLE_WRITER_UUID = new BytesRef();
private static final org.elasticsearch.Version WRITER_UUID_MIN_VERSION = org.elasticsearch.Version.V_7_16_0;
private final String name;
// the actual file size on "disk", if compressed, the compressed size
private final long length;
private final String checksum;
private final String writtenBy;
private final BytesRef hash;
private final BytesRef writerUuid;
public StoreFileMetadata(String name, long length, String checksum, String writtenBy) {
this(name, length, checksum, writtenBy, null, UNAVAILABLE_WRITER_UUID);
}
public StoreFileMetadata(String name, long length, String checksum, String writtenBy, BytesRef hash, BytesRef writerUuid) {
assert assertValidWrittenBy(writtenBy);
this.name = Objects.requireNonNull(name, "name must not be null");
this.length = length;
this.checksum = Objects.requireNonNull(checksum, "checksum must not be null");
this.writtenBy = Objects.requireNonNull(writtenBy, "writtenBy must not be null");
this.hash = hash == null ? new BytesRef() : hash;
assert writerUuid != null && (writerUuid.length > 0 || writerUuid == UNAVAILABLE_WRITER_UUID);
this.writerUuid = Objects.requireNonNull(writerUuid, "writerUuid must not be null");
}
/**
* Read from a stream.
*/
public StoreFileMetadata(StreamInput in) throws IOException {
name = in.readString();
length = in.readVLong();
checksum = in.readString();
writtenBy = in.readString();
hash = in.readBytesRef();
if (in.getVersion().onOrAfter(WRITER_UUID_MIN_VERSION)) {
writerUuid = StoreFileMetadata.toWriterUuid(in.readBytesRef());
} else {
writerUuid = UNAVAILABLE_WRITER_UUID;
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeVLong(length);
out.writeString(checksum);
out.writeString(writtenBy);
out.writeBytesRef(hash);
if (out.getVersion().onOrAfter(WRITER_UUID_MIN_VERSION)) {
out.writeBytesRef(writerUuid);
}
}
/**
* Returns the name of this file
*/
public String name() {
return name;
}
/**
* the actual file size on "disk", if compressed, the compressed size
*/
public long length() {
return length;
}
/**
* Returns a string representation of the files checksum. Since Lucene 4.8 this is a CRC32 checksum written
* by lucene.
*/
public String checksum() {
return this.checksum;
}
/**
* Checks if the bytes returned by {@link #hash()} are the contents of the file that this instances refers to.
*
* @return {@code true} iff {@link #hash()} will return the actual file contents
*/
public boolean hashEqualsContents() {
if (hash.length == length) {
try {
final boolean checksumsMatch = Store.digestToString(
CodecUtil.retrieveChecksum(new ByteArrayIndexInput("store_file", hash.bytes, hash.offset, hash.length))
).equals(checksum);
assert checksumsMatch : "Checksums did not match for [" + this + "] which has a hash of [" + hash + "]";
return checksumsMatch;
} catch (Exception e) {
// Hash didn't contain any bytes that Lucene could extract a checksum from so we can't verify against the checksum of the
// original file. We should never see an exception here because lucene files are assumed to always contain the checksum
// footer.
assert false : new AssertionError("Saw exception for hash [" + hash + "] but expected it to be Lucene file", e);
return false;
}
}
return false;
}
/**
* Returns true
iff the length and the checksums are the same. otherwise false
*/
public boolean isSame(StoreFileMetadata other) {
if (checksum == null || other.checksum == null) {
// we can't tell if either or is null so we return false in this case! this is why we don't use equals for this!
return false;
}
// If we have the file contents, we directly compare the contents. This is useful to compare segment info
// files of source-only snapshots where the original segment info file shares the same id as the source-only
// segment info file but its contents are different.
if (hashEqualsContents()) {
return hash.equals(other.hash);
}
if (writerUuid.length > 0 && other.writerUuid.length > 0) {
// if the writer ID is missing on one of the files then we ignore this field and just rely on the checksum and hash, but if
// it's present on both files then it must be identical
if (writerUuid.equals(other.writerUuid) == false) {
return false;
} else {
assert name.equals(other.name) && length == other.length && checksum.equals(other.checksum) : this + " vs " + other;
assert hash.equals(other.hash) : this + " vs " + other + " with hashes " + hash + " vs " + other.hash;
}
}
return length == other.length && checksum.equals(other.checksum) && hash.equals(other.hash);
}
@Override
public String toString() {
return "name [" + name + "], length [" + length + "], checksum [" + checksum + "], writtenBy [" + writtenBy + "]";
}
/**
* Returns a String representation of the Lucene version this file has been written by or null
if unknown
*/
public String writtenBy() {
return writtenBy;
}
/**
* Returns a variable length hash of the file represented by this metadata object. This can be the file
* itself if the file is small enough. If the length of the hash is {@code 0} no hash value is available
*/
public BytesRef hash() {
return hash;
}
/**
* Returns the globally-unique ID that was assigned by the {@link IndexWriter} that originally wrote this file:
*
* - For `segments_N` files this is {@link SegmentInfos#getId()} which uniquely identifies the commit.
* - For non-generational segment files this is {@link SegmentInfo#getId()} which uniquely identifies the segment.
* - For generational segment files (i.e. updated docvalues, liv files etc) this is {@link SegmentCommitInfo#getId()}
* which uniquely identifies the generation of the segment.
*
* This ID may be {@link StoreFileMetadata#UNAVAILABLE_WRITER_UUID} (i.e. zero-length) if unavilable, e.g.:
*
* - The file was written by a version of Lucene prior to {@link org.apache.lucene.util.Version#LUCENE_8_6_0}.
* - The metadata came from a version of Elasticsearch prior to {@link StoreFileMetadata#WRITER_UUID_MIN_VERSION}).
* - The file is not one of the files listed above.
*
*/
public BytesRef writerUuid() {
return writerUuid;
}
static BytesRef toWriterUuid(BytesRef bytesRef) {
if (bytesRef.length == 0) {
return UNAVAILABLE_WRITER_UUID;
} else {
return bytesRef;
}
}
static BytesRef toWriterUuid(@Nullable byte[] id) {
if (id == null) {
return UNAVAILABLE_WRITER_UUID;
} else {
assert id.length > 0;
return new BytesRef(id);
}
}
private static boolean assertValidWrittenBy(String writtenBy) {
try {
Version.parse(writtenBy);
} catch (ParseException e) {
throw new AssertionError("invalid writtenBy: " + writtenBy, e);
}
return true;
}
}