org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-codecs Show documentation
Show all versions of lucene-codecs Show documentation
Codecs and postings formats for Apache Lucene.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.blocktreeords;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.fst.Outputs;
/**
* A custom FST outputs implementation that stores block data (BytesRef), long ordStart, long
* numTerms.
*/
final class FSTOrdsOutputs extends Outputs {
public static final Output NO_OUTPUT = new Output(new BytesRef(), 0, 0);
private static final BytesRef NO_BYTES = new BytesRef();
/**
* @param startOrd Inclusive:
* @param endOrd Inclusive:
*/
public record Output(BytesRef bytes, long startOrd, long endOrd) {
public Output {
assert startOrd >= 0 : "startOrd=" + startOrd;
assert endOrd >= 0 : "endOrd=" + endOrd;
}
@Override
public String toString() {
long x;
if (endOrd > Long.MAX_VALUE / 2) {
x = Long.MAX_VALUE - endOrd;
} else {
assert endOrd >= 0;
x = -endOrd;
}
return startOrd + " to " + x;
}
}
@Override
public Output common(Output output1, Output output2) {
BytesRef bytes1 = output1.bytes;
BytesRef bytes2 = output2.bytes;
assert bytes1 != null;
assert bytes2 != null;
int pos1 = bytes1.offset;
int pos2 = bytes2.offset;
int stopAt1 = pos1 + Math.min(bytes1.length, bytes2.length);
while (pos1 < stopAt1) {
if (bytes1.bytes[pos1] != bytes2.bytes[pos2]) {
break;
}
pos1++;
pos2++;
}
BytesRef prefixBytes;
if (pos1 == bytes1.offset) {
// no common prefix
prefixBytes = NO_BYTES;
} else if (pos1 == bytes1.offset + bytes1.length) {
// bytes1 is a prefix of bytes2
prefixBytes = bytes1;
} else if (pos2 == bytes2.offset + bytes2.length) {
// bytes2 is a prefix of bytes1
prefixBytes = bytes2;
} else {
prefixBytes = new BytesRef(bytes1.bytes, bytes1.offset, pos1 - bytes1.offset);
}
return newOutput(
prefixBytes,
Math.min(output1.startOrd, output2.startOrd),
Math.min(output1.endOrd, output2.endOrd));
}
@Override
public Output subtract(Output output, Output inc) {
assert output != null;
assert inc != null;
if (inc == NO_OUTPUT) {
// no prefix removed
return output;
} else {
assert StringHelper.startsWith(output.bytes, inc.bytes);
BytesRef suffix;
if (inc.bytes.length == output.bytes.length) {
// entire output removed
suffix = NO_BYTES;
} else if (inc.bytes.length == 0) {
suffix = output.bytes;
} else {
assert inc.bytes.length < output.bytes.length
: "inc.length=" + inc.bytes.length + " vs output.length=" + output.bytes.length;
assert inc.bytes.length > 0;
suffix =
new BytesRef(
output.bytes.bytes,
output.bytes.offset + inc.bytes.length,
output.bytes.length - inc.bytes.length);
}
assert output.startOrd >= inc.startOrd;
assert output.endOrd >= inc.endOrd;
return newOutput(suffix, output.startOrd - inc.startOrd, output.endOrd - inc.endOrd);
}
}
@Override
public Output add(Output prefix, Output output) {
assert prefix != null;
assert output != null;
if (prefix == NO_OUTPUT) {
return output;
} else if (output == NO_OUTPUT) {
return prefix;
} else {
BytesRef bytes = new BytesRef(prefix.bytes.length + output.bytes.length);
System.arraycopy(
prefix.bytes.bytes, prefix.bytes.offset, bytes.bytes, 0, prefix.bytes.length);
System.arraycopy(
output.bytes.bytes,
output.bytes.offset,
bytes.bytes,
prefix.bytes.length,
output.bytes.length);
bytes.length = prefix.bytes.length + output.bytes.length;
return newOutput(bytes, prefix.startOrd + output.startOrd, prefix.endOrd + output.endOrd);
}
}
@Override
public void write(Output prefix, DataOutput out) throws IOException {
out.writeVInt(prefix.bytes.length);
out.writeBytes(prefix.bytes.bytes, prefix.bytes.offset, prefix.bytes.length);
out.writeVLong(prefix.startOrd);
out.writeVLong(prefix.endOrd);
}
@Override
public Output read(DataInput in) throws IOException {
int len = in.readVInt();
BytesRef bytes;
if (len == 0) {
bytes = NO_BYTES;
} else {
bytes = new BytesRef(len);
in.readBytes(bytes.bytes, 0, len);
bytes.length = len;
}
long startOrd = in.readVLong();
long endOrd = in.readVLong();
Output result = newOutput(bytes, startOrd, endOrd);
return result;
}
@Override
public void skipOutput(DataInput in) throws IOException {
int len = in.readVInt();
in.skipBytes(len);
in.readVLong();
in.readVLong();
}
@Override
public void skipFinalOutput(DataInput in) throws IOException {
skipOutput(in);
}
@Override
public Output getNoOutput() {
return NO_OUTPUT;
}
@Override
public String outputToString(Output output) {
if ((output.endOrd == 0 || output.endOrd == Long.MAX_VALUE) && output.startOrd == 0) {
return "";
} else {
return output.toString();
}
}
public Output newOutput(BytesRef bytes, long startOrd, long endOrd) {
if (bytes.length == 0 && startOrd == 0 && endOrd == 0) {
return NO_OUTPUT;
} else {
return new Output(bytes, startOrd, endOrd);
}
}
@Override
public long ramBytesUsed(Output output) {
return 2 * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * Long.BYTES
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ 2 * Integer.BYTES
+ output.bytes.length;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy