/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.invoke.MethodHandles;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.solr.common.ConditionalKeyMapWriter;
import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.IteratorWriter.ItemWriter;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.CommonParams;
import org.noggit.CharArr;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
/**
* Defines a space-efficient serialization/deserialization format for transferring data.
*
* JavaBinCodec has built in support many commonly used types. This includes primitive types (boolean, byte,
* short, double, int, long, float), common Java containers/utilities (Date, Map, Collection, Iterator, String,
* Object[], byte[]), and frequently used Solr types ({@link NamedList}, {@link SolrDocument},
* {@link SolrDocumentList}). Each of the above types has a pair of associated methods which read and write
* that type to a stream.
*
* Classes that aren't supported natively can still be serialized/deserialized by providing
* an {@link JavaBinCodec.ObjectResolver} object that knows how to work with the unsupported class.
* This allows {@link JavaBinCodec} to be used to marshall/unmarshall arbitrary content.
*
* NOTE -- {@link JavaBinCodec} instances cannot be reused for more than one marshall or unmarshall operation.
*/
public class JavaBinCodec implements PushWriter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
public static final byte
NULL = 0,
BOOL_TRUE = 1,
BOOL_FALSE = 2,
BYTE = 3,
SHORT = 4,
DOUBLE = 5,
INT = 6,
LONG = 7,
FLOAT = 8,
DATE = 9,
MAP = 10,
SOLRDOC = 11,
SOLRDOCLST = 12,
BYTEARR = 13,
ITERATOR = 14,
/**
* this is a special tag signals an end. No value is associated with it
*/
END = 15,
SOLRINPUTDOC = 16,
MAP_ENTRY_ITER = 17,
ENUM_FIELD_VALUE = 18,
MAP_ENTRY = 19,
UUID = 20, // This is reserved to be used only in LogCodec
// types that combine tag + length (or other info) in a single byte
TAG_AND_LEN = (byte) (1 << 5),
STR = (byte) (1 << 5),
SINT = (byte) (2 << 5),
SLONG = (byte) (3 << 5),
ARR = (byte) (4 << 5), //
ORDERED_MAP = (byte) (5 << 5), // SimpleOrderedMap (a NamedList subclass, and more common)
NAMED_LST = (byte) (6 << 5), // NamedList
EXTERN_STRING = (byte) (7 << 5);
private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536;
private static byte VERSION = 2;
private final ObjectResolver resolver;
protected FastOutputStream daos;
private StringCache stringCache;
private WritableDocFields writableDocFields;
private boolean alreadyMarshalled;
private boolean alreadyUnmarshalled;
protected boolean readStringAsCharSeq = false;
public JavaBinCodec() {
resolver =null;
writableDocFields =null;
}
public JavaBinCodec setReadStringAsCharSeq(boolean flag) {
readStringAsCharSeq = flag;
return this;
}
/**
* Use this to use this as a PushWriter. ensure that close() is called explicitly after use
*
* @param os The output stream
*/
public JavaBinCodec(OutputStream os, ObjectResolver resolver) throws IOException {
this.resolver = resolver;
initWrite(os);
}
public JavaBinCodec(ObjectResolver resolver) {
this(resolver, null);
}
public JavaBinCodec setWritableDocFields(WritableDocFields writableDocFields){
this.writableDocFields = writableDocFields;
return this;
}
public JavaBinCodec(ObjectResolver resolver, StringCache stringCache) {
this.resolver = resolver;
this.stringCache = stringCache;
}
public ObjectResolver getResolver() {
return resolver;
}
public void marshal(Object nl, OutputStream os) throws IOException {
try {
initWrite(os);
writeVal(nl);
} finally {
alreadyMarshalled = true;
daos.flushBuffer();
}
}
protected void initWrite(OutputStream os) throws IOException {
assert !alreadyMarshalled;
init(FastOutputStream.wrap(os));
daos.writeByte(VERSION);
}
/** expert: sets a new output stream */
public void init(FastOutputStream os) {
daos = os;
}
byte version;
public Object unmarshal(byte[] buf) throws IOException {
FastInputStream dis = initRead(buf);
return readVal(dis);
}
public Object unmarshal(InputStream is) throws IOException {
FastInputStream dis = initRead(is);
return readVal(dis);
}
protected FastInputStream initRead(InputStream is) throws IOException {
assert !alreadyUnmarshalled;
FastInputStream dis = FastInputStream.wrap(is);
return _init(dis);
}
protected FastInputStream initRead(byte[] buf) throws IOException {
assert !alreadyUnmarshalled;
FastInputStream dis = new FastInputStream(null, buf, 0, buf.length);
return _init(dis);
}
protected FastInputStream _init(FastInputStream dis) throws IOException {
version = dis.readByte();
if (version != VERSION) {
throw new RuntimeException("Invalid version (expected " + VERSION +
", but " + version + ") or the data in not in 'javabin' format");
}
alreadyUnmarshalled = true;
return dis;
}
public SimpleOrderedMap readOrderedMap(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
SimpleOrderedMap nl = new SimpleOrderedMap<>(sz);
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
public NamedList readNamedList(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList nl = new NamedList<>(sz);
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
public void writeNamedList(NamedList> nl) throws IOException {
writeTag(nl instanceof SimpleOrderedMap ? ORDERED_MAP : NAMED_LST, nl.size());
for (int i = 0; i < nl.size(); i++) {
String name = nl.getName(i);
writeExternString(name);
Object val = nl.getVal(i);
writeVal(val);
}
}
public void writeVal(Object val) throws IOException {
if (writeKnownType(val)) {
return;
} else {
ObjectResolver resolver = null;
if(val instanceof ObjectResolver) {
resolver = (ObjectResolver)val;
}
else {
resolver = this.resolver;
}
if (resolver != null) {
Object tmpVal = resolver.resolve(val, this);
if (tmpVal == null) return; // null means the resolver took care of it fully
if (writeKnownType(tmpVal)) return;
}
}
// Fallback to do *something*.
// note: if the user of this codec doesn't want this (e.g. UpdateLog) it can supply an ObjectResolver that does
// something else like throw an exception.
writeVal(val.getClass().getName() + ':' + val.toString());
}
protected static final Object END_OBJ = new Object();
protected byte tagByte;
public Object readVal(DataInputInputStream dis) throws IOException {
tagByte = dis.readByte();
return readObject(dis);
}
protected Object readObject(DataInputInputStream dis) throws IOException {
// if ((tagByte & 0xe0) == 0) {
// if top 3 bits are clear, this is a normal tag
// OK, try type + size in single byte
switch (tagByte >>> 5) {
case STR >>> 5:
return readStr(dis, stringCache, readStringAsCharSeq);
case SINT >>> 5:
return readSmallInt(dis);
case SLONG >>> 5:
return readSmallLong(dis);
case ARR >>> 5:
return readArray(dis);
case ORDERED_MAP >>> 5:
return readOrderedMap(dis);
case NAMED_LST >>> 5:
return readNamedList(dis);
case EXTERN_STRING >>> 5:
return readExternString(dis);
}
switch (tagByte) {
case NULL:
return null;
case DATE:
return new Date(dis.readLong());
case INT:
return dis.readInt();
case BOOL_TRUE:
return Boolean.TRUE;
case BOOL_FALSE:
return Boolean.FALSE;
case FLOAT:
return dis.readFloat();
case DOUBLE:
return dis.readDouble();
case LONG:
return dis.readLong();
case BYTE:
return dis.readByte();
case SHORT:
return dis.readShort();
case MAP:
return readMap(dis);
case SOLRDOC:
return readSolrDocument(dis);
case SOLRDOCLST:
return readSolrDocumentList(dis);
case BYTEARR:
return readByteArray(dis);
case ITERATOR:
return readIterator(dis);
case END:
return END_OBJ;
case SOLRINPUTDOC:
return readSolrInputDocument(dis);
case ENUM_FIELD_VALUE:
return readEnumFieldValue(dis);
case MAP_ENTRY:
return readMapEntry(dis);
case MAP_ENTRY_ITER:
return readMapIter(dis);
}
throw new RuntimeException("Unknown type " + tagByte);
}
@SuppressWarnings({"unchecked", "rawtypes"})
public boolean writeKnownType(Object val) throws IOException {
if (writePrimitive(val)) return true;
if (val instanceof NamedList) {
writeNamedList((NamedList>) val);
return true;
}
if (val instanceof SolrDocumentList) { // SolrDocumentList is a List, so must come before List check
writeSolrDocumentList((SolrDocumentList) val);
return true;
}
if (val instanceof SolrInputField) {
return writeKnownType(((SolrInputField) val).getValue());
}
if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
return true;
}
if (val instanceof Collection) {
writeArray((Collection) val);
return true;
}
if (val instanceof Object[]) {
writeArray((Object[]) val);
return true;
}
if (val instanceof SolrDocument) {
//this needs special treatment to know which fields are to be written
writeSolrDocument((SolrDocument) val);
return true;
}
if (val instanceof SolrInputDocument) {
writeSolrInputDocument((SolrInputDocument)val);
return true;
}
if (val instanceof MapWriter) {
writeMap((MapWriter) val);
return true;
}
if (val instanceof Map) {
writeMap((Map) val);
return true;
}
if (val instanceof Iterator) {
writeIterator((Iterator) val);
return true;
}
if (val instanceof Path) {
writeStr(((Path) val).toAbsolutePath().toString());
return true;
}
if (val instanceof Iterable) {
writeIterator(((Iterable) val).iterator());
return true;
}
if (val instanceof EnumFieldValue) {
writeEnumFieldValue((EnumFieldValue) val);
return true;
}
if (val instanceof Map.Entry) {
writeMapEntry((Map.Entry)val);
return true;
}
if (val instanceof MapSerializable) {
//todo find a better way to reuse the map more efficiently
writeMap(((MapSerializable) val).toMap(new NamedList().asShallowMap()));
return true;
}
if (val instanceof AtomicInteger) {
writeInt(((AtomicInteger) val).get());
return true;
}
if (val instanceof AtomicLong) {
writeLong(((AtomicLong) val).get());
return true;
}
if (val instanceof AtomicBoolean) {
writeBoolean(((AtomicBoolean) val).get());
return true;
}
return false;
}
public class BinEntryWriter implements MapWriter.EntryWriter {
@Override
public MapWriter.EntryWriter put(CharSequence k, Object v) throws IOException {
writeExternString(k);
JavaBinCodec.this.writeVal(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, int v) throws IOException {
writeExternString(k);
JavaBinCodec.this.writeInt(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, long v) throws IOException {
writeExternString(k);
JavaBinCodec.this.writeLong(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, float v) throws IOException {
writeExternString(k);
JavaBinCodec.this.writeFloat(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, double v) throws IOException {
writeExternString(k);
JavaBinCodec.this.writeDouble(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, boolean v) throws IOException {
writeExternString(k);
writeBoolean(v);
return this;
}
@Override
public MapWriter.EntryWriter put(CharSequence k, CharSequence v) throws IOException {
writeExternString(k);
writeStr(v);
return this;
}
private BiConsumer biConsumer;
@Override
public BiConsumer getBiConsumer() {
if (biConsumer == null) biConsumer = MapWriter.EntryWriter.super.getBiConsumer();
return biConsumer;
}
}
public final BinEntryWriter ew = new BinEntryWriter();
public void writeMap(MapWriter val) throws IOException {
writeTag(MAP_ENTRY_ITER);
val.writeMap(ew);
writeTag(END);
}
public void writeTag(byte tag) throws IOException {
daos.writeByte(tag);
}
public void writeTag(byte tag, int size) throws IOException {
if ((tag & 0xe0) != 0) {
if (size < 0x1f) {
daos.writeByte(tag | size);
} else {
daos.writeByte(tag | 0x1f);
writeVInt(size - 0x1f, daos);
}
} else {
daos.writeByte(tag);
writeVInt(size, daos);
}
}
public void writeByteArray(byte[] arr, int offset, int len) throws IOException {
writeTag(BYTEARR, len);
daos.write(arr, offset, len);
}
public byte[] readByteArray(DataInputInputStream dis) throws IOException {
byte[] arr = new byte[readVInt(dis)];
dis.readFully(arr);
return arr;
}
//use this to ignore the writable interface because , child docs will ignore the fl flag
// is it a good design?
private boolean ignoreWritable =false;
private MapWriter.EntryWriter cew;
public void writeSolrDocument(SolrDocument doc) throws IOException {
List children = doc.getChildDocuments();
int fieldsCount = 0;
if(writableDocFields == null || writableDocFields.wantsAllFields() || ignoreWritable){
fieldsCount = doc.size();
} else {
for (Entry e : doc) {
if(toWrite(e.getKey())) fieldsCount++;
}
}
int sz = fieldsCount + (children==null ? 0 : children.size());
writeTag(SOLRDOC);
writeTag(ORDERED_MAP, sz);
if (cew == null) cew = new ConditionalKeyMapWriter.EntryWriterWrapper(ew, (k) -> toWrite(k.toString()));
doc.writeMap(cew);
if (children != null) {
try {
ignoreWritable = true;
for (SolrDocument child : children) {
writeSolrDocument(child);
}
} finally {
ignoreWritable = false;
}
}
}
protected boolean toWrite(String key) {
return writableDocFields == null || ignoreWritable || writableDocFields.isWritable(key);
}
public SolrDocument readSolrDocument(DataInputInputStream dis) throws IOException {
tagByte = dis.readByte();
int size = readSize(dis);
SolrDocument doc = new SolrDocument(new LinkedHashMap<>(size));
for (int i = 0; i < size; i++) {
String fieldName;
Object obj = readVal(dis); // could be a field name, or a child document
if (obj instanceof SolrDocument) {
doc.addChildDocument((SolrDocument)obj);
continue;
} else {
fieldName = (String)obj;
}
Object fieldVal = readVal(dis);
doc.setField(fieldName, fieldVal);
}
return doc;
}
public SolrDocumentList readSolrDocumentList(DataInputInputStream dis) throws IOException {
SolrDocumentList solrDocs = new SolrDocumentList();
@SuppressWarnings("unchecked")
List list = (List) readVal(dis);
solrDocs.setNumFound((Long) list.get(0));
solrDocs.setStart((Long) list.get(1));
solrDocs.setMaxScore((Float) list.get(2));
if (list.size() > 3) { //needed for back compatibility
solrDocs.setNumFoundExact((Boolean)list.get(3));
}
@SuppressWarnings("unchecked")
List l = (List) readVal(dis);
solrDocs.addAll(l);
return solrDocs;
}
public void writeSolrDocumentList(SolrDocumentList docs)
throws IOException {
writeTag(SOLRDOCLST);
List l = new ArrayList<>(4);
l.add(docs.getNumFound());
l.add(docs.getStart());
l.add(docs.getMaxScore());
l.add(docs.getNumFoundExact());
writeArray(l);
writeArray(docs);
}
public SolrInputDocument readSolrInputDocument(DataInputInputStream dis) throws IOException {
int sz = readVInt(dis);
float docBoost = (Float)readVal(dis);
if (docBoost != 1f) {
String message = "Ignoring document boost: " + docBoost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
SolrInputDocument sdoc = createSolrInputDocument(sz);
for (int i = 0; i < sz; i++) {
String fieldName;
Object obj = readVal(dis); // could be a boost, a field name, or a child document
if (obj instanceof Float) {
float boost = (Float)obj;
if (boost != 1f) {
String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
fieldName = (String)readVal(dis);
} else if (obj instanceof SolrInputDocument) {
sdoc.addChildDocument((SolrInputDocument)obj);
continue;
} else {
fieldName = (String)obj;
}
Object fieldVal = readVal(dis);
sdoc.setField(fieldName, fieldVal);
}
return sdoc;
}
protected SolrInputDocument createSolrInputDocument(int sz) {
return new SolrInputDocument(new LinkedHashMap<>(sz));
}
static final Predicate IGNORECHILDDOCS = it -> !CommonParams.CHILDDOC.equals(it.toString());
public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
List children = sdoc.getChildDocuments();
int sz = sdoc.size() + (children==null ? 0 : children.size());
writeTag(SOLRINPUTDOC, sz);
writeFloat(1f); // document boost
sdoc.writeMap(new ConditionalKeyMapWriter.EntryWriterWrapper(ew,IGNORECHILDDOCS));
if (children != null) {
for (SolrInputDocument child : children) {
writeSolrInputDocument(child);
}
}
}
public Map readMapIter(DataInputInputStream dis) throws IOException {
Map m = newMap(-1);
for (; ; ) {
Object key = readVal(dis);
if (key == END_OBJ) break;
Object val = readVal(dis);
m.put(key, val);
}
return m;
}
/**
* create a new Map object
* @param size expected size, -1 means unknown size
*/
protected Map newMap(int size) {
return size < 0 ? new LinkedHashMap<>() : new LinkedHashMap<>(size);
}
public Map readMap(DataInputInputStream dis)
throws IOException {
int sz = readVInt(dis);
return readMap(dis, sz);
}
protected Map readMap(DataInputInputStream dis, int sz) throws IOException {
Map m = newMap(sz);
for (int i = 0; i < sz; i++) {
Object key = readVal(dis);
Object val = readVal(dis);
m.put(key, val);
}
return m;
}
public final ItemWriter itemWriter = new ItemWriter() {
@Override
public ItemWriter add(Object o) throws IOException {
writeVal(o);
return this;
}
@Override
public ItemWriter add(int v) throws IOException {
writeInt(v);
return this;
}
@Override
public ItemWriter add(long v) throws IOException {
writeLong(v);
return this;
}
@Override
public ItemWriter add(float v) throws IOException {
writeFloat(v);
return this;
}
@Override
public ItemWriter add(double v) throws IOException {
writeDouble(v);
return this;
}
@Override
public ItemWriter add(boolean v) throws IOException {
writeBoolean(v);
return this;
}
};
@Override
public void writeIterator(IteratorWriter val) throws IOException {
writeTag(ITERATOR);
val.writeIter(itemWriter);
writeTag(END);
}
public void writeIterator(@SuppressWarnings({"rawtypes"})Iterator iter) throws IOException {
writeTag(ITERATOR);
while (iter.hasNext()) {
writeVal(iter.next());
}
writeTag(END);
}
public List readIterator(DataInputInputStream fis) throws IOException {
ArrayList l = new ArrayList<>();
while (true) {
Object o = readVal(fis);
if (o == END_OBJ) break;
l.add(o);
}
return l;
}
public void writeArray(@SuppressWarnings({"rawtypes"})List l) throws IOException {
writeTag(ARR, l.size());
for (int i = 0; i < l.size(); i++) {
writeVal(l.get(i));
}
}
public void writeArray(@SuppressWarnings({"rawtypes"})Collection coll) throws IOException {
writeTag(ARR, coll.size());
for (Object o : coll) {
writeVal(o);
}
}
public void writeArray(Object[] arr) throws IOException {
writeTag(ARR, arr.length);
for (int i = 0; i < arr.length; i++) {
Object o = arr[i];
writeVal(o);
}
}
@SuppressWarnings({"unchecked"})
public List readArray(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
return readArray(dis, sz);
}
@SuppressWarnings({"rawtypes"})
protected List readArray(DataInputInputStream dis, int sz) throws IOException {
ArrayList l = new ArrayList<>(sz);
for (int i = 0; i < sz; i++) {
l.add(readVal(dis));
}
return l;
}
/**
* write {@link EnumFieldValue} as tag+int value+string value
* @param enumFieldValue to write
*/
public void writeEnumFieldValue(EnumFieldValue enumFieldValue) throws IOException {
writeTag(ENUM_FIELD_VALUE);
writeInt(enumFieldValue.toInt());
writeStr(enumFieldValue.toString());
}
public void writeMapEntry(Map.Entry,?> val) throws IOException {
writeTag(MAP_ENTRY);
writeVal(val.getKey());
writeVal(val.getValue());
}
/**
* read {@link EnumFieldValue} (int+string) from input stream
* @param dis data input stream
* @return {@link EnumFieldValue}
*/
public EnumFieldValue readEnumFieldValue(DataInputInputStream dis) throws IOException {
Integer intValue = (Integer) readVal(dis);
String stringValue = (String) convertCharSeq (readVal(dis));
return new EnumFieldValue(intValue, stringValue);
}
public Map.Entry readMapEntry(DataInputInputStream dis) throws IOException {
final Object key = readVal(dis);
final Object value = readVal(dis);
return new Map.Entry() {
@Override
public Object getKey() {
return key;
}
@Override
public Object getValue() {
return value;
}
@Override
public String toString() {
return "MapEntry[" + key + ":" + value + "]";
}
@Override
public Object setValue(Object value) {
throw new UnsupportedOperationException();
}
@Override
public int hashCode() {
int result = 31;
result *=31 + getKey().hashCode();
result *=31 + getValue().hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if(this == obj) {
return true;
}
if (obj instanceof Map.Entry, ?>) {
Entry, ?> entry = (Entry, ?>) obj;
return (this.getKey().equals(entry.getKey()) && this.getValue().equals(entry.getValue()));
}
return false;
}
};
}
/**
* write the string as tag+length, with length being the number of UTF-8 bytes
*/
public void writeStr(CharSequence s) throws IOException {
if (s == null) {
writeTag(NULL);
return;
}
if (s instanceof Utf8CharSequence) {
writeUTF8Str((Utf8CharSequence) s);
return;
}
int end = s.length();
int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;
if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
writeTag(STR, sz);
daos.write(bytes, 0, sz);
} else {
// double pass logic for large strings, see SOLR-7971
int sz = ByteUtils.calcUTF16toUTF8Length(s, 0, end);
writeTag(STR, sz);
if (bytes == null || bytes.length < 8192) bytes = new byte[8192];
ByteUtils.writeUTF16toUTF8(s, 0, end, daos, bytes);
}
}
byte[] bytes;
CharArr arr = new CharArr();
private StringBytes bytesRef = new StringBytes(bytes,0,0);
public CharSequence readStr(DataInputInputStream dis) throws IOException {
return readStr(dis, null, readStringAsCharSeq);
}
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache, boolean readStringAsCharSeq) throws IOException {
if (readStringAsCharSeq) {
return readUtf8(dis);
}
int sz = readSize(dis);
return _readStr(dis, stringCache, sz);
}
private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException {
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
dis.readFully(bytes, 0, sz);
if (stringCache != null) {
return stringCache.get(bytesRef.reset(bytes, 0, sz));
} else {
arr.reset();
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
return arr.toString();
}
}
/////////// code to optimize reading UTF8
static final int MAX_UTF8_SZ = 1024 * 64;//too big strings can cause too much memory allocation
private Function stringProvider;
private BytesBlock bytesBlock;
protected CharSequence readUtf8(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
return readUtf8(dis, sz);
}
protected CharSequence readUtf8(DataInputInputStream dis, int sz) throws IOException {
ByteArrayUtf8CharSequence result = new ByteArrayUtf8CharSequence(null,0,0);
if(dis.readDirectUtf8(result, sz)){
result.stringProvider= getStringProvider();
return result;
}
if (sz > MAX_UTF8_SZ) return _readStr(dis, null, sz);
if (bytesBlock == null) bytesBlock = new BytesBlock(1024 * 4);
BytesBlock block = this.bytesBlock.expand(sz);
dis.readFully(block.getBuf(), block.getStartPos(), sz);
result.reset(block.getBuf(), block.getStartPos(), sz,null);
result.stringProvider = getStringProvider();
return result;
}
private Function getStringProvider() {
if (stringProvider == null) {
stringProvider = new Function() {
final CharArr charArr = new CharArr(8);
@Override
public String apply(ByteArrayUtf8CharSequence butf8cs) {
synchronized (charArr) {
charArr.reset();
ByteUtils.UTF8toUTF16(butf8cs.buf, butf8cs.offset(), butf8cs.size(), charArr);
return charArr.toString();
}
}
};
}
return this.stringProvider;
}
public void writeInt(int val) throws IOException {
if (val > 0) {
int b = SINT | (val & 0x0f);
if (val >= 0x0f) {
b |= 0x10;
daos.writeByte(b);
writeVInt(val >>> 4, daos);
} else {
daos.writeByte(b);
}
} else {
daos.writeByte(INT);
daos.writeInt(val);
}
}
public int readSmallInt(DataInputInputStream dis) throws IOException {
int v = tagByte & 0x0F;
if ((tagByte & 0x10) != 0)
v = (readVInt(dis) << 4) | v;
return v;
}
public void writeLong(long val) throws IOException {
if ((val & 0xff00000000000000L) == 0) {
int b = SLONG | ((int) val & 0x0f);
if (val >= 0x0f) {
b |= 0x10;
daos.writeByte(b);
writeVLong(val >>> 4, daos);
} else {
daos.writeByte(b);
}
} else {
daos.writeByte(LONG);
daos.writeLong(val);
}
}
public long readSmallLong(DataInputInputStream dis) throws IOException {
long v = tagByte & 0x0F;
if ((tagByte & 0x10) != 0)
v = (readVLong(dis) << 4) | v;
return v;
}
public void writeFloat(float val) throws IOException {
daos.writeByte(FLOAT);
daos.writeFloat(val);
}
public boolean writePrimitive(Object val) throws IOException {
if (val == null) {
daos.writeByte(NULL);
return true;
} else if (val instanceof Utf8CharSequence) {
writeUTF8Str((Utf8CharSequence) val);
return true;
} else if (val instanceof CharSequence) {
writeStr((CharSequence) val);
return true;
} else if (val instanceof Number) {
if (val instanceof Integer) {
writeInt(((Integer) val).intValue());
return true;
} else if (val instanceof Long) {
writeLong(((Long) val).longValue());
return true;
} else if (val instanceof Float) {
writeFloat(((Float) val).floatValue());
return true;
} else if (val instanceof Double) {
writeDouble(((Double) val).doubleValue());
return true;
} else if (val instanceof Byte) {
daos.writeByte(BYTE);
daos.writeByte(((Byte) val).intValue());
return true;
} else if (val instanceof Short) {
daos.writeByte(SHORT);
daos.writeShort(((Short) val).intValue());
return true;
}
return false;
} else if (val instanceof Date) {
daos.writeByte(DATE);
daos.writeLong(((Date) val).getTime());
return true;
} else if (val instanceof Boolean) {
writeBoolean((Boolean) val);
return true;
} else if (val instanceof byte[]) {
writeByteArray((byte[]) val, 0, ((byte[]) val).length);
return true;
} else if (val instanceof ByteBuffer) {
ByteBuffer buf = (ByteBuffer) val;
writeByteArray(buf.array(), buf.arrayOffset() + buf.position(),buf.limit() - buf.position());
return true;
} else if (val == END_OBJ) {
writeTag(END);
return true;
}
return false;
}
protected void writeBoolean(boolean val) throws IOException {
if (val) daos.writeByte(BOOL_TRUE);
else daos.writeByte(BOOL_FALSE);
}
protected void writeDouble(double val) throws IOException {
daos.writeByte(DOUBLE);
daos.writeDouble(val);
}
public void writeMap(Map,?> val) throws IOException {
writeTag(MAP, val.size());
if (val instanceof MapWriter) {
((MapWriter) val).writeMap(ew);
return;
}
for (Map.Entry,?> entry : val.entrySet()) {
Object key = entry.getKey();
if (key instanceof String) {
writeExternString((String) key);
} else {
writeVal(key);
}
writeVal(entry.getValue());
}
}
public int readSize(DataInputInputStream in) throws IOException {
int sz = tagByte & 0x1f;
if (sz == 0x1f) sz += readVInt(in);
return sz;
}
/**
* Special method for variable length int (copied from lucene). Usually used for writing the length of a
* collection/array/map In most of the cases the length can be represented in one byte (length < 127) so it saves 3
* bytes/object
*
* @throws IOException If there is a low-level I/O error.
*/
public static void writeVInt(int i, FastOutputStream out) throws IOException {
while ((i & ~0x7F) != 0) {
out.writeByte((byte) ((i & 0x7f) | 0x80));
i >>>= 7;
}
out.writeByte((byte) i);
}
/**
* The counterpart for {@link #writeVInt(int, FastOutputStream)}
*
* @throws IOException If there is a low-level I/O error.
*/
public static int readVInt(DataInputInputStream in) throws IOException {
byte b = in.readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = in.readByte();
i |= (b & 0x7F) << shift;
}
return i;
}
public static void writeVLong(long i, FastOutputStream out) throws IOException {
while ((i & ~0x7F) != 0) {
out.writeByte((byte) ((i & 0x7f) | 0x80));
i >>>= 7;
}
out.writeByte((byte) i);
}
public static long readVLong(DataInputInputStream in) throws IOException {
byte b = in.readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = in.readByte();
i |= (long) (b & 0x7F) << shift;
}
return i;
}
private int stringsCount = 0;
private Map stringsMap;
private List stringsList;
public void writeExternString(CharSequence s) throws IOException {
if (s == null) {
writeTag(NULL);
return;
}
Integer idx = stringsMap == null ? null : stringsMap.get(s);
if (idx == null) idx = 0;
writeTag(EXTERN_STRING, idx);
if (idx == 0) {
writeStr(s);
if (stringsMap == null) stringsMap = new HashMap<>();
stringsMap.put(s.toString(), ++stringsCount);
}
}
public CharSequence readExternString(DataInputInputStream fis) throws IOException {
int idx = readSize(fis);
if (idx != 0) {// idx != 0 is the index of the extern string
return stringsList.get(idx - 1);
} else {// idx == 0 means it has a string value
tagByte = fis.readByte();
CharSequence s = readStr(fis, stringCache, false);
if (s != null) s = s.toString();
if (stringsList == null) stringsList = new ArrayList<>();
stringsList.add(s);
return s;
}
}
public void writeUTF8Str(Utf8CharSequence utf8) throws IOException {
writeTag(STR, utf8.size());
daos.writeUtf8CharSeq(utf8);
}
public long getTotalBytesWritten() {
if (daos != null) {
return daos.written;
}
return 0;
}
/**
* Allows extension of {@link JavaBinCodec} to support serialization of arbitrary data types.
*
* Implementors of this interface write a method to serialize a given object using an existing {@link JavaBinCodec}
*/
public interface ObjectResolver {
/**
* Examine and attempt to serialize the given object, using a {@link JavaBinCodec} to write it to a stream.
*
* @param o the object that the caller wants serialized.
* @param codec used to actually serialize {@code o}.
* @return the object {@code o} itself if it could not be serialized, or {@code null} if the whole object was successfully serialized.
* @see JavaBinCodec
*/
Object resolve(Object o, JavaBinCodec codec) throws IOException;
}
public interface WritableDocFields {
boolean isWritable(String name);
boolean wantsAllFields();
}
public static class StringCache {
private final Cache cache;
public StringCache(Cache cache) {
this.cache = cache;
}
public String get(StringBytes b) {
String result = cache.get(b);
if (result == null) {
//make a copy because the buffer received may be changed later by the caller
StringBytes copy = new StringBytes(Arrays.copyOfRange(b.bytes, b.offset, b.offset + b.length), 0, b.length);
CharArr arr = new CharArr();
ByteUtils.UTF8toUTF16(b.bytes, b.offset, b.length, arr);
result = arr.toString();
cache.put(copy, result);
}
return result;
}
}
@Override
public void close() throws IOException {
if (daos != null) {
daos.flushBuffer();
}
}
}