All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespaxmlparser.VespaXMLFieldReader Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespaxmlparser;

import com.yahoo.document.DataType;
import com.yahoo.document.Document;
import com.yahoo.document.DocumentId;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.Field;
import com.yahoo.document.MapDataType;
import com.yahoo.document.PositionDataType;
import com.yahoo.document.annotation.AnnotationReference;
import com.yahoo.document.datatypes.*;
import com.yahoo.document.predicate.Predicate;
import com.yahoo.document.serialization.DeserializationException;
import com.yahoo.document.serialization.FieldReader;
import com.yahoo.text.Utf8;
import com.yahoo.vespa.objects.FieldBase;

import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.Base64;
import java.util.Optional;

/**
 * XML parser that reads document fields from an XML stream.
 *
 * All read methods assume that the stream is currently positioned at the start element of the relevant field.
 *
 */
public class VespaXMLFieldReader extends VespaXMLReader implements FieldReader {
    private static final BigInteger UINT_MAX = new BigInteger("4294967296");
    private static final BigInteger ULONG_MAX = new BigInteger("18446744073709551616");

    public VespaXMLFieldReader(String fileName, DocumentTypeManager docTypeManager) throws Exception {
        super(fileName, docTypeManager);
    }

    public VespaXMLFieldReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception {
        super(stream, docTypeManager);
    }

    public VespaXMLFieldReader(XMLStreamReader reader, DocumentTypeManager docTypeManager) {
        super(reader, docTypeManager);
    }

    /**
     * Optional test and set condition. Common for document/update/remove elements
     * This variable is either set in VespaXMLFieldReader#read (reader for document)
     * or in VespaXMLUpdateReader#read (reader for update).
     */
    private Optional condition = Optional.empty();

    public Optional getCondition() {
        return condition;
    }

    public void read(FieldBase field, Document document) {
        try {
            //workaround for documents inside array 
            if (reader.getEventType() != XMLStreamReader.START_ELEMENT || !"document".equals(reader.getName().toString())) {
                while (reader.hasNext()) {
                    if (reader.getEventType() == XMLStreamReader.START_ELEMENT && "document".equals(reader.getName().toString())) {
                        break;
                    }
                    reader.next();
                }
            }

            // First fetch attributes.
            String typeName = null;

            for (int i = 0; i < reader.getAttributeCount(); i++) {
                final String attributeName = reader.getAttributeName(i).toString();
                if ("documentid".equals(attributeName) || "id".equals(attributeName)) {
                    document.setId(new DocumentId(reader.getAttributeValue(i)));
                } else if ("documenttype".equals(attributeName) || "type".equals(attributeName)) {
                    typeName = reader.getAttributeValue(i);
                } else if ("condition".equals(attributeName)) {
                    condition = Optional.of(reader.getAttributeValue(i));
                }
            }

            if (document.getId() != null) {
                if (field == null) {
                    field = new FieldBase(document.getId().toString());
                }
            }

            DocumentType doctype = docTypeManager.getDocumentType(typeName);
            if (doctype == null) {
                throw newDeserializeException(field, "Must specify an existing document type, not '" + typeName + "'");
            } else {
                document.setDataType(doctype);
            }

            // Then fetch fields
            while (reader.hasNext()) {
                int type = reader.next();

                if (type == XMLStreamReader.START_ELEMENT) {
                    Field f = doctype.getField(reader.getName().toString());

                    if (f == null) {
                        throw newDeserializeException(field, "Field " + reader.getName() + " not found.");
                    }

                    FieldValue fv = f.getDataType().createFieldValue();
                    fv.deserialize(f, this);
                    document.setFieldValue(f, fv);
                    skipToEnd(f.getName());
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    return;
                }
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public  void read(FieldBase field, Array value) {
        try {
            while (reader.hasNext()) {
                int type = reader.next();

                if (type == XMLStreamReader.START_ELEMENT) {
                    if ("item".equals(reader.getName().toString())) {
                        FieldValue fv = (value.getDataType()).getNestedType().createFieldValue();
                        deserializeFieldValue(field, fv);
                        // noinspection unchecked
                        value.add((T)fv);
                        skipToEnd("item");
                    }
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    return;
                }
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    class KeyAndValue {
        FieldValue key = null;
        FieldValue value = null;
    }

    void readKeyAndValue(FieldBase field, KeyAndValue val, MapDataType dt) throws XMLStreamException {
        while (reader.hasNext()) {
            int type = reader.next();

            if (type == XMLStreamReader.START_ELEMENT) {
                if ("key".equals(reader.getName().toString())) {
                    val.key = dt.getKeyType().createFieldValue();
                    deserializeFieldValue(field, val.key);
                    skipToEnd("key");
                } else if ("value".equals(reader.getName().toString())) {
                    val.value = dt.getValueType().createFieldValue();
                    deserializeFieldValue(field, val.value);
                    skipToEnd("value");
                } else {
                    throw newDeserializeException("Illegal element inside map item: " + reader.getName());
                }
            } else if (type == XMLStreamReader.END_ELEMENT) {
                return;
            }
        }
    }

    public  void read(FieldBase field, MapFieldValue map) {
        try {
            MapDataType dt = map.getDataType();

            while (reader.hasNext()) {
                int type = reader.next();

                if (type == XMLStreamReader.START_ELEMENT) {
                    if ("item".equals(reader.getName().toString())) {
                        KeyAndValue kv = new KeyAndValue();
                        readKeyAndValue(field, kv, dt);

                        if (kv.key == null || kv.value == null) {
                            throw newDeserializeException(field, "Map items must specify both key and value");
                        }
                        // noinspection unchecked
                        map.put((K)kv.key, (V)kv.value);
                        skipToEnd("item");
                    } else {
                        throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'");
                    }
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    return;
                }
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, Struct value) {
        try {
            boolean base64 = isBase64EncodedElement(reader);
            boolean foundField = false;
            StringBuilder positionBuilder = null;
            while (reader.hasNext()) {
                int type = reader.next();
                if (type == XMLStreamReader.START_ELEMENT) {
                    Field structField = value.getField(reader.getName().toString());
                    if (structField == null) {
                        throw newDeserializeException(field, "Field " + reader.getName() + " not found.");
                    }
                    FieldValue fieldValue = structField.getDataType().createFieldValue();
                    fieldValue.deserialize(structField, this);
                    value.setFieldValue(structField, fieldValue);
                    skipToEnd(structField.getName());
                    foundField = true;
                } else if (type == XMLStreamReader.CHARACTERS) {
                    if (foundField) {
                        continue;
                    }
                    // The text of an XML element may be output using 1-n CHARACTERS
                    // events, so we have to buffer up until the end of the element to
                    // ensure we get everything.
                    String chars = reader.getText();
                    if (positionBuilder == null) {
                        positionBuilder = new StringBuilder(chars);
                    } else {
                        positionBuilder.append(chars);
                    }
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    if (positionBuilder != null) {
                        assignPositionFieldFromStringIfNonEmpty(value, positionBuilder.toString(), base64);
                    }
                    break;
                }
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    private void assignPositionFieldFromStringIfNonEmpty(Struct value, String elementText, boolean base64) {
        String str = base64 ? Utf8.toString(Base64.getMimeDecoder().decode(elementText)) : elementText;
        str = str.trim();
        if (str.isEmpty()) {
            return;
        }
        DataType valueType = value.getDataType();
        if (valueType.equals(PositionDataType.INSTANCE)) {
            value.assign(PositionDataType.fromString(str));
        }
    }

    public  void read(FieldBase field, WeightedSet value) {
        try {
            while (reader.hasNext()) {
                int type = reader.next();

                if (type == XMLStreamReader.START_ELEMENT) {
                    if ("item".equals(reader.getName().toString())) {
                        FieldValue fv = value.getDataType().getNestedType().createFieldValue();

                        int weight = 1;
                        for (int i = 0; i < reader.getAttributeCount(); i++) {
                            if ("weight".equals(reader.getAttributeName(i).toString())) {
                                weight = Integer.parseInt(reader.getAttributeValue(i));
                            }
                        }

                        deserializeFieldValue(field, fv);
                        // noinspection unchecked
                        value.put((T)fv, weight);
                        skipToEnd("item");
                    } else {
                        throw newDeserializeException(field, "Illegal tag " + reader.getName() + " expected 'item'");
                    }
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    return;
                }
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, ByteFieldValue value) {
        try {
            String dataParsed = reader.getElementText();
            try {
                value.assign(Byte.valueOf(dataParsed));
            } catch (Exception e) {
                throw newDeserializeException(field, "Invalid byte \"" + dataParsed + "\".");
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, BoolFieldValue value) {
        try {
            String dataParsed = reader.getElementText();
            try {
                value.assign(dataParsed);
            } catch (Exception e) {
                throw newDeserializeException(field, e.getMessage());
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, DoubleFieldValue value) {
        try {
            String dataParsed = reader.getElementText();
            try {
                value.assign(Double.valueOf(dataParsed));
            } catch (Exception e) {
                throw newDeserializeException(field, "Invalid double \"" + dataParsed + "\".");
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, FloatFieldValue value) {
        try {
            String dataParsed = reader.getElementText();
            try {
                value.assign(Float.valueOf(dataParsed));
            } catch (Exception e) {
                throw newDeserializeException(field, "Invalid float \"" + dataParsed + "\".");
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    private RuntimeException newDeserializeException(FieldBase field, String msg) {
        return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + msg);
    }
    private RuntimeException newException(FieldBase field, Exception e) {
        return newDeserializeException("Field '" + ((field == null) ? "null" : field.getName()) + "': " + e.getMessage());
    }
    public void read(FieldBase field, IntegerFieldValue value) {
        try {
            String dataParsed = reader.getElementText();

            BigInteger val;
            try {
                if (dataParsed.startsWith("0x")) {
                    val = new BigInteger(dataParsed.substring(2), 16);
                } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) {
                    val = new BigInteger(dataParsed.substring(1), 8);
                } else {
                    val = new BigInteger(dataParsed);
                }
            } catch (Exception e) {
                throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\".");
            }
            if (val.bitLength() > 32) {
                throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range.");
            }
            if (val.bitLength() == 32) {
                if (val.compareTo(BigInteger.ZERO) == 1) {
                    // Flip to negative
                    val = val.subtract(UINT_MAX);
                } else {
                    throw newDeserializeException(field, "Invalid integer \"" + dataParsed + "\". Out of range.");
                }
            }

            value.assign(val.intValue());
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, LongFieldValue value) {
        try {
            String dataParsed = reader.getElementText();

            BigInteger val;
            try {
                if (dataParsed.startsWith("0x")) {
                    val = new BigInteger(dataParsed.substring(2), 16);
                } else if (dataParsed.startsWith("0") && dataParsed.length() > 1) {
                    val = new BigInteger(dataParsed.substring(1), 8);
                } else {
                    val = new BigInteger(dataParsed);
                }
            } catch (Exception e) {
                throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\".");
            }
            if (val.bitLength() > 64) {
                throw newDeserializeException(field, "Invalid long \"" + dataParsed + "\". Out of range.");
            }
            if (val.compareTo(BigInteger.ZERO) == 1 && val.bitLength() == 64) {
                // Flip to negative
                val = val.subtract(ULONG_MAX);
            }
            value.assign(val.longValue());
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, Raw value) {
        try {
            if (isBase64EncodedElement(reader)) {
                value.assign(Base64.getMimeDecoder().decode(reader.getElementText()));
            } else {
                value.assign(reader.getElementText().getBytes());
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    @Override
    public void read(FieldBase field, PredicateFieldValue value) {
        try {
            if (isBase64EncodedElement(reader)) {
                value.assign(Predicate.fromBinary(Base64.getMimeDecoder().decode(reader.getElementText())));
            } else {
                value.assign(Predicate.fromString(reader.getElementText()));
            }
        } catch (XMLStreamException e) {
            throw newException(field, e);
        }
    }

    public void read(FieldBase field, StringFieldValue value) {
        try {
            if (isBase64EncodedElement(reader)) {
                throw new IllegalArgumentException("Attribute binaryencoding=base64 is not allowed for fields of type 'string'. To represent binary data, use type 'raw'.");
            } else {
                value.assign(reader.getElementText());
            }
        } catch (XMLStreamException | IllegalArgumentException e) {
            throw newException(field, e);
        }
    }

    @Override
    public void read(FieldBase field, TensorFieldValue value) {
        throwOnlyJsonSupportedException(field, "TENSOR");
    }

    @Override
    public void read(FieldBase field, ReferenceFieldValue value) {
        throwOnlyJsonSupportedException(field, "REFERENCE");
    }

    private static void throwOnlyJsonSupportedException(FieldBase field, String fieldType) {
        throw new DeserializationException("Field '"+ (field != null ? field.getName() : "null") + "': "
                + "XML input for fields of type " + fieldType + " is not supported. Please use JSON input instead.");
    }

    public void read(FieldBase field, AnnotationReference value) {
        System.out.println("Annotation value read!");
    }

    private void deserializeFieldValue(FieldBase field, FieldValue value) {
        value.deserialize(field instanceof Field ? (Field)field : null, this);
    }

    /***********************************************************************/
    /*                   UNUSED METHODS                                    */
    /***********************************************************************/

    @SuppressWarnings("UnusedDeclaration")
    public DocumentId readDocumentId() {
        return null;
    }

    @SuppressWarnings("UnusedDeclaration")
    public DocumentType readDocumentType() {
        return null;  //To change body of implemented methods use File | Settings | File Templates.
    }

    @SuppressWarnings("UnusedDeclaration")
    public DocumentTypeManager getDocumentTypeManager() {
        return docTypeManager;
    }

    @Override
    public  void read(FieldBase field, CollectionFieldValue value) {
        System.out.println("Should not be called!!!");
    }

    @Override
    public void read(FieldBase field, StructuredFieldValue value) {
        System.out.println("Should not be called!!!");
    }

    @Override
    public void read(FieldBase field, FieldValue value) {
        System.out.println("SHOULD NEVER BE CALLED? " + field.toString());
    }

    @Override
    public byte getByte(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public short getShort(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public int getInt(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public long getLong(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public float getFloat(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public double getDouble(FieldBase fieldBase) {
        return 0;
    }

    @Override
    public byte[] getBytes(FieldBase fieldBase, int i) {
        return new byte[0];
    }

    @Override
    public String getString(FieldBase fieldBase) {
        return null;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy