com.yahoo.vespaxmlparser.VespaXMLFeedReader Maven / Gradle / Ivy
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespaxmlparser;
import com.yahoo.document.Document;
import com.yahoo.document.DocumentId;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.TestAndSetCondition;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
/**
* XML parser for Vespa document XML.
*
* Parses an entire document "feed", which consists of a vespafeed element containing
* zero or more instances of documents, updates or removes.
*
* Standard usage is to create an Operation object and call read(Operation) until
* operation.getType() returns OperationType.INVALID.
*
* If you are looking to parse only a single document or update, use VespaXMLDocumentReader
* or VespaXMLUpdateReader respectively.
*/
public class VespaXMLFeedReader extends VespaXMLReader implements FeedReader {
/**
* Creates a reader that reads from the given file.
*/
public VespaXMLFeedReader(String fileName, DocumentTypeManager docTypeManager) throws Exception {
super(fileName, docTypeManager);
readInitial();
}
/**
* Creates a reader that reads from the given stream.
*/
public VespaXMLFeedReader(InputStream stream, DocumentTypeManager docTypeManager) throws Exception {
super(stream, docTypeManager);
readInitial();
}
/**
* Creates a reader that uses the given reader to read - this can be used if the vespa feed
* is part of a larger XML document.
*/
public VespaXMLFeedReader(XMLStreamReader reader, DocumentTypeManager manager) throws Exception {
super(reader, manager);
readInitial();
}
/**
* Skips the initial "vespafeed" tag.
*/
private void readInitial() throws Exception {
boolean found = false;
while (reader.hasNext()) {
int type = reader.next();
if (type == XMLStreamReader.START_ELEMENT) {
if ("vespafeed".equals(reader.getName().toString())) {
found = true;
break;
}
}
}
if (!found) {
throw newDeserializeException("Feed information must be contained within a \"vespafeed\" element");
}
}
/**
* Reads all operations from the XML stream and puts into a list. Note
* that if the XML stream is large, this may cause out of memory errors, so
* make sure to use this only with small streams.
*
* @return The list of all read operations.
*/
public List readAll() throws Exception {
List list = new ArrayList<>();
while (true) {
FeedOperation op = read();
if (op.getType() == FeedOperation.Type.INVALID) {
return list;
} else {
list.add(op);
}
}
}
/* (non-Javadoc)
* @see com.yahoo.vespaxmlparser.FeedReader#read(com.yahoo.vespaxmlparser.VespaXMLFeedReader.Operation)
*/
@Override
public FeedOperation read() throws Exception {
String startTag = null;
try {
while (reader.hasNext()) {
int type = reader.next();
if (type == XMLStreamReader.START_ELEMENT) {
startTag = reader.getName().toString();
if ("document".equals(startTag)) {
VespaXMLDocumentReader documentReader = new VespaXMLDocumentReader(reader, docTypeManager);
Document document = new Document(documentReader);
return new DocumentFeedOperation(document, TestAndSetCondition.fromConditionString(documentReader.getCondition()));
} else if ("update".equals(startTag)) {
VespaXMLUpdateReader updateReader = new VespaXMLUpdateReader(reader, docTypeManager);
DocumentUpdate update = new DocumentUpdate(updateReader);
return new DocumentUpdateFeedOperation(update, TestAndSetCondition.fromConditionString(updateReader.getCondition()));
} else if ("remove".equals(startTag)) {
DocumentId documentId = null;
Optional condition = Optional.empty();
for (int i = 0; i < reader.getAttributeCount(); i++) {
final String attributeName = reader.getAttributeName(i).toString();
if ("documentid".equals(attributeName) || "id".equals(attributeName)) {
documentId = new DocumentId(reader.getAttributeValue(i));
} else if ("condition".equals(attributeName)) {
condition = Optional.of(reader.getAttributeValue(i));
}
}
if (documentId == null) {
throw newDeserializeException("Missing \"documentid\" attribute for remove operation");
}
return new RemoveFeedOperation(documentId, TestAndSetCondition.fromConditionString(condition));
} else {
throw newDeserializeException("Element \"" + startTag + "\" not allowed in this context");
}
}
}
} catch (XMLStreamException e) {
throw(e);
// Skip to end of current tag with other exceptions.
} catch (Exception e) {
try {
if (startTag != null) {
skipToEnd(startTag);
}
} catch (Exception ignore) {
}
throw(e);
}
return FeedOperation.INVALID;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy