org.apache.vinci.transport.document.XTalkToSAX Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jVinci Show documentation
Show all versions of jVinci Show documentation
This is a non-standard protocol for higher efficiency than
SOAP, used by the base UIMA Collection processing manager for supporting
networked deployment. See UIMA-AS as a more modern alternative supporting
more standard protocols.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.vinci.transport.document;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import org.apache.vinci.transport.XTalkTransporter;
/**
* Class for converting XTalk streams into SAX events.
*/
public class XTalkToSAX {
public static final int INITIAL_BUF_SIZE = 256;
private static final String cdataType = "CDATA";
private char[] charBuffer;
private byte[] byteBuffer;
private AttributesImpl workAttributes;
// members initialzed by parse() to reduce argument passing.
private InputStream is;
private ContentHandler handler;
public XTalkToSAX() {
init(INITIAL_BUF_SIZE);
}
public XTalkToSAX(int bufSize) {
init(bufSize);
}
private void init(int bufSize) {
this.workAttributes = new AttributesImpl();
this.byteBuffer = new byte[bufSize];
this.charBuffer = new char[bufSize];
}
/**
* Initially, the XTalkToSAX processor creates a byte buffer and char buffer of size
* INITIAL_BUF_SIZE. These buffer may grow during parsing to handle very large strings. Users can
* determine the size of these arrays with this method. This method in conjunction with
* resetBuffers lets application implement their own buffer management. Buffers can be reset
* during parsing, but not from another thread.
* @return -
*/
public int bufferSize() {
return byteBuffer.length;
}
/**
* Resets buffers to their initial size... this is useful because buffers can grow during parsing
* and this allows the space to be reclaimed without having to undo references to the parser
* object.
* @param toSize -
*/
public void resizeBuffers(int toSize) {
if (this.byteBuffer.length != toSize) {
this.byteBuffer = new byte[toSize];
this.charBuffer = new char[toSize];
}
}
/**
* Parse one document off of the incoming XTalk stream into SAX events. A side effect of parsing
* is that internal arrays will grow to the size of the largest character string encountered in
* the document. Use bufferSize() and resizeBuffers to manage memory in applications where very
* large strings may be encountered and the same object is used to parse many incoming documents.
*
* @param is -
* @param handler -
* @throws IOException
* if underlying IOException from the stream or if XTalk format is invalid.
* @throws SAXException
* if SAXException thrown by the handler
*
* @pre handler != null
* @pre is != null
*/
public void parse(InputStream is, ContentHandler handler) throws IOException, SAXException {
this.is = is;
this.handler = handler;
try {
int marker = is.read();
if (marker == -1) {
throw new EOFException();
}
if ((byte) marker != XTalkTransporter.DOCUMENT_MARKER) {
throw new IOException("Expected document marker: " + (char) marker);
}
int version = is.read();
if ((byte) version != XTalkTransporter.VERSION_CODE) {
throw new IOException("Xtalk version code doesn't match "
+ (int) XTalkTransporter.VERSION_CODE + ": " + version);
}
handler.startDocument();
doTopLevelParse();
handler.endDocument();
} finally {
// nullify refs to allow GC
is = null;
handler = null;
}
}
private void doTopLevelParse() throws IOException, SAXException {
int top_field_count = XTalkTransporter.readInt(is);
// Skip over intro PI's.
int marker;
if (top_field_count < 1) {
throw new IOException("No top level element.");
}
while ((marker = is.read()) == XTalkTransporter.PI_MARKER) {
String target = consumeString();
String data = consumeString();
handler.processingInstruction(target, data);
top_field_count--;
if (top_field_count < 1) {
throw new IOException("No top level element.");
}
}
if ((byte) marker != XTalkTransporter.ELEMENT_MARKER) {
throw new IOException("Expected element marker: " + (char) marker);
}
doElement();
top_field_count--;
// Handle trailing PI's
while (top_field_count > 0) {
if (is.read() != XTalkTransporter.PI_MARKER) {
throw new IOException("Expected PI marker.");
}
doProcessingInstruction();
top_field_count--;
}
}
private void doProcessingInstruction() throws IOException, SAXException {
String target = consumeString();
String data = consumeString();
handler.processingInstruction(target, data);
}
private void ensureCapacity(int bytesToRead) {
if (byteBuffer.length < bytesToRead) {
byteBuffer = new byte[byteBuffer.length + bytesToRead];
charBuffer = new char[charBuffer.length + bytesToRead];
}
}
private String consumeString() throws IOException {
int bytesToRead = XTalkTransporter.readInt(is);
ensureCapacity(bytesToRead);
int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer, bytesToRead);
return new String(charBuffer, 0, charsRead);
}
private void doElement() throws IOException, SAXException {
// Parse an incoming element.
String tagName = consumeString();
int attribute_count = XTalkTransporter.readInt(is);
workAttributes.clear();
for (int i = 0; i < attribute_count; i++) {
String attrName = consumeString();
String attrValue = consumeString();
workAttributes.addAttribute("", attrName, attrName, cdataType, attrValue);
}
handler.startElement("", tagName, tagName, workAttributes);
int field_count = XTalkTransporter.readInt(is);
for (int i = 0; i < field_count; i++) {
int marker = is.read();
switch ((byte) marker) {
case XTalkTransporter.PI_MARKER:
doProcessingInstruction();
break;
case XTalkTransporter.STRING_MARKER:
int bytesToRead = XTalkTransporter.readInt(is);
ensureCapacity(bytesToRead);
int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer,
bytesToRead);
handler.characters(charBuffer, 0, charsRead);
break;
case XTalkTransporter.ELEMENT_MARKER:
doElement();
break;
default:
throw new IOException("Unexpected marker: " + (char) marker);
}
}
handler.endElement(null, null, tagName);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy