All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.vinci.transport.document.XTalkToSAX Maven / Gradle / Ivy

Go to download

This is a non-standard protocol for higher efficiency than SOAP, used by the base UIMA Collection processing manager for supporting networked deployment. See UIMA-AS as a more modern alternative supporting more standard protocols.

There is a newer version: 3.6.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.vinci.transport.document;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import org.apache.vinci.transport.XTalkTransporter;

/**
 * Class for converting XTalk streams into SAX events.
 */
public class XTalkToSAX {

  public static final int INITIAL_BUF_SIZE = 256;

  private static final String cdataType = "CDATA";

  private char[] charBuffer;

  private byte[] byteBuffer;

  private AttributesImpl workAttributes;

  // members initialzed by parse() to reduce argument passing.
  private InputStream is;

  private ContentHandler handler;

  public XTalkToSAX() {
    init(INITIAL_BUF_SIZE);
  }

  public XTalkToSAX(int bufSize) {
    init(bufSize);
  }

  private void init(int bufSize) {
    this.workAttributes = new AttributesImpl();
    this.byteBuffer = new byte[bufSize];
    this.charBuffer = new char[bufSize];
  }

  /**
   * Initially, the XTalkToSAX processor creates a byte buffer and char buffer of size
   * INITIAL_BUF_SIZE. These buffer may grow during parsing to handle very large strings. Users can
   * determine the size of these arrays with this method. This method in conjunction with
   * resetBuffers lets application implement their own buffer management. Buffers can be reset
   * during parsing, but not from another thread.
   * @return -
   */
  public int bufferSize() {
    return byteBuffer.length;
  }

  /**
   * Resets buffers to their initial size... this is useful because buffers can grow during parsing
   * and this allows the space to be reclaimed without having to undo references to the parser
   * object.
   * @param toSize -
   */
  public void resizeBuffers(int toSize) {
    if (this.byteBuffer.length != toSize) {
      this.byteBuffer = new byte[toSize];
      this.charBuffer = new char[toSize];
    }
  }

  /**
   * Parse one document off of the incoming XTalk stream into SAX events. A side effect of parsing
   * is that internal arrays will grow to the size of the largest character string encountered in
   * the document. Use bufferSize() and resizeBuffers to manage memory in applications where very
   * large strings may be encountered and the same object is used to parse many incoming documents.
   * 
   * @param is -
   * @param handler -
   * @throws IOException
   *           if underlying IOException from the stream or if XTalk format is invalid.
   * @throws SAXException
   *           if SAXException thrown by the handler
   * 
   * @pre handler != null
   * @pre is != null
   */
  public void parse(InputStream is, ContentHandler handler) throws IOException, SAXException {
    this.is = is;
    this.handler = handler;
    try {
      int marker = is.read();
      if (marker == -1) {
        throw new EOFException();
      }
      if ((byte) marker != XTalkTransporter.DOCUMENT_MARKER) {
        throw new IOException("Expected document marker: " + (char) marker);
      }
      int version = is.read();
      if ((byte) version != XTalkTransporter.VERSION_CODE) {
        throw new IOException("Xtalk version code doesn't match "
                + (int) XTalkTransporter.VERSION_CODE + ": " + version);
      }
      handler.startDocument();
      doTopLevelParse();
      handler.endDocument();
    } finally {
      // nullify refs to allow GC
      is = null;
      handler = null;
    }
  }

  private void doTopLevelParse() throws IOException, SAXException {
    int top_field_count = XTalkTransporter.readInt(is);
    // Skip over intro PI's.
    int marker;
    if (top_field_count < 1) {
      throw new IOException("No top level element.");
    }
    while ((marker = is.read()) == XTalkTransporter.PI_MARKER) {
      String target = consumeString();
      String data = consumeString();
      handler.processingInstruction(target, data);
      top_field_count--;
      if (top_field_count < 1) {
        throw new IOException("No top level element.");
      }
    }
    if ((byte) marker != XTalkTransporter.ELEMENT_MARKER) {
      throw new IOException("Expected element marker: " + (char) marker);
    }
    doElement();
    top_field_count--;
    // Handle trailing PI's
    while (top_field_count > 0) {
      if (is.read() != XTalkTransporter.PI_MARKER) {
        throw new IOException("Expected PI marker.");
      }
      doProcessingInstruction();
      top_field_count--;
    }
  }

  private void doProcessingInstruction() throws IOException, SAXException {
    String target = consumeString();
    String data = consumeString();
    handler.processingInstruction(target, data);
  }

  private void ensureCapacity(int bytesToRead) {
    if (byteBuffer.length < bytesToRead) {
      byteBuffer = new byte[byteBuffer.length + bytesToRead];
      charBuffer = new char[charBuffer.length + bytesToRead];
    }
  }

  private String consumeString() throws IOException {
    int bytesToRead = XTalkTransporter.readInt(is);
    ensureCapacity(bytesToRead);
    int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer, bytesToRead);
    return new String(charBuffer, 0, charsRead);
  }

  private void doElement() throws IOException, SAXException {
    // Parse an incoming element.
    String tagName = consumeString();
    int attribute_count = XTalkTransporter.readInt(is);
    workAttributes.clear();
    for (int i = 0; i < attribute_count; i++) {
      String attrName = consumeString();
      String attrValue = consumeString();
      workAttributes.addAttribute("", attrName, attrName, cdataType, attrValue);
    }
    handler.startElement("", tagName, tagName, workAttributes);
    int field_count = XTalkTransporter.readInt(is);
    for (int i = 0; i < field_count; i++) {
      int marker = is.read();
      switch ((byte) marker) {
        case XTalkTransporter.PI_MARKER:
          doProcessingInstruction();
          break;
        case XTalkTransporter.STRING_MARKER:
          int bytesToRead = XTalkTransporter.readInt(is);
          ensureCapacity(bytesToRead);
          int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer,
                  bytesToRead);
          handler.characters(charBuffer, 0, charsRead);
          break;
        case XTalkTransporter.ELEMENT_MARKER:
          doElement();
          break;
        default:
          throw new IOException("Unexpected marker: " + (char) marker);
      }
    }
    handler.endElement(null, null, tagName);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy