All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.tools.offlineEditsViewer.XmlTokenizer Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.tools.offlineEditsViewer;

import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.FileInputStream;

import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

/**
 * Tokenizer that reads tokens from XML file
 *
 */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class XmlTokenizer implements Tokenizer {

  FileInputStream is = null;
  XMLStreamReader in;

  /**
   * XmlTokenizer constructor
   *
   * @param filename input filename
   */
  public XmlTokenizer(String filename) throws IOException {
    XMLInputFactory f = XMLInputFactory.newInstance();
    // FileInputStream is = null;
    try {
      is = new FileInputStream(filename);
      in = f.createXMLStreamReader(is);
    } catch(XMLStreamException e) {
      // if(is != null) { is.close(); }
      throw new IOException("Cannot create XML stream", e);
    } catch(FileNotFoundException e) {
      //if(is != null) { is.close(); }
      throw new IOException("Cannot open input file " + filename, e);
    }
  }

  /**
   * Get next element's value, checks that the element's name
   * is wantedName.
   *
   * @param wantedName a name of node that we are looking for
   */
  private String getNextElementsValue(String wantedName) throws IOException {
    boolean gotSTART_ELEMENT = false;
    try {
      int eventType = in.getEventType();
      while(true) {
        switch(eventType) {
          case XMLStreamConstants.CHARACTERS: // 4
            if(gotSTART_ELEMENT) {
              // XML returns "\n" instead of empty (zero-length) string
              // for elements like 
              return in.getText().trim();
            }
            break;
          case XMLStreamConstants.END_DOCUMENT: // 8
            throw new IOException("End of XML while looking for element [" +
              wantedName + "]");
            // break;
          case XMLStreamConstants.START_ELEMENT : // 1
            if(gotSTART_ELEMENT) {
              throw new IOException("START_ELEMENT [" +
                in.getName() +
                " event when expecting CHARACTERS event for [" +
                wantedName + "]");
            } else if(in.getName().toString().equals(wantedName)) {
              gotSTART_ELEMENT = true;
            } else {
              throw new IOException("unexpected element name [" +
                in.getName() + "], was expecting [" +
                wantedName + "]");
            }
            break;
          case XMLStreamConstants.COMMENT:
          case XMLStreamConstants.END_ELEMENT: // 2
          case XMLStreamConstants.SPACE:
          case XMLStreamConstants.START_DOCUMENT: // 7
            // these are in XML but we don't need them
            break;
          // these should never appear in edits XML
          case XMLStreamConstants.ATTRIBUTE:
          case XMLStreamConstants.CDATA:
          case XMLStreamConstants.DTD:
          case XMLStreamConstants.ENTITY_DECLARATION:
          case XMLStreamConstants.ENTITY_REFERENCE:
          case XMLStreamConstants.NAMESPACE:
          case XMLStreamConstants.NOTATION_DECLARATION:
          case XMLStreamConstants.PROCESSING_INSTRUCTION:
          default:
            throw new IOException("Unsupported event type [" +
              eventType + "] (see XMLStreamConstants)");
        }
        if(!in.hasNext()) { break; }
        eventType = in.next();
      }
    } catch(XMLStreamException e) {
      throw new IOException("Error reading XML stream", e);
    }
    throw new IOException(
      "Error reading XML stream, should never reach this line, " +
      "most likely XML does not have elements we are loking for");
  }

  /**
   * @see org.apache.hadoop.hdfs.tools.offlineEditsViewer.Tokenizer#read
   *
   * @param t a token to read
   * @return token that was just read
   */
  public Token read(Token t) throws IOException {
    t.fromString(getNextElementsValue(t.getEditsElement().toString()));
    return t;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy