All Downloads are FREE. Search and download functionalities are using the official Maven repository.

raw.runtime.truffle.ast.io.xml.parser.RawTruffleXmlParser Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 RAW Labs S.A.
 *
 * Use of this software is governed by the Business Source License
 * included in the file licenses/BSL.txt.
 *
 * As of the Change Date specified in that file, in accordance with
 * the Business Source License, use of this software will be governed
 * by the Apache License, Version 2.0, included in the file
 * licenses/APL.txt.
 */

package raw.runtime.truffle.ast.io.xml.parser;

import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Objects;
import java.util.Vector;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.text.StringEscapeUtils;
import raw.runtime.truffle.ast.expressions.builtin.temporals.DateTimeFormatCache;
import raw.runtime.truffle.runtime.exceptions.RawTruffleInternalErrorException;
import raw.runtime.truffle.runtime.exceptions.xml.XmlParserRawTruffleException;
import raw.runtime.truffle.runtime.exceptions.xml.XmlReaderRawTruffleException;
import raw.runtime.truffle.runtime.primitives.DateObject;
import raw.runtime.truffle.runtime.primitives.TimeObject;
import raw.runtime.truffle.runtime.primitives.TimestampObject;
import raw.runtime.truffle.utils.RawTruffleCharStream;
import raw.runtime.truffle.utils.RawTruffleStringCharStream;

public class RawTruffleXmlParser {

  private int currentLine;
  private int currentColumn;
  private final XMLStreamReader xmlStreamReader;
  private final RawTruffleCharStream stream;
  private int currentToken;
  private String currentTag;
  private boolean currentTokenValid = false;
  private final DateTimeFormatter dateFormatter, timeFormatter, timestampFormatter;
  private final RawTruffleXmlParserSettings settings;

  @TruffleBoundary
  public int currentLine() {
    return xmlStreamReader.getLocation().getLineNumber();
  }

  @TruffleBoundary
  public int currentColumn() {
    return xmlStreamReader.getLocation().getColumnNumber();
  }

  @TruffleBoundary
  public RawTruffleXmlParser(RawTruffleCharStream stream, RawTruffleXmlParserSettings settings) {
    this.stream = stream;
    this.dateFormatter = DateTimeFormatCache.get(settings.dateFormat);
    this.timeFormatter = DateTimeFormatCache.get(settings.timeFormat);
    this.timestampFormatter = DateTimeFormatCache.get(settings.timestampFormat);
    this.settings = settings;
    try {
      xmlStreamReader =
          RawTruffleXmlParserFactory.singleton().createXMLStreamReader(stream.getReader());
    } catch (XMLStreamException e) {
      // TODO !!!!!!!!!!!!!!!!!!!!! Like in CSV/Json
      throw new XmlReaderRawTruffleException("Error creating XMLStreamReader", e, null);
    }
  }

  private final StringBuilder stringBuilder = new StringBuilder();

  @TruffleBoundary
  public static RawTruffleXmlParser create(
      RawTruffleCharStream stream, RawTruffleXmlParserSettings settings) {
    return new RawTruffleXmlParser(stream, settings);
  }

  @TruffleBoundary
  public RawTruffleXmlParser duplicateFor(String text) {
    RawTruffleCharStream subStream = new RawTruffleStringCharStream(text);
    return new RawTruffleXmlParser(subStream, settings);
  }

  @TruffleBoundary
  public void assertCurrentTokenIsStartTag() {
    assertCurrentToken(XMLStreamReader.START_ELEMENT);
  }

  @TruffleBoundary
  public void expectEndTag(String tag) {
    int currentToken = xmlStreamReader.getEventType();
    if (currentToken != XMLStreamReader.END_ELEMENT) {
      recordPosition();
      throw new XmlParserRawTruffleException(
          "expected "
              + eventToStr(XMLStreamReader.END_ELEMENT, tag)
              + " but got "
              + eventToStr(currentToken, currentTag),
          this,
          null);
    }
  }

  private String eventToStr(int token, String tagName) {
    switch (token) {
      case XMLStreamConstants.START_ELEMENT:
        if (tagName != null) return "start-element" + " <" + tagName + ">";
        else return "start-element";
      case XMLStreamConstants.END_ELEMENT:
        if (tagName != null) return "end-element" + " ";
        else return "end-element";
      case XMLStreamConstants.CHARACTERS:
        return "characters";
      case XMLStreamConstants.ENTITY_DECLARATION:
        return "entity-declaration";
      case XMLStreamConstants.ENTITY_REFERENCE:
        return "entity-reference";
      case XMLStreamConstants.PROCESSING_INSTRUCTION:
        return "precessing-instruction";
      case XMLStreamConstants.START_DOCUMENT:
        return "start-document";
      case XMLStreamConstants.END_DOCUMENT:
        return "end-document";
      case XMLStreamConstants.ATTRIBUTE:
        return "attribute";
      default:
        // TODO log something
        throw new RawTruffleInternalErrorException();
    }
  }

  @TruffleBoundary
  public boolean onEndTag() {
    assert (currentTokenValid);
    return currentToken == XMLStreamReader.END_ELEMENT;
  }

  @TruffleBoundary
  public boolean onStartTag() {
    assert (currentTokenValid);
    return currentToken == XMLStreamReader.START_ELEMENT;
  }

  @TruffleBoundary
  public String getCurrentName() {
    assert (currentTokenValid);
    return xmlStreamReader.getLocalName();
  }

  @TruffleBoundary
  public void skipTag() {
    int depth = 0;
    try {
      while (xmlStreamReader.hasNext()) {
        int token = xmlStreamReader.getEventType();
        if (token == XMLStreamConstants.START_ELEMENT) depth += 1;
        else if (token == XMLStreamConstants.END_ELEMENT) depth -= 1;
        nextToken();
        if (depth == 0) break;
      }
    } catch (XMLStreamException ex) {
      throw new XmlParserRawTruffleException(ex, this);
    }
  }

  @TruffleBoundary
  public void finishConsuming() {
    int depth = 1;
    try {
      while (xmlStreamReader.hasNext()) {
        int token = xmlStreamReader.getEventType();
        if (token == XMLStreamConstants.START_ELEMENT) depth += 1;
        else if (token == XMLStreamConstants.END_ELEMENT) depth -= 1;
        nextToken();
        if (depth == 0) break;
      }
    } catch (XMLStreamException ex) {
      throw new XmlReaderRawTruffleException(ex, stream, null);
    }
  }

  @TruffleBoundary
  public Vector attributes() {
    Vector names = new Vector<>();
    try {
      int n = xmlStreamReader.getAttributeCount();
      for (int i = 0; i < n; i++) {
        names.add(xmlStreamReader.getAttributeLocalName(i));
      }
      return names;
    } catch (IllegalStateException ex) {
      throw new XmlReaderRawTruffleException(ex, stream, null);
    }
  }

  @TruffleBoundary
  public int nextToken() {
    boolean skip = true;
    int token = -1;
    try {
      while (skip && xmlStreamReader.hasNext()) {
        token = xmlStreamReader.next();
        assert (token != XMLStreamConstants.ATTRIBUTE);
        skip =
            token == XMLStreamConstants.SPACE
                || token == XMLStreamConstants.PROCESSING_INSTRUCTION
                || token == XMLStreamConstants.COMMENT
                || token == XMLStreamConstants.END_DOCUMENT
                || token == XMLStreamConstants.START_DOCUMENT
                || token == XMLStreamConstants.DTD
                || (token == XMLStreamConstants.CHARACTERS && xmlStreamReader.getText().isBlank());
      }
      currentTokenValid = !skip;
      if (currentTokenValid) {
        currentToken = token;
        if (onStartTag() || onEndTag()) currentTag = xmlStreamReader.getLocalName();
      }
    } catch (XMLStreamException e) {
      // TODO more details?
      recordPosition();
      throw new XmlReaderRawTruffleException(e, this, stream, null);
    }
    return currentToken;
  }

  private void assertCurrentToken(int expectedToken) {
    int currentToken = xmlStreamReader.getEventType();
    if (currentToken != expectedToken) {
      // TODO !!!!!!!!!!!!!!!!!!!!!!!!!!
      recordPosition();
      throw new XmlReaderRawTruffleException(
          "Expected token " + expectedToken + " but found " + currentToken, stream, null);
    }
  }

  // String

  @TruffleBoundary
  public String getText() {
    stringBuilder.setLength(0);
    while (xmlStreamReader.getEventType() == XMLStreamConstants.ENTITY_REFERENCE
        || xmlStreamReader.getEventType() == XMLStreamConstants.CHARACTERS
        || xmlStreamReader.getEventType() == XMLStreamConstants.CDATA) {
      stringBuilder.append(xmlStreamReader.getText());
      nextToken();
    }
    return stringBuilder.toString();
  }

  @TruffleBoundary
  private void recordPosition() {
    currentLine = xmlStreamReader.getLocation().getLineNumber();
    currentColumn = xmlStreamReader.getLocation().getColumnNumber();
  }

  @TruffleBoundary
  public String getAsString() {
    String tag = xmlStreamReader.getLocalName();
    assertCurrentTokenIsStartTag();
    nextToken(); // skip the field name
    recordPosition();
    String text = getText();
    expectEndTag(tag); // don't skip the end tag but make sure it is there and it's the one we need
    return text;
  }

  @TruffleBoundary
  public String getStringAttribute(int index) {
    recordPosition();
    return xmlStreamReader.getAttributeValue(index);
  }

  @TruffleBoundary
  public byte byteFrom(String content) {
    try {
      return Byte.parseByte(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to byte", this, e, null);
    }
  }

  @TruffleBoundary
  public short shortFrom(String content) {
    try {
      return Short.parseShort(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to short", this, e, null);
    }
  }

  @TruffleBoundary
  public int intFrom(String content) {
    try {
      return Integer.parseInt(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException("cannot cast '" + content + "' to int", this, e, null);
    }
  }

  @TruffleBoundary
  public long longFrom(String content) {
    try {
      return Long.parseLong(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to long", this, e, null);
    }
  }

  @TruffleBoundary
  public float floatFrom(String content) {
    try {
      return Float.parseFloat(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to float", this, e, null);
    }
  }

  @TruffleBoundary
  public double doubleFrom(String content) {
    try {
      return Double.parseDouble(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to double", this, e, null);
    }
  }

  @TruffleBoundary
  public BigDecimal decimalFrom(String content) {
    try {
      return new BigDecimal(content.strip());
    } catch (NumberFormatException e) {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to decimal", this, e, null);
    }
  }

  @TruffleBoundary
  public boolean boolFrom(String content) {
    String normalized = content.strip().toLowerCase();
    if (Objects.equals(normalized, "true")) {
      return true;
    } else if (Objects.equals(normalized, "false")) {
      return false;
    } else {
      throw new XmlParserRawTruffleException(
          "cannot cast '" + content + "' to boolean", this, null);
    }
  }

  @TruffleBoundary
  public DateObject dateFrom(String content) {
    try {
      return new DateObject(LocalDate.parse(content.strip(), dateFormatter));
    } catch (DateTimeParseException ex) {
      throw new XmlParserRawTruffleException(
          String.format(
              "string '%s' does not match date template '%s'", content, settings.dateFormat),
          this,
          ex,
          null);
    }
  }

  @TruffleBoundary
  public TimeObject timeFrom(String content) {
    try {
      return new TimeObject(LocalTime.parse(content.strip(), timeFormatter));
    } catch (DateTimeParseException ex) {
      throw new XmlParserRawTruffleException(
          String.format(
              "string '%s' does not match time template '%s'", content, settings.timeFormat),
          this,
          ex,
          null);
    }
  }

  @TruffleBoundary
  public TimestampObject timestampFrom(String content) {
    try {
      return new TimestampObject(LocalDateTime.parse(content.strip(), timestampFormatter));
    } catch (DateTimeParseException ex) {
      throw new XmlParserRawTruffleException(
          String.format(
              "string '%s' does not match timestamp template '%s'",
              content, settings.timestampFormat),
          this,
          ex,
          null);
    }
  }

  @TruffleBoundary
  public void close() {
    try {
      xmlStreamReader.close();
    } catch (XMLStreamException e) {
      // TODO !!!!!!!!!!!!!!!!!!!!! Like in CSV/Json
      throw new XmlReaderRawTruffleException("Error closing XMLStreamReader", e, null);
    }
  }

  @TruffleBoundary
  public String elementAsString() {
    StringBuilder s = new StringBuilder();
    String currentTagName;
    int depth;
    if (xmlStreamReader.getEventType() == XMLStreamConstants.START_ELEMENT) {
      currentTagName = xmlStreamReader.getLocalName();
      int attributeCount = xmlStreamReader.getAttributeCount();
      s.append("<").append(currentTagName);
      for (int i = 0; i < attributeCount; i++) {
        s.append(" ")
            .append(xmlStreamReader.getAttributeLocalName(i))
            .append("=\"")
            .append(xmlStreamReader.getAttributeValue(i))
            .append("\"");
      }
      s.append(">");
      depth = 1;
    } else {
      depth = 0;
    }

    try {
      while (depth > 0 && xmlStreamReader.hasNext()) {
        int token = xmlStreamReader.next();
        switch (token) {
          case XMLStreamConstants.START_ELEMENT:
            currentTagName = xmlStreamReader.getLocalName();
            s.append("<").append(currentTagName);
            int attributeCount = xmlStreamReader.getAttributeCount();
            for (int i = 0; i < attributeCount; i++) {
              s.append(" ")
                  .append(xmlStreamReader.getAttributeLocalName(i))
                  .append("=\"")
                  .append(xmlStreamReader.getAttributeValue(i))
                  .append("\"");
            }
            s.append(">");
            depth += 1;
            break;
          case XMLStreamConstants.END_ELEMENT:
            s.append("");
            depth -= 1;
            break;
          case XMLStreamConstants.ENTITY_REFERENCE:
          case XMLStreamConstants.CHARACTERS:
          case XMLStreamConstants.CDATA:
            s.append(StringEscapeUtils.escapeXml11(xmlStreamReader.getText()));
        }
      }
    } catch (XMLStreamException e) {
      throw new XmlParserRawTruffleException(e, this);
    }
    return s.toString();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy