All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sonarsource.analyzer.commons.xml.XmlParser Maven / Gradle / Ivy

/*
 * SonarSource Analyzers XML Parsing Commons
 * Copyright (C) 2009-2021 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonarsource.analyzer.commons.xml;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.sonarsource.analyzer.commons.xml.PrologElement.PrologAttribute;
import org.sonarsource.analyzer.commons.xml.XmlFile.Location;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

class XmlParser {

  private static final String BOM_CHAR = "\ufeff";
  private static final String XML_DECLARATION_TAG = " nodes = new LinkedList<>();
  private XmlFile xmlFile;

  XmlParser(XmlFile xmlFile, boolean namespaceAware) {
    this.xmlFile = xmlFile;
    try {
      setContent();
      ByteArrayInputStream stream = new ByteArrayInputStream(content.getBytes(xmlFile.getCharset()));
      Document document = SafeDomParserFactory.createDocumentBuilder(namespaceAware).parse(stream);
      xmlFile.setDocument(document, namespaceAware);
      currentNode = document;
      nodes.push(currentNode);

      parseXmlDeclaration();
      parseXml();

      setDocumentLocation(xmlFile);

    } catch (XMLStreamException|SAXException|IOException e) {
      throw new ParseException(e);
    }
  }

  private static void setDocumentLocation(XmlFile xmlFile) {
    Document document = xmlFile.getDocument();
    XmlTextRange startRange = XmlFile.nodeLocation(document.getFirstChild());
    XmlTextRange end = XmlFile.nodeLocation(document.getLastChild());
    Optional prologElement = xmlFile.getPrologElement();
    if (prologElement.isPresent()) {
      startRange = prologElement.get().getPrologStartLocation();
    }
    document.setUserData(Location.NODE.name(), new XmlTextRange(startRange, end), null);
  }

  private void setContent() throws XMLStreamException {
    String fullContent = xmlFile.getContents();

    if (fullContent.startsWith(BOM_CHAR)) {
      // remove it immediately
      fullContent = fullContent.substring(1);
    }
    int realStartIndex = fullContent.indexOf(XML_DECLARATION_TAG);

    if (realStartIndex == -1) {
      xmlFileStartLocation = new XmlFilePosition(fullContent);
      content = fullContent;
    } else {
      content = fullContent.substring(realStartIndex);
      xmlFileStartLocation = new XmlFilePosition(fullContent).moveBefore(XML_DECLARATION_TAG);
    }
  }

  private void parseXml() throws XMLStreamException {
    XMLStreamReader xmlReader = SafeStaxParserFactory.createXMLInputFactory().createXMLStreamReader(new StringReader(content));

    while (xmlReader.hasNext()) {
      previousEventIsText = xmlReader.getEventType() == XMLStreamConstants.CHARACTERS;
      xmlReader.next();
      XmlFilePosition startLocation = new XmlFilePosition(content, xmlReader.getLocation());

      finalizePreviousNode(startLocation);

      switch (xmlReader.getEventType()) {
        case XMLStreamConstants.ENTITY_REFERENCE:
        case XMLStreamConstants.COMMENT:
        case XMLStreamConstants.PROCESSING_INSTRUCTION:
          setNextNode();
          currentNodeStartLocation = startLocation;
          break;

        case XMLStreamConstants.CHARACTERS:
          visitTextNode(startLocation);
          break;

        case XMLStreamConstants.START_ELEMENT:
          visitStartElement(xmlReader, startLocation);
          break;

        case XMLStreamConstants.END_ELEMENT:
          visitEndElement(startLocation);
          break;

        case XMLStreamConstants.CDATA:
          if (!xmlReader.getText().isEmpty()) {
            // Empty CDATA are not detected by the xerces DocumentBuilder
            visitCdata(startLocation);
          }
          break;

        case XMLStreamConstants.DTD:
          visitDTD(startLocation);
          break;

        default:
          break;
      }

      if (xmlReader.getEventType() != XMLStreamConstants.START_ELEMENT
        && xmlReader.getEventType() != XMLStreamConstants.END_ELEMENT) {
        // as no end event for non-element nodes, consider them closed
        currentNodeIsClosed = true;
      }
    }
  }

  private void visitTextNode(XmlFilePosition startLocation) {
    if (previousEventIsText) {
      // text can appear after another text when it's not coalesced (see XMLInputFactory.IS_COALESCING)
      // so both events stand for the same node in DOM
      currentNodeStartRange = XmlFile.nodeLocation(currentNode);
    } else {
      setNextNode();
      currentNodeStartLocation = startLocation;
    }
  }

  private void finalizePreviousNode(XmlFilePosition endLocation) {
    if (currentNodeStartLocation != null) {
      setLocation(currentNode, Location.NODE, currentNodeStartLocation, endLocation);
      // for entity reference having a child which is it's text replacement
      // setting the same location
      if (currentNode.getFirstChild() != null) {
        setLocation(currentNode.getFirstChild(), Location.NODE, currentNodeStartLocation, endLocation);
      }
    } else if (currentNodeStartRange != null) {
      currentNode.setUserData(Location.NODE.name(), new XmlTextRange(currentNodeStartRange, endLocation, xmlFileStartLocation), null);
    }

    currentNodeStartLocation = null;
    currentNodeStartRange = null;
  }

  private void visitStartElement(XMLStreamReader xmlReader, XmlFilePosition startLocation) throws XMLStreamException {
    setNextNode();
    nodes.push(currentNode);
    XmlFilePosition nameEndLocation = startLocation.shift(getNameWithNamespaceLength(xmlReader) + 1);
    XmlFilePosition closingBracketEndLocation = startLocation.moveAfterClosingBracket();
    setLocation(currentNode, Location.START, startLocation, closingBracketEndLocation);
    setLocation(currentNode, Location.NAME, startLocation.shift(1), nameEndLocation);
    visitAttributes(nameEndLocation, closingBracketEndLocation.moveBackward());
  }

  private void visitEndElement(XmlFilePosition startLocation) throws XMLStreamException {
    currentNode = nodes.pop();
    XmlFilePosition closingBracketEndLocation = startLocation.moveAfterClosingBracket();
    setLocation(currentNode, Location.END, startLocation, closingBracketEndLocation);
    XmlTextRange startRange = (XmlTextRange) currentNode.getUserData(Location.START.name());
    currentNode.setUserData(Location.NODE.name(), new XmlTextRange(startRange, closingBracketEndLocation, xmlFileStartLocation), null);
    currentNodeIsClosed = true;
  }

  private void setNextNode() {
    if (currentNodeIsClosed) {
      // when currentNode (last processed node) is closed, it's impossible that we visit its child
      currentNode = currentNode.getNextSibling();
    } else {
      currentNode = currentNode.getFirstChild();
    }

    currentNodeIsClosed = false;
  }

  private void parseXmlDeclaration() throws XMLStreamException {
    XmlFilePosition startLocation = new XmlFilePosition(content);
    if (startLocation.startsWith(XML_DECLARATION_TAG)) {
      XmlFilePosition endLocation =  startLocation.moveAfterClosingBracket();
      XmlFilePosition attributesStart = startLocation.moveAfter(XML_DECLARATION_TAG);

      List prologAttributes = visitPrologAttributes(attributesStart, endLocation.moveBackward());

      xmlFile.setPrologElement(new PrologElement(
        prologAttributes,
        new XmlTextRange(startLocation, attributesStart, xmlFileStartLocation),
        new XmlTextRange(endLocation.moveBackward().moveBackward(), endLocation, xmlFileStartLocation)
      ));
    }
  }

  private void visitDTD(XmlFilePosition startLocation) throws XMLStreamException {
    setNextNode();
    XmlFilePosition endLocation = startLocation.moveAfterClosingBracket();
    setLocation(currentNode, Location.NODE, startLocation, endLocation);
  }

  private void visitCdata(XmlFilePosition startLocation) throws XMLStreamException {
    if (!startLocation.startsWith("");
    XmlFilePosition endLocation = beforeClosingTag.moveAfter("]]>");
    setLocation(currentNode, Location.START, startLocation, startLocation.moveAfter(" visitPrologAttributes(XmlFilePosition start, XmlFilePosition end) throws XMLStreamException {
    XmlFilePosition currentLocation = start.moveAfterWhitespaces();
    List attributes = new ArrayList<>();

    while (currentLocation.has("=", end)) {
      XmlFilePosition attributeNameEnd = currentLocation.moveBefore("=");

      XmlFilePosition attributeValueStart = attributeNameEnd.moveAfter("=").moveAfterWhitespaces();
      char c = attributeValueStart.readChar();
      XmlFilePosition attributeValueEnd = attributeValueStart.shift(1).moveAfter(String.valueOf(c));

      attributes.add(new PrologAttribute(
        currentLocation.textUntil(attributeNameEnd),
        new XmlTextRange(currentLocation, attributeNameEnd, xmlFileStartLocation),
        removeQuotes(attributeValueStart.textUntil(attributeValueEnd)),
        new XmlTextRange(attributeValueStart, attributeValueEnd, xmlFileStartLocation)
      ));
      currentLocation = attributeValueEnd.moveAfterWhitespaces();
    }

    return attributes;
  }

  private static String removeQuotes(String str) {
    if ((str.startsWith("\"") || str.startsWith("'")) && str.length() > 1) {
      return str.substring(1, str.length() - 1);
    }

    return str;
  }

  private static int getNameWithNamespaceLength(XMLStreamReader streamReader) {
    int prefixLength = 0;
    if (!streamReader.getName().getPrefix().isEmpty()) {
      prefixLength = streamReader.getName().getPrefix().length() + 1;
    }

    return prefixLength + streamReader.getLocalName().length();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy