org.dita.dost.writer.ChunkTopicParser Maven / Gradle / Ivy

Go to download
/*
 * This file is part of the DITA Open Toolkit project.
 *
 * Copyright 2007 IBM Corporation
 *
 * See the accompanying LICENSE file for applicable license.

 */
package org.dita.dost.writer;

import static javax.xml.XMLConstants.NULL_NS_URI;
import static org.dita.dost.reader.ChunkMapReader.*;
import static org.dita.dost.reader.GenListModuleReader.isFormatDita;
import static org.dita.dost.util.Constants.*;
import static org.dita.dost.util.FileUtils.getRelativeUnixPath;
import static org.dita.dost.util.StringUtils.split;
import static org.dita.dost.util.URLUtils.*;
import static org.dita.dost.util.XMLUtils.*;

import java.io.*;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.*;
import org.dita.dost.log.MessageUtils;
import org.dita.dost.util.Job.FileInfo;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * Combine topic into a single file for {@code to-content} chunking.
 * Not reusable and not thread-safe.
 */
public final class ChunkTopicParser extends AbstractChunkTopicParser {

  /**
   * Constructor.
   */
  public ChunkTopicParser() {
    super();
    //        try {
    //            reader = getXMLReader();
    //            reader.setContentHandler(this);
    //            reader.setFeature(FEATURE_NAMESPACE_PREFIX, true);
    //        } catch (final Exception e) {
    //            throw new RuntimeException("Failed to initialize XML parser: " + e.getMessage(), e);
    //        }
  }

  @Override
  public void write(final URI currentFile) {
    this.currentFile = currentFile;
    try {
      output = new StringWriter();
      processChunk(rootTopicref, null);
    } finally {
      if (output != null) {
        try {
          output.close();
        } catch (final IOException e) {
          logger.error(e.getMessage(), e);
        }
      }
    }
  }

  private void processChunk(final Element topicref, final URI outputFile) {
    final URI hrefValue = toURI(getValue(topicref, ATTRIBUTE_NAME_HREF));
    final Collection chunkValue = split(getValue(topicref, ATTRIBUTE_NAME_CHUNK));
    final URI copytoValue = toURI(getValue(topicref, ATTRIBUTE_NAME_COPY_TO));
    final String scopeValue = getCascadeValue(topicref, ATTRIBUTE_NAME_SCOPE);
    final String classValue = getValue(topicref, ATTRIBUTE_NAME_CLASS);
    final String processRoleValue = getCascadeValue(topicref, ATTRIBUTE_NAME_PROCESSING_ROLE);
    final String formatValue = getValue(topicref, ATTRIBUTE_NAME_FORMAT);

    URI outputFileName = outputFile;
    Writer tempWriter = null;
    Set tempTopicID = null;

    targetTopicId = null;
    selectMethod = CHUNK_SELECT_DOCUMENT;
    include = false;

    boolean needWriteDitaTag = true;

    try {
      URI parseFilePath;
      if (copytoValue != null && !chunkValue.contains(CHUNK_TO_CONTENT)) {
        if (hrefValue.getFragment() != null) {
          parseFilePath = setFragment(copytoValue, hrefValue.getFragment());
        } else {
          parseFilePath = copytoValue;
        }
      } else {
        parseFilePath = hrefValue;
      }

      if (parseFilePath != null && !ATTR_SCOPE_VALUE_EXTERNAL.equals(scopeValue) && isFormatDita(formatValue)) {
        // now the path to target file make sense
        if (chunkValue.contains(CHUNK_TO_CONTENT)) {
          // if current element contains "to-content" in chunk attribute
          // we need to create new buffer and flush the buffer to file
          // after processing is finished
          tempWriter = output;
          tempTopicID = topicID;
          output = new StringWriter();
          topicID = new HashSet<>();
          if (MAP_MAP.matches(classValue)) {
            // Very special case, we have a map element with href value.
            // This is a map that needs to be chunked to content.
            // No need to parse any file, just generate a stub output.
            outputFileName = currentFile.resolve(parseFilePath);
            needWriteDitaTag = false;
          } else if (copytoValue != null) {
            // use @copy-to value as the new file name
            outputFileName = currentFile.resolve(copytoValue);
          } else if (hrefValue != null) {
            // try to use href value as the new file name
            if (chunkValue.contains(CHUNK_SELECT_TOPIC) || chunkValue.contains(CHUNK_SELECT_BRANCH)) {
              if (hrefValue.getFragment() != null) {
                // if we have an ID here, use it.
                outputFileName = currentFile.resolve(hrefValue.getFragment() + FILE_EXTENSION_DITA);
              } else {
                // Find the first topic id in target file if any.
                final String firstTopic = getFirstTopicId(new File(stripFragment(currentFile.resolve(hrefValue))));
                if (firstTopic != null) {
                  outputFileName = currentFile.resolve(firstTopic + FILE_EXTENSION_DITA);
                } else {
                  outputFileName = currentFile.resolve(hrefValue);
                }
              }
            } else {
              // otherwise, use the href value instead
              outputFileName = currentFile.resolve(hrefValue);
            }
          } else {
            // use randomly generated file name
            outputFileName = generateOutputFile(currentFile);
          }

          // Check if there is any conflict
          if (job.getStore().exists(outputFileName) && !MAP_MAP.matches(classValue)) {
            final URI t = outputFileName;
            outputFileName = generateOutputFile(currentFile);
            conflictTable.put(outputFileName, t);
          }
          // add newly generated file to changTable
          // the new entry in changeTable has same key and value
          // in order to indicate it is a newly generated file
          changeTable.put(outputFileName, outputFileName);

          final FileInfo fi = generateFileInfo(outputFileName);
          job.add(fi);
        }
        // "by-topic" couldn't reach here
        this.outputFile = outputFileName;

        final URI path = currentFile.resolve(parseFilePath);
        URI newpath;
        if (path.getFragment() != null) {
          newpath = setFragment(outputFileName, path.getFragment());
        } else {
          final String firstTopicID = getFirstTopicId(new File(path));
          if (firstTopicID != null) {
            newpath = setFragment(outputFileName, firstTopicID);
          } else {
            newpath = outputFileName;
          }
        }
        // add file name changes to changeTable, this will be used in
        // TopicRefWriter's updateHref method, very important!!!
        changeTable.put(path, newpath);
        // update current element's @href value
        topicref.setAttribute(
          ATTRIBUTE_NAME_HREF,
          getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), newpath).toString()
        );

        if (parseFilePath.getFragment() != null) {
          targetTopicId = parseFilePath.getFragment();
        }

        final String s = getChunkByToken(chunkValue, "select-", null);
        if (s != null) {
          selectMethod = s;
          // if the current topic href referred to a entire
          // topic file, it will be handled in "document" level.
          if (targetTopicId == null) {
            selectMethod = CHUNK_SELECT_DOCUMENT;
          }
        }
        final URI tempPath = currentParsingFile;
        currentParsingFile = currentFile.resolve(parseFilePath);

        if (!ATTR_PROCESSING_ROLE_VALUE_RESOURCE_ONLY.equals(processRoleValue)) {
          currentParsingFileTopicIDChangeTable = new HashMap<>();
          // TODO recursive point
          logger.info("Processing " + currentParsingFile);
          job.getStore().transform(currentParsingFile, this);
          //                    reader.parse(currentParsingFile.toString());
          if (currentParsingFileTopicIDChangeTable.size() > 0) {
            final URI href = toURI(topicref.getAttribute(ATTRIBUTE_NAME_HREF));
            final String pathtoElem = href.getFragment() != null ? href.getFragment() : "";
            final String old_elementid = pathtoElem.contains(SLASH)
              ? pathtoElem.substring(0, pathtoElem.indexOf(SLASH))
              : pathtoElem;
            if (!old_elementid.isEmpty()) {
              final String new_elementid = currentParsingFileTopicIDChangeTable.get(old_elementid);
              if (new_elementid != null && !new_elementid.isEmpty()) {
                topicref.setAttribute(ATTRIBUTE_NAME_HREF, setFragment(href, new_elementid).toString());
              }
            }
          }
          currentParsingFileTopicIDChangeTable = null;
        }
        // restore the currentParsingFile
        currentParsingFile = tempPath;
      }

      if (topicref.hasChildNodes()) {
        // if current element has child nodes and chunk results for this element has value
        // which means current element makes sense for chunk action.
        final StringWriter tempOutput = (StringWriter) output;
        output = new StringWriter();
        final NodeList children = topicref.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
          final Node current = children.item(i);
          if (MAP_TOPICREF.matches(current)) {
            processChunk((Element) current, outputFileName);
          }
        }

        // merge results
        final StringBuffer parentResult = tempOutput.getBuffer();
        final CharSequence tmpContent = ((StringWriter) output).getBuffer();
        // Skip empty parents and @processing-role='resource-only' entries.
        // append into root topic
        if (
          parentResult.length() > 0 &&
          parseFilePath != null &&
          !ATTR_PROCESSING_ROLE_VALUE_RESOURCE_ONLY.equals(processRoleValue)
        ) {
          insertAfter(hrefValue, parentResult, tmpContent);
          // replace contents
        } else {
          parentResult.append(tmpContent);
        }
        // restore back to parent's output this is a different temp
        output = tempOutput;
      }

      if (chunkValue.contains(CHUNK_TO_CONTENT)) {
        final String tmpContent = output.toString();
        writeToContentChunk(tmpContent, outputFileName, needWriteDitaTag);
        // restore back original output
        output = tempWriter;
        topicID = tempTopicID;
      }
    } catch (final RuntimeException e) {
      throw e;
    } catch (final Exception e) {
      logger.error(e.getMessage(), e);
    }
  }

  /**
   * Append XML content into root element
   *
   * @param hrefValue    href of the topicref
   * @param parentResult XML content to insert into
   * @param tmpContent   XML content to insert
   */
  private void insertAfter(final URI hrefValue, final StringBuffer parentResult, final CharSequence tmpContent) {
    int insertpoint = parentResult.lastIndexOf("", insertpoint);

    if (insertpoint == -1 || end == -1) {
      logger.error(MessageUtils.getMessage("DOTJ033E", hrefValue.toString()).toString());
    } else {
      if (ELEMENT_NAME_DITA.equals(parentResult.substring(insertpoint, end).trim())) {
        insertpoint = parentResult.lastIndexOf(" 0) {
      skipLevel++;
    }

    if (TOPIC_TOPIC.matches(cls)) {
      topicSpecSet.add(qName);

      processSelect(id);
    }

    if (include) {
      includelevel++;

      AttributesImpl resAtts = new AttributesImpl(checkForNSDeclaration(atts, uri));
      writeStartElement(output, qName, resAtts);
    }
  }

  @Override
  public void endElement(final String uri, final String localName, final String qName) throws SAXException {
    //pop the namespace level
    if (!Objects.equals(uri, NULL_NS_URI)) {
      if (namespaceMap.containsKey(uri)) {
        decreaseNamespaceLevel(uri);
      }
    }

    if (skip && skipLevel > 0) {
      skipLevel--;
    } else if (skip) {
      include = true;
      skip = false;
      skipLevel = 0;
    }

    if (include) {
      includelevel--;
      // prevent adding  into output
      if (includelevel >= 0) {
        writeEndElement(output, qName);
      }
      if (includelevel == 0 && !CHUNK_SELECT_DOCUMENT.equals(selectMethod)) {
        include = false;
      }
    }
  }

  private void increaseNamespaceLevel(String uri) {
    namespaceMap.put(uri, namespaceMap.get(uri) + 1);
  }

  private void decreaseNamespaceLevel(String uri) {
    namespaceMap.put(uri, namespaceMap.get(uri) - 1);
  }

  /**
   * Check if we need to add 'xmlns' attribute with prefix
   *
   * @return updated attributes
   */
  private Attributes checkForNSDeclaration(Attributes atts, String uri) {
    AttributesImpl resAtts = null;

    //This part is to handle namespace declaration in the content.
    if (!Objects.equals(uri, NULL_NS_URI)) {
      if (namespaceMap.containsKey(uri)) {
        increaseNamespaceLevel(uri);
      } else {
        namespaceMap.put(uri, 1);
      }
      resAtts = new AttributesImpl(processAttributesNS(atts, uri));
    } else {
      resAtts = new AttributesImpl(processAttributes(atts));
    }

    return resAtts;
  }
}