All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dita.dost.reader.ChunkMapReader Maven / Gradle / Ivy

The newest version!
/*
 * This file is part of the DITA Open Toolkit project.
 *
 * Copyright 2007 IBM Corporation
 *
 * See the accompanying LICENSE file for applicable license.

 */
package org.dita.dost.reader;

import static java.util.Collections.unmodifiableSet;
import static org.apache.commons.io.FilenameUtils.getBaseName;
import static org.dita.dost.util.Constants.*;
import static org.dita.dost.util.DitaUtils.getDitaVersion;
import static org.dita.dost.util.FileUtils.getFragment;
import static org.dita.dost.util.FileUtils.replaceExtension;
import static org.dita.dost.util.StringUtils.join;
import static org.dita.dost.util.StringUtils.split;
import static org.dita.dost.util.URLUtils.*;
import static org.dita.dost.util.XMLUtils.*;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.util.*;
import java.util.stream.Collectors;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;
import org.dita.dost.exception.DITAOTException;
import org.dita.dost.log.MessageUtils;
import org.dita.dost.module.ChunkModule.ChunkFilenameGenerator;
import org.dita.dost.module.ChunkModule.ChunkFilenameGeneratorFactory;
import org.dita.dost.module.reader.TempFileNameScheme;
import org.dita.dost.util.DitaClass;
import org.dita.dost.util.Job;
import org.dita.dost.util.Job.FileInfo;
import org.dita.dost.util.URLUtils;
import org.dita.dost.util.XMLSerializer;
import org.dita.dost.writer.AbstractDomFilter;
import org.dita.dost.writer.ChunkTopicParser;
import org.dita.dost.writer.SeparateChunkTopicParser;
import org.w3c.dom.*;
import org.xml.sax.SAXException;

/**
 * ChunkMapReader class, read and filter ditamap file for chunking.
 */
// TODO rename this because this is not a reader, it's a filter
public final class ChunkMapReader extends AbstractDomFilter {

  public static final String FILE_NAME_STUB_DITAMAP = "stub.ditamap";
  public static final String FILE_EXTENSION_CHUNK = ".chunk";
  public static final String ATTR_XTRF_VALUE_GENERATED = "generated_by_chunk";

  public static final String CHUNK_SELECT_BRANCH = "select-branch";
  public static final String CHUNK_SELECT_TOPIC = "select-topic";
  public static final String CHUNK_SELECT_DOCUMENT = "select-document";
  private static final String CHUNK_BY_DOCUMENT = "by-document";
  private static final String CHUNK_BY_TOPIC = "by-topic";
  public static final String CHUNK_TO_CONTENT = "to-content";
  public static final String CHUNK_TO_NAVIGATION = "to-navigation";
  public static final String CHUNK_PREFIX = "Chunk";

  private TempFileNameScheme tempFileNameScheme;
  private Collection rootChunkOverride;
  private String defaultChunkByToken;

  // ChunkTopicParser assumes keys and values are chimera paths, i.e. systems paths with fragments.
  private final LinkedHashMap changeTable = new LinkedHashMap<>(128);

  private final Map conflictTable = new HashMap<>(128);

  private boolean supportToNavigation;

  private ProcessingInstruction workdir = null;
  private ProcessingInstruction workdirUrl = null;
  private ProcessingInstruction path2proj = null;
  private ProcessingInstruction path2projUrl = null;
  private ProcessingInstruction path2rootmapUrl = null;

  private final ChunkFilenameGenerator chunkFilenameGenerator = ChunkFilenameGeneratorFactory.newInstance();

  @Override
  public void setJob(final Job job) {
    super.setJob(job);
    try {
      tempFileNameScheme = (TempFileNameScheme) Class.forName(job.getProperty("temp-file-name-scheme")).newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
      throw new RuntimeException(e);
    }
    tempFileNameScheme.setBaseDir(job.getInputDir());
  }

  public void setRootChunkOverride(final String chunkValue) {
    rootChunkOverride = split(chunkValue);
  }

  /**
   * Absolute URI to file being processed
   */
  private URI currentFile;

  /**
   * read input file.
   *
   * @param inputFile filename
   */
  @Override
  public void read(final File inputFile) throws DITAOTException {
    this.currentFile = inputFile.toURI();

    super.read(inputFile);
  }

  @Override
  public Document process(final Document doc) {
    final Float ditaVersion = getDitaVersion(doc.getDocumentElement());
    if (ditaVersion == null || ditaVersion >= 2.0f) {
      return doc;
    }
    final Element root = doc.getDocumentElement();
    if (rootChunkOverride != null) {
      final String c = join(rootChunkOverride, " ");
      logger.debug("Use override root chunk \"" + c + "\"");
      root.setAttribute(ATTRIBUTE_NAME_CHUNK, c);
    }
    readLinks(doc);
    readProcessingInstructions(doc);

    final Collection rootChunk = split(root.getAttribute(ATTRIBUTE_NAME_CHUNK));
    defaultChunkByToken = getChunkByToken(rootChunk, "by-", CHUNK_BY_DOCUMENT);

    if (rootChunk.contains(CHUNK_TO_CONTENT)) {
      chunkMap(root);
    } else {
      for (final Element currentElem : getChildElements(root)) {
        if (MAP_RELTABLE.matches(currentElem)) {
          updateReltable(currentElem);
        } else if (MAP_TOPICREF.matches(currentElem)) {
          processTopicref(currentElem);
        }
      }
    }

    return buildOutputDocument(root);
  }

  private final Set chunkTopicSet = new HashSet<>();

  /**
   * @return absolute temporary files
   */
  public Set getChunkTopicSet() {
    return unmodifiableSet(chunkTopicSet);
  }

  private void readLinks(final Document doc) {
    final Element root = doc.getDocumentElement();
    readLinks(root, false, false);
  }

  private void readLinks(final Element elem, final boolean chunk, final boolean disabled) {
    final boolean c = chunk || elem.getAttributeNode(ATTRIBUTE_NAME_CHUNK) != null;
    final boolean d =
      disabled ||
      elem.getAttribute(ATTRIBUTE_NAME_CHUNK).contains(CHUNK_TO_NAVIGATION) ||
      (MAPGROUP_D_TOPICGROUP.matches(elem) && !SUBMAP.matches(elem)) ||
      MAP_RELTABLE.matches(elem);
    final Attr href = elem.getAttributeNode(ATTRIBUTE_NAME_HREF);
    if (href != null) {
      final URI filename = stripFragment(currentFile.resolve(href.getValue()));
      if (c && !d) {
        chunkTopicSet.add(filename);
        final Attr copyTo = elem.getAttributeNode(ATTRIBUTE_NAME_COPY_TO);
        if (copyTo != null) {
          final URI copyToFile = stripFragment(currentFile.resolve(copyTo.getValue()));
          chunkTopicSet.add(copyToFile);
        }
      }
    }

    for (final Element topicref : getChildElements(elem, MAP_TOPICREF)) {
      readLinks(topicref, c, d);
    }
  }

  public static String getChunkByToken(
    final Collection chunkValue,
    final String category,
    final String defaultToken
  ) {
    if (chunkValue.isEmpty()) {
      return defaultToken;
    }
    for (final String token : chunkValue) {
      if (token.startsWith(category)) {
        return token;
      }
    }
    return defaultToken;
  }

  /**
   * Process map when "to-content" is specified on map element.
   * 

* TODO: Instead of reclassing map element to be a topicref, add a topicref * into the map root and move all map content into that topicref. */ private void chunkMap(final Element root) { // create the reference to the new file on root element. URI newFilename = URLUtils.toURI(replaceExtension(new File(currentFile).getName(), FILE_EXTENSION_DITA)); URI newFile = currentFile.resolve(newFilename); if (job.getStore().exists(newFile)) { final URI oldFile = newFile; newFilename = URLUtils.toURI(chunkFilenameGenerator.generateFilename(CHUNK_PREFIX, FILE_EXTENSION_DITA)); newFile = currentFile.resolve(newFilename); // Mark up the possible name changing, in case that references might be updated. conflictTable.put(newFile, oldFile.normalize()); } changeTable.put(newFile, newFile); // change the class attribute to "topicref" final String origCls = root.getAttribute(ATTRIBUTE_NAME_CLASS); root.setAttribute(ATTRIBUTE_NAME_CLASS, origCls + MAP_TOPICREF.matcher); root.setAttribute(ATTRIBUTE_NAME_HREF, newFilename.toString()); createTopicStump(newFile); // process chunk processTopicref(root); // restore original root element if (origCls != null) { root.setAttribute(ATTRIBUTE_NAME_CLASS, origCls); } root.removeAttribute(ATTRIBUTE_NAME_HREF); } /** * Create the new topic stump. */ private void createTopicStump(final URI newFile) { try (final OutputStream newFileWriter = job.getStore().getOutputStream(newFile)) { final XMLStreamWriter o = XMLOutputFactory.newInstance().createXMLStreamWriter(newFileWriter, UTF8); o.writeStartDocument(); o.writeProcessingInstruction( PI_WORKDIR_TARGET, UNIX_SEPARATOR + new File(newFile.resolve(".")).getAbsolutePath() ); o.writeProcessingInstruction(PI_WORKDIR_TARGET_URI, newFile.resolve(".").toString()); o.writeStartElement(ELEMENT_NAME_DITA); o.writeEndElement(); o.writeEndDocument(); o.close(); newFileWriter.flush(); } catch (final RuntimeException e) { throw e; } catch (final Exception e) { logger.error(e.getMessage(), e); } } /** * Read processing metadata from processing instructions. */ private void readProcessingInstructions(final Document doc) { final NodeList docNodes = doc.getChildNodes(); for (int i = 0; i < docNodes.getLength(); i++) { final Node node = docNodes.item(i); if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) { final ProcessingInstruction pi = (ProcessingInstruction) node; switch (pi.getNodeName()) { case PI_WORKDIR_TARGET -> workdir = pi; case PI_WORKDIR_TARGET_URI -> workdirUrl = pi; case PI_PATH2PROJ_TARGET -> path2proj = pi; case PI_PATH2PROJ_TARGET_URI -> path2projUrl = pi; case PI_PATH2ROOTMAP_TARGET_URI -> path2rootmapUrl = pi; } } } } private void outputMapFile(final URI file, final Document doc) { try { job.getStore().writeDocument(doc, file); } catch (final IOException e) { logger.error("Failed to serialize map: " + e.getMessage(), e); } } private Document buildOutputDocument(final Element root) { final Document doc = getDocumentBuilder().newDocument(); if (workdir != null) { doc.appendChild(doc.importNode(workdir, true)); } if (workdirUrl != null) { doc.appendChild(doc.importNode(workdirUrl, true)); } if (path2proj != null) { doc.appendChild(doc.importNode(path2proj, true)); } if (path2projUrl != null) { doc.appendChild(doc.importNode(path2projUrl, true)); } if (path2rootmapUrl != null) { doc.appendChild(doc.importNode(path2rootmapUrl, true)); } doc.appendChild(doc.importNode(root, true)); return doc; } private void processTopicref(final Element topicref) { final String xtrf = getValue(topicref, ATTRIBUTE_NAME_XTRF); if (xtrf != null && xtrf.contains(ATTR_XTRF_VALUE_GENERATED)) { return; } final Collection chunk = split(getValue(topicref, ATTRIBUTE_NAME_CHUNK)); final URI href = toURI(getValue(topicref, ATTRIBUTE_NAME_HREF)); final URI copyTo = toURI(getValue(topicref, ATTRIBUTE_NAME_COPY_TO)); final String scope = getCascadeValue(topicref, ATTRIBUTE_NAME_SCOPE); final String chunkByToken = getChunkByToken(chunk, "by-", defaultChunkByToken); if ( ATTR_SCOPE_VALUE_EXTERNAL.equals(scope) || (href != null && !job.getStore().exists(currentFile.resolve(href.toString()))) || (chunk.isEmpty() && href == null) ) { processChildTopicref(topicref); } else if (chunk.contains(CHUNK_TO_CONTENT)) { if (href != null || copyTo != null || topicref.hasChildNodes()) { if (chunk.contains(CHUNK_BY_TOPIC)) { logger.warn(MessageUtils.getMessage("DOTJ064W").setLocation(topicref).toString()); } if (href == null) { generateStumpTopic(topicref); } processCombineChunk(topicref); } } else if (chunk.contains(CHUNK_TO_NAVIGATION) && supportToNavigation) { processChildTopicref(topicref); processNavitation(topicref); } else if (chunkByToken.equals(CHUNK_BY_TOPIC)) { if (href != null) { processSeparateChunk(topicref); } processChildTopicref(topicref); } else { // chunkByToken.equals(CHUNK_BY_DOCUMENT) URI currentPath = null; if (copyTo != null) { currentPath = currentFile.resolve(copyTo); } else if (href != null) { currentPath = currentFile.resolve(href); } if (currentPath != null) { changeTable.remove(currentPath); final String processingRole = getCascadeValue(topicref, ATTRIBUTE_NAME_PROCESSING_ROLE); if (!ATTR_PROCESSING_ROLE_VALUE_RESOURCE_ONLY.equals(processingRole)) { changeTable.put(currentPath, currentPath); } } processChildTopicref(topicref); } } /** * Create new map and refer to it with navref. */ private void processNavitation(final Element topicref) { // create new map's root element final Element root = (Element) topicref.getOwnerDocument().getDocumentElement().cloneNode(false); // create navref element final Element navref = topicref.getOwnerDocument().createElement(MAP_NAVREF.localName); final String newMapFile = chunkFilenameGenerator.generateFilename("MAPCHUNK", FILE_EXTENSION_DITAMAP); navref.setAttribute(ATTRIBUTE_NAME_MAPREF, newMapFile); navref.setAttribute(ATTRIBUTE_NAME_CLASS, MAP_NAVREF.toString()); // replace topicref with navref topicref.getParentNode().replaceChild(navref, topicref); root.appendChild(topicref); // generate new file final URI navmap = currentFile.resolve(newMapFile); changeTable.put(stripFragment(navmap), stripFragment(navmap)); outputMapFile(navmap, buildOutputDocument(root)); } /** * Generate file name. * * @return generated file name */ private String generateFilename() { return chunkFilenameGenerator.generateFilename(CHUNK_PREFIX, FILE_EXTENSION_DITA); } /** * Generate stump topic for to-content content. * * @param topicref topicref without href to generate stump topic for */ private void generateStumpTopic(final Element topicref) { final URI result = getResultFile(topicref); final URI temp = tempFileNameScheme.generateTempFileName(result); final URI absTemp = job.tempDir.toURI().resolve(temp); final String name = getBaseName(new File(result).getName()); String navtitle = getChildElementValueOfTopicmeta(topicref, TOPIC_NAVTITLE); if (navtitle == null) { navtitle = getValue(topicref, ATTRIBUTE_NAME_NAVTITLE); } String shortDesc = getChildElementValueOfTopicmeta(topicref, TOPIC_SHORTDESC); if (shortDesc == null) { shortDesc = getChildElementValueOfTopicmeta(topicref, MAP_SHORTDESC); } writeChunk(absTemp, name, navtitle, shortDesc); // update current element's @href value final URI relativePath = getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), absTemp); topicref.setAttribute(ATTRIBUTE_NAME_HREF, relativePath.toString()); if (MAPGROUP_D_TOPICGROUP.matches(topicref)) { topicref.setAttribute(ATTRIBUTE_NAME_CLASS, MAP_TOPICREF.toString()); } final URI relativeToBase = getRelativePath(job.tempDirURI.resolve("dummy"), absTemp); final FileInfo fi = new FileInfo.Builder().uri(temp).result(result).format(ATTR_FORMAT_VALUE_DITA).build(); job.add(fi); } private void writeChunk(final URI outputFileName, String id, String title, String shortDesc) { try (final OutputStream output = job.getStore().getOutputStream(outputFileName)) { final XMLSerializer serializer = XMLSerializer.newInstance(output); serializer.writeStartDocument(); if (title == null && shortDesc == null) { //topicgroup with no title, no shortdesc, just need a non titled stub serializer.writeStartElement(ELEMENT_NAME_DITA); serializer.writeAttribute( DITA_NAMESPACE, ATTRIBUTE_PREFIX_DITAARCHVERSION + ":" + ATTRIBUTE_NAME_DITAARCHVERSION, "1.3" ); serializer.writeEndElement(); // dita } else { serializer.writeStartElement(TOPIC_TOPIC.localName); serializer.writeAttribute( DITA_NAMESPACE, ATTRIBUTE_PREFIX_DITAARCHVERSION + ":" + ATTRIBUTE_NAME_DITAARCHVERSION, "1.3" ); serializer.writeAttribute(ATTRIBUTE_NAME_ID, id); serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_TOPIC.toString()); serializer.writeAttribute(ATTRIBUTE_NAME_DOMAINS, ""); serializer.writeAttribute(ATTRIBUTE_NAME_SPECIALIZATIONS, ""); serializer.writeStartElement(TOPIC_TITLE.localName); serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_TITLE.toString()); if (title != null) { serializer.writeCharacters(title); } serializer.writeEndElement(); // title if (shortDesc != null) { serializer.writeStartElement(TOPIC_SHORTDESC.localName); serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_SHORTDESC.toString()); serializer.writeCharacters(shortDesc); serializer.writeEndElement(); // shortdesc } serializer.writeEndElement(); // topic } serializer.writeEndDocument(); serializer.close(); } catch (final IOException | SAXException e) { logger.error("Failed to write generated chunk: " + e.getMessage(), e); } } private URI getResultFile(final Element topicref) { final FileInfo curr = job.getFileInfo(currentFile); final URI copyTo = toURI(getValue(topicref, ATTRIBUTE_NAME_COPY_TO)); final String id = getValue(topicref, ATTRIBUTE_NAME_ID); URI outputFileName; if (copyTo != null) { outputFileName = curr.result.resolve(copyTo); } else if (id != null) { outputFileName = curr.result.resolve(id + FILE_EXTENSION_DITA); } else { final Set results = job.getFileInfo().stream().map(fi -> fi.result).collect(Collectors.toSet()); do { outputFileName = curr.result.resolve(generateFilename()); } while (results.contains(outputFileName)); } return outputFileName; } /** * get topicmeta's child(e.g navtitle, shortdesc) tag's value(text-only). * * @param element input element * @return text value */ private String getChildElementValueOfTopicmeta(final Element element, final DitaClass classValue) { if (element.hasChildNodes()) { final Element topicMeta = getElementNode(element, MAP_TOPICMETA); if (topicMeta != null) { final Element elem = getElementNode(topicMeta, classValue); if (elem != null) { return getText(elem); } } } return null; } private void processChildTopicref(final Element node) { final List children = getChildElements(node, MAP_TOPICREF); for (final Element currentElem : children) { final URI href = toURI(getValue(currentElem, ATTRIBUTE_NAME_HREF)); final String xtrf = currentElem.getAttribute(ATTRIBUTE_NAME_XTRF); if (href == null) { processTopicref(currentElem); } else if ( !ATTR_XTRF_VALUE_GENERATED.equals(xtrf) && !currentFile.resolve(href).equals(changeTable.get(currentFile.resolve(href))) ) { processTopicref(currentElem); } } } private void processSeparateChunk(final Element topicref) { final SeparateChunkTopicParser chunkParser = new SeparateChunkTopicParser(); chunkParser.setLogger(logger); chunkParser.setJob(job); chunkParser.setup(changeTable, conflictTable, topicref, chunkFilenameGenerator); chunkParser.write(currentFile); } private void processCombineChunk(final Element topicref) { final ChunkTopicParser chunkParser = new ChunkTopicParser(); chunkParser.setLogger(logger); chunkParser.setJob(job); createChildTopicrefStubs(getChildElements(topicref, MAP_TOPICREF)); chunkParser.setup(changeTable, conflictTable, topicref, chunkFilenameGenerator); chunkParser.write(currentFile); } /** Before combining topics in a branch, ensure any descendant topicref with @chunk and no @href has a stub */ private void createChildTopicrefStubs(final List topicrefs) { if (!topicrefs.isEmpty()) { for (final Element currentElem : topicrefs) { final String href = getValue(currentElem, ATTRIBUTE_NAME_HREF); final String chunk = getValue(currentElem, ATTRIBUTE_NAME_CHUNK); if (href == null && chunk != null) { generateStumpTopic(currentElem); } createChildTopicrefStubs(getChildElements(currentElem, MAP_TOPICREF)); } } } private void updateReltable(final Element elem) { final String href = elem.getAttribute(ATTRIBUTE_NAME_HREF); if (href.length() != 0) { if (changeTable.containsKey(currentFile.resolve(href))) { URI res = getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), currentFile.resolve(href)); final String fragment = getFragment(href); if (fragment != null) { res = setFragment(res, fragment); } elem.setAttribute(ATTRIBUTE_NAME_HREF, res.toString()); } } final NodeList children = elem.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { final Node current = children.item(i); if (current.getNodeType() == Node.ELEMENT_NODE) { final Element currentElem = (Element) current; final String cls = currentElem.getAttribute(ATTRIBUTE_NAME_CLASS); if (MAP_TOPICREF.matches(cls)) { // FIXME: What should happen here? } } } } /** * Get changed files table. * * @return map of changed files, absolute temporary files */ public Map getChangeTable() { for (final Map.Entry e : changeTable.entrySet()) { assert e.getKey().isAbsolute(); assert e.getValue().isAbsolute(); } return Collections.unmodifiableMap(changeTable); } /** * get conflict table. * * @return conflict table, absolute temporary files */ public Map getConflicTable() { for (final Map.Entry e : conflictTable.entrySet()) { assert e.getKey().isAbsolute(); assert e.getValue().isAbsolute(); } return conflictTable; } /** * Support chunk token to-navigation. * * @param supportToNavigation flag to enable to-navigation support */ public void supportToNavigation(final boolean supportToNavigation) { this.supportToNavigation = supportToNavigation; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy