org.dita.dost.reader.ChunkMapReader Maven / Gradle / Ivy
/*
* This file is part of the DITA Open Toolkit project.
*
* Copyright 2007 IBM Corporation
*
* See the accompanying LICENSE file for applicable license.
*/
package org.dita.dost.reader;
import static java.util.Collections.unmodifiableSet;
import static org.apache.commons.io.FilenameUtils.getBaseName;
import static org.dita.dost.util.Constants.*;
import static org.dita.dost.util.DitaUtils.getDitaVersion;
import static org.dita.dost.util.FileUtils.getFragment;
import static org.dita.dost.util.FileUtils.replaceExtension;
import static org.dita.dost.util.StringUtils.join;
import static org.dita.dost.util.StringUtils.split;
import static org.dita.dost.util.URLUtils.*;
import static org.dita.dost.util.XMLUtils.*;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.util.*;
import java.util.stream.Collectors;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;
import org.dita.dost.exception.DITAOTException;
import org.dita.dost.log.MessageUtils;
import org.dita.dost.module.ChunkModule.ChunkFilenameGenerator;
import org.dita.dost.module.ChunkModule.ChunkFilenameGeneratorFactory;
import org.dita.dost.module.reader.TempFileNameScheme;
import org.dita.dost.util.DitaClass;
import org.dita.dost.util.Job;
import org.dita.dost.util.Job.FileInfo;
import org.dita.dost.util.URLUtils;
import org.dita.dost.util.XMLSerializer;
import org.dita.dost.writer.AbstractDomFilter;
import org.dita.dost.writer.ChunkTopicParser;
import org.dita.dost.writer.SeparateChunkTopicParser;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
/**
* ChunkMapReader class, read and filter ditamap file for chunking.
*/
// TODO rename this because this is not a reader, it's a filter
public final class ChunkMapReader extends AbstractDomFilter {
public static final String FILE_NAME_STUB_DITAMAP = "stub.ditamap";
public static final String FILE_EXTENSION_CHUNK = ".chunk";
public static final String ATTR_XTRF_VALUE_GENERATED = "generated_by_chunk";
public static final String CHUNK_SELECT_BRANCH = "select-branch";
public static final String CHUNK_SELECT_TOPIC = "select-topic";
public static final String CHUNK_SELECT_DOCUMENT = "select-document";
private static final String CHUNK_BY_DOCUMENT = "by-document";
private static final String CHUNK_BY_TOPIC = "by-topic";
public static final String CHUNK_TO_CONTENT = "to-content";
public static final String CHUNK_TO_NAVIGATION = "to-navigation";
public static final String CHUNK_PREFIX = "Chunk";
private TempFileNameScheme tempFileNameScheme;
private Collection rootChunkOverride;
private String defaultChunkByToken;
// ChunkTopicParser assumes keys and values are chimera paths, i.e. systems paths with fragments.
private final LinkedHashMap changeTable = new LinkedHashMap<>(128);
private final Map conflictTable = new HashMap<>(128);
private boolean supportToNavigation;
private ProcessingInstruction workdir = null;
private ProcessingInstruction workdirUrl = null;
private ProcessingInstruction path2proj = null;
private ProcessingInstruction path2projUrl = null;
private ProcessingInstruction path2rootmapUrl = null;
private final ChunkFilenameGenerator chunkFilenameGenerator = ChunkFilenameGeneratorFactory.newInstance();
@Override
public void setJob(final Job job) {
super.setJob(job);
try {
tempFileNameScheme = (TempFileNameScheme) Class.forName(job.getProperty("temp-file-name-scheme")).newInstance();
} catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
throw new RuntimeException(e);
}
tempFileNameScheme.setBaseDir(job.getInputDir());
}
public void setRootChunkOverride(final String chunkValue) {
rootChunkOverride = split(chunkValue);
}
/**
* Absolute URI to file being processed
*/
private URI currentFile;
/**
* read input file.
*
* @param inputFile filename
*/
@Override
public void read(final File inputFile) throws DITAOTException {
this.currentFile = inputFile.toURI();
super.read(inputFile);
}
@Override
public Document process(final Document doc) {
final Float ditaVersion = getDitaVersion(doc.getDocumentElement());
if (ditaVersion == null || ditaVersion >= 2.0f) {
return doc;
}
final Element root = doc.getDocumentElement();
if (rootChunkOverride != null) {
final String c = join(rootChunkOverride, " ");
logger.debug("Use override root chunk \"" + c + "\"");
root.setAttribute(ATTRIBUTE_NAME_CHUNK, c);
}
readLinks(doc);
readProcessingInstructions(doc);
final Collection rootChunk = split(root.getAttribute(ATTRIBUTE_NAME_CHUNK));
defaultChunkByToken = getChunkByToken(rootChunk, "by-", CHUNK_BY_DOCUMENT);
if (rootChunk.contains(CHUNK_TO_CONTENT)) {
chunkMap(root);
} else {
for (final Element currentElem : getChildElements(root)) {
if (MAP_RELTABLE.matches(currentElem)) {
updateReltable(currentElem);
} else if (MAP_TOPICREF.matches(currentElem)) {
processTopicref(currentElem);
}
}
}
return buildOutputDocument(root);
}
private final Set chunkTopicSet = new HashSet<>();
/**
* @return absolute temporary files
*/
public Set getChunkTopicSet() {
return unmodifiableSet(chunkTopicSet);
}
private void readLinks(final Document doc) {
final Element root = doc.getDocumentElement();
readLinks(root, false, false);
}
private void readLinks(final Element elem, final boolean chunk, final boolean disabled) {
final boolean c = chunk || elem.getAttributeNode(ATTRIBUTE_NAME_CHUNK) != null;
final boolean d =
disabled ||
elem.getAttribute(ATTRIBUTE_NAME_CHUNK).contains(CHUNK_TO_NAVIGATION) ||
(MAPGROUP_D_TOPICGROUP.matches(elem) && !SUBMAP.matches(elem)) ||
MAP_RELTABLE.matches(elem);
final Attr href = elem.getAttributeNode(ATTRIBUTE_NAME_HREF);
if (href != null) {
final URI filename = stripFragment(currentFile.resolve(href.getValue()));
if (c && !d) {
chunkTopicSet.add(filename);
final Attr copyTo = elem.getAttributeNode(ATTRIBUTE_NAME_COPY_TO);
if (copyTo != null) {
final URI copyToFile = stripFragment(currentFile.resolve(copyTo.getValue()));
chunkTopicSet.add(copyToFile);
}
}
}
for (final Element topicref : getChildElements(elem, MAP_TOPICREF)) {
readLinks(topicref, c, d);
}
}
public static String getChunkByToken(
final Collection chunkValue,
final String category,
final String defaultToken
) {
if (chunkValue.isEmpty()) {
return defaultToken;
}
for (final String token : chunkValue) {
if (token.startsWith(category)) {
return token;
}
}
return defaultToken;
}
/**
* Process map when "to-content" is specified on map element.
*
* TODO: Instead of reclassing map element to be a topicref, add a topicref
* into the map root and move all map content into that topicref.
*/
private void chunkMap(final Element root) {
// create the reference to the new file on root element.
URI newFilename = URLUtils.toURI(replaceExtension(new File(currentFile).getName(), FILE_EXTENSION_DITA));
URI newFile = currentFile.resolve(newFilename);
if (job.getStore().exists(newFile)) {
final URI oldFile = newFile;
newFilename = URLUtils.toURI(chunkFilenameGenerator.generateFilename(CHUNK_PREFIX, FILE_EXTENSION_DITA));
newFile = currentFile.resolve(newFilename);
// Mark up the possible name changing, in case that references might be updated.
conflictTable.put(newFile, oldFile.normalize());
}
changeTable.put(newFile, newFile);
// change the class attribute to "topicref"
final String origCls = root.getAttribute(ATTRIBUTE_NAME_CLASS);
root.setAttribute(ATTRIBUTE_NAME_CLASS, origCls + MAP_TOPICREF.matcher);
root.setAttribute(ATTRIBUTE_NAME_HREF, newFilename.toString());
createTopicStump(newFile);
// process chunk
processTopicref(root);
// restore original root element
if (origCls != null) {
root.setAttribute(ATTRIBUTE_NAME_CLASS, origCls);
}
root.removeAttribute(ATTRIBUTE_NAME_HREF);
}
/**
* Create the new topic stump.
*/
private void createTopicStump(final URI newFile) {
try (final OutputStream newFileWriter = job.getStore().getOutputStream(newFile)) {
final XMLStreamWriter o = XMLOutputFactory.newInstance().createXMLStreamWriter(newFileWriter, UTF8);
o.writeStartDocument();
o.writeProcessingInstruction(
PI_WORKDIR_TARGET,
UNIX_SEPARATOR + new File(newFile.resolve(".")).getAbsolutePath()
);
o.writeProcessingInstruction(PI_WORKDIR_TARGET_URI, newFile.resolve(".").toString());
o.writeStartElement(ELEMENT_NAME_DITA);
o.writeEndElement();
o.writeEndDocument();
o.close();
newFileWriter.flush();
} catch (final RuntimeException e) {
throw e;
} catch (final Exception e) {
logger.error(e.getMessage(), e);
}
}
/**
* Read processing metadata from processing instructions.
*/
private void readProcessingInstructions(final Document doc) {
final NodeList docNodes = doc.getChildNodes();
for (int i = 0; i < docNodes.getLength(); i++) {
final Node node = docNodes.item(i);
if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
final ProcessingInstruction pi = (ProcessingInstruction) node;
switch (pi.getNodeName()) {
case PI_WORKDIR_TARGET -> workdir = pi;
case PI_WORKDIR_TARGET_URI -> workdirUrl = pi;
case PI_PATH2PROJ_TARGET -> path2proj = pi;
case PI_PATH2PROJ_TARGET_URI -> path2projUrl = pi;
case PI_PATH2ROOTMAP_TARGET_URI -> path2rootmapUrl = pi;
}
}
}
}
private void outputMapFile(final URI file, final Document doc) {
try {
job.getStore().writeDocument(doc, file);
} catch (final IOException e) {
logger.error("Failed to serialize map: " + e.getMessage(), e);
}
}
private Document buildOutputDocument(final Element root) {
final Document doc = getDocumentBuilder().newDocument();
if (workdir != null) {
doc.appendChild(doc.importNode(workdir, true));
}
if (workdirUrl != null) {
doc.appendChild(doc.importNode(workdirUrl, true));
}
if (path2proj != null) {
doc.appendChild(doc.importNode(path2proj, true));
}
if (path2projUrl != null) {
doc.appendChild(doc.importNode(path2projUrl, true));
}
if (path2rootmapUrl != null) {
doc.appendChild(doc.importNode(path2rootmapUrl, true));
}
doc.appendChild(doc.importNode(root, true));
return doc;
}
private void processTopicref(final Element topicref) {
final String xtrf = getValue(topicref, ATTRIBUTE_NAME_XTRF);
if (xtrf != null && xtrf.contains(ATTR_XTRF_VALUE_GENERATED)) {
return;
}
final Collection chunk = split(getValue(topicref, ATTRIBUTE_NAME_CHUNK));
final URI href = toURI(getValue(topicref, ATTRIBUTE_NAME_HREF));
final URI copyTo = toURI(getValue(topicref, ATTRIBUTE_NAME_COPY_TO));
final String scope = getCascadeValue(topicref, ATTRIBUTE_NAME_SCOPE);
final String chunkByToken = getChunkByToken(chunk, "by-", defaultChunkByToken);
if (
ATTR_SCOPE_VALUE_EXTERNAL.equals(scope) ||
(href != null && !job.getStore().exists(currentFile.resolve(href.toString()))) ||
(chunk.isEmpty() && href == null)
) {
processChildTopicref(topicref);
} else if (chunk.contains(CHUNK_TO_CONTENT)) {
if (href != null || copyTo != null || topicref.hasChildNodes()) {
if (chunk.contains(CHUNK_BY_TOPIC)) {
logger.warn(MessageUtils.getMessage("DOTJ064W").setLocation(topicref).toString());
}
if (href == null) {
generateStumpTopic(topicref);
}
processCombineChunk(topicref);
}
} else if (chunk.contains(CHUNK_TO_NAVIGATION) && supportToNavigation) {
processChildTopicref(topicref);
processNavitation(topicref);
} else if (chunkByToken.equals(CHUNK_BY_TOPIC)) {
if (href != null) {
processSeparateChunk(topicref);
}
processChildTopicref(topicref);
} else { // chunkByToken.equals(CHUNK_BY_DOCUMENT)
URI currentPath = null;
if (copyTo != null) {
currentPath = currentFile.resolve(copyTo);
} else if (href != null) {
currentPath = currentFile.resolve(href);
}
if (currentPath != null) {
changeTable.remove(currentPath);
final String processingRole = getCascadeValue(topicref, ATTRIBUTE_NAME_PROCESSING_ROLE);
if (!ATTR_PROCESSING_ROLE_VALUE_RESOURCE_ONLY.equals(processingRole)) {
changeTable.put(currentPath, currentPath);
}
}
processChildTopicref(topicref);
}
}
/**
* Create new map and refer to it with navref.
*/
private void processNavitation(final Element topicref) {
// create new map's root element
final Element root = (Element) topicref.getOwnerDocument().getDocumentElement().cloneNode(false);
// create navref element
final Element navref = topicref.getOwnerDocument().createElement(MAP_NAVREF.localName);
final String newMapFile = chunkFilenameGenerator.generateFilename("MAPCHUNK", FILE_EXTENSION_DITAMAP);
navref.setAttribute(ATTRIBUTE_NAME_MAPREF, newMapFile);
navref.setAttribute(ATTRIBUTE_NAME_CLASS, MAP_NAVREF.toString());
// replace topicref with navref
topicref.getParentNode().replaceChild(navref, topicref);
root.appendChild(topicref);
// generate new file
final URI navmap = currentFile.resolve(newMapFile);
changeTable.put(stripFragment(navmap), stripFragment(navmap));
outputMapFile(navmap, buildOutputDocument(root));
}
/**
* Generate file name.
*
* @return generated file name
*/
private String generateFilename() {
return chunkFilenameGenerator.generateFilename(CHUNK_PREFIX, FILE_EXTENSION_DITA);
}
/**
* Generate stump topic for to-content content.
*
* @param topicref topicref without href to generate stump topic for
*/
private void generateStumpTopic(final Element topicref) {
final URI result = getResultFile(topicref);
final URI temp = tempFileNameScheme.generateTempFileName(result);
final URI absTemp = job.tempDir.toURI().resolve(temp);
final String name = getBaseName(new File(result).getName());
String navtitle = getChildElementValueOfTopicmeta(topicref, TOPIC_NAVTITLE);
if (navtitle == null) {
navtitle = getValue(topicref, ATTRIBUTE_NAME_NAVTITLE);
}
String shortDesc = getChildElementValueOfTopicmeta(topicref, TOPIC_SHORTDESC);
if (shortDesc == null) {
shortDesc = getChildElementValueOfTopicmeta(topicref, MAP_SHORTDESC);
}
writeChunk(absTemp, name, navtitle, shortDesc);
// update current element's @href value
final URI relativePath = getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), absTemp);
topicref.setAttribute(ATTRIBUTE_NAME_HREF, relativePath.toString());
if (MAPGROUP_D_TOPICGROUP.matches(topicref)) {
topicref.setAttribute(ATTRIBUTE_NAME_CLASS, MAP_TOPICREF.toString());
}
final URI relativeToBase = getRelativePath(job.tempDirURI.resolve("dummy"), absTemp);
final FileInfo fi = new FileInfo.Builder().uri(temp).result(result).format(ATTR_FORMAT_VALUE_DITA).build();
job.add(fi);
}
private void writeChunk(final URI outputFileName, String id, String title, String shortDesc) {
try (final OutputStream output = job.getStore().getOutputStream(outputFileName)) {
final XMLSerializer serializer = XMLSerializer.newInstance(output);
serializer.writeStartDocument();
if (title == null && shortDesc == null) {
//topicgroup with no title, no shortdesc, just need a non titled stub
serializer.writeStartElement(ELEMENT_NAME_DITA);
serializer.writeAttribute(
DITA_NAMESPACE,
ATTRIBUTE_PREFIX_DITAARCHVERSION + ":" + ATTRIBUTE_NAME_DITAARCHVERSION,
"1.3"
);
serializer.writeEndElement(); // dita
} else {
serializer.writeStartElement(TOPIC_TOPIC.localName);
serializer.writeAttribute(
DITA_NAMESPACE,
ATTRIBUTE_PREFIX_DITAARCHVERSION + ":" + ATTRIBUTE_NAME_DITAARCHVERSION,
"1.3"
);
serializer.writeAttribute(ATTRIBUTE_NAME_ID, id);
serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_TOPIC.toString());
serializer.writeAttribute(ATTRIBUTE_NAME_DOMAINS, "");
serializer.writeAttribute(ATTRIBUTE_NAME_SPECIALIZATIONS, "");
serializer.writeStartElement(TOPIC_TITLE.localName);
serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_TITLE.toString());
if (title != null) {
serializer.writeCharacters(title);
}
serializer.writeEndElement(); // title
if (shortDesc != null) {
serializer.writeStartElement(TOPIC_SHORTDESC.localName);
serializer.writeAttribute(ATTRIBUTE_NAME_CLASS, TOPIC_SHORTDESC.toString());
serializer.writeCharacters(shortDesc);
serializer.writeEndElement(); // shortdesc
}
serializer.writeEndElement(); // topic
}
serializer.writeEndDocument();
serializer.close();
} catch (final IOException | SAXException e) {
logger.error("Failed to write generated chunk: " + e.getMessage(), e);
}
}
private URI getResultFile(final Element topicref) {
final FileInfo curr = job.getFileInfo(currentFile);
final URI copyTo = toURI(getValue(topicref, ATTRIBUTE_NAME_COPY_TO));
final String id = getValue(topicref, ATTRIBUTE_NAME_ID);
URI outputFileName;
if (copyTo != null) {
outputFileName = curr.result.resolve(copyTo);
} else if (id != null) {
outputFileName = curr.result.resolve(id + FILE_EXTENSION_DITA);
} else {
final Set results = job.getFileInfo().stream().map(fi -> fi.result).collect(Collectors.toSet());
do {
outputFileName = curr.result.resolve(generateFilename());
} while (results.contains(outputFileName));
}
return outputFileName;
}
/**
* get topicmeta's child(e.g navtitle, shortdesc) tag's value(text-only).
*
* @param element input element
* @return text value
*/
private String getChildElementValueOfTopicmeta(final Element element, final DitaClass classValue) {
if (element.hasChildNodes()) {
final Element topicMeta = getElementNode(element, MAP_TOPICMETA);
if (topicMeta != null) {
final Element elem = getElementNode(topicMeta, classValue);
if (elem != null) {
return getText(elem);
}
}
}
return null;
}
private void processChildTopicref(final Element node) {
final List children = getChildElements(node, MAP_TOPICREF);
for (final Element currentElem : children) {
final URI href = toURI(getValue(currentElem, ATTRIBUTE_NAME_HREF));
final String xtrf = currentElem.getAttribute(ATTRIBUTE_NAME_XTRF);
if (href == null) {
processTopicref(currentElem);
} else if (
!ATTR_XTRF_VALUE_GENERATED.equals(xtrf) &&
!currentFile.resolve(href).equals(changeTable.get(currentFile.resolve(href)))
) {
processTopicref(currentElem);
}
}
}
private void processSeparateChunk(final Element topicref) {
final SeparateChunkTopicParser chunkParser = new SeparateChunkTopicParser();
chunkParser.setLogger(logger);
chunkParser.setJob(job);
chunkParser.setup(changeTable, conflictTable, topicref, chunkFilenameGenerator);
chunkParser.write(currentFile);
}
private void processCombineChunk(final Element topicref) {
final ChunkTopicParser chunkParser = new ChunkTopicParser();
chunkParser.setLogger(logger);
chunkParser.setJob(job);
createChildTopicrefStubs(getChildElements(topicref, MAP_TOPICREF));
chunkParser.setup(changeTable, conflictTable, topicref, chunkFilenameGenerator);
chunkParser.write(currentFile);
}
/** Before combining topics in a branch, ensure any descendant topicref with @chunk and no @href has a stub */
private void createChildTopicrefStubs(final List topicrefs) {
if (!topicrefs.isEmpty()) {
for (final Element currentElem : topicrefs) {
final String href = getValue(currentElem, ATTRIBUTE_NAME_HREF);
final String chunk = getValue(currentElem, ATTRIBUTE_NAME_CHUNK);
if (href == null && chunk != null) {
generateStumpTopic(currentElem);
}
createChildTopicrefStubs(getChildElements(currentElem, MAP_TOPICREF));
}
}
}
private void updateReltable(final Element elem) {
final String href = elem.getAttribute(ATTRIBUTE_NAME_HREF);
if (href.length() != 0) {
if (changeTable.containsKey(currentFile.resolve(href))) {
URI res = getRelativePath(currentFile.resolve(FILE_NAME_STUB_DITAMAP), currentFile.resolve(href));
final String fragment = getFragment(href);
if (fragment != null) {
res = setFragment(res, fragment);
}
elem.setAttribute(ATTRIBUTE_NAME_HREF, res.toString());
}
}
final NodeList children = elem.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
final Node current = children.item(i);
if (current.getNodeType() == Node.ELEMENT_NODE) {
final Element currentElem = (Element) current;
final String cls = currentElem.getAttribute(ATTRIBUTE_NAME_CLASS);
if (MAP_TOPICREF.matches(cls)) {
// FIXME: What should happen here?
}
}
}
}
/**
* Get changed files table.
*
* @return map of changed files, absolute temporary files
*/
public Map getChangeTable() {
for (final Map.Entry e : changeTable.entrySet()) {
assert e.getKey().isAbsolute();
assert e.getValue().isAbsolute();
}
return Collections.unmodifiableMap(changeTable);
}
/**
* get conflict table.
*
* @return conflict table, absolute temporary files
*/
public Map getConflicTable() {
for (final Map.Entry e : conflictTable.entrySet()) {
assert e.getKey().isAbsolute();
assert e.getValue().isAbsolute();
}
return conflictTable;
}
/**
* Support chunk token to-navigation.
*
* @param supportToNavigation flag to enable to-navigation support
*/
public void supportToNavigation(final boolean supportToNavigation) {
this.supportToNavigation = supportToNavigation;
}
}