com.deepoove.poi.xwpf.XmlXWPFDocumentMerge Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of poi-tl Show documentation
Show all versions of poi-tl Show documentation
Generate word(docx) with template
/*
* Copyright 2014-2021 Sayi
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.deepoove.poi.xwpf;
import java.io.IOException;
import java.lang.reflect.Field;
import java.math.BigInteger;
import java.util.*;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.POIXMLDocumentPart.RelationPart;
import org.apache.poi.ooxml.util.POIXMLUnits;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
public class XmlXWPFDocumentMerge extends AbstractXWPFDocumentMerge {
private static final String CROSS_REPLACE_STRING = "@PoiTL@";
public XmlXWPFDocumentMerge() {
}
@Override
public NiceXWPFDocument merge(NiceXWPFDocument source, Iterator mergeIterator, XWPFRun run)
throws Exception {
CTBody body = source.getDocument().getBody();
List addParts = createMergeableStrings(source, mergeIterator);
String[] startEnd = truncatedStartEndXmlFragment(body);
XWPFParagraph paragraph = (XWPFParagraph) run.getParent();
CTP mergedContainer = paragraph.getCTP();
CTP mergedBody = CTP.Factory.parse(startEnd[0] + String.join("", addParts) + startEnd[1]);
// instead insert xml-fragment?
mergedContainer.set(mergedBody);
String xmlText = truncatedOverlapWP(body);
body.set(CTBody.Factory.parse(xmlText));
return source.generate(true);
}
private String truncatedOverlapWP(CTBody body) {
String xmlText = body.xmlText(DefaultXmlOptions.OPTIONS_INNER);
xmlText = xmlText.replaceAll("", "")
.replaceAll("", "")
.replaceAll(" ", "")
.replaceAll(" ", " ")
.replaceAll(" ", "")
.replaceAll(" ", "")
.replaceAll(" ", "");
return xmlText;
}
private String[] truncatedStartEndXmlFragment(CTBody body) {
String srcString = body.xmlText(DefaultXmlOptions.OPTIONS_INNER);
// hack for create document or single element document
if (!srcString.startsWith("") + 1);
String suffix = srcString.substring(srcString.lastIndexOf("<"));
return new String[] { prefix, suffix };
}
private List createMergeableStrings(NiceXWPFDocument source, Iterator iterator)
throws InvalidFormatException, IOException {
List addParts = new ArrayList();
if (!iterator.hasNext()) return addParts;
NiceXWPFDocument next = iterator.next();
// apply style merge once
Map mergeStyles = mergeStyles(source, next);
// apply namespaces merge once
mergeNamespaces(source, next);
do {
addParts.add(createMergeableString(source, next, mergeStyles));
try {
next.close();
} catch (Exception e) {
logger.warn("close merged doc failed!", e);
}
if (iterator.hasNext())
next = iterator.next();
else
break;
} while (true);
return addParts;
}
private void mergeNamespaces(NiceXWPFDocument source, NiceXWPFDocument docMerge) {
CTDocument1 document = source.getDocument();
XmlCursor newCursor = document.newCursor();
if (toStartCursor(newCursor)) {
CTDocument1 documentMerge = docMerge.getDocument();
XmlCursor mergeCursor = documentMerge.newCursor();
if (toStartCursor(mergeCursor)) {
Map addToThis = new HashMap<>();
mergeCursor.getAllNamespaces(addToThis);
addToThis.forEach(newCursor::insertNamespace);
}
mergeCursor.dispose();
}
newCursor.dispose();
}
private boolean toStartCursor(XmlCursor newCursor) {
do {
if (newCursor.currentTokenType().isStart()) {
return true;
} else if (newCursor.hasNextToken()) {
newCursor.toNextToken();
} else
return false;
} while (true);
}
private String createMergeableString(NiceXWPFDocument source, NiceXWPFDocument merged,
Map styleIdsMap) throws InvalidFormatException, IOException {
CTBody mergedBody = merged.getDocument().getBody();
// TODO For the same style, reduce the number of merges
// Map styleIdsMap = mergeStyles(docMerge);
Map numIdsMap = mergeNumbering(source, merged);
Map blipIdsMap = mergePicture(source, merged);
Map externalBlipIdsMap = mergeExternalPicture(source, merged);
Map hyperlinkMap = mergeHyperlink(source, merged);
Map chartIdsMap = mergeChart(source, merged);
Map attachmentIdsMap = mergeAttachment(source, merged);
String appendString = mergedBody.xmlText(DefaultXmlOptions.OPTIONS_OUTER);
String addPart = ridSectPr(appendString);
// style
for (String styleId : styleIdsMap.keySet()) {
addPart = addPart
.replaceAll(" placeHolderblipIdsMap = new HashMap();
for (String relaId : blipIdsMap.keySet()) {
placeHolderblipIdsMap.put(relaId, blipIdsMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String relaId : placeHolderblipIdsMap.keySet()) {
addPart = addPart.replaceAll("r:embed=\"" + relaId + "\"",
"r:embed=\"" + placeHolderblipIdsMap.get(relaId) + "\"");
// w:pict v:shape v:imagedata
addPart = addPart.replaceAll("r:id=\"" + relaId + "\"",
"r:id=\"" + placeHolderblipIdsMap.get(relaId) + "\"");
}
Map placeHolderExternalblipIdsMap = new HashMap();
for (String relaId : externalBlipIdsMap.keySet()) {
placeHolderExternalblipIdsMap.put(relaId, blipIdsMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String relaId : placeHolderExternalblipIdsMap.keySet()) {
addPart = addPart.replaceAll("r:link=\"" + relaId + "\"",
"r:link=\"" + placeHolderExternalblipIdsMap.get(relaId) + "\"");
}
// hyperlink id
for (String relaId : hyperlinkMap.keySet()) {
hyperlinkMap.put(relaId, hyperlinkMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String relaId : hyperlinkMap.keySet()) {
// w:hyperlink r:id
addPart = addPart.replaceAll("r:id=\"" + relaId + "\"", "r:id=\"" + hyperlinkMap.get(relaId) + "\"");
}
// chart id
for (String relaId : chartIdsMap.keySet()) {
chartIdsMap.put(relaId, chartIdsMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String relaId : chartIdsMap.keySet()) {
addPart = addPart.replaceAll("r:id=\"" + relaId + "\"", "r:id=\"" + chartIdsMap.get(relaId) + "\"");
}
// attachment id
for (String relaId : attachmentIdsMap.keySet()) {
attachmentIdsMap.put(relaId, attachmentIdsMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String relaId : attachmentIdsMap.keySet()) {
addPart = addPart.replaceAll("r:id=\"" + relaId + "\"", "r:id=\"" + attachmentIdsMap.get(relaId) + "\"");
}
// numbering numId
for (String relaId : numIdsMap.keySet()) {
numIdsMap.put(relaId, numIdsMap.get(relaId) + CROSS_REPLACE_STRING);
}
for (String numId : numIdsMap.keySet()) {
addPart = addPart.replaceAll("") + 1;
int end = appendString.lastIndexOf("<");
if (-1 != lastIndexOf) {
String prefix = appendString.substring(begin, appendString.lastIndexOf("") + 11, end);
return prefix + suffix;
} else if (begin < end) {
addPart = appendString.substring(begin, end);
}
return addPart;
}
private Map mergePicture(NiceXWPFDocument source, NiceXWPFDocument merged)
throws InvalidFormatException {
Map blipIdsMap = new HashMap();
List allPictures = merged.getAllPictures();
for (XWPFPictureData xwpfPictureData : allPictures) {
String relationId = merged.getRelationId(xwpfPictureData);
String blidId = source.addPictureData(xwpfPictureData.getData(), xwpfPictureData.getPictureType());
blipIdsMap.put(relationId, blidId);
}
return blipIdsMap;
}
private Map mergeExternalPicture(NiceXWPFDocument source, NiceXWPFDocument merged)
throws InvalidFormatException {
Map blipIdsMap = new HashMap();
PackageRelationshipCollection imagePart = merged.getPackagePart()
.getRelationshipsByType(PackageRelationshipTypes.IMAGE_PART);
Iterator iterator = imagePart.iterator();
while (iterator.hasNext()) {
PackageRelationship relationship = iterator.next();
if (relationship.getTargetMode() == TargetMode.EXTERNAL) {
PackageRelationship relationshipNew = source.getPackagePart()
.addExternalRelationship(relationship.getTargetURI().toString(),
XWPFRelation.IMAGES.getRelation());
blipIdsMap.putIfAbsent(relationship.getId(), relationshipNew.getId());
}
}
return blipIdsMap;
}
private Map mergeNumbering(NiceXWPFDocument source, NiceXWPFDocument merged) {
Map numIdsMap = new HashMap();
XWPFNumbering numberingMerge = merged.getNumbering();
if (null == numberingMerge) return numIdsMap;
XWPFNumberingWrapper wrapperMerge = new XWPFNumberingWrapper(numberingMerge);
List nums = wrapperMerge.getNums();
if (null == nums) return numIdsMap;
XWPFNumbering numbering = source.getNumbering();
if (null == numbering) numbering = source.createNumbering();
XWPFNumberingWrapper wrapper = new XWPFNumberingWrapper(numbering);
XWPFAbstractNum xwpfAbstractNum;
CTAbstractNum cTAbstractNum;
Map cache = new HashMap();
Map ret = new HashMap();
for (XWPFNum xwpfNum : nums) {
BigInteger mergeNumId = xwpfNum.getCTNum().getNumId();
cTAbstractNum = cache.get(xwpfNum.getCTNum().getAbstractNumId().getVal());
if (null == cTAbstractNum) {
xwpfAbstractNum = numberingMerge.getAbstractNum(xwpfNum.getCTNum().getAbstractNumId().getVal());
if (null == xwpfAbstractNum) {
logger.warn("cannot find cTAbstractNum by XWPFNum.");
continue;
}
cTAbstractNum = xwpfAbstractNum.getCTAbstractNum();
// cTAbstractNum.setAbstractNumId(wrapper.getNextAbstractNumID());
if (cTAbstractNum.isSetNsid()) cTAbstractNum.unsetNsid();
if (cTAbstractNum.isSetTmpl()) cTAbstractNum.unsetTmpl();
cache.put(xwpfNum.getCTNum().getAbstractNumId().getVal(), cTAbstractNum);
}
ret.put(mergeNumId, cTAbstractNum);
}
long nextId = wrapper.getNextAbstractNumID().longValue();
Set hashSet = new HashSet<>(ret.values());
for (CTAbstractNum abnum : hashSet) {
abnum.setAbstractNumId(BigInteger.valueOf(nextId++));
}
final XWPFNumbering finalNumbering = numbering;
ret.forEach((mergeNumId, abnum) -> {
BigInteger numID = finalNumbering.addNum(finalNumbering.addAbstractNum(new XWPFAbstractNum(abnum)));
numIdsMap.put(mergeNumId.toString(), numID.toString());
});
return numIdsMap;
}
@SuppressWarnings("unchecked")
private Map mergeStyles(NiceXWPFDocument source, NiceXWPFDocument merged) {
Map styleIdsMap = new HashMap();
XWPFStyles styles = source.getStyles();
if (null == styles) styles = source.createStyles();
XWPFStyles stylesMerge = merged.getStyles();
if (null == stylesMerge) return styleIdsMap;
try {
Field listStyleField = XWPFStyles.class.getDeclaredField("listStyle");
listStyleField.setAccessible(true);
List lists = (List) listStyleField.get(stylesMerge);
String defaultParaStyleId = null;
for (XWPFStyle xwpfStyle : lists) {
if (styles.styleExist(xwpfStyle.getStyleId())) {
String id = xwpfStyle.getStyleId();
xwpfStyle.setStyleId(UUID.randomUUID().toString().substring(0, 8));
styleIdsMap.put(id, xwpfStyle.getStyleId());
}
// fix github issue 499
CTStyle ctStyle = xwpfStyle.getCTStyle();
if (ctStyle.isSetDefault() && POIXMLUnits.parseOnOff(ctStyle.xgetDefault())
&& ctStyle.getType() == STStyleType.PARAGRAPH) {
defaultParaStyleId = ctStyle.getStyleId();
}
if (ctStyle.isSetDefault()) {
ctStyle.unsetDefault();
}
if (ctStyle.isSetName()) {
ctStyle.getName().setVal(ctStyle.getName().getVal() + xwpfStyle.getStyleId());
}
if (ctStyle.isSetBasedOn()) {
String newId = styleIdsMap.get(ctStyle.getBasedOn().getVal());
if (null != newId) ctStyle.getBasedOn().setVal(newId);
}
styles.addStyle(xwpfStyle);
}
if (null != defaultParaStyleId) {
final String dpid = defaultParaStyleId;
merged.getParagraphs().stream().filter(p -> null == p.getStyle()).forEach(p -> p.setStyle(dpid));
}
} catch (Exception e) {
// throw exception?
logger.error("merge style error", e);
}
return styleIdsMap;
}
private Map mergeHyperlink(NiceXWPFDocument source, NiceXWPFDocument merged)
throws InvalidFormatException {
Map map = new HashMap();
PackageRelationshipCollection hyperlinks = merged.getPackagePart()
.getRelationshipsByType(PackageRelationshipTypes.HYPERLINK_PART);
Iterator iterator = hyperlinks.iterator();
while (iterator.hasNext()) {
PackageRelationship relationship = iterator.next();
PackageRelationship relationshipNew = source.getPackagePart()
.addExternalRelationship(relationship.getTargetURI().toString(),
XWPFRelation.HYPERLINK.getRelation());
map.put(relationship.getId(), relationshipNew.getId());
}
return map;
}
private Map mergeChart(NiceXWPFDocument source, NiceXWPFDocument merged)
throws InvalidFormatException, IOException {
Map map = new HashMap();
List charts = merged.getCharts();
for (XWPFChart chart : charts) {
String relationId = merged.getRelationId(chart);
RelationPart addChartData = source.addChartData(chart);
map.put(relationId, addChartData.getRelationship().getId());
}
return map;
}
private Map mergeAttachment(NiceXWPFDocument source, NiceXWPFDocument merged)
throws InvalidFormatException, IOException {
Map attachmentIdsMap = new HashMap();
PackageRelationshipCollection part = merged.getPackagePart()
.getRelationshipsByType(POIXMLDocument.PACK_OBJECT_REL_TYPE);
Iterator iterator = part.iterator();
while (iterator.hasNext()) {
PackageRelationship relationship = iterator.next();
PackagePart embeddPart = merged.getPackagePart().getRelatedPart(relationship);
String path = relationship.getTargetURI().getPath();
if (null == path || (!path.endsWith(".docx") && !path.endsWith(".xlsx"))) continue;
try {
byte[] byteData = IOUtils.toByteArray(embeddPart.getInputStream());
String newId = source.addEmbeddData(byteData, path.endsWith("docx") ? 0 : 1);
attachmentIdsMap.putIfAbsent(relationship.getId(), newId);
} catch (IOException e) {
throw new POIXMLException(e);
}
}
return attachmentIdsMap;
}
// TODO merge header, footer, pageSect...
}