
org.anyline.office.docx.entity.WDocument Maven / Gradle / Ivy
/*
* Copyright 2006-2023 www.anyline.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.anyline.office.docx.entity;
import org.anyline.entity.html.Table;
import org.anyline.entity.html.Td;
import org.anyline.entity.html.Tr;
import org.anyline.handler.Downloader;
import org.anyline.handler.Uploader;
import org.anyline.log.LogProxy;
import org.anyline.office.docx.util.DocxUtil;
import org.anyline.util.*;
import org.anyline.util.regular.RegularUtil;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.anyline.log.Log;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.*;
public class WDocument extends WElement {
private static Log log = LogProxy.get(WDocument.class);
private File file;
private String charset = "UTF-8";
private String xml = null; // document.xml文本
// word/document.xml
private org.dom4j.Document doc = null;
public boolean IS_HTML_ESCAPE = false; //设置文本时是否解析转义符
// word/_rels/document.xml.rels
private String relsXml = null;
private org.dom4j.Document rels;
private LinkedHashMap footers = new LinkedHashMap<>();
private LinkedHashMap headers = new LinkedHashMap<>();
private LinkedHashMap charts = new LinkedHashMap<>();
private LinkedHashMap> styles = new LinkedHashMap<>();
private LinkedHashMap replaces = new LinkedHashMap<>();
/**
* 文本原样替换,不解析原文没有${}的也不要添加
*/
private LinkedHashMap txt_replaces = new LinkedHashMap<>();
private boolean autoMergePlaceholder = true;
/**
* word转html时遇到文件需要上传到文件服务器,并返回url
*/
private Uploader uploader;
/**
* html转word时遇到url需要下载到本地
*/
private Downloader downloader;
private int listNum = 0;
public WDocument(File file){
this.file = file;
}
public WDocument(String file){
this.file = new File(file);
}
public WDocument(File file, String charset){
this.file = file;
this.charset = charset;
}
public WDocument(String file, String charset){
this.file = new File(file);
this.charset = charset;
}
private void load(){
if(null == xml){
reload();
}
}
public void reload(){
try {
xml = ZipUtil.read(file, "word/document.xml", charset);
relsXml = ZipUtil.read(file, "word/_rels/document.xml.rels", charset);
doc = DocumentHelper.parseText(xml);
rels = DocumentHelper.parseText(relsXml);
src = doc.getRootElement().element("body");
List items = ZipUtil.getEntriesNames(file);
for(String item:items){
if(item.contains("word/footer")){
String name = item.replace("word/", "").replace(".xml", "");
footers.put(name, DocumentHelper.parseText(ZipUtil.read(file, item, charset)));
}else if(item.contains("word/header")){
String name = item.replace("word/", "").replace(".xml", "");
headers.put(name, DocumentHelper.parseText(ZipUtil.read(file, item, charset)));
}else if(item.contains("word/charts/chart")){
String name = item.replace("word/charts/", "").replace(".xml", "");
charts.put(name, DocumentHelper.parseText(ZipUtil.read(file, item, charset)));
}
}
}catch (Exception e){
e.printStackTrace();
}
}
/**
* 根据资源文件id获取element
* @param id 文件id
* @return Element
*/
public Element rel(String id){
if(null != id) {
List elements = rels.getRootElement().elements();
for (Element element : elements) {
if (id.equalsIgnoreCase(element.attributeValue("Id"))) {
return element;
}
}
}
return null;
}
public InputStream read(String path){
InputStream is = ZipUtil.read(file, path);
return is;
}
public void flush(){
try {
xml = DomUtil.format(doc);
}catch (Exception e){
e.printStackTrace();
}
}
public void loadStyle(String html){
Map> map = StyleParser.load(html);
for(String key:map.keySet()){
this.styles.put(key, map.get(key));
}
}
/**
* 设置占位符替换值 在调用save时执行替换
* 注意如果不解析的话 不会添加自动${}符号 按原文替换,是替换整个文件的纯文件,包括标签名在内
* @param parse 是否解析标签 true:解析HTML标签 false:直接替换文本
* @param key 占位符
* @param content 替换值
*/
public void replace(boolean parse, String key, String content){
if(null == key && key.trim().length()==0){
return;
}
if(parse) {
replaces.put(key, content);
}else{
txt_replaces.put(key, content);
}
}
public void replace(String key, String content){
replace(true, key, content);
}
public void replace(boolean parse, String key, File ... words){
replace(parse, key, BeanUtil.array2list(words));
}
public void replace(String key, File ... words){
replace(true, key, BeanUtil.array2list(words));
}
public void replace(boolean parse, String key, List words){
if(null != words) {
StringBuilder content = new StringBuilder();
for(File word:words) {
content.append("").append(word.getAbsolutePath()).append(" ");
}
if(parse) {
replaces.put(key, content.toString());
}else{
txt_replaces.put(key, content.toString());
}
}
}
public void replace(String key, List words){
replace(true, key, words);
}
public void save(){
save(Charset.forName("UTF-8"));
}
public void save(Charset charset){
try {
//加载文件
load();
if(autoMergePlaceholder){
mergePlaceholder();
}
//执行替换
replace(src, replaces);
Map zip_replaces = new HashMap<>();
for(String name:footers.keySet()){
Document doc = footers.get(name);
Element element = doc.getRootElement();
replace(element, replaces);
String txt = DomUtil.format(doc);
txt = BasicUtil.replace(txt, txt_replaces);
zip_replaces.put("word/" + name + ".xml", txt);
}
for(String name:headers.keySet()){
Document doc = headers.get(name);
Element element = doc.getRootElement();
replace(element, replaces);
String txt = DomUtil.format(doc);
txt = BasicUtil.replace(txt, txt_replaces);
zip_replaces.put("word/" + name + ".xml", txt);
}
for(String name:charts.keySet()){
Document doc = charts.get(name);
Element element = doc.getRootElement();
//replace(element, replaces);
String txt = DomUtil.format(doc);
txt = BasicUtil.replace(txt, txt_replaces);
zip_replaces.put("word/charts/" + name + ".xml", txt);
}
//检测内容类型
checkContentTypes();
//合并列的表格,如果没有设置宽度,在wps中只占一列,需要在表格中根据总列数添加
checkMergeCol();
String txt = DomUtil.format(doc);
txt = BasicUtil.replace(txt, txt_replaces);
zip_replaces.put("word/document.xml", txt);
zip_replaces.put("word/_rels/document.xml.rels", DomUtil.format(rels));
ZipUtil.replace(file, zip_replaces, charset);
}catch (Exception e){
e.printStackTrace();
}
}
/**
* 合并点位符 ${key} 拆分到3个t中的情况
* 调用完replace后再调用当前方法,因为需要用到replace里提供的占位符列表
*/
public void mergePlaceholder(){
List placeholders = new ArrayList<>();
placeholders.addAll(replaces.keySet());
mergePlaceholder(placeholders);
}
/**
* 合并点位符 ${key} 拆分到3个t中的情况
* @param placeholders 占位符列表 带不还${}都可以 最终会处理掉${}
*/
public void mergePlaceholder(List placeholders){
mergePlaceholder(getSrc(), placeholders);
for(Document footer:footers.values()){
mergePlaceholder(footer.getRootElement(), placeholders);
}
for(Document header:headers.values()){
mergePlaceholder(header.getRootElement(), placeholders);
}
for(Document chart:charts.values()){
mergePlaceholder(chart.getRootElement(), placeholders);
}
}
public void mergePlaceholder(Element box, List placeholders){
List list = new ArrayList<>();
for(String placeholder:placeholders){
list.add(placeholder.replace("${", "").replace("}", ""));
}
List ts = DomUtil.elements(box, "t");
List removes = new ArrayList<>();
int size = ts.size();
for(int i=0; i elements, int start, List contents){
int size = elements.size();
for(int i=start; i elements, int start, String content){
int size = elements.size();
for(int i=start; i replaces){
List ts = DomUtil.elements(box, "t");
for(Element t:ts){
String txt = t.getTextTrim();
List flags = DocxUtil.splitKey(txt);
if(flags.size() == 0){
continue;
}
Collections.reverse(flags);
Element r = t.getParent();
List elements = r.elements();
int index = elements.indexOf(t);
Element prev = null;
if(index < elements.size()-1){
prev = elements.get(index+1);
}
boolean exists = false;
for(int i=0; i list = parseHtml(src, prev ,content);
}else{
List list = parseHtml(r, prev ,content);
}*/
if(null != content) {
List list = parseHtml(r, prev, content);
}
}
//如果存在占位符 删除原内容
if(exists) {
elements.remove(t);
}
}
List bookmarks = DomUtil.elements(box, "bookmarkStart");
for(Element bookmark:bookmarks){
replaceBookmark(bookmark, replaces);
}
}
/**
* 合并列的表格,如果没有设置宽度,在wps中只占一列,需要在表格中根据总列数添加
* w:tblGrid
* w:gridCol w:w="1000"
*/
private void checkMergeCol(){
List tables = DomUtil.elements(doc.getRootElement(), "tbl");
for(Element table:tables){
int max = 0;
boolean isMerge = false;
List trs = DomUtil.elements(table, "tr");
for(Element tr:trs){
int size = 0;
List tcs = DomUtil.elements(tr,"tc");
for(Element tc:tcs){
int colspan = 1;
Element pr = DomUtil.element(tc,"tcPr");
if(null != pr){
Element grid = DomUtil.element(pr,"gridSpan");
if(null != grid){
colspan = BasicUtil.parseInt(grid.attributeValue("w:val"),1);
}
}
if(colspan > 1){
isMerge = true;
}
size += colspan;
}
if(size > max){
max = size;
}
}
if(isMerge){
int tableWidth = 5000;
Element tblW = DomUtil.element(table, "tblW");
if(null != tblW){
tableWidth = BasicUtil.parseInt(tblW.attributeValue("w:w"),5000);
}
Element tblGrid = DomUtil.element(table,"tblGrid");
if(null == tblGrid){
tblGrid = DocxUtil.addElement(table, "tblGrid");
}
List gridCols = DomUtil.elements(tblGrid, "gridCol");
int width = tableWidth / max;
for(int i=gridCols.size(); i elements = root.elements("Default");
for(Element element:elements){
if(extension.equals(element.attributeValue("Extension"))){
return;
}
}
Element element = root.addElement("Default");
element.addAttribute("Extension", extension);
element.addAttribute("ContentType", type);
}
/**
* 在element之前插入节点
* @param element element
* @param html html
*/
public Element before(Element element, String html){
Element parent = element.getParent();
List elements = parent.elements();
int index = elements.indexOf(element)-1;
Element prev = null;
if(index >= 0){
prev = elements.get(index);
}
List list = parseHtml(parent, prev, html);
if(list.isEmpty()){
return null;
}
return list.get(list.size()-1);
}
public Element before(Element point, Element element){
Element parent = point.getParent();
List elements = parent.elements();
int index = elements.indexOf(point);
elements.add(index ,element);
return element;
}
public Element before(Element point, WTable table){
Element src = table.getSrc();
before(point, src);
return src;
}
public Element after(Element point, Element element){
Element parent = point.getParent();
List elements = parent.elements();
int index = elements.indexOf(point)+1;
if(index >= elements.size()-1){
elements.add(element);
}else {
elements.add(index, element);
}
return element;
}
public Element after(Element point, WTable table){
Element src = table.getSrc();
after(point, src);
return src;
}
/**
* 在element之后 插入节点
* 解析html有可能解析出多个element这里会返回最外层的最后一个
* @param element element
* @param html html
* @return Element
*/
public Element after(Element element, String html){
Element parent = element.getParent();
List list = parseHtml(parent, element, html);
if(list.isEmpty()){
return null;
}
return list.get(list.size()-1);
}
public Element insert(Element parent, String html){
List list = parseHtml(parent, null, html);
if(list.isEmpty()){
return null;
}
return list.get(list.size()-1);
}
public Element insert(int index, Element parent, String html){
List elements = parent.elements();
if(index <= 1){
index = 1;
}else if(index >= elements.size()){
index = elements.size()-1;
}
Element prev = elements.get(index-1);
List list = parseHtml(parent, prev, html);
if(list.isEmpty()){
return null;
}
return list.get(list.size()-1);
}
public Element insert(Element parent, Element element){
parent.elements().add(element);
return element;
}
public Element insert(Element parent, WTable table){
Element src = table.getSrc();
insert(parent, src);
return src;
}
public Element insert(int index, Element parent, Element element){
List elements = parent.elements();
elements.add(index, element);
return element;
}
public Element insert(int index, Element parent, WTable table){
Element src = table.getSrc();
insert(index, parent, src);
return src;
}
/**
* 获取书签所在的标签 通常用来定位
* @param bookmark 书签
* @param tag 上一级标签名 如tbl
* @return Element
*/
public Element parent(String bookmark, String tag){
load();
Element bk = DocxUtil.bookmark(doc.getRootElement(), bookmark);
return DocxUtil.getParent(bk, tag);
}
public Element parent(String bookmark){
return parent(bookmark, null);
}
/**
* 读取书签所有的table
* @param bookmark 书签
* @return docx table
*/
public WTable table(String bookmark){
Element src = parent(bookmark, "tbl");
if(null != src) {
return new WTable(this, src);
}
return null;
}
/**
* 获取body下的table
* @param recursion 是否递归获取所有级别的table,正常情况下不需要,word中的tbl一般在src下的最顶级,除非有表格嵌套
* @return tables
*/
public List tables(boolean recursion){
if(!recursion){
return tables();
}
load();
List tables = new ArrayList<>();
List elements = children(getSrc());
for(Element element:elements){
if(element.getName().equals("tbl")){
WTable table = new WTable(this, element);
tables.add(table);
}
}
return tables;
}
/**
* 获取body下的table
* @param content 根据内容定位
* @return tables
*/
public List tables(String content){
List all = tables();
List list = new ArrayList<>();
for(WTable table:all){
String txt = table.getTexts();
if(txt.contains(content)){
list.add(table);
}
}
return list;
}
private List children(Element parent){
List result = new ArrayList<>();
List items = parent.elements();
for(Element item:items){
result.add(item);
result.addAll(children(item));
}
return result;
}
public List tables(){
List tables = new ArrayList<>();
List elements = getSrc().elements("tbl");
for(Element element:elements){
WTable table = new WTable(this, element);
tables.add(table);
}
return tables;
}
// 插入排版方向
public void setOrient(Element prev, String orient, Map styles){
int index = index(getSrc(), prev);
Element p = src.addElement("w:p");
Element pr = p.addElement("pPr");
DocxUtil.setOrient(pr, orient, styles);
List elements = src.elements();
if(index > -1 && index elements = src.elements();
if(index > -1 && index replaces){
String id = start.attributeValue("id");
Element end = DomUtil.element(getSrc(), "bookmarkEnd","id",id);
String name = start.attributeValue("name");
String content = replaces.get(name);
if(null == content){
return;
}
boolean isblock = DocxUtil.isBlock(content);
Element startParent = start.getParent();
Element endParent = end.getParent();
if(isblock){
if(startParent == endParent){
// 结束标签拆分到下一段落
//
Element nEndP = startParent.getParent().addElement("w:p");
endParent.elements().remove(end);
nEndP.elements().add(end);
DocxUtil.after(nEndP, startParent);
}
DomUtil.remove(startParent, DomUtil.afters(start,"t"));
DomUtil.remove(endParent, DomUtil.befores(end,"t"));
parseHtml(startParent.getParent(),startParent,content);
}else{
if(startParent == endParent){
DomUtil.remove(startParent,DomUtil.betweens(start, end,"t"));
parseHtml(startParent,start,content);
}else{
DomUtil.remove(startParent, DomUtil.afters(start,"t"));
DomUtil.remove(endParent, DomUtil.befores(end,"t"));
parseHtml(startParent,start,content);
}
}
}
public Element pr(Element element, Map styles){
return DocxUtil.pr(element, styles);
}
/**
* 在prev之后插入节点
* @param box box
* @param prev 放在prev之后
* @param html html
* @return list
*/
private List parseHtml(Element box, Element prev, String html){
List list = new ArrayList();
if(null == html || html.trim().length()==0){
return list;
}
// 抽取style
this.styles.clear();
List styles = RegularUtil.cuts(html,true,"");
for(String style:styles){
loadStyle(style);
html = html.replace(style,"");
}
try {
if(IS_HTML_ESCAPE){
html = HtmlUtil.name2code(html);
}
html = "" + html + " ";
org.dom4j.Document doc = DocumentHelper.parseText(html);
Element root = doc.getRootElement();
parseHtml(box, prev, root, null, true);
//提取出新添加的elements
int size = root.elements().size();
List elements = box.elements();
int index = elements.indexOf(prev);
for(int i=0; i elements = parent.elements();
for(Element item:elements){
}
}
return element;
}
public Element table(Element box, Element after, Element src){
Element tbl = box.addElement("w:tbl");
Element tblPr = tbl.addElement("w:tblPr");
Table table = new Table();
Map styles = style(src);
pr(tbl, styles);
table.setStyles(styles);
List html_rows = src.elements("tr");
for(Element row:html_rows){
Tr tr = new Tr();
tr.setStyles(style(row));
table.addTr(tr);
}
int rows_size = html_rows.size();
int cols_size = 0;
if(rows_size>0){
Element html_row = html_rows.get(0);
List cols = html_row.elements("td");
for(Element col:cols){
int colspan = BasicUtil.parseInt(col.attributeValue("colspan"), 1);
cols_size += colspan;
}
}
Td[][] cells = new Td[rows_size][cols_size];
for(int r=0; r cols = html_row.elements("td");
int tcIndex = 0;
for(int tdIndex = 0; tdIndex tdStyles = StyleParser.join(tc.getStyles(),style(html_col));
tdStyles = StyleParser.parse(tdStyles, html_col.attributeValue("style"), true);
tc.setStyles(tdStyles);
tc.setClazz(html_col.attributeValue("class"));
int rowspan = BasicUtil.parseInt(html_col.attributeValue("rowspan"), 1);
int colspan = BasicUtil.parseInt(html_col.attributeValue("colspan"),1);
if(rowspan > 1){
tc.setMerge(1);
for(int i=r+1; i<=r+rowspan-1; i++){
for(int j=tcIndex+1; j 1){
tc.setColspan(colspan);
for(int j=r; jr) {
Td merge = cells[j][tcIndex];
merge.setMerge(2);//被上一行合并
merge.setColspan(colspan);
}
for(int i=tcIndex+merge_qty+1; i styles = StyleParser.inherit(tr.getStyles(), tr.getTable().getStyles());
tr.setStyles(styles);
pr(etr, tr.getStyles());
for (Td td:tr.getTds()) {
Element tc = tc(etr, td);
}
return etr;
}
public Element tc(Element parent, Td td){
Element tc = null;
int merge = td.getMerge(); // 0:不合并 1:向下合并(restart) 2:被合并(continue)
int colspan = td.getColspan(); // 向右合并
boolean remove = td.isRemove(); // 被左侧合并
if(!remove){
tc = parent.addElement("w:tc");
Element tcPr = DocxUtil.addElement(tc, "tcPr");
if(merge > 0){
Element vMerge = tcPr.addElement("w:vMerge");//被上一行合并
if(merge == 1) {//向下合并
vMerge.addAttribute("w:val", "restart");
}
}
if(colspan >1){
Element span = tcPr.addElement("w:gridSpan");
span.addAttribute("w:val", colspan+"");
}
if(tcPr.elements().size()==0){
// tc.remove(tcPr);
}
Map styles = StyleParser.inherit(td.getStyles(), td.getTr().getStyles());
pr(tc, styles);
if(merge !=2){
if(null != td.getSrc()) {
parseHtml(tc, null, td.getSrc(), StyleParser.inherit(null, styles), false);
}
}else{
p(tc,"",null);
}
}
return tc;
}
private Element inline(Element parent, Element prev, String text, Map styles, boolean copyPrevStyle){
String pname = parent.getName();
Element r;
if(pname.equalsIgnoreCase("r")){
r = parent;
pr(parent, styles);
DocxUtil.after(r, prev);
}else if(pname.equalsIgnoreCase("tc")){
Element p = DocxUtil.addElement(parent, "p");
r = DocxUtil.addElement(p, "r");
pr(r, styles);
DocxUtil.after(r, prev);
}else if(pname.equalsIgnoreCase("p")){
pr(parent, styles);
r = parent.addElement("w:r");
// 复制前一个w 的样式
if(copyPrevStyle && null != prev){
Element prevR = prevStyle(prev);
DocxUtil.copyStyle(r, prevR, true);
}
DocxUtil.after(r, prev);
}else if(pname.equalsIgnoreCase("body")){
Element p = parent.addElement("w:p");
pr(p, styles);
r = p.addElement("w:r");
DocxUtil.after(p, prev);
}else{
throw new RuntimeException("text.parent异常:"+parent.getName());
}
pr(r, styles);
Element t = r.addElement("w:t");
if(IS_HTML_ESCAPE) {
text = HtmlUtil.display(text);
}
t.setText(text.trim());
return r;
}
// 前一个样式
public Element prevStyle(Element prev){
Element prevStyle = null;
if(prev.getName().equals("r")){
prevStyle = prev;
}else{
Element tmp = DocxUtil.prev(prev);
if(null != tmp) {
String tmpName = tmp.getName();
if (tmpName.equals("r")) {
prevStyle = tmp;
} else if (tmpName.equals("pPr")) {
prevStyle = tmp;
} else if (tmpName.equals("p")) {
prevStyle = tmp;
}
}
}
return prevStyle;
}
public Element block(Element parent, Element prev, Element element, Map styles){
Element box = null;
String pname = parent.getName();
Element newPrev = null;
Element wp = null;
if(null != styles && null != styles.get("page-break-after")){
// 分页 放在一个新段落中
Element pageP = parent.addElement("w:p");
pageP.addElement("w:r").addElement("w:br").addAttribute("w:type","page");
pageP.addElement("w:r").addElement("w:lastRenderedPageBreak");
DocxUtil.after(pageP, prev);
return pageP;
}
pr(parent, styles);
if(pname.equalsIgnoreCase("p")){
box = parent.addElement("w:r");
if(!DocxUtil.isEmpty(parent)){//如果上级为空 就不插入换行符
prev = box.addElement("w:br");
}
DocxUtil.after(box, prev);
newPrev = parent;
wp = parent;
}else if(pname.equalsIgnoreCase("r")){
box = parent.getParent().addElement("w:r");
if(!DocxUtil.isEmpty(parent)) {//如果上级为空 就不插入换行符
prev = box.addElement("w:br");
}
DocxUtil.after(box, prev);
newPrev = parent.getParent();
wp = newPrev;
}else if(pname.equalsIgnoreCase("tc")){
// box = DocxUtil.addElement(parent,"p");
Element p = parent.element("p");
if(null != p && DocxUtil.isEmpty(p)){
box = p;
}else{
box = parent.addElement("w:p");
}
DocxUtil.after(box, prev);
newPrev = box;
wp = box;
}else if(pname.equalsIgnoreCase("body")){
box = parent.addElement("w:p");
newPrev = box;
DocxUtil.after(box, prev);
wp = box;
}else{
throw new RuntimeException("div.parent 异常:"+pname+":"+element.getName()+":"+element.getTextTrim());
// 新建一个段落
}
pr(box, styles);
parseHtml(box, prev, element, styles, false);
return newPrev;
}
private Element ol(Element parent, Element prev, Element element, Map styles){
styles = StyleParser.parse(styles, element.attributeValue("style"), true);
if(!DocxUtil.hasParent(element, "ol")){
listNum ++;//新一组编号
}
List lis = element.elements();
for(Element li:lis){
String liName = li.getName();
if(liName.equalsIgnoreCase("ol")) {
prev = ol(getSrc(), prev, li, styles);
}else{
prev = li(getSrc(), prev, li, styles);
}
}
return prev;
}
private List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy