
com.day.cq.wcm.offline.DocxImporter Maven / Gradle / Ivy
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 2013 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.cq.wcm.offline;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.poi.POIXMLException;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An implementation of {@link TextDocumentImporter} for "modern" MS Word
* documents ("docx").
* @deprecated since 6.3
*/
@Deprecated
public class DocxImporter implements TextDocumentImporter {
private static final Logger log = LoggerFactory.getLogger(DocxImporter.class);
private final XWPFDocument document;
private final List paragraphs;
public DocxImporter(InputStream stream) throws TextImportException, IOException {
try {
this.document = new XWPFDocument(stream);
List tmp = new ArrayList();
for (XWPFParagraph p : this.document.getParagraphs()) {
if (!p.isEmpty() && !p.isPageBreak()) {
tmp.add(p);
}
}
this.paragraphs = Collections.unmodifiableList(tmp);
} catch (POIXMLException ex) {
throw new TextImportException("not a docx file", ex);
}
}
/**
* @return contents of first non-empty paragraph
*/
public String getTitle() {
for (XWPFParagraph p : this.paragraphs) {
String t = p.getText().trim();
if (!"".equals(t)) {
return t;
}
}
return null;
}
public int getNumberOfParagraphs() {
return this.paragraphs.size();
}
public Paragraph getParagraph(int index) {
return new DocxParagraph(this.paragraphs.get(index));
}
private class DocxParagraph implements Paragraph {
private final XWPFParagraph p;
private final String text;
private final String textHTML;
private final List pictures;
public DocxParagraph(XWPFParagraph p) {
this.p = p;
String classname = WordStyleSupport.makeClassName(p.getStyle());
String container = "p";
String elemname = WordStyleSupport.toHtmlElement(classname);
if (elemname != null) {
container = elemname;
classname = null;
}
StringBuilder sbtext = new StringBuilder();
StringBuilder sbhtml = new StringBuilder();
List pics = new ArrayList();
for (XWPFRun run : this.p.getRuns()) {
if (run.isBold()) {
sbhtml.append("");
}
if (run.isItalic()) {
sbhtml.append("");
}
if (run.isStrike()) {
sbhtml.append("");
}
if (run.getUnderline() != UnderlinePatterns.NONE) {
sbhtml.append("");
}
if (run.getSubscript() == VerticalAlign.SUBSCRIPT) {
sbhtml.append("");
}
if (run.getSubscript() == VerticalAlign.SUPERSCRIPT) {
sbhtml.append("");
}
String textdata = run.getText(0);
if (textdata != null) {
sbtext.append(textdata);
sbhtml.append(HtmlUtil.escapeHtmlText(textdata));
}
if (run.getSubscript() == VerticalAlign.SUPERSCRIPT) {
sbhtml.append("");
}
if (run.getSubscript() == VerticalAlign.SUBSCRIPT) {
sbhtml.append("");
}
if (run.getUnderline() != UnderlinePatterns.NONE) {
sbhtml.append("");
}
if (run.isStrike()) {
sbhtml.append("");
}
if (run.isItalic()) {
sbhtml.append("");
}
if (run.isBold()) {
sbhtml.append("");
}
for (XWPFPicture pic : run.getEmbeddedPictures()) {
pics.add(new DocxPicture(pic));
}
}
String result = sbhtml.toString().trim();
if (result.length() > 0) {
StringBuilder tmp = new StringBuilder();
tmp.append("<");
tmp.append(container);
if (classname != null) {
tmp.append(" class='" + HtmlUtil.escapeHtmlAttr(classname) + "'");
}
tmp.append(">");
tmp.append(result);
tmp.append("");
tmp.append(container);
tmp.append(">");
result = tmp.toString();
}
this.text = sbtext.toString().trim();
this.textHTML = result;
this.pictures = Collections.unmodifiableList(pics);
}
public String getText() {
return text;
}
public String getHTML() {
return textHTML;
}
public List getPictures() {
return pictures;
}
}
private class DocxPicture implements Picture {
private final XWPFPicture picture;
public DocxPicture(XWPFPicture picture) {
this.picture = picture;
}
public String getMediaType() {
switch (this.picture.getPictureData().getPictureType()) {
case Document.PICTURE_TYPE_GIF:
return "image/gif";
case Document.PICTURE_TYPE_JPEG:
return "image/jepg";
case Document.PICTURE_TYPE_PNG:
return "image/png";
default:
log.error("Unknown picture type " + this.picture.getPictureData().getPictureType()
+ " - need to define media type mapping");
return null;
}
}
public byte[] getBytes() {
return this.picture.getPictureData().getData();
}
@Override
public String toString() {
return getMediaType() + " (" + getBytes().length + " bytes)";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy