![JAR search and dependency download from the Maven repository](/logo.png)
panda.tool.poi.doc.DocTextExtractor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of panda-tool Show documentation
Show all versions of panda-tool Show documentation
Panda Tool contains some commonly used tools and source code generator for Panda Mvc. Can generate Entity/Query/Dao/Action class, Freemarker (HTML) template file.
package panda.tool.poi.doc;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.transform.TransformerException;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import panda.bean.Beans;
import panda.bind.xml.Xmls;
import panda.lang.Charsets;
import panda.tool.poi.ESummary;
/**
*
*/
public class DocTextExtractor extends DocTextProcessor {
private boolean extractSummary = false;
private boolean extractHeader = false;
private boolean extractFooter = false;
private EDocument edoc;
private Map erange;
private EParagraph eparagraph;
/**
* Constructor
*/
public DocTextExtractor() {
}
/**
* @return the extractSummary
*/
public boolean isExtractSummary() {
return extractSummary;
}
/**
* @param extractSummary the extractSummary to set
*/
public void setExtractSummary(boolean extractSummary) {
this.extractSummary = extractSummary;
}
/**
* @return the extractHeader
*/
public boolean isExtractHeader() {
return extractHeader;
}
/**
* @param extractHeader the extractHeader to set
*/
public void setExtractHeader(boolean extractHeader) {
this.extractHeader = extractHeader;
}
/**
* @return the extractFooter
*/
public boolean isExtractFooter() {
return extractFooter;
}
/**
* @param extractFooter the extractFooter to set
*/
public void setExtractFooter(boolean extractFooter) {
this.extractFooter = extractFooter;
}
public void extractToXml(HWPFDocument doc, OutputStream output) throws TransformerException, UnsupportedEncodingException {
OutputStreamWriter osw = new OutputStreamWriter(output, Charsets.CS_UTF_8);
extractToXml(doc, osw);
}
public void extractToXml(HWPFDocument doc, Writer output) throws TransformerException {
process(doc);
Xmls.toXml(edoc, output, true);
}
@Override
protected void handleDocument(HWPFDocument doc) {
edoc = new EDocument();
erange = null;
eparagraph = null;
if (extractSummary) {
final SummaryInformation si = doc.getSummaryInformation();
if (si != null) {
ESummary es = new ESummary();
es.copy(si);
edoc.setSummary(es);
}
}
}
@Override
protected boolean handleRange(String name, HWPFDocument doc, Range range) {
if (extractHeader || !name.equals("header")) {
erange = new LinkedHashMap();
Beans.setProperty(edoc, name, erange);
return true;
}
return false;
}
@Override
protected boolean handleParagraph(Range range, Paragraph paragraph, int index) {
eparagraph = new EParagraph();
erange.put(index, eparagraph);
return true;
}
@Override
protected boolean handleText(Range range, Paragraph paragraph, CharacterRun crun, int index, ECharRun ecrun) {
eparagraph.getCruns().put(index, ecrun);
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy