All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.odftoolkit.odfdom.incubator.doc.text.OdfEditableTextExtractor Maven / Gradle / Ivy

/**
 * **********************************************************************
 *
 * 

Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * *

http://www.apache.org/licenses/LICENSE-2.0 * *

Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. * *

********************************************************************** */ package org.odftoolkit.odfdom.incubator.doc.text; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.odftoolkit.odfdom.doc.OdfDocument; import org.odftoolkit.odfdom.doc.table.OdfTable; import org.odftoolkit.odfdom.doc.table.OdfTableRow; import org.odftoolkit.odfdom.dom.OdfContentDom; import org.odftoolkit.odfdom.dom.OdfMetaDom; import org.odftoolkit.odfdom.dom.OdfStylesDom; import org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement; import org.odftoolkit.odfdom.dom.element.office.OfficeMetaElement; import org.odftoolkit.odfdom.dom.element.style.StyleMasterPageElement; import org.odftoolkit.odfdom.dom.element.table.TableTableElement; import org.odftoolkit.odfdom.dom.element.text.TextAElement; import org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement; import org.odftoolkit.odfdom.pkg.OdfElement; import org.w3c.dom.NodeList; /** * It's a sub class of OdfTextExtractor. It provides a method to return all the text that the user * can typically edit in a document, including text in cotent.xml, header and footer in styles.xml, * meta data in meta.xml. * *

This function can be used by search engine, and text analytic operations. */ public class OdfEditableTextExtractor extends OdfTextExtractor { OdfDocument mDocument = null; OdfElement mElement = null; boolean mIsDocumentExtractor = false; /** * Constructor with an ODF document as a parameter * * @param doc the ODF document whose editable text would be extracted. */ private OdfEditableTextExtractor(OdfDocument doc) { mTextBuilder = new StringBuilder(); mDocument = doc; mIsDocumentExtractor = true; } /** * Constructor with an ODF element as parameter * * @param element the ODF element whose editable text would be extracted. */ private OdfEditableTextExtractor(OdfElement element) { mTextBuilder = new StringBuilder(); mElement = element; mIsDocumentExtractor = false; } /** * An instance of OdfEditableTextExtractor will be created to extract the editable text content of * an ODF element. * * @param doc the ODF document whose text will be extracted. * @return An instance of OdfEditableTextExtractor */ public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfDocument doc) { return new OdfEditableTextExtractor(doc); } /** * An instance of OdfEditableTextExtractor will be created to extract the editable text content of * an ODF element. * * @param element the ODF element whose text will be extracted. * @return An instance of OdfEditableTextExtractor */ public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfElement element) { return new OdfEditableTextExtractor(element); } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement) */ @Override public void visit(DrawObjectElement element) { String embedDocPath = element.getXlinkHrefAttribute(); OdfDocument embedDoc = ((OdfDocument) (((OdfContentDom) element.getOwnerDocument()).getDocument())) .loadSubDocument(embedDocPath); if (embedDoc != null) { try { mTextBuilder.append( OdfEditableTextExtractor.newOdfEditableTextExtractor(embedDoc).getText()); } catch (Exception e) { Logger.getLogger(OdfEditableTextExtractor.class.getName()).log(Level.SEVERE, null, e); } } } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement) */ @Override public void visit(TextTrackedChangesElement ele) { return; } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextAElement) */ @Override public void visit(TextAElement ele) { String link = ele.getXlinkHrefAttribute(); mTextBuilder.append(link); appendElementText(ele); } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement) */ @Override public void visit(TableTableElement ele) { OdfTable table = OdfTable.getInstance(ele); List rowlist = table.getRowList(); for (int i = 0; i < rowlist.size(); i++) { OdfTableRow row = rowlist.get(i); for (int j = 0; j < row.getCellCount(); j++) { mTextBuilder.append(row.getCellByIndex(j).getDisplayText()).append(TabChar); } mTextBuilder.append(NewLineChar); } } /** * Return the editable text content as a string * * @return the editable text content as a string */ @Override public String getText() { if (mIsDocumentExtractor) { return getDocumentText(); } else { visit(mElement); return mTextBuilder.toString(); } } private String getDocumentText() { StringBuilder builder = new StringBuilder(); try { // Extract text from content.xml OdfEditableTextExtractor contentDomExtractor = newOdfEditableTextExtractor(mDocument.getContentRoot()); builder.append(contentDomExtractor.getText()); // Extract text from style.xml OdfStylesDom styleDom = mDocument.getStylesDom(); if (styleDom != null) { StyleMasterPageElement masterpage = null; NodeList list = styleDom.getElementsByTagName("style:master-page"); if (list.getLength() > 0) { masterpage = (StyleMasterPageElement) list.item(0); } if (masterpage != null) { builder.append(newOdfEditableTextExtractor(masterpage).getText()); } } // Extract text from meta.xml OdfMetaDom metaDom = mDocument.getMetaDom(); if (metaDom != null) { OdfElement root = metaDom.getRootElement(); OfficeMetaElement officemeta = OdfElement.findFirstChildNode(OfficeMetaElement.class, root); if (officemeta != null) { builder.append(newOdfEditableTextExtractor(officemeta).getText()); } } return builder.toString(); } catch (Exception e) { Logger.getLogger(OdfEditableTextExtractor.class.getName()).severe(e.getMessage()); return builder.toString(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy