All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.odftoolkit.odfdom.incubator.doc.text.OdfEditableTextExtractor Maven / Gradle / Ivy

/************************************************************************
* 
*  Licensed to the Apache Software Foundation (ASF) under one
*  or more contributor license agreements.  See the NOTICE file
*  distributed with this work for additional information
*  regarding copyright ownership.  The ASF licenses this file
*  to you under the Apache License, Version 2.0 (the
*  "License"); you may not use this file except in compliance
*  with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing,
*  software distributed under the License is distributed on an
*  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
*  KIND, either express or implied.  See the License for the
*  specific language governing permissions and limitations
*  under the License.
*
************************************************************************/
package org.odftoolkit.odfdom.incubator.doc.text;

import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.odftoolkit.odfdom.doc.OdfDocument;

import org.odftoolkit.odfdom.doc.table.OdfTable;
import org.odftoolkit.odfdom.doc.table.OdfTableRow;
import org.odftoolkit.odfdom.dom.OdfContentDom;
import org.odftoolkit.odfdom.dom.OdfMetaDom;
import org.odftoolkit.odfdom.dom.OdfStylesDom;
import org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement;
import org.odftoolkit.odfdom.dom.element.office.OfficeMetaElement;
import org.odftoolkit.odfdom.dom.element.style.StyleMasterPageElement;
import org.odftoolkit.odfdom.dom.element.table.TableTableElement;
import org.odftoolkit.odfdom.dom.element.text.TextAElement;
import org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.w3c.dom.NodeList;

/**
 * It's a sub class of OdfTextExtractor. It provides a method to return all the text 
 * that the user can typically edit in a document, including text in cotent.xml, 
 * header and footer in styles.xml, meta data in meta.xml. 
 * 
 * 

This function can be used by search engine, and text analytic operations.

* * @deprecated As of release 0.8.8, replaced by {@link org.odftoolkit.simple.common.EditableTextExtractor} in Simple API. */ public class OdfEditableTextExtractor extends OdfTextExtractor { OdfDocument mDocument = null; OdfElement mElement = null; boolean mIsDocumentExtractor = false; /** * Constructor with an ODF document as a parameter * @param doc the ODF document whose editable text would be extracted. */ private OdfEditableTextExtractor(OdfDocument doc) { mTextBuilder = new StringBuilder(); mDocument = doc; mIsDocumentExtractor = true; } /** * Constructor with an ODF element as parameter * @param element the ODF element whose editable text would be extracted. */ private OdfEditableTextExtractor(OdfElement element) { mTextBuilder = new StringBuilder(); mElement = element; mIsDocumentExtractor = false; } /** * An instance of OdfEditableTextExtractor will be created to * extract the editable text content of an ODF element. * @param doc the ODF document whose text will be extracted. * @return An instance of OdfEditableTextExtractor */ public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfDocument doc) { return new OdfEditableTextExtractor(doc); } /** * An instance of OdfEditableTextExtractor will be created to * extract the editable text content of an ODF element. * @param element the ODF element whose text will be extracted. * @return An instance of OdfEditableTextExtractor */ public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfElement element) { return new OdfEditableTextExtractor(element); } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement) */ @Override public void visit(DrawObjectElement element) { String embedDocPath = element.getXlinkHrefAttribute(); OdfDocument embedDoc = ((OdfDocument) (((OdfContentDom) element.getOwnerDocument()).getDocument())).loadSubDocument(embedDocPath); if (embedDoc != null) { try { mTextBuilder.append(OdfEditableTextExtractor.newOdfEditableTextExtractor(embedDoc).getText()); } catch (Exception e) { Logger.getLogger(OdfEditableTextExtractor.class.getName()).log(Level.SEVERE, null, e); } } } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement) */ @Override public void visit(TextTrackedChangesElement ele) { return; } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextAElement) */ @Override public void visit(TextAElement ele) { String link = ele.getXlinkHrefAttribute(); mTextBuilder.append(link); appendElementText(ele); } /* (non-Javadoc) * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement) */ @Override public void visit(TableTableElement ele) { OdfTable table = OdfTable.getInstance(ele); List rowlist = table.getRowList(); for (int i = 0; i < rowlist.size(); i++) { OdfTableRow row = rowlist.get(i); for (int j = 0; j < row.getCellCount(); j++) { mTextBuilder.append(row.getCellByIndex(j).getDisplayText()).append(TabChar); } mTextBuilder.append(NewLineChar); } } /** * Return the editable text content as a string * @return the editable text content as a string */ @Override public String getText() { if (mIsDocumentExtractor) { return getDocumentText(); } else { visit(mElement); return mTextBuilder.toString(); } } private String getDocumentText() { StringBuilder builder = new StringBuilder(); try { //Extract text from content.xml OdfEditableTextExtractor contentDomExtractor = newOdfEditableTextExtractor(mDocument.getContentRoot()); builder.append(contentDomExtractor.getText()); //Extract text from style.xml OdfStylesDom styleDom = mDocument.getStylesDom(); if (styleDom != null) { StyleMasterPageElement masterpage = null; NodeList list = styleDom.getElementsByTagName("style:master-page"); if (list.getLength() > 0) { masterpage = (StyleMasterPageElement) list.item(0); } if (masterpage != null) { builder.append(newOdfEditableTextExtractor(masterpage).getText()); } } //Extract text from meta.xml OdfMetaDom metaDom = mDocument.getMetaDom(); if (metaDom != null) { OdfElement root = metaDom.getRootElement(); OfficeMetaElement officemeta = OdfElement.findFirstChildNode(OfficeMetaElement.class, root); if (officemeta != null) { builder.append(newOdfEditableTextExtractor(officemeta).getText()); } } return builder.toString(); } catch (Exception e) { Logger.getLogger(OdfEditableTextExtractor.class.getName()).severe(e.getMessage()); return builder.toString(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy