All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xhtmlrenderer.pdf.HTMLOutline Maven / Gradle / Ivy

/*
 * {{{ header & license
 * Copyright (c) 2016 Stanimir Stamenkov
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 * }}}
 */
package org.xhtmlrenderer.pdf;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;
import org.xhtmlrenderer.pdf.ITextOutputDevice.Bookmark;
import org.xhtmlrenderer.render.Box;

import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static java.util.regex.Pattern.CASE_INSENSITIVE;

class HTMLOutline {

    private static final Pattern HEADING = Pattern.compile("h(\\d+)", CASE_INSENSITIVE);

    /** sectioning roots */
    private static final Pattern ROOT = Pattern.compile("blockquote|details|fieldset|figure|td", CASE_INSENSITIVE);

    private static final Pattern WS = Pattern.compile("\\s+");

    private static final int MAX_NAME_LENGTH = 200;

    private final HTMLOutline parent;
    private final int level;
    private final Bookmark bookmark;

    private HTMLOutline() {
        this(0, "root", null);
    }

    private HTMLOutline(int level, String name, HTMLOutline parent) {
        this.level = level;
        this.bookmark = new Bookmark(name, "");
        this.parent = parent;
        if (parent != null) {
            parent.bookmark.addChild(bookmark);
        }
    }

    /**
     * Creates a bookmark list of the document outline generated for the given
     * element context (usually the root document element).
     * 

* The current algorithm is more simple than the one suggested in the HTML5 * specification such as it is not affected by * sectioning * content but just the heading level. For * example:

*
     * <body>
     *   <h1>Foo</h1>
     *   <h3>Bar</h3>
     *   <blockquote>
     *     <h5>Bla</h5>
     *   </blockquote>
     *   <p>Baz</p>
     *   <h2>Quux</h2>
     *   <section>
     *     <h3>Thud</h3>
     *   </section>
     *   <h4>Grunt</h4>
     * </body>
*

* Should generate outline as:

*
    *
  1. Foo *
      *
    1. Bar
    2. *
    3. Quux
    4. *
    5. Thud
    6. *
    7. Grunt
    8. *
  2. *
*

* But it generates outline as:

*
    *
  1. Foo *
      *
    1. Bar
    2. *
    3. Quux *
        *
      1. Thud *
          *
        1. Grunt
        2. *
      2. *
    4. *
  2. *
* *

Example document customizations

* *
Include non-heading element as bookmark (level 4)
*
     * <strong data-pdf-bookmark="4">Foo bar</strong>
* *
Specify bookmark name
*
     * <tr data-pdf-bookmark="5" data-pdf-bookmark-name="Bar baz">...</tr>
* *
Exclude individual heading from bookmarks
*
     * <h3 data-pdf-bookmark="none">Baz qux</h3>
* *
Prevent automatic bookmarks for the whole of the document
*
     * <html data-pdf-bookmark="exclude">...</html>
* * @param context the top element a sectioning outline would be generated for; * @param box box hierarchy the outline bookmarks would get mapped into. * @return Bookmarks of the outline generated for the given element context. * @see Creating an outline */ public static List generate(Element context, Box box) { NodeIterator iterator = NestedSectioningFilter.iterator(context); if (iterator == null) { return Collections.emptyList(); } HTMLOutline root = new HTMLOutline(); HTMLOutline current = root; Map map = new IdentityHashMap<>(); for (Element element = (Element) iterator.nextNode(); element != null; element = (Element) iterator.nextNode()) { int level; try { level = Integer.parseInt(getOutlineLevel(element)); if (level < 1) { continue; // Illegal value } } catch (NumberFormatException ignore) { continue; // Invalid value } String name = getBookmarkName(element); while (current.level >= level) { current = current.parent; } current = new HTMLOutline(level, name, current); map.put(element, current.bookmark); } initBoxRefs(map, box); return root.bookmark.getChildren(); } private static void initBoxRefs(Map map, Box box) { Bookmark bookmark = map.get(box.getElement()); if (bookmark != null) { bookmark.setBox(box); } for (int i = 0, len = box.getChildCount(); i < len; i++) { initBoxRefs(map, box.getChild(i)); } } private static String getBookmarkName(Element element) { String name = element.getAttribute("data-pdf-bookmark-name").trim(); if (name.isEmpty()) { name = element.getTextContent(); } name = WS.matcher(name.trim()).replaceAll(" "); if (name.length() > MAX_NAME_LENGTH) { name = name.substring(0, MAX_NAME_LENGTH); } return name; } private static String getOutlineLevel(Element element) { String bookmark = element.getAttribute("data-pdf-bookmark").trim(); return bookmark.isEmpty() ? getOutlineLevelFromTagName(element.getTagName()) : bookmark; } static String getOutlineLevelFromTagName(String tagName) { Matcher heading = HEADING.matcher(tagName); if (heading.matches()) { return heading.group(1); } else if (ROOT.matcher(tagName).matches()) { return "exclude"; } else { return "none"; } } private static class NestedSectioningFilter implements NodeFilter { private static final NestedSectioningFilter INSTANCE = new NestedSectioningFilter(); private static NodeIterator iterator(Element root) { Document ownerDocument = root.getOwnerDocument(); return (ownerDocument instanceof DocumentTraversal) ? ((DocumentTraversal) ownerDocument).createNodeIterator(root, SHOW_ELEMENT, INSTANCE, true) : null; } @Override public short acceptNode(Node n) { String outlineLevel = getOutlineLevel((Element) n); if (outlineLevel.equalsIgnoreCase("none")) { return FILTER_SKIP; } return outlineLevel.equalsIgnoreCase("exclude") ? FILTER_REJECT : FILTER_ACCEPT; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy