All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.overzealous.remark.convert.AbstractNodeHandler Maven / Gradle / Ivy

Go to download

markdown generator from html updated but based on original Apache 2.0 licensed code from https://bitbucket.org/OverZealous/remark/src/default/

There is a newer version: 2.0.18
Show newest version
/**
 * (c) Copyright 2019-2020 IBM Corporation
 * 1 New Orchard Road, 
 * Armonk, New York, 10504-1722
 * United States
 * +1 914 499 1900
 * support: Nathaniel Mills [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

/*
 * Copyright 2011 OverZealous Creations, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.overzealous.remark.convert;

import java.io.IOException;
import java.util.Map;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import com.overzealous.remark.util.BlockWriter;

/**
 * Contains basic implementations for handling text nodes and ignored HTML
 * elements.
 *
 * @author Phil DeJarnett
 * @author Nathaniel Mills modifications for provenance and level tracking
 */
public abstract class AbstractNodeHandler implements NodeHandler {

   /**
    * Handle a child text node. The default method, implemented here, is to
    * simply write the cleaned text directly.
    *
    * @param node
    *           Node to handle
    * @param converter
    *           Parent converter for this object.
    * @param pw
    *           Annotation Writer to receive annotations mapping generated
    *           markdown to document element(s)
    * @param baseUri
    *           the base URI needed to flesh out partial (local) image or href URL references
    * @param domain
    *           the domain from the baseUri used to find domain specific
    *           filtering rules
    * @param level
    */
   public void handleTextNode(TextNode node, DocumentConverter converter,
      ProvenanceWriter pw, String baseUri, String domain, String level) {
      String md = converter.cleaner.clean(node);
      converter.output.write(md);
      saveAnnotation(pw, level, node, md);
   }

   /**
    * Handle an ignored HTMLElement. The default method here is to either write
    * the HTMLElement as a block if it is a block element, or write it directly
    * if it is not.
    *
    * @param node
    *           Node to handle
    * @param converter
    *           Parent converter for this object.
    * @param pw
    *           Annotation Writer to receive annotations mapping generated
    *           markdown to document element(s)
    * @param baseUri
    *           the base URI needed to flesh out partial (local) image or href URL references
    * @param domain
    *           the domain from the baseUri used to find domain specific
    *           filtering rules
    * @param level
    */
   public void handleIgnoredHTMLElement(Element node,
      DocumentConverter converter, ProvenanceWriter pw, String baseUri, String domain,
      String level) {
      if (node.isBlock()) {
         converter.output.writeBlock(node.toString());
      } else {
         // Note: because this is an inline element, we want to make sure it
         // stays that way!
         // this means turning off prettyPrinting, so that JSoup doesn't add
         // unecessary spacing around
         // the child nodes.
         Document doc = node.ownerDocument();
         boolean oldPrettyPrint = doc.outputSettings().prettyPrint();
         doc.outputSettings().prettyPrint(false);
         converter.output.write(node.toString());
         String md = node.toString();
         saveAnnotation(pw, level, node, md);
         doc.outputSettings().prettyPrint(oldPrettyPrint);
      }
   }

   /**
    * Recursively processes child nodes, and prepends the given string to the
    * output.
    * 
    * @param prepend
    *           String to prepend
    * @param node
    *           Starting Node
    * @param converter
    *           Parent document converter
    * @param nodes
    *           Map of valid nodes
    * @param pw
    *           Annotation Writer to receive annotations mapping generated
    *           markdown to document element(s)
    * @param baseUri
    *           the base URI needed to flesh out partial (local) image or href URL references
    * @param domain
    *           the domain from the baseUri used to find domain specific
    *           filtering rules
    * @param level
    *           The dotted tree notation for the location within the dom of the
    *           node being processed. e.g., The top level node (e.g., )
    *           would be 1. Its first child element would be 1.1 and 2nd would
    *           be 1.2. The first childs first child would be 1.1.1. For inline
    *           expressions, we can introduce a different separator like the
    *           tilde. So, if an element is like: 
    *           Facebook
    *            Then the  tag would be x.y and its href would be x.y~1
    *           and the  would be x.y.1 and its src would be x.y.1~1. For
    *           text between tags we could use a carat as a separator.
    * @param searchLevel
    *           signals to stop dom walking if we have reached the search level,
    *           and return the node we are on. If null, no interuption occurs.
    * @return the Node where level matched searchLevel, otherwise, null
    */
   protected Node prependAndRecurse(String prepend, Element node,
      DocumentConverter converter, Map nodes,
      ProvenanceWriter pw, String baseUri, String domain, String level, String searchLevel) {
      Node result = null;
      BlockWriter oldOutput = converter.output;
      converter.output = new BlockWriter(oldOutput);
      converter.output.setPrependNewlineString(prepend);
      result = converter.walkNodes(this, node, nodes, pw, baseUri, domain, level,
         searchLevel);
      converter.output = oldOutput;
      return result;
   }

   /**
    * Save the annotation for the level, node, and markdown content printing any
    * exceptions that may occur
    * 
    * @param pw
    * @param level
    * @param node
    * @param markdown
    */
   protected void saveAnnotation(ProvenanceWriter pw, String level, Node node,
      String markdown) {
      if (pw != null) {
         try {
            pw.saveHTML2MD(level, node, markdown);
         } catch (IOException e) {
            e.printStackTrace();
         }
      }
   }

   /**
    * Save the filter reason annotation for the level, node, and filterReason content printing any
    * exceptions that may occur
    * 
    * @param pw
    * @param level
    * @param node
    * @param filterReason
    */
   protected void saveFilterAnnotation(ProvenanceWriter pw, String level, Node node,
      String filterReason) {
      if (pw != null) {
         try {
            pw.saveFilteredHTML(level, node, filterReason);
         } catch (IOException e) {
            e.printStackTrace();
         }
      }
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy