All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.overzealous.remark.Main Maven / Gradle / Ivy

Go to download

markdown generator from html updated but based on original Apache 2.0 licensed code from https://bitbucket.org/OverZealous/remark/src/default/

There is a newer version: 2.0.18
Show newest version
/**
 * (c) Copyright 2019-2020 IBM Corporation
 * 1 New Orchard Road, 
 * Armonk, New York, 10504-1722
 * United States
 * +1 914 499 1900
 * support: Nathaniel Mills [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

/*
 * Copyright 2011 OverZealous Creations, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.overzealous.remark;

import org.apache.commons.cli.*;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * This is the starting point to begin remarking HTML to Markdown from the
 * command line.
 * 
 * @author Phil DeJarnett
 * @author Nathaniel Mills modifications for provenance and level tracking
 */
public class Main {

   private static class Args {

      File fileInput = null;
      URL urlInput = null;
      int inputTimeout = 15;
      String baseURL = "";
      String charset = null;
      File output = null;
      boolean html = false;
      boolean relative = false;
      Options options = null;
   }

   public static void main(String[] args) throws Exception {
      new Main().start(args);
   }

   private void start(String[] args) {
      Args myArgs = processArgs(args);
      if (myArgs != null) {
         Remark remark = new Remark(myArgs.options);
         remark.setCleanedHtmlEchoed(myArgs.html);
         if (myArgs.output != null) {
            convertToFile(remark, myArgs);
         } else {
            try {
               System.out.println(convert(remark, myArgs));
            } catch (IOException ex) {
               System.err.println("Error reading from input:");
               System.err.println("  " + ex.getMessage());
            }
         }
      }
   }

   private void convertToFile(Remark remark, Args myArgs) {
      FileOutputStream fos = null;
      OutputStreamWriter osw = null;
      BufferedWriter bw = null;
      try {
         // noinspection IOResourceOpenedButNotSafelyClosed
         fos = new FileOutputStream(myArgs.output);
         // noinspection IOResourceOpenedButNotSafelyClosed
         osw = new OutputStreamWriter(fos, "UTF-8");
         // noinspection IOResourceOpenedButNotSafelyClosed
         bw = new BufferedWriter(osw);
         remark = remark.withWriter(bw);
         convert(remark, myArgs);

      } catch (IOException ex) {
         System.err
            .println("Error reading from input or writing to output file:");
         System.err.println("  " + ex.getMessage());
      } finally {
         try {
            if (bw != null) {
               bw.close();
            }
            if (osw != null) {
               osw.close();
            }
            if (fos != null) {
               fos.close();
            }
         } catch (IOException ex) {
            ex.printStackTrace();
         }
      }
   }

   private String convert(Remark remark, Args myArgs) throws IOException {
      String ret;
      if (myArgs.fileInput != null) {
         ret = remark.convert(myArgs.fileInput, myArgs.charset, myArgs.baseURL);
      } else {
         ret = remark.convert(myArgs.urlInput, myArgs.inputTimeout * 1000);
      }
      return ret;
   }

   private Args processArgs(String[] args) {
      Args result = new Args();
      List error = new ArrayList();
      org.apache.commons.cli.Options opts = makeOptions();
      CommandLineParser clp = new DefaultParser();// PosixParser();

      try {
         CommandLine cl = clp.parse(opts, args, true);
         if (cl.hasOption('h')) {
            printHelp(opts);
            result = null;
         } else {
            checkType(cl, result, error);
            checkTimeout(cl, result, error);
            checkBaseURL(cl, result, error);
            checkCharset(cl, result, error);
            checkOutput(cl, result, error);
            checkInput(cl, result, error);
            result.html = cl.hasOption("html");
            result.relative = cl.hasOption("relative");
            result.options.preserveRelativeLinks = result.relative;
         }
      } catch (ParseException ex) {
         System.err.println("Unexpected error parsing the command line.");
         System.err.println();
         printHelp(opts);
         result = null;
      }

      if (!error.isEmpty()) {
         printErrors(error);
         result = null;
      }

      return result;
   }

   private void printHelp(org.apache.commons.cli.Options opts) {
      HelpFormatter hf = new HelpFormatter();
      hf.printHelp("remark [options] [-o ] ",
         opts);
      System.err.println();
   }

   private void printErrors(List error) {
      if (error.size() == 1) {
         System.err.print("Error: ");
         System.err.println(error.get(0));
      } else {
         System.err.println("Error:");
         for (final String err : error) {
            System.err.print(" - ");
            System.err.println(err);
         }
      }
      System.err.println("Run with the argument -h for help.");
      System.err.println();
   }

   private org.apache.commons.cli.Options makeOptions() {
      org.apache.commons.cli.Options opts = new org.apache.commons.cli.Options();
      opts.addOption("t", "type", true,
         "Type of markdown to target: markdown, markdownextra, multimarkdown, pegdown, pegdownall, or github");
      opts.addOption("o", "output", true,
         "Name of file to output to; defaults to system out");
      opts.addOption("timeout", true,
         "Timeout in seconds for downloading from URLs only; defaults to 15s");
      opts.addOption("baseurl", true,
         "Base URL for file inputs, this helps in handling relative links.");
      opts.addOption("relative", false, "If set, preserve relative URLs.");
      opts.addOption("charset", true,
         "Character set for file inputs; defaults to UTF-8.");
      opts.addOption("html", false,
         "If set, the cleaned HTML document will be echoed out before conversion.");
      opts.addOption("h", "help", false, "Displays this help.");
      return opts;
   }

   private void checkType(CommandLine cl, Args result, List error) {
      if (cl.hasOption('t')) {
         String type = cl.getOptionValue('t');
         if ("markdown".equalsIgnoreCase(type)) {
            result.options = Options.markdown();
         } else if ("markdown".equalsIgnoreCase(type)) {
            result.options = Options.markdown();
         } else if ("multimarkdown".equalsIgnoreCase(type)) {
            result.options = Options.multiMarkdown();
         } else if ("markdownextra".equalsIgnoreCase(type)) {
            result.options = Options.markdownExtra();
         } else if ("pegdown".equalsIgnoreCase(type)) {
            result.options = Options.pegdownBase();
         } else if ("pegdownall".equalsIgnoreCase(type)) {
            result.options = Options.pegdownAllExtensions();
         } else if ("github".equalsIgnoreCase(type)) {
            result.options = Options.github();
         } else {
            error.add("Invalid type specified");
         }
      } else {
         result.options = Options.markdown();
      }
   }

   private void checkOutput(CommandLine cl, Args result, List error) {
      if (cl.hasOption('o')) {
         File output = new File(cl.getOptionValue('o')).getAbsoluteFile();
         result.output = output;
         if (!output.exists()) {
            // check for parent path
            File parent = output.getParentFile();
            if (parent.exists() && !parent.isDirectory()) {
               error.add("Output does is not a valid path.");
            } else {
               if (!parent.exists() && !parent.mkdirs()) {
                  error.add("Output path could not be created.");
               } else if (!parent.canWrite()) {
                  error.add("Output directory cannot be written to.");
               }
            }
         } else if (!output.isFile()) {
            error.add("Output file exists and is not a file");
         } else if (!output.canWrite()) {
            error.add("Output file cannot be written to.");
         }
      }
   }

   private void checkTimeout(CommandLine cl, Args result, List error) {
      if (cl.hasOption("timeout")) {
         try {
            Integer timeout = Integer.parseInt(cl.getOptionValue("timeout"));
            if (timeout < 1) {
               error.add("Invalid timeout specified.");
            } else {
               result.inputTimeout = timeout;
            }
         } catch (NumberFormatException ex) {
            error.add("Invalid timeout specified.");
         }
      }
   }

   private void checkBaseURL(CommandLine cl, Args result, List error) {
      if (cl.hasOption("baseurl")) {
         result.baseURL = cl.getOptionValue("baseurl");
      }
   }

   private void checkCharset(CommandLine cl, Args result, List error) {
      if (cl.hasOption("charset")) {
         String charset = cl.getOptionValue("charset");
         if (Charset.isSupported(charset)) {
            result.charset = charset;
         } else {
            error.add("Unsupported charset.");
         }
      }
   }

   private void checkInput(CommandLine cl, Args result, List error) {
      List leftover = cl.getArgList();
      if (leftover.isEmpty()) {
         error.add("No input file or URL specified.");
      } else if (leftover.size() > 1) {
         error.add("Too many arguments.");
      } else {
         String arg = (String) leftover.get(0);
         if (arg.contains("://")) {
            try {
               result.urlInput = new URL(arg);
            } catch (MalformedURLException ex) {
               error.add("Malformed URL: " + ex.getMessage());
            }
         } else {
            File input = new File(arg);
            if (input.isFile()) {
               if (input.canRead()) {
                  result.fileInput = input;
               } else {
                  error
                     .add(String.format("Unable to read input file: %s", arg));
               }
            } else {
               error.add(String.format(
                  "Input file does not exist or is not a file: %s", arg));
            }
         }
      }
   }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy