All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.overzealous.remark.Main Maven / Gradle / Ivy

Go to download

markdown generator from html updated but based on original Apache 2.0 licensed code from https://bitbucket.org/OverZealous/remark/src/default/

There is a newer version: 2.0.18
Show newest version
/**
 * (c) Copyright 2019-2020 IBM Corporation
 * 1 New Orchard Road, 
 * Armonk, New York, 10504-1722
 * United States
 * +1 914 499 1900
 * support: Nathaniel Mills [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

/*
 * Copyright 2011 OverZealous Creations, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.overzealous.remark;

import org.apache.commons.cli.*;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * This is the starting point to begin remarking HTML to Markdown from the
 * command line.
 * 
 * @author Phil DeJarnett
 * @author Nathaniel Mills modifications for provenance and level tracking
 */
public class Main {

    private static class Args {

        File fileInput = null;
        URL urlInput = null;
        int inputTimeout = 15;
        String baseURL = "";
        String charset = null;
        File output = null;
        boolean html = false;
        boolean relative = false;
        Options options = null;
    }

    public static void main(String[] args) throws Exception {
        new Main().start(args);
    }

    private void start(String[] args) {
        Args myArgs = processArgs(args);
        if (myArgs != null) {
            Remark remark = new Remark(myArgs.options);
            remark.setCleanedHtmlEchoed(myArgs.html);
            if (myArgs.output != null) {
                convertToFile(remark, myArgs);
            } else {
                try {
                    System.out.println(convert(remark, myArgs));
                } catch (IOException ex) {
                    System.err.println("Error reading from input:");
                    System.err.println("  " + ex.getMessage());
                }
            }
        }
    }

    private void convertToFile(Remark remark, Args myArgs) {
        FileOutputStream fos = null;
        OutputStreamWriter osw = null;
        BufferedWriter bw = null;
        try {
            // noinspection IOResourceOpenedButNotSafelyClosed
            fos = new FileOutputStream(myArgs.output);
            // noinspection IOResourceOpenedButNotSafelyClosed
            osw = new OutputStreamWriter(fos, "UTF-8");
            // noinspection IOResourceOpenedButNotSafelyClosed
            bw = new BufferedWriter(osw);
            remark = remark.withWriter(bw);
            convert(remark, myArgs);

        } catch (IOException ex) {
            System.err
                .println("Error reading from input or writing to output file:");
            System.err.println("  " + ex.getMessage());
        } finally {
            try {
                if (bw != null) {
                    bw.close();
                }
                if (osw != null) {
                    osw.close();
                }
                if (fos != null) {
                    fos.close();
                }
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
    }

    private String convert(Remark remark, Args myArgs) throws IOException {
        String ret;
        if (myArgs.fileInput != null) {
            ret = remark.convert(myArgs.fileInput, myArgs.charset,
                myArgs.baseURL);
        } else {
            ret = remark.convert(myArgs.urlInput, myArgs.inputTimeout * 1000);
        }
        return ret;
    }

    private Args processArgs(String[] args) {
        Args result = new Args();
        List error = new ArrayList();
        org.apache.commons.cli.Options opts = makeOptions();
        CommandLineParser clp = new DefaultParser();// PosixParser();

        try {
            CommandLine cl = clp.parse(opts, args, true);
            if (cl.hasOption('h')) {
                printHelp(opts);
                result = null;
            } else {
                checkType(cl, result, error);
                checkTimeout(cl, result, error);
                checkBaseURL(cl, result, error);
                checkCharset(cl, result, error);
                checkOutput(cl, result, error);
                checkInput(cl, result, error);
                result.html = cl.hasOption("html");
                result.relative = cl.hasOption("relative");
                result.options.preserveRelativeLinks = result.relative;
            }
        } catch (ParseException ex) {
            System.err.println("Unexpected error parsing the command line.");
            System.err.println();
            printHelp(opts);
            result = null;
        }

        if (!error.isEmpty()) {
            printErrors(error);
            result = null;
        }

        return result;
    }

    private void printHelp(org.apache.commons.cli.Options opts) {
        HelpFormatter hf = new HelpFormatter();
        hf.printHelp("remark [options] [-o ] ",
            opts);
        System.err.println();
    }

    private void printErrors(List error) {
        if (error.size() == 1) {
            System.err.print("Error: ");
            System.err.println(error.get(0));
        } else {
            System.err.println("Error:");
            for (final String err : error) {
                System.err.print(" - ");
                System.err.println(err);
            }
        }
        System.err.println("Run with the argument -h for help.");
        System.err.println();
    }

    private org.apache.commons.cli.Options makeOptions() {
        org.apache.commons.cli.Options opts = new org.apache.commons.cli.Options();
        opts.addOption("t", "type", true,
            "Type of markdown to target: markdown, markdownextra, multimarkdown, pegdown, pegdownall, or github");
        opts.addOption("o", "output", true,
            "Name of file to output to; defaults to system out");
        opts.addOption("timeout", true,
            "Timeout in seconds for downloading from URLs only; defaults to 15s");
        opts.addOption("baseurl", true,
            "Base URL for file inputs, this helps in handling relative links.");
        opts.addOption("relative", false, "If set, preserve relative URLs.");
        opts.addOption("charset", true,
            "Character set for file inputs; defaults to UTF-8.");
        opts.addOption("html", false,
            "If set, the cleaned HTML document will be echoed out before conversion.");
        opts.addOption("h", "help", false, "Displays this help.");
        return opts;
    }

    private void checkType(CommandLine cl, Args result, List error) {
        if (cl.hasOption('t')) {
            String type = cl.getOptionValue('t');
            if ("markdown".equalsIgnoreCase(type)) {
                result.options = Options.markdown();
            } else if ("markdown".equalsIgnoreCase(type)) {
                result.options = Options.markdown();
            } else if ("multimarkdown".equalsIgnoreCase(type)) {
                result.options = Options.multiMarkdown();
            } else if ("markdownextra".equalsIgnoreCase(type)) {
                result.options = Options.markdownExtra();
            } else if ("pegdown".equalsIgnoreCase(type)) {
                result.options = Options.pegdownBase();
            } else if ("pegdownall".equalsIgnoreCase(type)) {
                result.options = Options.pegdownAllExtensions();
            } else if ("github".equalsIgnoreCase(type)) {
                result.options = Options.github();
            } else {
                error.add("Invalid type specified");
            }
        } else {
            result.options = Options.markdown();
        }
    }

    private void checkOutput(CommandLine cl, Args result, List error) {
        if (cl.hasOption('o')) {
            File output = new File(cl.getOptionValue('o')).getAbsoluteFile();
            result.output = output;
            if (!output.exists()) {
                // check for parent path
                File parent = output.getParentFile();
                if (parent.exists() && !parent.isDirectory()) {
                    error.add("Output does is not a valid path.");
                } else {
                    if (!parent.exists() && !parent.mkdirs()) {
                        error.add("Output path could not be created.");
                    } else if (!parent.canWrite()) {
                        error.add("Output directory cannot be written to.");
                    }
                }
            } else if (!output.isFile()) {
                error.add("Output file exists and is not a file");
            } else if (!output.canWrite()) {
                error.add("Output file cannot be written to.");
            }
        }
    }

    private void checkTimeout(CommandLine cl, Args result, List error) {
        if (cl.hasOption("timeout")) {
            try {
                Integer timeout = Integer
                    .parseInt(cl.getOptionValue("timeout"));
                if (timeout < 1) {
                    error.add("Invalid timeout specified.");
                } else {
                    result.inputTimeout = timeout;
                }
            } catch (NumberFormatException ex) {
                error.add("Invalid timeout specified.");
            }
        }
    }

    private void checkBaseURL(CommandLine cl, Args result, List error) {
        if (cl.hasOption("baseurl")) {
            result.baseURL = cl.getOptionValue("baseurl");
        }
    }

    private void checkCharset(CommandLine cl, Args result, List error) {
        if (cl.hasOption("charset")) {
            String charset = cl.getOptionValue("charset");
            if (Charset.isSupported(charset)) {
                result.charset = charset;
            } else {
                error.add("Unsupported charset.");
            }
        }
    }

    private void checkInput(CommandLine cl, Args result, List error) {
        List leftover = cl.getArgList();
        if (leftover.isEmpty()) {
            error.add("No input file or URL specified.");
        } else if (leftover.size() > 1) {
            error.add("Too many arguments.");
        } else {
            String arg = (String) leftover.get(0);
            if (arg.contains("://")) {
                try {
                    result.urlInput = new URL(arg);
                } catch (MalformedURLException ex) {
                    error.add("Malformed URL: " + ex.getMessage());
                }
            } else {
                File input = new File(arg);
                if (input.isFile()) {
                    if (input.canRead()) {
                        result.fileInput = input;
                    } else {
                        error.add(String.format("Unable to read input file: %s",
                            arg));
                    }
                } else {
                    error.add(String.format(
                        "Input file does not exist or is not a file: %s", arg));
                }
            }
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy