All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tomitribe.crest.help.DocumentParser Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.tomitribe.crest.help;

import org.tomitribe.util.Join;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DocumentParser {

    private final String content;
    private final Pattern leadingSpaces = Pattern.compile("^( *)");
    private final Pattern bullet = Pattern.compile("^( *-) *(.+)");
    private final Pattern heading = Pattern.compile("^[=#]+ *(.+)|^([A-Z]+[^a-z]+)$");
    private final Pattern preformatted = Pattern.compile("^    (.+)");
    private final Document.Builder doc = Document.builder();

    public DocumentParser(final String content) {
        this.content = content;
    }

    public static Document parser(String content) {
        if (content == null || content.length() == 0) return Document.builder().build();
        if (HtmlDocumentParser.isHtml(content)) {
            return HtmlDocumentParser.parse(content);
        } else {
            return new DocumentParser(content).parse();
        }
    }

    /**
     * Item descriptions are allowed to have only a subset of the element types
     * a complete manual would get.
     */
    public static Document parseOptionDescription(final String description) {
        final Document.Builder document = Document.builder();

        final Document parser = DocumentParser.parser(description);
        parser.getElements().stream()
                .filter(element -> !(element instanceof Option))
                .map(DocumentParser::convertHeaders)
                .forEach(document::element);

        return document.build();
    }

    /**
     * Item Descriptions aren't allowed to have headings.  We will convert them to paragraphs
     */
    private static Element convertHeaders(final Element element) {
        if (element instanceof Heading) {
            return new Paragraph(element.getContent());
        }
        return element;
    }

    private Document parse() {

        final List lines = normalizeAndSplitContent();

        for (final String line : lines) {
            if (processTerminator(line)) continue;
            if (processHeader(line)) continue;
            if (processBullet(line)) continue;
            if (processPreformatted(line)) continue;
            processParagraph(line);
        }

        terminate();

        return doc.build();
    }

    private List normalizeAndSplitContent() {
        List lines = new ArrayList<>(Arrays.asList(content.trim()
                .replace("\t", "    ")
                .replaceAll("\n *\n", "\n\n")
                .split("\n")));

        lines = stripIndent(lines);
        return lines;
    }

    private boolean processPreformatted(final String line) {
        final Matcher matcher = preformatted.matcher(line);
        if (!matcher.find()) return false;

        if (state != null && !(state instanceof ReadingPreformatted)) {
            state.terminate();
            state = null;
        }

        if (state == null) {
            state = new ReadingPreformatted();
        }

        state.process(matcher.group(1));

        return true;
    }

    private boolean processTerminator(final String line) {
        if (line.length() != 0) return false;

        terminate();

        return true;
    }

    private void processParagraph(final String line) {
        if (state != null && !(state instanceof ReadingParagraph)) {
            state.terminate();
            state = null;
        }

        if (state == null) {
            state = new ReadingParagraph();
        }

        state.process(line);
    }

    private boolean processHeader(final String line) {
        final Matcher matcher = heading.matcher(line);
        if (!matcher.find()) return false;


        if (matcher.group(1) != null) {
            terminate();
            final String text = matcher.group(1);
            doc.heading(text);
            return true;
        }
        if (matcher.group(2) != null) {
            terminate();
            final String text = matcher.group(2);
            doc.heading(text);
            return true;
        }

        return false;
    }

    private boolean processBullet(final String line) {
        { // Is this line the start of a bullet?
            final Matcher matcher = bullet.matcher(line);
            if (matcher.find()) {
                terminate();

                final String prefix = matcher.group(1);
                final String text = matcher.group(2);

                this.state = new ReadingBullet(prefix);
                this.state.process(text);

                return true;
            }
        }

        // Is this line a continuation of a bullet?
        if (state instanceof ReadingBullet) {
            final ReadingBullet readingBullet = (ReadingBullet) this.state;
            final Matcher matcher = readingBullet.continued.matcher(line);
            if (!matcher.find()) return false;

            final String text = matcher.group(1);
            readingBullet.process(text);
            return true;
        }

        return false;
    }


    private void terminate() {
        if (state == null) return;
        state.terminate();
        state = null;
    }


    private State state;

    public interface State {
        void process(final String line);

        void terminate();
    }

    private class ReadingParagraph implements State {
        private final List lines = new ArrayList<>();

        @Override
        public void process(final String line) {
            lines.add(line.trim());
        }

        @Override
        public void terminate() {
            final String content = Join.join(" ", lines)
                    .replaceAll("  +", " ");
            doc.paragraph(content);
        }
    }

    private class ReadingPreformatted implements State {
        private final List lines = new ArrayList<>();

        @Override
        public void process(final String line) {
            lines.add(line);
        }

        @Override
        public void terminate() {
            doc.preformatted(Join.join("\n", lines));
        }
    }

    private class ReadingBullet implements State {
        private final List lines = new ArrayList<>();
        private final Pattern continued;

        public ReadingBullet(final String bullet) {
            final String spaces = bullet.replaceAll(".", " ");
            this.continued = Pattern.compile(String.format("^%s(.+)", spaces));
        }

        @Override
        public void process(final String line) {
            lines.add(line);
        }

        @Override
        public void terminate() {
            final String content = Join.join(" ", lines)
                    .replaceAll("  +", " ");

            doc.bullet(content);
        }
    }

    private List stripIndent(final List lines) {
        final String first = lines.remove(0);

        int indent = getIndent(lines);

        for (int i = 0; i < lines.size(); i++) {
            String line = lines.get(i);
            if (line.length() < indent) continue;
            line = line.substring(indent);
            lines.set(i, line);
        }
        lines.add(0, first);
        return lines;
    }

    private int getIndent(final List lines) {
        int indent = Integer.MAX_VALUE;
        for (final String line : lines) {
            if (line.length() == 0) continue;
            final Matcher matcher = leadingSpaces.matcher(line);
            if (matcher.find()) {
                final String spaces = matcher.group(1);
                indent = Math.min(indent, spaces.length());
            }
        }
        return indent;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy