All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.fortytwo.smsn.brain.wiki.NoteParser Maven / Gradle / Ivy

package net.fortytwo.smsn.brain.wiki;

import net.fortytwo.smsn.SemanticSynchrony;
import net.fortytwo.smsn.brain.model.Note;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class NoteParser {

    // regex of valid id suffixes
    public static final Pattern ID = Pattern.compile("[a-zA-Z0-9-_]+");
    private static final Pattern ID_SUFFIX = Pattern.compile(":[a-zA-Z0-9-_]+:");

    private static final String VERBATIM_BLOCK_START = "{{{";
    private static final String VERBATIM_BLOCK_END = "}}}";
    private static final String VERBATIM_BLOCK_START_ESC = "\\{\\{\\{";
    private static final String VERBATIM_BLOCK_END_ESC = "\\}\\}\\}";

    private static final String
            ALIAS_PROP = "@alias",
            PRIORITY_PROP = "@priority",
            SHARABILITY_PROP = "@sharability",
            SHORTCUT_PROP = "@shortcut",
            WEIGHT_PROP = "@weight";

    private static final int MAX_BULLET_LENGTH = 1;

    // Tabs count as four spaces each.
    private static final String TAB_REPLACEMENT = "    ";

    public Note fromWikiText(final String s) throws IOException, NoteParsingException {
        try (InputStream in = new ByteArrayInputStream(s.getBytes(SemanticSynchrony.UTF8))) {
            return fromWikiText(in);
        }
    }

    public Note fromWikiText(final InputStream in) throws IOException, NoteParsingException {
        Note root = new Note();

        LinkedList hierarchy = new LinkedList<>();
        LinkedList indentHierarachy = new LinkedList<>();

        InputStreamReader r = new InputStreamReader(in, SemanticSynchrony.UTF8);
        BufferedReader br = new BufferedReader(r);
        String line;
        int lineNumber = 0;
        while ((line = br.readLine()) != null) {
            lineNumber++;

            String l = line;

            // Tabs count as four spaces each.
            l = l.replaceAll("[\\t]", TAB_REPLACEMENT);

            if (0 == l.trim().length()) {
                // Empty lines are simply ignored.
                continue;
            }

            if (l.endsWith(NoteWriter.VALUE_TRUNCATOR)) {
                throw new NoteParsingException(lineNumber,
                        "line ends with the reserved truncation sequence \"" + NoteWriter.VALUE_TRUNCATOR + "\"");
            }

            // find indent level
            int indent = 0;
            if (l.length() > 0) {
                while (' ' == l.charAt(indent)) {
                    indent++;
                }
                l = l.substring(indent);
            }

            if (0 == l.length()) {
                throw new NoteParsingException(lineNumber, "missing bullet and value");
            }

            while (0 < hierarchy.size() && indentHierarachy.getLast() >= indent) {
                hierarchy.removeLast();
                indentHierarachy.removeLast();
            }

            // parse bullet or property name
            int j = -1;
            for (int i = 0; i < l.length(); i++) {
                char c = l.charAt(i);
                if (' ' == c) {
                    j = i;
                    break;
                }
            }
            if (j < 0) {
                j = l.length();
            }

            boolean isProperty;
            String bullet = l.substring(0, j);
            if (bullet.startsWith("@") && bullet.length() > 1) {
                isProperty = true;
            } else {
                isProperty = false;

                if (bullet.length() > MAX_BULLET_LENGTH) {
                    throw new NoteParsingException(lineNumber, "bullet is too long: " + bullet);
                }
            }

            // skip white space between bullet and value
            while (j < l.length() && ' ' == l.charAt(j)) {
                j++;
            }
            l = l.substring(j);

            // find id, if present
            String id = null;
            if (!isProperty) {
                Matcher m = ID_SUFFIX.matcher(l);
                if (m.find() && 0 == m.start()) {
                    id = l.substring(1, m.end() - 1);
                    l = l.substring(m.end()).trim();
                }
            }

            String value = "";
            if (0 < l.length()) {
                String lt = l.trim();
                if (!isProperty && lt.startsWith(VERBATIM_BLOCK_START)) {
                    if (lt.length() > 3) {
                        throw new NoteParsingException(lineNumber, "verbatim block must open with a line containing only '{{{'");
                    }

                    StringBuilder verbatimValue = new StringBuilder();
                    boolean first = true;
                    while (true) {
                        String nextLine = br.readLine();
                        lineNumber++;
                        if (nextLine.contains(VERBATIM_BLOCK_END)) {
                            lt = nextLine.trim();
                            if (lt.length() > 3) {
                                throw new NoteParsingException(lineNumber, "verbatim block must close with a line containing only '}}}'");
                            } else {
                                break;
                            }
                        } else {
                            if (first) {
                                first = false;
                            } else {
                                verbatimValue.append('\n');
                            }
                            verbatimValue.append(nextLine);
                        }
                    }

                    value = verbatimValue.toString();
                } else {
                    value = l;
                }
            }

            value = value.trim();

            if (0 == value.length()) {
                if (isProperty) {
                    // can "clear" alias or shortcut by writing "@alias" or "@shortcut" and nothing else;
                    // all other properties require an argument
                    if (!(bullet.equals(ALIAS_PROP) || bullet.equals(SHORTCUT_PROP))) {
                        throw new NoteParsingException(
                                lineNumber, "empty value for property candidate '" + bullet + "'");
                    }
                } else if (null == id) {
                    throw new NoteParsingException(lineNumber, "empty value for new note");
                } else {
                    // Empty note values are allowed for existing notes.
                    // They signify that an existing note's value should not be overwritten.
                    value = null;
                }
            }

            if (isProperty) {
                Note n = 0 == hierarchy.size() ? root : hierarchy.get(hierarchy.size() - 1);

                switch (bullet) {
                    case ALIAS_PROP:
                        if (value.length() > 0) {
                            n.setAlias(value);
                        } else {
                            n.setAlias(Note.CLEARME_VALUE);
                        }
                        break;
                    case SHORTCUT_PROP:
                        if (value.length() > 0) {
                            n.setShortcut(value);
                        } else {
                            n.setShortcut(Note.CLEARME_VALUE);
                        }
                        break;
                    case PRIORITY_PROP: {
                        float val;
                        try {
                            val = Float.valueOf(value);
                        } catch (NumberFormatException e) {
                            throw new NoteParsingException(lineNumber, "invalid @priority value: " + value);
                        }
                        n.setPriority(val);
                        break;
                    }
                    case SHARABILITY_PROP: {
                        float val;
                        try {
                            val = Float.valueOf(value);
                        } catch (NumberFormatException e) {
                            throw new NoteParsingException(lineNumber, "invalid @sharability value: " + value);
                        }
                        n.setSharability(val);
                        break;
                    }
                    case WEIGHT_PROP: {
                        float val;
                        try {
                            val = Float.valueOf(value);
                        } catch (NumberFormatException e) {
                            throw new NoteParsingException(lineNumber, "invalid @weight value: " + value);
                        }
                        n.setWeight(val);
                        break;
                    }
                    default:
                        throw new NoteParsingException(lineNumber, "unknown property: " + bullet);
                }
            } else {
                Note n = new Note();
                n.setValue(value);

                n.setId(id);

                if (0 < hierarchy.size()) {
                    hierarchy.get(hierarchy.size() - 1).addChild(n);
                } else {
                    root.addChild(n);
                }

                hierarchy.add(n);
                indentHierarachy.add(indent);
            }
        }

        return root;
    }

    /**
     * Removes the verbatim block terminators ("{{{" and "}}}") from an atom value string.
     * This method does not check the value with respect to matching and non-nested terminators;
     * the value is assumed to be valid.
     *
     * @return the unescaped value
     */
    public static String unescapeValue(final String escaped) {
        return escaped.replaceAll(VERBATIM_BLOCK_START_ESC, "").replaceAll(VERBATIM_BLOCK_END_ESC, "");
    }

    public Note fromJSON(final JSONObject j) throws JSONException {
        Note n = new Note();

        if (j.has(NoteWriter.ID)) {
            n.setId(j.getString(NoteWriter.ID));
        }
        if (j.has(SemanticSynchrony.VALUE)) {
            n.setValue(j.getString(SemanticSynchrony.VALUE));
        }
        if (j.has(SemanticSynchrony.ALIAS)) {
            n.setAlias(j.getString(SemanticSynchrony.ALIAS));
        }
        if (j.has(SemanticSynchrony.SHORTCUT)) {
            n.setShortcut(j.getString(SemanticSynchrony.SHORTCUT));
        }
        if (j.has(SemanticSynchrony.SHARABILITY)) {
            n.setSharability((float) j.getDouble(SemanticSynchrony.SHARABILITY));
        }
        if (j.has(SemanticSynchrony.WEIGHT)) {
            n.setWeight((float) j.getDouble(SemanticSynchrony.WEIGHT));
        }
        if (j.has(SemanticSynchrony.PRIORITY)) {
            n.setPriority((float) j.getDouble(SemanticSynchrony.PRIORITY));
        }
        if (j.has(SemanticSynchrony.CREATED)) {
            n.setCreated(j.getLong(SemanticSynchrony.CREATED));
        }
        if (j.has(NoteWriter.HAS_CHILDREN)) {
            n.setHasChildren(j.optBoolean(NoteWriter.HAS_CHILDREN));
        }

        JSONArray a = j.optJSONArray(NoteWriter.CHILDREN);
        if (null != a) {
            for (int i = 0; i < a.length(); i++) {
                JSONObject jc = a.getJSONObject(i);
                n.addChild(fromJSON(jc));
            }
        }

        return n;
    }

    public static class NoteParsingException extends Exception {
        public NoteParsingException(final String message) {
            super(message);
        }

        public NoteParsingException(final int lineNumber,
                                    final String message) {
            super("line " + lineNumber + ": " + message);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy