All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.formeta.parser.TextParserState Maven / Gradle / Ivy

There is a newer version: 6.2.0
Show newest version
/*
 * Copyright 2013, 2014 Deutsche Nationalbibliothek
 *
 * Licensed under the Apache License, Version 2.0 the "License";
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.metafacture.formeta.parser;

import org.metafacture.formeta.Formeta;
import org.metafacture.framework.FormatException;

/**
 * FSA for parsing identifiers, names and values. The initial parser
 * state is {@code LEADING_WHITESPACE}, the final (accepting) state
 * is {@code DELIMITER_REACHED}.
 */
enum TextParserState {

    LEADING_WHITESPACE {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            final TextParserState newState;
            switch (ch) {
                case Formeta.ESCAPE_CHAR:
                    ctx.setQuoted(false);
                    newState = ESCAPE_SEQ;
                    break;
                case Formeta.QUOT_CHAR:
                    ctx.setQuoted(true);
                    newState = QUOTED_TEXT;
                    break;
                case Formeta.GROUP_START:
                case Formeta.GROUP_END:
                case Formeta.ITEM_SEPARATOR:
                case Formeta.NAME_VALUE_SEPARATOR:
                    ctx.setQuoted(false);
                    newState = DELIMITER_REACHED;
                    break;
                default:
                    if (Formeta.isWhitespace(ch)) {
                        newState = LEADING_WHITESPACE;
                    }
                    else {
                        ctx.setQuoted(false);
                        ctx.appendChar(ch);
                        newState = TEXT;
                    }
            }
            return newState;
        }
    },
    TEXT {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            final TextParserState newState;
            switch (ch) {
                case Formeta.ESCAPE_CHAR:
                    newState = ESCAPE_SEQ;
                    break;
                case Formeta.GROUP_START:
                case Formeta.GROUP_END:
                case Formeta.ITEM_SEPARATOR:
                case Formeta.NAME_VALUE_SEPARATOR:
                    newState = DELIMITER_REACHED;
                    break;
                default:
                    ctx.appendChar(ch);
                    newState = TEXT;
            }
            return newState;
        }
    },
    ESCAPE_SEQ {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            ctx.appendEscapedChar(ch);
            return TEXT;
        }

        public void endOfInput(final TextParserContext ctx) {
            throw new FormatException("incomplete escape sequence");
        }
    },
    QUOTED_TEXT {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            final TextParserState newState;
            switch (ch) {
                case Formeta.ESCAPE_CHAR:
                    newState = QUOTED_ESCAPE_SEQ;
                    break;
                case Formeta.QUOT_CHAR:
                    newState = TRAILING_WHITESPACE;
                    break;
                default:
                    ctx.appendChar(ch);
                    newState = QUOTED_TEXT;
            }
            return newState;
        }

        public void endOfInput(final TextParserContext ctx) {
            throw new FormatException("quoted string is not terminated");
        }
    },
    QUOTED_ESCAPE_SEQ {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            ctx.appendEscapedChar(ch);
            return QUOTED_TEXT;
        }

        public void endOfInput(final TextParserContext ctx) {
            throw new FormatException("incomplete escape sequence and quoted string is not terminated");
        }
    },
    TRAILING_WHITESPACE {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            final TextParserState newState;
            switch (ch) {
                case Formeta.GROUP_START:
                case Formeta.GROUP_END:
                case Formeta.ITEM_SEPARATOR:
                case Formeta.NAME_VALUE_SEPARATOR:
                    newState = DELIMITER_REACHED;
                    break;
                default:
                    if (Formeta.isWhitespace(ch)) {
                        newState = TRAILING_WHITESPACE;
                    }
                    else {
                        final String sep = "', '";
                        final String expected = "whitespace or one of '" +
                            Formeta.GROUP_START + sep +
                            Formeta.GROUP_END + sep +
                            Formeta.ITEM_SEPARATOR + sep +
                            Formeta.NAME_VALUE_SEPARATOR + "'";
                        throw new FormatException(getUnexpectedCharMsg(expected, ch));
                    }
            }
            return newState;
        }
    },
    DELIMITER_REACHED {
        public TextParserState processChar(final char ch, final TextParserContext ctx) {
            throw new UnsupportedOperationException("Cannot process characters in state DELIMITER_REACHED");
        }
    };

    public abstract TextParserState processChar(char ch, TextParserContext ctx);

    public void endOfInput(final TextParserContext ctx) {
        // Default implementation does nothing
    }

    private static String getUnexpectedCharMsg(final String expected, final char actual) {
        return expected + " expected but got '" + actual + "'";
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy