org.metafacture.formeta.parser.TextParserState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of metafacture-formeta Show documentation
Show all versions of metafacture-formeta Show documentation
Modules for reading and writing data in Formeta format
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.formeta.parser;
import org.metafacture.formeta.Formeta;
import org.metafacture.framework.FormatException;
/**
* FSA for parsing identifiers, names and values. The initial parser
* state is {@code LEADING_WHITESPACE}, the final (accepting) state
* is {@code DELIMITER_REACHED}.
*/
enum TextParserState {
LEADING_WHITESPACE {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
final TextParserState newState;
switch (ch) {
case Formeta.ESCAPE_CHAR:
ctx.setQuoted(false);
newState = ESCAPE_SEQ;
break;
case Formeta.QUOT_CHAR:
ctx.setQuoted(true);
newState = QUOTED_TEXT;
break;
case Formeta.GROUP_START:
case Formeta.GROUP_END:
case Formeta.ITEM_SEPARATOR:
case Formeta.NAME_VALUE_SEPARATOR:
ctx.setQuoted(false);
newState = DELIMITER_REACHED;
break;
default:
if (Formeta.isWhitespace(ch)) {
newState = LEADING_WHITESPACE;
}
else {
ctx.setQuoted(false);
ctx.appendChar(ch);
newState = TEXT;
}
}
return newState;
}
},
TEXT {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
final TextParserState newState;
switch (ch) {
case Formeta.ESCAPE_CHAR:
newState = ESCAPE_SEQ;
break;
case Formeta.GROUP_START:
case Formeta.GROUP_END:
case Formeta.ITEM_SEPARATOR:
case Formeta.NAME_VALUE_SEPARATOR:
newState = DELIMITER_REACHED;
break;
default:
ctx.appendChar(ch);
newState = TEXT;
}
return newState;
}
},
ESCAPE_SEQ {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
ctx.appendEscapedChar(ch);
return TEXT;
}
public void endOfInput(final TextParserContext ctx) {
throw new FormatException("incomplete escape sequence");
}
},
QUOTED_TEXT {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
final TextParserState newState;
switch (ch) {
case Formeta.ESCAPE_CHAR:
newState = QUOTED_ESCAPE_SEQ;
break;
case Formeta.QUOT_CHAR:
newState = TRAILING_WHITESPACE;
break;
default:
ctx.appendChar(ch);
newState = QUOTED_TEXT;
}
return newState;
}
public void endOfInput(final TextParserContext ctx) {
throw new FormatException("quoted string is not terminated");
}
},
QUOTED_ESCAPE_SEQ {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
ctx.appendEscapedChar(ch);
return QUOTED_TEXT;
}
public void endOfInput(final TextParserContext ctx) {
throw new FormatException("incomplete escape sequence and quoted string is not terminated");
}
},
TRAILING_WHITESPACE {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
final TextParserState newState;
switch (ch) {
case Formeta.GROUP_START:
case Formeta.GROUP_END:
case Formeta.ITEM_SEPARATOR:
case Formeta.NAME_VALUE_SEPARATOR:
newState = DELIMITER_REACHED;
break;
default:
if (Formeta.isWhitespace(ch)) {
newState = TRAILING_WHITESPACE;
}
else {
final String sep = "', '";
final String expected = "whitespace or one of '" +
Formeta.GROUP_START + sep +
Formeta.GROUP_END + sep +
Formeta.ITEM_SEPARATOR + sep +
Formeta.NAME_VALUE_SEPARATOR + "'";
throw new FormatException(getUnexpectedCharMsg(expected, ch));
}
}
return newState;
}
},
DELIMITER_REACHED {
public TextParserState processChar(final char ch, final TextParserContext ctx) {
throw new UnsupportedOperationException("Cannot process characters in state DELIMITER_REACHED");
}
};
public abstract TextParserState processChar(char ch, TextParserContext ctx);
public void endOfInput(final TextParserContext ctx) {
// Default implementation does nothing
}
private static String getUnexpectedCharMsg(final String expected, final char actual) {
return expected + " expected but got '" + actual + "'";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy