![JAR search and dependency download from the Maven repository](/logo.png)
net.java.textilej.parser.markup.Dialect Maven / Gradle / Ivy
The newest version!
package net.java.textilej.parser.markup;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.java.textilej.parser.DocumentBuilder;
import net.java.textilej.parser.MarkupParser;
import net.java.textilej.parser.outline.OutlineParser;
import net.java.textilej.util.LocationTrackingReader;
/**
* A markup dialect, which knows its formatting rules and is able to
* process content based on {@link Block}, {@link PatternBasedElementProcessor} and {@link PatternBasedElement}
* concepts. All markup languages supported by Textile-J extend this class.
*
* The Dialect class provides basic functionality for determining which blocks process
* which markup content in a particular document. In general multi-line documents are split into
* consecutive regions called blocks, and each line in a block is processed with spanning sections
* called phrase modifiers, and tokens within a span are replaced with their respective replacement
* tokens. These rules apply to most markup languages, however subclasses may override this default
* functionality if required. For example, by default phrase modifiers are non-overlapping and non-nested,
* however if required a subclass could permit such nesting.
*
* Generally dialect classes are not accessed directly by client code, instead client code should
* configure and call {@link MarkupParser}.
*
* @author dgreen
*/
public abstract class Dialect {
private String name;
private boolean filterGenerativeBlocks;
private boolean blocksOnly;
/**
* Create new state for tracking a document and its contents during a parse session.
* Subclasses may override this method to provide additional state tracking capability.
*
* @return the new state.
*/
protected ContentState createState() {
return new ContentState();
}
public void processContent(MarkupParser parser, String markupContent, boolean asDocument) {
ContentState state = createState();
state.setMarkupContent(markupContent);
LocationTrackingReader reader = new LocationTrackingReader(new StringReader(markupContent));
String line;
Block currentBlock = null;
DocumentBuilder builder = parser.getBuilder();
builder.setLocator(state);
try {
if (asDocument) {
builder.beginDocument();
}
try {
while ((line = reader.readLine()) != null) {
state.setLineNumber(reader.getLineNumber()+1);
state.setLineOffset(reader.getLineOffset());
state.setLineCharacterOffset(0);
state.setLineSegmentEndOffset(0);
state.setLineLength(line.length());
int lineOffset = 0;
for (;;) {
if (currentBlock == null) {
currentBlock = startBlock(line,lineOffset);
if (currentBlock == null) {
break;
}
currentBlock.setState(state);
currentBlock.setParser(parser);
}
lineOffset = currentBlock.processLineContent(line,lineOffset);
if (currentBlock.isClosed()) {
currentBlock = null;
}
if (lineOffset < line.length() && lineOffset >= 0) {
if (currentBlock != null) {
throw new IllegalStateException("if a block does not fully process a line then it must be closed");
}
} else {
break;
}
}
}
state.setLineNumber(reader.getLineNumber()+1);
state.setLineOffset(reader.getLineOffset());
state.setLineCharacterOffset(0);
state.setLineLength(0);
} catch (IOException e) {
throw new IllegalStateException(e);
}
if (currentBlock != null && !currentBlock.isClosed()) {
currentBlock.setClosed(true);
}
if (asDocument) {
builder.endDocument();
}
} finally {
builder.setLocator(null);
}
}
public Block startBlock(String line,int lineOffset) {
if (isEmptyLine(line)) {
// nothing starts on an empty line
return null;
}
for (Block block: getBlocks()) {
if (block.canStart(line, lineOffset)) {
return block.clone();
}
}
return null;
}
public abstract List getBlocks();
/**
* Emit a markup line that may contain phrase modifiers and replacement tokens, but no
* block modifiers.
*
* @param parser
* @param state
* @param textLineOffset the offset of the provided text in the current line
* @param line the text to process
* @param offset the offset in the text
at which processing should begin
*/
public void emitMarkupLine(MarkupParser parser, ContentState state,int textLineOffset, String line, int offset) {
if (blocksOnly) {
emitMarkupText(parser,state,line.substring(offset));
return;
}
for (;;) {
PatternBasedElementProcessor phraseModifier = getPhraseModifierSyntax().findPatternBasedElement(line, offset);
if (phraseModifier != null) {
int newOffset = phraseModifier.getLineStartOffset();
if (offset < newOffset) {
state.setLineCharacterOffset(textLineOffset+offset);
state.setLineSegmentEndOffset(textLineOffset+newOffset);
String text = line.substring(offset,newOffset);
emitMarkupText(parser,state,text);
}
phraseModifier.setParser(parser);
phraseModifier.setState(state);
state.setLineCharacterOffset(textLineOffset+phraseModifier.getLineStartOffset());
state.setLineSegmentEndOffset(textLineOffset+phraseModifier.getLineEndOffset());
phraseModifier.emit();
offset = phraseModifier.getLineEndOffset();
if (offset >= line.length()) {
break;
}
} else {
state.setLineCharacterOffset(textLineOffset+offset);
state.setLineSegmentEndOffset(textLineOffset+line.length());
emitMarkupText(parser,state,line.substring(offset));
break;
}
}
}
/**
* Emit a markup line that may contain phrase modifiers and replacement tokens, but no
* block modifiers.
*
* @param parser
* @param state
* @param line
* @param offset
*/
public void emitMarkupLine(MarkupParser parser,ContentState state,String line,int offset) {
emitMarkupLine(parser, state,0, line, offset);
}
/**
* Emit markup that may contain replacement tokens but no phrase or block modifiers.
*
* @param parser
* @param state
* @param text
*/
public void emitMarkupText(MarkupParser parser,ContentState state,String text) {
if (blocksOnly) {
parser.getBuilder().characters(text);
return;
}
int offset = 0;
for (;;) {
PatternBasedElementProcessor patternBasedElement = getReplacementTokenSyntax().findPatternBasedElement(text, offset);
if (patternBasedElement != null) {
int newOffset = patternBasedElement.getLineStartOffset();
if (offset < newOffset) {
String text2 = text.substring(offset,newOffset);
emitMarkupText(parser,state,text2);
}
patternBasedElement.setParser(parser);
patternBasedElement.setState(state);
patternBasedElement.emit();
offset = patternBasedElement.getLineEndOffset();
if (offset >= text.length()) {
break;
}
} else {
parser.getBuilder().characters(offset>0?text.substring(offset):text);
break;
}
}
}
private static class Group {
int count;
}
public static class PatternBasedSyntax {
protected List elements = new ArrayList();
protected Pattern elementPattern;
protected List elementGroup = new ArrayList();
private StringBuilder patternBuffer = new StringBuilder();
private int patternGroup = 0;
private Stack groups = new Stack();
{
groups.push(new Group());
}
public PatternBasedSyntax() {}
public void add(PatternBasedElement element) {
elementPattern = null;
elements.add(element);
if (groups.peek().count++ > 0) {
patternBuffer.append('|');
}
++patternGroup;
patternBuffer.append('(');
patternBuffer.append(element.getPattern(patternGroup));
patternBuffer.append(')');
elementGroup.add(patternGroup);
patternGroup += element.getPatternGroupCount();
}
public void beginGroup(String regexFragment, int size) {
add(regexFragment,size,true);
}
public void endGroup(String regexFragment, int size) {
add(regexFragment,size,false);
}
private void add(String regexFragment, int size,boolean beginGroup) {
elementPattern = null;
if (beginGroup) {
if (groups.peek().count++ > 0) {
patternBuffer.append('|');
}
groups.push(new Group());
patternBuffer.append("(?:");
} else {
groups.pop();
}
patternBuffer.append(regexFragment);
if (!beginGroup) {
patternBuffer.append(")");
}
patternGroup += size;
}
public PatternBasedElementProcessor findPatternBasedElement(String lineText,int offset) {
Matcher matcher = getPattern().matcher(lineText);
if (offset > 0) {
matcher.region(offset, lineText.length());
}
if (matcher.find()) {
int size = elementGroup.size();
for (int x = 0;x 0) {
elementPattern = Pattern.compile(patternBuffer.toString());
} else {
return null;
}
}
return elementPattern;
}
}
protected abstract PatternBasedSyntax getPhraseModifierSyntax();
protected abstract PatternBasedSyntax getReplacementTokenSyntax();
/**
* The name of the dialect, typically the same as the name of the
* markup language supported by this dialect. This value may be displayed to the
* user.
*
* @return the name, or null if unknown
*/
public String getName() {
return name;
}
/**
* The name of the dialect, typically the same as the name of the
* markup language supported by this dialect. This value may be displayed to the
* user.
*
* @param name the name
*/
public void setName(String name) {
this.name = name;
}
@Override
public Dialect clone() {
Dialect dialect;
try {
dialect = getClass().newInstance();
} catch (Exception e) {
throw new IllegalStateException(e);
}
dialect.setName(name);
return dialect;
}
/**
* Indicate if generative contents should be filtered. This option is used with the {@link OutlineParser}.
*/
public boolean isFilterGenerativeContents() {
return filterGenerativeBlocks;
}
/**
* Indicate if table of contents should be filtered. This option is used with the {@link OutlineParser}.
*/
public void setFilterGenerativeContents(boolean filterGenerativeBlocks) {
this.filterGenerativeBlocks = filterGenerativeBlocks;
}
/**
* indicate if the parser should detect blocks only. This is useful for use in a document partitioner where the partition boundaries are defined by blocks.
*/
public boolean isBlocksOnly() {
return blocksOnly;
}
/**
* indicate if the parser should detect blocks only. This is useful for use in a document partitioner where the partition boundaries are defined by blocks.
*/
public void setBlocksOnly(boolean blocksOnly) {
this.blocksOnly = blocksOnly;
}
/**
* indicate if the given line is considered 'empty'. The default implementation
* returns true for lines of length 0, and for lines whose only content is whitespace.
*
* @param line the line content
*
* @return true if the given line is considered empty by this dialect
*/
public boolean isEmptyLine(String line) {
if (line.length() == 0) {
return true;
}
for (int x = 0;x
© 2015 - 2025 Weber Informatics LLC | Privacy Policy