All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.igormaznitsa.mindmap.model.parser.MindMapLexer Maven / Gradle / Ivy

/*
 * Copyright (C) 2015-2022 Igor A. Maznitsa
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.igormaznitsa.mindmap.model.parser;

import static java.util.Objects.requireNonNull;

/**
 * Allows to extract lexeme from mind map file.
 */
public final class MindMapLexer {

  private final LexerPosition position = new LexerPosition(0, TokenType.UNKNOWN_LINE);
  private CharSequence buffer = "";
  private int endOffset;
  private int tokenStart;
  private int tokenEnd;
  private TokenType tokenType = TokenType.UNKNOWN_LINE;

  /**
   * Returns start offset of token
   *
   * @return token start offset
   */
  public int getTokenStartOffset() {
    return this.tokenStart;
  }

  /**
   * Returns end offset of token
   *
   * @return token end offset
   */
  public int getTokenEndOffset() {
    return this.tokenEnd;
  }

  /**
   * Start next token.
   *
   * @param buffer       buffer to be used, must not be null
   * @param startOffset  start offset
   * @param endOffset    end offset
   * @param initialState initial state of the lexer, must not be null
   */
  public void start(
      final CharSequence buffer,
      final int startOffset,
      final int endOffset,
      final MindMapLexer.TokenType initialState
  ) {
    this.buffer = buffer;
    this.tokenType = initialState;
    this.position.offset = startOffset;
    this.position.tokenCompleted = true;
    this.position.state = this.tokenType;
    this.endOffset = endOffset;
  }

  /**
   * Set end offset
   *
   * @param value value to be used as token end offset
   */
  public void setBufferEndOffset(final int value) {
    this.endOffset = value;
  }

  /**
   * Generate char sequence for current buffer state
   *
   * @return current buffered char sequence for position, must not be null
   */
  public CharSequence getTokenSequence() {
    return getBufferSequence().subSequence(this.tokenStart, this.tokenEnd);
  }

  /**
   * Generate current buffer state as string
   *
   * @return string currently presented in buffer, must not be null
   */
  public String getTokenText() {
    return getTokenSequence().toString();
  }

  /**
   * Get token type
   *
   * @return current token type, can be null if there is no any token
   */
  public TokenType getTokenType() {
    return this.tokenStart == this.tokenEnd ? null : this.tokenType;
  }

  /**
   * Generate token position.
   *
   * @return created token position object for current state, must not be null
   */
  public TokenPosition makeTokenPosition() {
    return new TokenPosition(this.tokenStart, this.tokenEnd);
  }

  /**
   * Reset token type to null
   */
  public void resetTokenTypeToNull() {
    this.tokenType = null;
  }

  /**
   * Read next token.
   */
  public void advance() {
    boolean tokenHasBeenCompleted = this.position.isTokenCompleted();
    this.tokenStart = tokenHasBeenCompleted ? this.position.offset : this.tokenStart;
    boolean inAction = true;

    while (inAction && !isBufferEnd()) {
      switch (this.position.state) {
        case HEAD_LINE: {
          tokenHasBeenCompleted = skipToNextLine();
          if (tokenHasBeenCompleted && isAllLineFromChars('-')) {
            this.position.state = TokenType.HEAD_DELIMITER;
          }
          inAction = false;
        }
        break;
        case HEAD_DELIMITER: {
          this.position.state = TokenType.WHITESPACE;
        }
        break;
        case CODE_SNIPPET_END:
        case WHITESPACE: {
          skipAllWhitespaceAndSpecial();
          if (this.position.offset > this.tokenStart || isBufferEnd()) {
            tokenHasBeenCompleted = true;
            inAction = false;
          } else {
            final char chr = readChar();
            switch (chr) {
              case '#': {
                this.position.state = TokenType.TOPIC_LEVEL;
              }
              break;
              case '-':
              case '>': {
                if (isBufferEnd()) {
                  this.position.state = chr == '>' ? TokenType.ATTRIBUTE : TokenType.EXTRA_TYPE;
                  tokenHasBeenCompleted = false;
                  inAction = false;
                } else {
                  this.position.state =
                      readChar() == ' ' ? chr == '>' ? TokenType.ATTRIBUTE : TokenType.EXTRA_TYPE :
                          TokenType.UNKNOWN_LINE;
                }
              }
              break;
              case '<': {
                tokenHasBeenCompleted = false;
                this.position.state = TokenType.EXTRA_TEXT;
              }
              break;
              default: {
                this.position.state = TokenType.UNKNOWN_LINE;
              }
              break;
            }
          }
        }
        break;
        case EXTRA_TEXT: {
          if (getTokenLength() <= 5 && !isTokenMayStartWith("
")) {
            this.position.state = TokenType.UNKNOWN_LINE;
          } else if (readChar() == '>' && getTokenLength() > 5) {
            if (prevTextInBufferIs("
")) { tokenHasBeenCompleted = true; inAction = false; } } } break; case CODE_SNIPPET_START: { tokenHasBeenCompleted = toStartPositionOfCodeSnippetEnd(); if (isEmptyToken()) { this.position.state = TokenType.CODE_SNIPPET_END; } else { this.position.state = TokenType.CODE_SNIPPET_BODY; inAction = false; } } break; case ATTRIBUTE: case EXTRA_TYPE: { if (!isBufferEnd()) { if (getTokenLength() == 1) { if (readChar() != ' ') { this.position.state = TokenType.UNKNOWN_LINE; continue; } } tokenHasBeenCompleted = skipToNextLine(); inAction = false; } } break; case TOPIC_LEVEL: { if (!isBufferEnd()) { final char ch = readChar(); if (ch == '#') { continue; } else if (!Character.isWhitespace(ch) || ch == '\n') { back(); } tokenHasBeenCompleted = true; inAction = false; } } break; case TOPIC_TITLE: case UNKNOWN_LINE: { tokenHasBeenCompleted = skipToNextLine(); inAction = false; } break; default: throw new IllegalStateException("Detected unexpected lexer state " + this.position.state); } } this.position.tokenCompleted = tokenHasBeenCompleted; this.tokenType = this.position.state; this.tokenEnd = this.position.offset; if (tokenHasBeenCompleted) { switch (this.tokenType) { case HEAD_LINE: { if (hasTextAt("> ", this.tokenStart)) { this.tokenType = TokenType.ATTRIBUTE; } } break; case TOPIC_LEVEL: { this.position.state = TokenType.TOPIC_TITLE; } break; case UNKNOWN_LINE: { if (tokenStartsWith("```")) { this.tokenType = isAllLineFromChars('`') ? TokenType.CODE_SNIPPET_END : TokenType.CODE_SNIPPET_START; this.position.state = this.tokenType; } } break; default: { this.position.state = TokenType.WHITESPACE; } break; } } } private int getTokenLength() { return this.position.offset - this.tokenStart; } private boolean isLineStart() { boolean result; final int startPos = this.position.offset - 1; if (startPos < 0) { result = true; } else { result = this.buffer.charAt(startPos) == '\n'; } return result; } private boolean isEmptyToken() { return this.position.offset == this.tokenStart; } private boolean prevTextInBufferIs(final String text) { final int len = text.length(); int startPos = this.position.offset - len; if (startPos < 0) { return false; } for (int i = 0; i < len; i++) { if (this.buffer.charAt(startPos++) != text.charAt(i)) { return false; } } return true; } private boolean hasTextAt(final String text, int position) { boolean result = false; if (position >= 0 && position + text.length() <= this.buffer.length()) { boolean ok = true; for (int i = 0; i < text.length(); i++) { if (text.charAt(i) != this.buffer.charAt(position++)) { ok = false; break; } } result = ok; } return result; } private boolean isBufferEnd() { return this.position.offset >= this.endOffset; } private boolean tokenStartsWith(final String text) { boolean result = true; int index = 0; if (this.position.offset - this.tokenStart >= text.length()) { final int end = this.tokenStart + text.length(); for (int i = this.tokenStart; i < end; i++) { if (text.charAt(index++) != this.buffer.charAt(i)) { result = false; break; } } } else { result = false; } return result; } private boolean isTokenMayStartWith(final String text) { boolean result = true; int index = 0; for (int i = this.tokenStart; i <= this.position.offset && index < text.length(); i++) { if (text.charAt(index++) != this.buffer.charAt(i)) { result = false; break; } } return result; } private boolean isAllLineFromChars(final char c) { boolean detected = false; final int preLimit = this.position.offset - 1; for (int i = this.tokenStart; i < this.position.offset; i++) { final char chr = this.buffer.charAt(i); if ((chr == '\r') || (chr == '\n' && i == preLimit)) { continue; } if (chr != c) { return false; } else { detected = true; } } return detected; } private void skipAllWhitespaceAndSpecial() { while (!isBufferEnd()) { final char chr = readChar(); if (!(Character.isWhitespace(chr) || Character.isISOControl(chr))) { back(); break; } } } private boolean toStartPositionOfCodeSnippetEnd() { boolean found = false; boolean lineStart = isLineStart(); int lineStartPosition = lineStart ? this.position.offset : -1; int startingBacktickCounter = 0; int detectedSpaces = 0; while (!found && !isBufferEnd()) { final char ch = readChar(); switch (ch) { case '`': { if (detectedSpaces == 0 && (startingBacktickCounter > 0 || lineStart)) { startingBacktickCounter++; } else { startingBacktickCounter = 0; } lineStart = false; } break; case '\n': { if (startingBacktickCounter == 3) { found = true; } else { lineStartPosition = this.position.offset; startingBacktickCounter = 0; } lineStart = true; detectedSpaces = 0; } break; default: { if (Character.isWhitespace(ch)) { detectedSpaces++; } else if (!Character.isISOControl(ch) && !Character.isWhitespace(ch)) { startingBacktickCounter = 0; } lineStart = false; } break; } } if (found || startingBacktickCounter == 3) { found = true; this.position.offset = lineStartPosition; } return found; } private boolean skipToNextLine() { boolean result = false; while (!isBufferEnd()) { if (readChar() == '\n') { result = true; break; } } return this.buffer.length() == this.position.offset || result; } private char readChar() { return this.buffer.charAt(this.position.offset++); } private void back() { if (this.position.offset > 0) { this.position.offset--; } } /** * Get current lexer position. * * @return current lexer position, can't be null */ public LexerPosition getCurrentPosition() { return this.position; } /** * Restore to lexer position. * * @param position position to be used as restored one, can't be null */ public void restore(final LexerPosition position) { if (position != this.position) { this.position.set(position); } } /** * Get internal buffer. * * @return internal char buffer, can't be null */ public CharSequence getBufferSequence() { return this.buffer; } /** * Get current buffer end offset * * @return current buffer end offset */ public int getBufferEnd() { return this.endOffset; } /** * Type of allowed lexeme. */ public enum TokenType { HEAD_LINE, HEAD_DELIMITER, ATTRIBUTE, TOPIC_LEVEL, TOPIC_TITLE, CODE_SNIPPET_START, CODE_SNIPPET_BODY, CODE_SNIPPET_END, EXTRA_TYPE, EXTRA_TEXT, WHITESPACE, UNKNOWN_LINE } /** * Class contains information about current lexer state. */ public static final class LexerPosition { private int offset; private TokenType state; private boolean tokenCompleted; private LexerPosition(final LexerPosition pos) { this.offset = pos.offset; this.state = pos.state; this.tokenCompleted = pos.tokenCompleted; } private LexerPosition(final int offset, final TokenType state) { this.tokenCompleted = true; this.offset = offset; this.state = requireNonNull(state); } /** * Get offset of position * * @return position offset */ public int getOffset() { return this.offset; } /** * Check that token completed * * @return true if token completed, false otherwise */ public boolean isTokenCompleted() { return this.tokenCompleted; } /** * Get token type * * @return token type */ public TokenType getState() { return this.state; } /** * Set position * * @param position new position, must not be null */ public void set(final LexerPosition position) { if (this != requireNonNull(position)) { this.offset = position.offset; this.state = position.state; this.tokenCompleted = position.tokenCompleted; } } /** * Make copy of position * * @return cloned position, must not be null */ public LexerPosition makeCopy() { return new LexerPosition(this); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy