com.igormaznitsa.mindmap.model.parser.MindMapLexer Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2016 Igor Maznitsa.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.igormaznitsa.mindmap.model.parser;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.igormaznitsa.meta.annotation.ReturnsOriginal;
import com.igormaznitsa.meta.common.utils.Assertions;

/**
 * Allows to extract lexeme from mind map file.
 */
public final class MindMapLexer {
  /**
   * Type of allowed lexeme.
   */
  public enum TokenType {
    HEAD_LINE,
    HEAD_DELIMITER,
    ATTRIBUTE,
    TOPIC,
    EXTRA_TYPE,
    EXTRA_TEXT,
    WHITESPACE,
    UNKNOWN_LINE
  }

  /**
   * Class contains information about current lexer state.
   */
  public static final class LexerPosition {

    private int offset;
    private TokenType state = TokenType.HEAD_LINE;
    private boolean tokenCompleted;

    private LexerPosition(@Nonnull final LexerPosition pos) {
      this.offset = pos.offset;
      this.state = pos.state;
      this.tokenCompleted = pos.tokenCompleted;
    }

    private LexerPosition(final int offset, @Nonnull final TokenType state) {
      this.tokenCompleted = true;
      this.offset = offset;
      this.state = Assertions.assertNotNull(state);
    }

    public int getOffset() {
      return this.offset;
    }

    public boolean isTokenCompleted() {
      return this.tokenCompleted;
    }

    @Nonnull
    public TokenType getState() {
      return this.state;
    }

    public void set(@Nullable final LexerPosition position) {
      if (position != null && this != position) {
        this.offset = position.offset;
        this.state = position.state;
        this.tokenCompleted = position.tokenCompleted;
      }
    }

    @Nonnull
    public LexerPosition makeCopy() {
      return new LexerPosition(this);
    }
  }

  private CharSequence buffer = "";
  private int endOffset;
  private int tokenStart;
  private int tokenEnd;
  private TokenType tokenType = TokenType.UNKNOWN_LINE;
  private final LexerPosition position = new LexerPosition(0, TokenType.UNKNOWN_LINE);

  public int getTokenStartOffset() {
    return this.tokenStart;
  }

  public int getTokenEndOffset() {
    return this.tokenEnd;
  }

  public void start(@Nonnull final CharSequence buffer, final int startOffset, final int endOffset, @Nonnull final MindMapLexer.TokenType initialState) {
    this.buffer = buffer;
    this.tokenType = initialState;
    this.position.offset = startOffset;
    this.position.tokenCompleted = true;
    this.position.state = this.tokenType;
    this.endOffset = endOffset;
  }

  public void setBufferEndOffset(final int value) {
    this.endOffset = value;
  }

  @Nonnull
  public CharSequence getTokenSequence() {
    return getBufferSequence().subSequence(this.tokenStart, this.tokenEnd);
  }

  @Nonnull
  public String getTokenText() {
    return getTokenSequence().toString();
  }

  @Nullable
  public TokenType getTokenType() {
    return this.tokenStart == this.tokenEnd ? null : this.tokenType;
  }

  @Nonnull
  public TokenPosition makeTokenPosition() {
    return new TokenPosition(this.tokenStart, this.tokenEnd);
  }

  public void advance() {
    boolean tokenHasBeenCompleted = this.position.isTokenCompleted();
    this.tokenStart = tokenHasBeenCompleted ? this.position.offset : this.tokenStart;
    boolean inAction = true;

    while (inAction && !isBufferEnd()) {
      switch (this.position.state) {
        case HEAD_LINE: {
          tokenHasBeenCompleted = skipToNextLine();
          if (tokenHasBeenCompleted && isAllLineFromChars('-')) {
            this.position.state = TokenType.HEAD_DELIMITER;
          }
          inAction = false;
        }
        break;
        case HEAD_DELIMITER: {
          this.position.state = TokenType.WHITESPACE;
        }
        break;
        case WHITESPACE: {
          skipAllWhitespaceAndSpecial();
          if (this.position.offset > this.tokenStart || isBufferEnd()) {
            tokenHasBeenCompleted = true;
            inAction = false;
          }
          else {
            final char chr = readChar();
            switch (chr) {
              case '#': {
                this.position.state = TokenType.TOPIC;
              }
              break;
              case '-':
              case '>': {
                if (isBufferEnd()) {
                  this.position.state = chr == '>' ? TokenType.ATTRIBUTE : TokenType.EXTRA_TYPE;
                  tokenHasBeenCompleted = false;
                  inAction = false;
                }
                else {
                  this.position.state = readChar() == ' ' ? chr == '>' ? TokenType.ATTRIBUTE : TokenType.EXTRA_TYPE : TokenType.UNKNOWN_LINE;
                }
              }
              break;
              case '<': {
                tokenHasBeenCompleted = false;
                this.position.state = TokenType.EXTRA_TEXT;
              }
              break;
              default: {
                this.position.state = TokenType.UNKNOWN_LINE;
              }
              break;
            }
          }
        }
        break;
        case EXTRA_TEXT: {
          if (getTokenLength() <= 5 && !isTokenMayStartWith("")) {
            this.position.state = TokenType.UNKNOWN_LINE;
          }
          else if (readChar() == '>' && getTokenLength() > 5) {
            if (prevTextInBufferIs("")) {
              tokenHasBeenCompleted = true;
              inAction = false;
            }
          }
        }
        break;
        case ATTRIBUTE:
        case EXTRA_TYPE: {
          if (!isBufferEnd()) {
            if (getTokenLength() == 1) {
              if (readChar() != ' ') {
                this.position.state = TokenType.UNKNOWN_LINE;
                continue;
              }
            }
            tokenHasBeenCompleted = skipToNextLine();
            inAction = false;
          }
        }
        break;
        case TOPIC:
        case UNKNOWN_LINE: {
          tokenHasBeenCompleted = skipToNextLine();
          inAction = false;
        }
        break;
        default:
          throw Assertions.fail("Detected unexpected lexer state " + this.position.state);
      }
    }

    this.position.tokenCompleted = tokenHasBeenCompleted;
    this.tokenType = this.position.getState();
    this.tokenEnd = this.position.getOffset();
    this.tokenType = this.position.state;
    if (tokenHasBeenCompleted) {
      if (this.tokenType == TokenType.HEAD_LINE) {
        if (hasTextAt("> ", this.tokenStart)) {
          this.tokenType = TokenType.ATTRIBUTE;
        }
      }
      else {
        this.position.state = TokenType.WHITESPACE;
      }
    }
  }

  private int getTokenLength() {
    return this.position.offset - this.tokenStart;
  }

  private boolean prevTextInBufferIs(@Nonnull final String text) {
    final int len = text.length();
    int startPos = this.position.offset - len;
    if (startPos < 0) {
      return false;
    }
    for (int i = 0; i < len; i++) {
      if (this.buffer.charAt(startPos++) != text.charAt(i)) {
        return false;
      }
    }
    return true;
  }

  private boolean hasTextAt(@Nonnull final String text, int position) {
    boolean result = false;
    if (position >= 0 && position + text.length() <= this.buffer.length()) {
      boolean ok = true;
      for (int i = 0; i < text.length(); i++) {
        if (text.charAt(i) != this.buffer.charAt(position++)) {
          ok = false;
          break;
        }
      }
      result = ok;
    }
    return result;
  }

  private boolean isBufferEnd() {
    return this.position.offset >= this.endOffset;
  }

  private boolean isTokenMayStartWith(@Nonnull final String text) {
    boolean result = true;
    int index = 0;
    for (int i = this.tokenStart; i <= this.position.offset && index < text.length(); i++) {
      if (text.charAt(index++) != this.buffer.charAt(i)) {
        result = false;
        break;
      }
    }
    return result;
  }

  private boolean isAllLineFromChars(final char c) {
    boolean detected = false;
    final int prelimit = this.position.offset - 1;

    for (int i = this.tokenStart; i < this.position.offset; i++) {
      final char chr = this.buffer.charAt(i);
      if ((chr == '\r') || (chr == '\n' && i == prelimit)) {
        continue;
      }
      if (chr != c) {
        return false;
      }
      else {
        detected = true;
      }
    }
    return detected;
  }

  private void skipAllWhitespaceAndSpecial() {
    while (!isBufferEnd()) {
      final char chr = readChar();
      if (!(Character.isWhitespace(chr) || Character.isISOControl(chr))) {
        back();
        break;
      }
    }
  }

  private boolean skipToNextLine() {
    boolean result = false;
    while (!isBufferEnd()) {
      if (readChar() == '\n') {
        result = true;
        break;
      }
    }
    return this.buffer.length() == this.position.offset || result;
  }

  private char readChar() {
    return this.buffer.charAt(this.position.offset++);
  }

  private void back() {
    if (this.position.offset > 0) {
      this.position.offset--;
    }
  }

  @Nonnull
  @ReturnsOriginal
  public LexerPosition getCurrentPosition() {
    return this.position;
  }

  public void restore(@Nonnull final LexerPosition position) {
    if (position != this.position) {
      this.position.set(position);
    }
  }

  @Nonnull
  public CharSequence getBufferSequence() {
    return this.buffer;
  }

  public int getBufferEnd() {
    return this.endOffset;
  }
}