com.vladsch.flexmark.ext.tables.internal.TableParagraphPreProcessor Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of flexmark-ext-tables Show documentation
flexmark-java extension for tables using "|" pipes with optional column spans and table caption
The newest version!
package com.vladsch.flexmark.ext.tables.internal;

import com.vladsch.flexmark.ast.Paragraph;
import com.vladsch.flexmark.ast.Text;
import com.vladsch.flexmark.ast.WhiteSpace;
import com.vladsch.flexmark.ext.tables.TableBlock;
import com.vladsch.flexmark.ext.tables.TableBody;
import com.vladsch.flexmark.ext.tables.TableCaption;
import com.vladsch.flexmark.ext.tables.TableCell;
import com.vladsch.flexmark.ext.tables.TableHead;
import com.vladsch.flexmark.ext.tables.TableRow;
import com.vladsch.flexmark.ext.tables.TableSeparator;
import com.vladsch.flexmark.parser.InlineParser;
import com.vladsch.flexmark.parser.block.CharacterNodeFactory;
import com.vladsch.flexmark.parser.block.ParagraphPreProcessor;
import com.vladsch.flexmark.parser.block.ParagraphPreProcessorFactory;
import com.vladsch.flexmark.parser.block.ParserState;
import com.vladsch.flexmark.parser.core.ReferencePreProcessorFactory;
import com.vladsch.flexmark.util.ast.Block;
import com.vladsch.flexmark.util.ast.DoNotDecorate;
import com.vladsch.flexmark.util.ast.Node;
import com.vladsch.flexmark.util.ast.NodeIterator;
import com.vladsch.flexmark.util.data.DataHolder;
import com.vladsch.flexmark.util.format.TableFormatOptions;
import com.vladsch.flexmark.util.sequence.BasedSequence;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.jetbrains.annotations.Nullable;

public class TableParagraphPreProcessor implements ParagraphPreProcessor {
  private static BitSet pipeCharacters = new BitSet();
  private static BitSet separatorCharacters = new BitSet();

  static {
    pipeCharacters.set('|');

    separatorCharacters.set('|');
    separatorCharacters.set(':');
    separatorCharacters.set('-');
  }

  private static final Map pipeNodeMap = new HashMap<>();

  static {
    pipeNodeMap.put(
        '|',
        new CharacterNodeFactory() {
          @Override
          public boolean skipNext(char c) {
            return c == ' ' || c == '\t';
          }

          @Override
          public boolean skipPrev(char c) {
            return c == ' ' || c == '\t';
          }

          @Override
          public boolean wantSkippedWhitespace() {
            return true;
          }

          @Override
          public Node get() {
            return new TableColumnSeparator();
          }
        });
  }

  private static final Map pipeIntelliJNodeMap = new HashMap<>();

  static {
    pipeIntelliJNodeMap.put(
        '|',
        new CharacterNodeFactory() {
          @Override
          public boolean skipNext(char c) {
            return c == ' ' || c == '\t';
          }

          @Override
          public boolean skipPrev(char c) {
            return c == ' ' || c == '\t';
          }

          @Override
          public boolean wantSkippedWhitespace() {
            return true;
          }

          @Override
          public Node get() {
            return new TableColumnSeparator();
          }
        });
  }

  public static ParagraphPreProcessorFactory Factory() {
    return new ParagraphPreProcessorFactory() {
      @Override
      public boolean affectsGlobalScope() {
        return false;
      }

      @Nullable
      @Override
      public Set> getAfterDependents() {
        Set> set = new HashSet<>();
        set.add(ReferencePreProcessorFactory.class);
        return set;
      }

      @Nullable
      @Override
      public Set> getBeforeDependents() {
        return null;
      }

      @Override
      public ParagraphPreProcessor apply(ParserState state) {
        return new TableParagraphPreProcessor(state.getProperties());
      }
    };
  }

  private final TableParserOptions options;
  Pattern TABLE_HEADER_SEPARATOR;

  public static Pattern getTableHeaderSeparator(
      int minColumnDashes, String intellijDummyIdentifier) {
    int minCol = minColumnDashes >= 1 ? minColumnDashes : 1;
    int minColDash = minColumnDashes >= 2 ? minColumnDashes - 1 : 1;
    int minColDashes = minColumnDashes >= 3 ? minColumnDashes - 2 : 1;
    // to prevent conversion to arabic numbers, using string
    String COL =
        String.format(
            Locale.US,
            "(?:" + "\\s*-{%d,}\\s*|\\s*:-{%d,}\\s*|\\s*-{%d,}:\\s*|\\s*:-{%d,}:\\s*" + ")",
            minCol,
            minColDash,
            minColDash,
            minColDashes);

    boolean noIntelliJ = intellijDummyIdentifier.isEmpty();
    String add = noIntelliJ ? "" : TableFormatOptions.INTELLIJ_DUMMY_IDENTIFIER;
    String sp = noIntelliJ ? "\\s" : "(?:\\s" + add + "?)";
    String ds = noIntelliJ ? "-" : "(?:-" + add + "?)";
    String pipe = noIntelliJ ? "\\|" : "(?:" + add + "?\\|" + add + "?)";
    // COL = COL.replace("\\s", sp).replace("-", ds);

    String regex =
        "\\|"
            + COL
            + "\\|?\\s*"
            + "|"
            + COL
            + "\\|\\s*"
            + "|"
            + "\\|?"
            + "(?:"
            + COL
            + "\\|)+"
            + COL
            + "\\|?\\s*";

    String withIntelliJ = regex.replace("\\s", sp).replace("\\|", pipe).replace("-", ds);

    return Pattern.compile(withIntelliJ);
  }

  private TableParagraphPreProcessor(DataHolder options) {
    this.options = new TableParserOptions(options);
    // isIntellijDummyIdentifier = Parser.INTELLIJ_DUMMY_IDENTIFIER.getFrom(options);
    // intellijDummyIdentifier = isIntellijDummyIdentifier ? INTELLIJ_DUMMY_IDENTIFIER : "";
    this.TABLE_HEADER_SEPARATOR = getTableHeaderSeparator(this.options.minSeparatorDashes, "");
  }

  private static class TableSeparatorRow extends TableRow implements DoNotDecorate {
    TableSeparatorRow(BasedSequence chars) {
      super(chars);
    }
  }

  @Override
  public int preProcessBlock(Paragraph block, ParserState state) {
    InlineParser inlineParser = state.getInlineParser();

    List tableLines = new ArrayList<>();
    int separatorLineNumber = -1;
    BasedSequence separatorLine = null;
    int blockIndent = block.getLineIndent(0);
    BasedSequence captionLine = null;
    BitSet separators = separatorCharacters;
    Map nodeMap = pipeNodeMap;

    for (BasedSequence rowLine : block.getContentLines()) {
      int rowNumber = tableLines.size();
      if (separatorLineNumber == -1 && rowNumber > options.maxHeaderRows)
        return 0; // too many header rows

      if (rowLine.indexOf('|') < 0) {
        if (separatorLineNumber == -1) {
          return 0;
        }

        if (options.withCaption) {
          BasedSequence trimmed = rowLine.trim();
          if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
            captionLine = trimmed;
          }
        }
        break;
      }

      // NOTE: block lines now contain leading indent spaces which should be ignored
      BasedSequence trimmedRowLine = rowLine.subSequence(block.getLineIndent(rowNumber));

      if (separatorLineNumber == -1) {
        if (rowNumber >= options.minHeaderRows
            && TABLE_HEADER_SEPARATOR.matcher(trimmedRowLine).matches()) {
          // must start with | or cell, whitespace means its not a separator line
          if (rowLine.charAt(0) != ' ' && rowLine.charAt(0) != '\t' || rowLine.charAt(0) != '|') {
            separatorLineNumber = rowNumber;
            separatorLine = trimmedRowLine;
          } else if (rowLine.charAt(0) == ' ' || rowLine.charAt(0) == '\t') {
            block.setHasTableSeparator(true);
          }
        }
      }

      tableLines.add(trimmedRowLine);
    }

    if (separatorLineNumber == -1) {
      return 0;
    }

    List tableRows = new ArrayList<>();
    for (BasedSequence rowLine : tableLines) {
      int rowNumber = tableRows.size();

      BasedSequence fullRowLine =
          block.getLineIndent(rowNumber) <= blockIndent
              ? rowLine.trimEOL()
              : rowLine.baseSubSequence(
                  rowLine.getStartOffset() - (block.getLineIndent(rowNumber) - blockIndent),
                  rowLine.getEndOffset() - rowLine.eolEndLength());
      boolean isSeparator = rowNumber == separatorLineNumber;
      TableRow tableRow = new TableRow(fullRowLine);
      int tableRowNumber;

      List sepList;
      if (isSeparator) {
        TableSeparatorRow fakeRow = new TableSeparatorRow(fullRowLine);
        sepList = inlineParser.parseCustom(fullRowLine, fakeRow, separators, nodeMap);
        tableRow.takeChildren(fakeRow);
        // sepList = inlineParser.parseCustom(fullRowLine, tableRow, separators, nodeMap);
        tableRowNumber = 0;
      } else {
        sepList = inlineParser.parseCustom(fullRowLine, tableRow, pipeCharacters, pipeNodeMap);
        if (rowNumber < separatorLineNumber) tableRowNumber = rowNumber + 1;
        else tableRowNumber = rowNumber - separatorLineNumber;

        // can have table separators embedded inside inline elements, need to convert them to text
        // and remove them from sepList
        if (sepList != null) {
          sepList = cleanUpInlinedSeparators(inlineParser, tableRow, sepList);
        }
      }

      if (sepList == null) {
        if (rowNumber <= separatorLineNumber) {
          return 0;
        }
        break;
      }

      tableRow.setRowNumber(tableRowNumber);
      tableRows.add(tableRow);
    }

    // table is done, could be earlier than the lines tested earlier, may need to truncate lines
    Block tableBlock = new TableBlock(tableLines.subList(0, tableRows.size()));
    Node section = new TableHead(tableLines.get(0).subSequence(0, 0));
    tableBlock.appendChild(section);

    List alignments = parseAlignment(separatorLine);

    int rowNumber = 0;
    int separatorColumns = alignments.size();
    for (TableRow tableRow : tableRows) {
      if (rowNumber == separatorLineNumber) {
        section.setCharsFromContent();
        section = new TableSeparator();
        tableBlock.appendChild(section);
      } else if (rowNumber == separatorLineNumber + 1) {
        section.setCharsFromContent();
        section = new TableBody();
        tableBlock.appendChild(section);
      }

      boolean firstCell = true;
      int cellCount = 0;
      NodeIterator nodes = new NodeIterator(tableRow.getFirstChild());
      TableRow newTableRow = new TableRow(tableRow.getChars());
      newTableRow.setRowNumber(tableRow.getRowNumber());
      int accumulatedSpanOffset = 0;

      while (nodes.hasNext()) {
        if (cellCount >= separatorColumns && options.discardExtraColumns) {
          if (options.headerSeparatorColumnMatch && rowNumber < separatorLineNumber) {
            // header/separator mismatch
            return 0;
          }

          break;
        }

        // TableCell tableCell = rowNumber == separatorLineNumber ? new TableSeparatorCell() : new
        // TableCell();
        TableCell tableCell = new TableCell();

        if (firstCell && nodes.peek() instanceof TableColumnSeparator) {
          Node columnSep = nodes.next();
          tableCell.setOpeningMarker(columnSep.getChars());
          columnSep.unlink();
          firstCell = false;
        }

        TableCell.Alignment alignment =
            cellCount + accumulatedSpanOffset < separatorColumns
                ? alignments.get(cellCount + accumulatedSpanOffset)
                : null;
        tableCell.setHeader(rowNumber < separatorLineNumber);
        tableCell.setAlignment(alignment);

        // take all until separator or end of iterator
        while (nodes.hasNext()) {
          if (nodes.peek() instanceof TableColumnSeparator) {
            break;
          }
          tableCell.appendChild(nodes.next());
        }

        // accumulate closers, and optional spans
        BasedSequence closingMarker = null;
        int span = 1;
        while (nodes.hasNext()) {
          if (!(nodes.peek() instanceof TableColumnSeparator)) {
            break;
          }
          if (closingMarker == null) {
            closingMarker = nodes.next().getChars();
            if (!options.columnSpans) {
              break;
            }
          } else {
            BasedSequence nextSep = nodes.peek().getChars();

            if (!closingMarker.isContinuedBy(nextSep)) {
              break;
            }
            closingMarker = closingMarker.spliceAtEnd(nextSep);
            nodes.next().unlink();
            span++;
          }
        }

        accumulatedSpanOffset += span - 1;

        if (closingMarker != null) tableCell.setClosingMarker(closingMarker);
        tableCell.setChars(tableCell.getChildChars());
        // option to keep cell whitespace, if yes, then convert it to text and merge adjacent text
        // nodes
        if (options.trimCellWhitespace) tableCell.trimWhiteSpace();
        else tableCell.mergeWhiteSpace();

        // NOTE: here we get only chars which do not reflect out-of-base characters, prefixes and
        // removed text
        tableCell.setText(tableCell.getChildChars());

        tableCell.setCharsFromContent();
        tableCell.setSpan(span);
        newTableRow.appendChild(tableCell);
        cellCount++;
      }

      if (options.headerSeparatorColumnMatch
          && rowNumber < separatorLineNumber
          && cellCount < separatorColumns) {
        // no match
        return 0;
      }

      while (options.appendMissingColumns && cellCount < separatorColumns) {
        TableCell tableCell = new TableCell();
        tableCell.setHeader(rowNumber < separatorLineNumber);
        tableCell.setAlignment(alignments.get(cellCount));
        newTableRow.appendChild(tableCell);
        cellCount++;
      }

      newTableRow.setCharsFromContent();
      section.appendChild(newTableRow);

      rowNumber++;
    }

    section.setCharsFromContent();

    if (section instanceof TableSeparator) {
      TableBody tableBody =
          new TableBody(section.getChars().subSequence(section.getChars().length()));
      tableBlock.appendChild(tableBody);
    }

    // Add caption if the option is enabled
    if (captionLine != null) {
      TableCaption caption =
          new TableCaption(
              captionLine.subSequence(0, 1),
              captionLine.subSequence(1, captionLine.length() - 1),
              captionLine.subSequence(captionLine.length() - 1));
      inlineParser.parse(caption.getText(), caption);
      caption.setCharsFromContent();
      tableBlock.appendChild(caption);
    }

    tableBlock.setCharsFromContent();

    block.insertBefore(tableBlock);
    state.blockAdded(tableBlock);
    return tableBlock.getChars().length();
  }

  List cleanUpInlinedSeparators(
      InlineParser inlineParser, TableRow tableRow, List sepList) {
    // any separators which do not have tableRow as parent are embedded into inline elements and
    // should be
    // converted back to text
    List removedSeparators = null;
    List mergeTextParents = null;

    for (Node node : sepList) {
      if (node.getParent() != null && node.getParent() != tableRow) {
        // embedded, convert it and surrounding whitespace to text
        Node firstNode = node.getPrevious() instanceof WhiteSpace ? node.getPrevious() : node;
        Node lastNode = node.getNext() instanceof WhiteSpace ? node.getNext() : node;

        Text text =
            new Text(node.baseSubSequence(firstNode.getStartOffset(), lastNode.getEndOffset()));
        node.insertBefore(text);
        node.unlink();
        firstNode.unlink();
        lastNode.unlink();

        if (removedSeparators == null) {
          removedSeparators = new ArrayList<>();
          mergeTextParents = new ArrayList<>();
        }

        removedSeparators.add(node);
        mergeTextParents.add(text.getParent());
      }
    }

    if (mergeTextParents != null) {
      for (Node parent : mergeTextParents) {
        inlineParser.mergeTextNodes(parent.getFirstChild(), parent.getLastChild());
      }

      if (removedSeparators.size() == sepList.size()) {
        return null;
      }

      List newSeparators = new ArrayList<>(sepList);
      newSeparators.removeAll(removedSeparators);
      return newSeparators;
    }

    return sepList;
  }

  private static List parseAlignment(BasedSequence separatorLine) {
    List parts = split(separatorLine, false, false);
    List alignments = new ArrayList<>();
    for (BasedSequence part : parts) {
      BasedSequence trimmed = part.trim();
      boolean left = trimmed.startsWith(":");
      boolean right = trimmed.endsWith(":");
      TableCell.Alignment alignment = getAlignment(left, right);
      alignments.add(alignment);
    }
    return alignments;
  }

  private static List split(
      BasedSequence input, boolean columnSpans, boolean wantPipes) {
    BasedSequence line = input.trim();
    int lineLength = line.length();
    List segments = new ArrayList<>();

    if (line.startsWith("|")) {
      if (wantPipes) segments.add(line.subSequence(0, 1));
      line = line.subSequence(1, lineLength);
      lineLength--;
    }

    boolean escape = false;
    int lastPos = 0;
    int cellChars = 0;
    for (int i = 0; i < lineLength; i++) {
      char c = line.charAt(i);
      if (escape) {
        escape = false;
        cellChars++;
      } else {
        switch (c) {
          case '\\':
            escape = true;
            // Removing the escaping '\' is handled by the inline parser later, so add it to cell
            cellChars++;
            break;
          case '|':
            if (!columnSpans || lastPos < i) segments.add(line.subSequence(lastPos, i));
            if (wantPipes) segments.add(line.subSequence(i, i + 1));
            lastPos = i + 1;
            cellChars = 0;
            break;
          default:
            cellChars++;
        }
      }
    }

    if (cellChars > 0) {
      segments.add(line.subSequence(lastPos, lineLength));
    }
    return segments;
  }

  private static TableCell.Alignment getAlignment(boolean left, boolean right) {
    if (left && right) {
      return TableCell.Alignment.CENTER;
    } else if (left) {
      return TableCell.Alignment.LEFT;
    } else if (right) {
      return TableCell.Alignment.RIGHT;
    } else {
      return null;
    }
  }
}