All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.bottlecaps.markup.blitz.transform.GenerateAdditionalNames Maven / Gradle / Ivy

// Copyright (c) 2023-2024 Gunther Rademacher. Provided under the Apache 2 License.

package de.bottlecaps.markup.blitz.transform;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import de.bottlecaps.markup.blitz.codepoints.Codepoint;
import de.bottlecaps.markup.blitz.codepoints.RangeSet;
import de.bottlecaps.markup.blitz.grammar.Alt;
import de.bottlecaps.markup.blitz.grammar.Alts;
import de.bottlecaps.markup.blitz.grammar.Charset;
import de.bottlecaps.markup.blitz.grammar.Control;
import de.bottlecaps.markup.blitz.grammar.Grammar;
import de.bottlecaps.markup.blitz.grammar.Insertion;
import de.bottlecaps.markup.blitz.grammar.Node;
import de.bottlecaps.markup.blitz.grammar.Occurrence;
import de.bottlecaps.markup.blitz.grammar.Rule;
import de.bottlecaps.markup.blitz.grammar.Term;

public class GenerateAdditionalNames extends Visitor {
  private static final Pattern nameCharPattern = Pattern.compile("^([-_.\u00B7\u203F\u2040]|\\p{L}|\\p{Nd}|\\p{Mn})$");

  private final Grammar grammar;
  private final Set names;
  private final Map nameByRhs;
  private final String additionalNamePrefix;
  private final Map smallestContext;

  public GenerateAdditionalNames(Grammar grammar) {
    this.grammar = grammar;
    this.nameByRhs = new HashMap<>();

    this.names = new HashSet(grammar.getRules().keySet());
    grammar.getRules().values().forEach(rule -> nameByRhs.put(rule.getAlts(), rule.getName()));

    for (StringBuilder sb = new StringBuilder();; sb.append("_")) {
      String prefix = sb.toString();
      if (names.stream().allMatch(name -> ! name.startsWith(prefix))) {
        this.additionalNamePrefix = prefix;
        break;
      }
    }

    addAdditionalNames(Charset.END, additionalNamePrefix + "end");
    addAdditionalNames(Term.START, additionalNamePrefix + "start");

    final var charsetOrigin = new CharsetOrigin();
    charsetOrigin.visit(grammar);
    this.smallestContext = charsetOrigin.smallestContext;
  }

  @Override
  public void visit(Alts a) {
    super.visit(a);
    if (! (a.getParent() instanceof Rule) && a.getAlts().size() > 1)
      addAdditionalNames(a, getAdditionalName(a.getRule().getName(), a, "choice"));
  }

  private void addAdditionalNames(Term t, String... names) {
    grammar.getAdditionalNames().putIfAbsent(t, names);
  }

  @Override
  public void visit(Charset c) {
    if (needsProposalForName(c)) {
      String suffix = c.isDeleted()
          ? "deleted_chars"
          : "preserved_chars";
      String origin = smallestContext.get(c.getRangeSet());
      if (origin == null)
        origin =  c.getRule().getName();
      String additionalName = getAdditionalName(origin, c, suffix);
      addAdditionalNames(c, additionalName);
    }
  }

  @Override
  public void visit(Insertion i) {
    String additionalName = getAdditionalName(i.getRule().getName(), i, "insertion");
    addAdditionalNames(i, additionalName);
  }

  @Override
  public void visit(Control c) {
    super.visit(c);
    switch (c.getOccurrence()) {
    case ONE_OR_MORE:
      addAdditionalNames(c, getAdditionalName(c.getRule().getName(), c, "list"));
      break;
    case ZERO_OR_MORE:
      String name0 = getAdditionalName(c.getRule().getName(), c, "list_option");
      if (c.getSeparator() != null) {
        Control list = new Control(Occurrence.ONE_OR_MORE, c.getTerm(), c.getSeparator());
        String name1 = getAdditionalName(c.getRule().getName(), list, "list");
        addAdditionalNames(c, name0, name1);
      }
      else {
        addAdditionalNames(c, name0);
      }
      break;
    case ZERO_OR_ONE:
      addAdditionalNames(c, getAdditionalName(c.getRule().getName(), c, "option"));
      break;
    default:
      throw new IllegalStateException();
    }
  }

  public String getAdditionalName(String proposal, Term term, String suffix) {
    Alts alts;
    if (term instanceof Alts) {
      alts = (Alts) term;
    }
    else {
      Alt alt = new Alt();
      alt.getTerms().add(term);
      alts = new Alts();
      alts.addAlt(alt);
      term = alts;
    }
    String name = nameByRhs.get(alts);
    if (name == null) {
      name = getAdditionalName(proposal, suffix);
      nameByRhs.put(alts, name);
    }
    return name;
  }

  public String getAdditionalName(String proposal, String suffix) {
    StringBuilder sb = new StringBuilder();
    char last = '_';
    for (Character chr : proposal.toCharArray()) {
      if (chr == '-' || ! nameCharPattern.matcher(String.valueOf(chr)).matches())
        chr = '_';
      if (chr == '_' && last == '_') continue;
      last = chr;
      sb.append(last);
    }
    if (sb.length() != 0 && sb.charAt(sb.length() - 1) != '_') {
      sb.append("_");
    }
    sb.append(suffix);
    for (int i = 0;; ++i) {
      String name = additionalNamePrefix + sb.toString()
                  + (i== 0 && sb.length() > 0 ? "" : "_" + i);
      if (! names.contains(name)) {
        names.add(name);
        return name;
      }
    }
  }

  private boolean needsProposalForName(Charset c) {
    if (grammar.getAdditionalNames().containsKey(c))
      return false;
    RangeSet rangeSet = c.getRangeSet();
    if (rangeSet.isSingleton() && Codepoint.isAscii(rangeSet.iterator().next().getFirstCodepoint()))
      return false;
    return true;
  }

  private class CharsetOrigin extends Visitor {
    Map smallestContext = new HashMap<>();
    Map smallestContextSize = new HashMap<>();

    @Override
    public void visit(Charset c) {
      if (needsProposalForName(c)) {
        if (grammar.getAdditionalNames().containsKey(c))
          return;
        final var rangeSet = c.getRangeSet();
        if (rangeSet.isSingleton() && Codepoint.isAscii(rangeSet.iterator().next().getFirstCodepoint()))
          return;
        Node parent = c.getParent();
        Alts alts = null;
        int contextSize;
        if (parent instanceof Alt && ((Alt) parent).getTerms().size() == 1) {
          contextSize = 0;
          alts = (Alts) parent.getParent();
          for (Alt a : alts.getAlts()) {
            if (a.getTerms().size() == 1 && a.getTerms().get(0) instanceof Charset) {
              contextSize += ((Charset) a.getTerms().get(0)).getRangeSet().charCount();
            }
          }
        }
        else {
          contextSize = rangeSet.charCount();
        }
        Integer minContextSize = smallestContextSize.get(rangeSet);
        if (minContextSize == null || minContextSize > contextSize) {
          String name = grammar.getAdditionalNames().containsKey(alts)
                      ? grammar.getAdditionalNames().get(alts)[0]
                      : c.getRule().getName();
          smallestContext.put(rangeSet, name);
          smallestContextSize.put(rangeSet, contextSize);
        }
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy