org.languagetool.markup.AnnotatedTextBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of languagetool-core Show documentation
LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.
There is a newer version: 6.4
Show newest version
/* LanguageTool, a natural language style checker
 * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.markup;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Use this builder to create input of text with markup for LanguageTool, so that it
 * can check only the plain text parts and ignore the markup, yet still calculate the
 * positions of errors so that they refer to the complete text, including markup.
 *
 * It's up to you to split the input into parts that are plain text and parts that
 * are markup.
 *
 * 
For example, text with XML markup like
 *
 *  *   Here is <b>some text</b>
 * 
 *
 * needs to be prepared like this:
 *
 *  * new AnnotatedTextBuilder()
 *   .addText("Here is ").addMarkup("<b>").addText("some text").addMarkup("</b>")
 *   .build()
 * 
 *
 * @since 2.3
 */
public class AnnotatedTextBuilder {

  private final List parts = new ArrayList<>();
  private final Map metaData = new HashMap<>();
  private final Map customMetaData = new HashMap<>();

  public AnnotatedTextBuilder() {
  }

  /**
   * Add global meta data like document title or receiver name (when writing an email).
   * Some rules may use this information.
   * @since 3.9
   */
  public AnnotatedTextBuilder addGlobalMetaData(AnnotatedText.MetaDataKey key, String value) {
    metaData.put(key, value);
    return this;
  }

  /**
   * Add any global meta data about the document to be checked. Some rules may use this information.
   * Unless you're using your own rules for which you know useful keys, you probably want to
   * use {@link #addGlobalMetaData(AnnotatedText.MetaDataKey, String)}.
   * @since 3.9
   */
  public AnnotatedTextBuilder addGlobalMetaData(String key, String value) {
    customMetaData.put(key, value);
    return this;
  }

  /**
   * Add a plain text snippet, to be checked by LanguageTool when using
   * {@link org.languagetool.JLanguageTool#check(AnnotatedText)}.
   */
  public AnnotatedTextBuilder addText(String text) {
    parts.add(new TextPart(text, TextPart.Type.TEXT));
    return this;
  }

  /**
   * Add a markup text snippet like {@code } or {@code }. These
   * parts will be ignored by LanguageTool when using {@link org.languagetool.JLanguageTool#check(AnnotatedText)}.
   */
  public AnnotatedTextBuilder addMarkup(String markup) {
    parts.add(new TextPart(markup, TextPart.Type.MARKUP));
    return this;
  }

  /**
   * Add a markup text snippet like {@code } or {@code }. These
   * parts will be ignored by LanguageTool when using {@link org.languagetool.JLanguageTool#check(AnnotatedText)}.
   * @param interpretAs A string that will be used by the checker instead of the markup. This is usually
   *                    whitespace, e.g. {@code \n\n} for {@code }
   */
  public AnnotatedTextBuilder addMarkup(String markup, String interpretAs) {
    parts.add(new TextPart(markup, TextPart.Type.MARKUP));
    parts.add(new TextPart(interpretAs, TextPart.Type.FAKE_CONTENT));
    return this;
  }

  /** @since 5.4 */
  public void add(TextPart part) {
    parts.add(part);
  }

  /**
   * Create the annotated text to be passed into {@link org.languagetool.JLanguageTool#check(AnnotatedText)}.
   */
  public AnnotatedText build() {
    int plainTextPosition = 0;
    int totalPosition = 0;
    Map mapping = new HashMap<>();
    for (int i = 0; i < parts.size(); i++) {
      TextPart part = parts.get(i);
      if (part.getType() == TextPart.Type.TEXT) {
        plainTextPosition += part.getPart().length();
        totalPosition += part.getPart().length();
        MappingValue mappingValue = new MappingValue(totalPosition);
        mapping.put(plainTextPosition, mappingValue);
      } else if (part.getType() == TextPart.Type.MARKUP) {
        totalPosition += part.getPart().length();
        if (hasFakeContent(i, parts)) {
          plainTextPosition += parts.get(i + 1).getPart().length();
          i++;
          if (mapping.get(plainTextPosition) == null) {
            MappingValue mappingValue = new MappingValue(totalPosition, part.getPart().length());
            mapping.put(plainTextPosition, mappingValue);
          }
        }
      }
    }
    return new AnnotatedText(parts, mapping, metaData, customMetaData);
  }

  private boolean hasFakeContent(int i, List parts) {
    int nextPartIndex = i + 1;
    if (nextPartIndex < parts.size()) {
      if (parts.get(nextPartIndex).getType().equals(TextPart.Type.FAKE_CONTENT)) {
        return true;
      }
    }
    return false;
  }

}