org.languagetool.tools.RuleMatchAsXmlSerializer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of languagetool-core Show documentation
LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.
There is a newer version: 6.5
Show newest version
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2014 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.tools;

import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.Category;
import org.languagetool.rules.CategoryId;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.patterns.AbstractPatternRule;

import java.util.List;

import static org.languagetool.tools.StringTools.*;

/**
 * Generate XML to represent matching rules.
 * 
 * @since 2.5 (as 'RuleAsXmlSerializer' up to 3.1)
 * @deprecated don't use for new use cases, the only place this should still be used is for the API mode of the command-line client (deprecated since 3.5) 
 */
public class RuleMatchAsXmlSerializer {

  private static final int CAPACITY = 200;

  /**
   * Get the string to begin the XML. After this, use {@link #ruleMatchesToXmlSnippet} and then {@link #getXmlEnd()}
   * or better, simply use {@link #ruleMatchesToXml}.
   */
  public String getXmlStart(Language lang, Language motherTongue) {
    StringBuilder xml = new StringBuilder(CAPACITY);
    xml.append("\n")
            .append("\n")
            .append("\n");
    if (lang != null || motherTongue != null) {
      String languageXml = "\n");
        }
      }
      if (motherTongue != null && (lang == null || !motherTongue.getShortCode().equals(lang.getShortCodeWithCountryAndVariant()))) {
        languageXml += " mothertongueshortname=\"" + motherTongue.getShortCode() + "\" mothertonguename=\"" + motherTongue.getName() + "\"";
      }
      languageXml += "/>\n";
      xml.append(languageXml);
      xml.append(warning);
    }
    return xml.toString();
  }

  /**
   * Get the string to end the XML. Use after {@link #ruleMatchesToXmlSnippet} and {@link #getXmlStart}.
   */
  public String getXmlEnd() {
    return "\n";
  }

  /**
   * Get the XML snippet (i.e. not a complete XML document) for the given rules.
   * @see #getXmlStart
   * @see #getXmlEnd()
   */
  public String ruleMatchesToXmlSnippet(List ruleMatches, String text, int contextSize) {
    StringBuilder xml = new StringBuilder(CAPACITY);
    //
    // IMPORTANT: people rely on this format, don't change it!
    //
    ContextTools contextTools = new ContextTools();
    contextTools.setEscapeHtml(false);
    contextTools.setContextSize(contextSize);
    String startMarker = "__languagetool_start_marker";
    contextTools.setErrorMarkerStart(startMarker);
    contextTools.setErrorMarkerEnd("");

    for (RuleMatch match : ruleMatches) {
      String subId = "";
      if (match.getRule() instanceof AbstractPatternRule) {
        AbstractPatternRule pRule = (AbstractPatternRule) match.getRule();
        if (pRule.getSubId() != null) {
          subId = " subId=\"" + escapeXMLForAPIOutput(pRule.getSubId()) + "\" ";
        }
      }
      xml.append("", "'");
      xml.append(" msg=\"").append(escapeXMLForAPIOutput(msg)).append('"');
      if (!match.getShortMessage().isEmpty()) {
        xml.append(" shortmsg=\"").append(escapeXMLForAPIOutput(match.getShortMessage())).append('"');
      }
      xml.append(" replacements=\"").append(escapeXMLForAPIOutput(
              String.join("#", match.getSuggestedReplacements()))).append('"');
      String context = contextTools.getContext(match.getFromPos(), match.getToPos(), text);
      // get position of error in context and remove artificial marker again:
      int contextOffset = context.indexOf(startMarker);
      context = context.replaceFirst(startMarker, "");
      context = context.replaceAll("[\n\r]", " ");
      xml.append(" context=\"").append(escapeForXmlAttribute(context)).append('"')
              .append(" contextoffset=\"").append(contextOffset).append('"')
              .append(" offset=\"").append(match.getFromPos()).append('"')
              .append(" errorlength=\"").append(match.getToPos() - match.getFromPos()).append('"');
      if (match.getRule().getUrl() != null) {
        xml.append(" url=\"").append(escapeXMLForAPIOutput(match.getRule().getUrl().toString())).append('"');
      }
      Category category = match.getRule().getCategory();
      if (category != null) {
        xml.append(" category=\"").append(escapeXMLForAPIOutput(category.getName())).append('"');
        CategoryId id = category.getId();
        if (id != null) {
          xml.append(" categoryid=\"").append(escapeXMLForAPIOutput(id.toString())).append('"');
        }
      }
      ITSIssueType type = match.getRule().getLocQualityIssueType();
      if (type != null) {
        xml.append(" locqualityissuetype=\"").append(escapeXMLForAPIOutput(type.toString())).append('"');
      }
      xml.append("/>\n");
    }
    return xml.toString();
  }

  /**
   * Get an XML representation of the given rule matches.
   * @param text the original text that was checked, used to get the context of the matches
   * @param contextSize the desired context size in characters
   */
  public String ruleMatchesToXml(List ruleMatches, String text, int contextSize,
                                 Language lang, Language motherTongue) {
    return getXmlStart(lang, motherTongue) + ruleMatchesToXmlSnippet(ruleMatches, text, contextSize) + getXmlEnd();
  }

  /**
   * Get an XML representation of the given rule matches.
   * @param text the original text that was checked, used to get the context of the matches
   * @param contextSize the desired context size in characters
   */
  public String ruleMatchesToXml(List ruleMatches, String text, int contextSize, Language lang) {
    return getXmlStart(lang, null) + ruleMatchesToXmlSnippet(ruleMatches, text, contextSize) + getXmlEnd();
  }

  /**
   * Get an XML representation of the given rule matches.
   *
   * @param text the original text that was checked, used to get the context of the matches
   * @param contextSize the desired context size in characters
   * @param unknownWords unknown words to be printed in a separated list
   * @since 3.0
   */
  public String ruleMatchesToXml(List ruleMatches, String text, int contextSize, ApiPrintMode xmlMode, Language lang, List unknownWords) {
    String xmlSnippet = ruleMatchesToXmlSnippet(ruleMatches, text, contextSize);
    switch (xmlMode) {
      case START_API:
        return getXmlStart(lang, null) + xmlSnippet;
      case CONTINUE_API:
        return xmlSnippet;
      case END_API:
        return xmlSnippet + getXmlUnknownWords(unknownWords) + getXmlEnd();
      case NORMAL_API:
        return getXmlStart(lang, null) + xmlSnippet + getXmlUnknownWords(unknownWords) + getXmlEnd();
    }
    throw new IllegalArgumentException("Unknown XML mode: " + xmlMode);
  }

  private String getXmlUnknownWords(List unknownWords) {
    StringBuilder xml = new StringBuilder(CAPACITY);
    if (!unknownWords.isEmpty()) {
      xml.append("\n");
      for (String word : unknownWords) {
        xml.append("    ");
        xml.append(escapeForXmlAttribute(word));
        xml.append("\n");
      }
      xml.append("\n");
    }
    return xml.toString();
  }

  private static String escapeXMLForAPIOutput(String s) {
    // this is simplified XML, i.e. put the "" in one line:
    return escapeForXmlAttribute(s).replaceAll("[\n\r]", " ");
  }
  
}