All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.markup.AnnotatedTextBuilder Maven / Gradle / Ivy

Go to download

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.5
Show newest version
/* LanguageTool, a natural language style checker
 * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.markup;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Use this builder to create input of text with markup for LanguageTool, so that it
 * can check only the plain text parts and ignore the markup, yet still calculate the
 * positions of errors so that they refer to the complete text, including markup.
 *
 * 

It's up to you to split the input into parts that are plain text and parts that * are markup. * *

For example, text with XML markup like

* *
 *   Here is <b>some text</b>
 * 
* *

needs to be prepared like this:

* *
 * new AnnotatedTextBuilder()
 *   .addText("Here is ").addMarkup("<b>").addText("some text").addMarkup("</b>")
 *   .build()
 * 
* * @since 2.3 */ public class AnnotatedTextBuilder { private final List parts = new ArrayList<>(); private final Map metaData = new HashMap<>(); private final Map customMetaData = new HashMap<>(); public AnnotatedTextBuilder() { } /** * Add global meta data like document title or receiver name (when writing an email). * Some rules may use this information. * @since 3.9 */ public AnnotatedTextBuilder addGlobalMetaData(AnnotatedText.MetaDataKey key, String value) { metaData.put(key, value); return this; } /** * Add any global meta data about the document to be checked. Some rules may use this information. * Unless you're using your own rules for which you know useful keys, you probably want to * use {@link #addGlobalMetaData(AnnotatedText.MetaDataKey, String)}. * @since 3.9 */ public AnnotatedTextBuilder addGlobalMetaData(String key, String value) { customMetaData.put(key, value); return this; } /** * Add a plain text snippet, to be checked by LanguageTool when using * {@link org.languagetool.JLanguageTool#check(AnnotatedText)}. */ public AnnotatedTextBuilder addText(String text) { parts.add(new TextPart(text, TextPart.Type.TEXT)); return this; } /** * Add a markup text snippet like {@code } or {@code
}. These * parts will be ignored by LanguageTool when using {@link org.languagetool.JLanguageTool#check(AnnotatedText)}. */ public AnnotatedTextBuilder addMarkup(String markup) { parts.add(new TextPart(markup, TextPart.Type.MARKUP)); return this; } /** * Add a markup text snippet like {@code } or {@code
}. These * parts will be ignored by LanguageTool when using {@link org.languagetool.JLanguageTool#check(AnnotatedText)}. * @param interpretAs A string that will be used by the checker instead of the markup. This is usually * whitespace, e.g. {@code \n\n} for {@code

} */ public AnnotatedTextBuilder addMarkup(String markup, String interpretAs) { parts.add(new TextPart(markup, TextPart.Type.MARKUP)); parts.add(new TextPart(interpretAs, TextPart.Type.FAKE_CONTENT)); return this; } /** @since 5.4 */ public void add(TextPart part) { parts.add(part); } /** * Create the annotated text to be passed into {@link org.languagetool.JLanguageTool#check(AnnotatedText)}. */ public AnnotatedText build() { int plainTextPosition = 0; int totalPosition = 0; Map mapping = new HashMap<>(); for (int i = 0; i < parts.size(); i++) { TextPart part = parts.get(i); if (part.getType() == TextPart.Type.TEXT) { plainTextPosition += part.getPart().length(); totalPosition += part.getPart().length(); MappingValue mappingValue = new MappingValue(totalPosition); mapping.put(plainTextPosition, mappingValue); } else if (part.getType() == TextPart.Type.MARKUP) { totalPosition += part.getPart().length(); if (hasFakeContent(i, parts)) { plainTextPosition += parts.get(i + 1).getPart().length(); i++; if (mapping.get(plainTextPosition) == null) { MappingValue mappingValue = new MappingValue(totalPosition, part.getPart().length()); mapping.put(plainTextPosition, mappingValue); } } } } return new AnnotatedText(parts, mapping, metaData, customMetaData); } private boolean hasFakeContent(int i, List parts) { int nextPartIndex = i + 1; if (nextPartIndex < parts.size()) { if (parts.get(nextPartIndex).getType().equals(TextPart.Type.FAKE_CONTENT)) { return true; } } return false; } }