All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.android.tools.lint.checks.TypoDetector Maven / Gradle / Ivy

Go to download

A packaging of the IntelliJ Community Edition lint-checks library. This is release number 1 of trunk branch 142.

The newest version!
/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.tools.lint.checks;

import static com.android.SdkConstants.ATTR_LOCALE;
import static com.android.SdkConstants.ATTR_TRANSLATABLE;
import static com.android.SdkConstants.FD_RES_VALUES;
import static com.android.SdkConstants.TAG_PLURALS;
import static com.android.SdkConstants.TAG_STRING;
import static com.android.SdkConstants.TAG_STRING_ARRAY;
import static com.android.SdkConstants.TOOLS_URI;
import static com.android.tools.lint.checks.TypoLookup.isLetter;
import static com.google.common.base.Objects.equal;

import com.android.annotations.NonNull;
import com.android.annotations.Nullable;
import com.android.ide.common.resources.configuration.LocaleQualifier;
import com.android.resources.ResourceFolderType;
import com.android.tools.lint.detector.api.Category;
import com.android.tools.lint.detector.api.Context;
import com.android.tools.lint.detector.api.Implementation;
import com.android.tools.lint.detector.api.Issue;
import com.android.tools.lint.detector.api.Location;
import com.android.tools.lint.detector.api.ResourceXmlDetector;
import com.android.tools.lint.detector.api.Scope;
import com.android.tools.lint.detector.api.Severity;
import com.android.tools.lint.detector.api.Speed;
import com.android.tools.lint.detector.api.TextFormat;
import com.android.tools.lint.detector.api.XmlContext;
import com.android.utils.Pair;
import com.google.common.base.Charsets;
import com.google.common.base.Splitter;

import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

/**
 * Check which looks for likely typos in Strings.
 * 

* TODO: *

    *
  • Add check of Java String literals too! *
  • Add support for additional languages. The typo detector is now * multilingual and looks for typos-*locale*.txt files to use. However, * we need to seed it with additional typo databases. I did some searching * and came up with some alternatives. Here's the strategy I used: * Used Google Translate to translate "Wikipedia Common Misspellings", and * then I went to google.no, google.fr etc searching with that translation, and * came up with what looks like wikipedia language local lists of typos. * This is how I found the Norwegian one for example: *
    * http://no.wikipedia.org/wiki/Wikipedia:Liste_over_alminnelige_stavefeil/Maskinform *
    * Here are some additional possibilities not yet processed: *
      *
    • French: http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Liste_de_fautes_d'orthographe_courantes * (couldn't find a machine-readable version there?) *
    • Swedish: * http://sv.wikipedia.org/wiki/Wikipedia:Lista_%C3%B6ver_vanliga_spr%C3%A5kfel * (couldn't find a machine-readable version there?) *
    • German * http://de.wikipedia.org/wiki/Wikipedia:Liste_von_Tippfehlern/F%C3%BCr_Maschinen *
    *
  • Consider also digesting files like * http://sv.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos * See http://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/User_manual. *
*/ public class TypoDetector extends ResourceXmlDetector { @Nullable private TypoLookup mLookup; @Nullable private String mLastLanguage; @Nullable private String mLastRegion; @Nullable private String mLanguage; @Nullable private String mRegion; /** The main issue discovered by this detector */ public static final Issue ISSUE = Issue.create( "Typos", //$NON-NLS-1$ "Spelling error", "This check looks through the string definitions, and if it finds any words " + "that look like likely misspellings, they are flagged.", Category.MESSAGES, 7, Severity.WARNING, new Implementation( TypoDetector.class, Scope.RESOURCE_FILE_SCOPE)); /** Constructs a new detector */ public TypoDetector() { } @Override public boolean appliesTo(@NonNull ResourceFolderType folderType) { return folderType == ResourceFolderType.VALUES; } /** Look up the locale and region from the given parent folder name and store it * in {@link #mLanguage} and {@link #mRegion} */ private void initLocale(@NonNull String parent) { mLanguage = null; mRegion = null; if (parent.equals(FD_RES_VALUES)) { return; } Pair locale = getLocale(parent); if (locale != null) { mLanguage = locale.getFirst(); mRegion = locale.getSecond(); } } /** * Returns the locale for the given parent folder. * * @param parent the name of the parent folder * @return null if the locale is not known, or a pair of language and region * where one or the other but not both can be null */ @Nullable public static Pair getLocale(@NonNull String parent) { String language = null; String region = null; for (String qualifier : Splitter.on('-').split(parent)) { int qualifierLength = qualifier.length(); if (qualifierLength == 2) { char first = qualifier.charAt(0); char second = qualifier.charAt(1); if (first >= 'a' && first <= 'z' && second >= 'a' && second <= 'z') { language = qualifier; } } else if (qualifierLength == 3 && qualifier.charAt(0) == 'r') { char first = qualifier.charAt(1); char second = qualifier.charAt(2); if (first >= 'A' && first <= 'Z' && second >= 'A' && second <= 'Z') { region = new String(new char[] { first, second }); // Don't include the "r" } break; } else if (qualifier.startsWith(LocaleQualifier.PREFIX)) { return LocaleQualifier.parseBcp47(qualifier); } } if (language != null || region != null) { return Pair.of(language, region); } else { return null; } } /** * Returns the locale for the given context. * * @param context the context to look up the locale for * @return null if the locale is not known, or a pair of language and region * where one or the other but not both can be null */ @Nullable public static Pair getLocale(@NonNull XmlContext context) { Element root = context.document.getDocumentElement(); if (root != null) { String locale = root.getAttributeNS(TOOLS_URI, ATTR_LOCALE); if (locale != null && !locale.isEmpty()) { return getLocale(locale); } } return getLocale(context.file.getParentFile().getName()); } @Override public void beforeCheckFile(@NonNull Context context) { initLocale(context.file.getParentFile().getName()); if (mLanguage == null) { // Check to see if the user has specified the language for this folder // using a tools:locale attribute if (context instanceof XmlContext) { Element root = ((XmlContext) context).document.getDocumentElement(); if (root != null) { String locale = root.getAttributeNS(TOOLS_URI, ATTR_LOCALE); if (locale != null && !locale.isEmpty()) { initLocale(FD_RES_VALUES + '-' + locale); } } } if (mLanguage == null) { mLanguage = "en"; //$NON-NLS-1$ } } if (!equal(mLastLanguage, mLanguage) || !equal(mLastRegion, mRegion)) { mLookup = TypoLookup.get(context.getClient(), mLanguage, mRegion); mLastLanguage = mLanguage; mLastRegion = mRegion; } } @NonNull @Override public Speed getSpeed() { return Speed.NORMAL; } @Override public Collection getApplicableElements() { return Arrays.asList( TAG_STRING, TAG_STRING_ARRAY, TAG_PLURALS ); } @Override public void visitElement(@NonNull XmlContext context, @NonNull Element element) { if (mLookup == null) { return; } visit(context, element, element); } private void visit(XmlContext context, Element parent, Node node) { if (node.getNodeType() == Node.TEXT_NODE) { // TODO: Figure out how to deal with entities check(context, parent, node, node.getNodeValue()); } else { NodeList children = node.getChildNodes(); for (int i = 0, n = children.getLength(); i < n; i++) { visit(context, parent, children.item(i)); } } } private void check(XmlContext context, Element element, Node node, String text) { int max = text.length(); int index = 0; int lastWordBegin = -1; int lastWordEnd = -1; boolean checkedTypos = false; for (; index < max; index++) { char c = text.charAt(index); if (!Character.isWhitespace(c)) { if (c == '@' || (c == '?')) { // Don't look for typos in resource references; they are not // user visible anyway return; } break; } } while (index < max) { for (; index < max; index++) { char c = text.charAt(index); if (c == '\\') { index++; } else if (Character.isLetter(c)) { break; } } if (index >= max) { return; } int begin = index; for (; index < max; index++) { char c = text.charAt(index); if (c == '\\') { index++; break; } else if (!Character.isLetter(c)) { break; } else if (text.charAt(index) >= 0x80) { // Switch to UTF-8 handling for this string if (checkedTypos) { // If we've already checked words we may have reported typos // so create a substring from the current word and on. byte[] utf8Text = text.substring(begin).getBytes(Charsets.UTF_8); check(context, element, node, utf8Text, 0, utf8Text.length, text, begin); } else { // If all we've done so far is skip whitespace (common scenario) // then no need to substring the text, just re-search with the // UTF-8 routines byte[] utf8Text = text.getBytes(Charsets.UTF_8); check(context, element, node, utf8Text, 0, utf8Text.length, text, 0); } return; } } int end = index; checkedTypos = true; assert mLookup != null; List replacements = mLookup.getTypos(text, begin, end); if (replacements != null && isTranslatable(element)) { reportTypo(context, node, text, begin, replacements); } checkRepeatedWords(context, element, node, text, lastWordBegin, lastWordEnd, begin, end); lastWordBegin = begin; lastWordEnd = end; index = end + 1; } } private static void checkRepeatedWords(XmlContext context, Element element, Node node, String text, int lastWordBegin, int lastWordEnd, int begin, int end) { if (lastWordBegin != -1 && end - begin == lastWordEnd - lastWordBegin && end - begin > 1) { // See whether we have a repeated word boolean different = false; for (int i = lastWordBegin, j = begin; i < lastWordEnd; i++, j++) { if (text.charAt(i) != text.charAt(j)) { different = true; break; } } if (!different && onlySpace(text, lastWordEnd, begin) && isTranslatable(element)) { reportRepeatedWord(context, node, text, lastWordBegin, begin, end); } } } private static boolean onlySpace(String text, int fromInclusive, int toExclusive) { for (int i = fromInclusive; i < toExclusive; i++) { if (!Character.isWhitespace(text.charAt(i))) { return false; } } return true; } private void check(XmlContext context, Element element, Node node, byte[] utf8Text, int byteStart, int byteEnd, String text, int charStart) { int lastWordBegin = -1; int lastWordEnd = -1; int index = byteStart; while (index < byteEnd) { // Find beginning of word while (index < byteEnd) { byte b = utf8Text[index]; if (b == '\\') { index++; charStart++; if (index < byteEnd) { b = utf8Text[index]; } } else if (isLetter(b)) { break; } index++; if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX charStart++; } } if (index >= byteEnd) { return; } int charEnd = charStart; int begin = index; // Find end of word. Unicode has the nice property that even 2nd, 3rd and 4th // bytes won't match these ASCII characters (because the high bit must be set there) while (index < byteEnd) { byte b = utf8Text[index]; if (b == '\\') { index++; charEnd++; if (index < byteEnd) { b = utf8Text[index++]; if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { charEnd++; } } break; } else if (!isLetter(b)) { break; } index++; if ((b & 0x80) == 0 || (b & 0xC0) == 0xC0) { // First characters in UTF-8 are always ASCII (0 high bit) or 11XXXXXX charEnd++; } } int end = index; List replacements = mLookup.getTypos(utf8Text, begin, end); if (replacements != null && isTranslatable(element)) { reportTypo(context, node, text, charStart, replacements); } checkRepeatedWords(context, element, node, text, lastWordBegin, lastWordEnd, charStart, charEnd); lastWordBegin = charStart; lastWordEnd = charEnd; charStart = charEnd; } } private static boolean isTranslatable(Element element) { Attr translatable = element.getAttributeNode(ATTR_TRANSLATABLE); return translatable == null || Boolean.valueOf(translatable.getValue()); } /** Report the typo found at the given offset and suggest the given replacements */ private static void reportTypo(XmlContext context, Node node, String text, int begin, List replacements) { if (replacements.size() < 2) { return; } String typo = replacements.get(0); String word = text.substring(begin, begin + typo.length()); String first = null; String message; boolean isCapitalized = Character.isUpperCase(word.charAt(0)); StringBuilder sb = new StringBuilder(40); for (int i = 1, n = replacements.size(); i < n; i++) { String replacement = replacements.get(i); if (first == null) { first = replacement; } if (sb.length() > 0) { sb.append(" or "); } sb.append('"'); if (isCapitalized) { sb.append(Character.toUpperCase(replacement.charAt(0))); sb.append(replacement.substring(1)); } else { sb.append(replacement); } sb.append('"'); } if (first != null && first.equalsIgnoreCase(word)) { if (first.equals(word)) { return; } message = String.format( "\"%1$s\" is usually capitalized as \"%2$s\"", word, first); } else { message = String.format( "\"%1$s\" is a common misspelling; did you mean %2$s ?", word, sb.toString()); } int end = begin + word.length(); context.report(ISSUE, node, context.getLocation(node, begin, end), message); } /** Reports a repeated word */ private static void reportRepeatedWord(XmlContext context, Node node, String text, int lastWordBegin, int begin, int end) { String message = String.format( "Repeated word \"%1$s\" in message: possible typo", text.substring(begin, end)); Location location = context.getLocation(node, lastWordBegin, end); context.report(ISSUE, node, location, message); } /** Returns the suggested replacements, if any, for the given typo. The error * message must be one supplied by lint. * * @param errorMessage the error message * @param format the format of the error message * @return a list of replacement words suggested by the error message */ @Nullable public static List getSuggestions(@NonNull String errorMessage, @NonNull TextFormat format) { errorMessage = format.toText(errorMessage); // The words are all in quotes; the first word is the misspelling, // the other words are the suggested replacements List words = new ArrayList(); // Skip the typo int index = errorMessage.indexOf('"'); index = errorMessage.indexOf('"', index + 1); index++; while (true) { index = errorMessage.indexOf('"', index); if (index == -1) { break; } index++; int start = index; index = errorMessage.indexOf('"', index); if (index == -1) { index = errorMessage.length(); } words.add(errorMessage.substring(start, index)); index++; } return words; } /** * Returns the typo word in the error message from this detector * * @param errorMessage the error message produced earlier by this detector * @param format the format of the error message * @return the typo */ @Nullable public static String getTypo(@NonNull String errorMessage, @NonNull TextFormat format) { errorMessage = format.toText(errorMessage); // The words are all in quotes int index = errorMessage.indexOf('"'); int start = index + 1; index = errorMessage.indexOf('"', start); if (index != -1) { return errorMessage.substring(start, index); } return null; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy