org.languagetool.chunking.TokenPredicate Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of languagetool-core Show documentation

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.5

Show newest version

/* LanguageTool, a natural language style checker
 * Copyright (C) 2015 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.chunking;

import edu.washington.cs.knowitall.logic.Expression;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.rules.patterns.StringMatcher;

import java.util.function.Predicate;

final class TokenPredicate extends Expression.Arg.Pred {
  private final Predicate predicate;

  TokenPredicate(String description, boolean caseSensitive) {
    super(description);
    predicate = compile(description, caseSensitive);
  }

  private Predicate compile(String description, boolean caseSensitive) {
    String[] parts = description.split("=");
    String exprType;
    String exprValue;
    if (parts.length == 1) {
      exprType = "string";
      exprValue = unquote(parts[0]);
    } else if (parts.length == 2) {
      exprType = parts[0];
      exprValue = unquote(parts[1]);
    } else {
      throw new RuntimeException("Could not parse expression: " + getDescription());
    }

    switch (exprType) {
      case "string":
      case "regex":
      case "regexCS":
        StringMatcher matcher = StringMatcher.create(exprValue, !"string".equals(exprType), caseSensitive || "regexCS".equals(exprType));
        return analyzedToken -> matcher.matches(analyzedToken.getToken());

      case "chunk":
        StringMatcher chunkPattern = StringMatcher.regexp(exprValue);
        return analyzedToken -> {
          for (ChunkTag chunkTag : analyzedToken.getChunkTags()) {
            if (chunkPattern.matches(chunkTag.getChunkTag())) {
              return true;
            }
          }
          return false;
        };

      case "pos":
        return analyzedToken -> {
          AnalyzedTokenReadings readings = analyzedToken.getReadings();
          if (readings != null) {
            for (AnalyzedToken token : readings) {
              if (token.getPOSTag() != null && token.getPOSTag().contains(exprValue)) {
                return true;
              }
            }
          }
          return false;
        };

      case "posre":
      case "posregex":
        StringMatcher posPattern = StringMatcher.regexp(exprValue);
        return analyzedToken -> {
          AnalyzedTokenReadings readings = analyzedToken.getReadings();
          if (readings != null) {
            for (AnalyzedToken token : readings) {
              if (token.getPOSTag() != null && posPattern.matches(token.getPOSTag())) {
                return true;
              }
            }
          }
          return false;
        };

      default:
        throw new RuntimeException("Expression type not supported: '" + exprType + "'");
    }

  }

  private static String unquote(String s) {
    return s.startsWith("'") && s.endsWith("'") ? s.substring(1, s.length() - 1) : s;
  }

  @Override
  public boolean apply(ChunkTaggedToken analyzedToken) {
    return predicate.test(analyzedToken);
  }
}