org.languagetool.rules.uk.SearchHelper Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of language-uk Show documentation
The newest version!
package org.languagetool.rules.uk;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.tagging.uk.PosTagHelper;

class SearchHelper {
  
   public static class Match {
     private static final Pattern WHITESPACE = Pattern.compile("\\s");
     private boolean ignoreQuotes = true;
     private boolean ignoreInserts = false;
//    private boolean ignoreCase = true;
//    private String[] searchTokens;
    private List targets;
    private int limit = -1;
    private List skips = new ArrayList<>();

//    public Match tokens(String... tokens) { this.searchTokens = tokens; return this; }
    public Match tokenLine(String tokenLine) { 
      this.targets = Stream.of(WHITESPACE.split(tokenLine.replace(",", " ,")))
          .map(s -> Condition.token(s))
          .collect(Collectors.toList());
      return this;
    }
    
    public Match limit(int limit) { this.limit = limit; return this; }
    public Match ignoreInserts() { this.ignoreInserts = true; return this; }
    public Match skip(Condition... conditions) { this.skips = Arrays.asList(conditions); return this; }
    public Match target(Condition... conditions) { this.targets = Arrays.asList(conditions); return this; }
    
    public int mBefore(AnalyzedTokenReadings[] tokens, int pos) {
      boolean foundFirst = false;

      int logicalDistance = 0;

      for (int iCond = targets.size()-1; iCond>=0; pos--) {
        if( pos - 1 < iCond )
          return -1;

        if( limit > 0 && logicalDistance > limit )
          return -1;
        
        logicalDistance++;

        AnalyzedTokenReadings currentToken = tokens[pos];
        if( ignoreQuotes && LemmaHelper.QUOTES_PATTERN.matcher(currentToken.getToken()).matches() ) {
//          pos--;
          continue;
        }

        if( ignoreInserts(tokens, pos, -1) ) {
          pos -= 2;
          continue;
        }
        
        if( ignoreInserts && tokens[pos].getToken().equals(")") ) {
          for(int i=pos-1; i>=1; i--) {
            if( tokens[i].getToken().equals("(") ) {
              pos = i;
              continue;
            }
          }
        }

        Context context = new Context(tokens, pos);
        if( ! targets.get(iCond).matches(currentToken, context) ) {
          if( foundFirst )
            return -1;

          if( ! canSkip(currentToken, context) )
            return -1;
          
          continue;
        }

        foundFirst = true;
        iCond--;
      }
      return pos+1;
    }

    public int mAfter(AnalyzedTokenReadings[] tokens, int pos) {
      boolean foundFirst = false;

      int logicalDistance = 0;

      for (int iCond = 0; iCond < targets.size(); pos++) {
        if( pos + targets.size() - iCond > tokens.length )
          return -1;
        
        if( limit > 0 && logicalDistance > limit )
          return -1;
        
        logicalDistance++;

        AnalyzedTokenReadings currentToken = tokens[pos];
        if( ignoreQuotes && LemmaHelper.QUOTES_PATTERN.matcher(currentToken.getToken()).matches() ) {
//          pos++;
          continue;
        }

        if( ignoreInserts(tokens, pos, +1) ) {
          pos += 2;
          continue;
        }

        if( ignoreInserts && tokens[pos].getToken().equals("(") ) {
          for(int i=pos+1; i<=tokens.length-1; i++) {
            if( tokens[i].getToken().equals(")") ) {
              pos = i;
              continue;
            }
          }
        }

        Context context = new Context(tokens, pos);
        if( ! targets.get(iCond).matches(currentToken, context) ) {
          if( foundFirst )
            return -1;
          
          if( ! canSkip(currentToken, context) )
            return -1;

          continue;
        }

        foundFirst = true;
        iCond++;
      }
      return pos-1;
    }

    public int mNow(AnalyzedTokenReadings[] tokens, int pos) {
      return limit(0).mAfter(tokens, pos);
    }
    
    private boolean canSkip(AnalyzedTokenReadings currentToken, Context context) {
      return skips.isEmpty() 
          || skips.stream().anyMatch(s -> s.matches(currentToken, context));
    }

    private boolean ignoreInserts(AnalyzedTokenReadings[] tokens, int pos, int dir) {
      return ignoreInserts
          && (dir>0 ? pos + 3 < tokens.length : pos - 3 > 0)
          && ",".equals(tokens[pos].getToken())
          && ",".equals(tokens[pos+2*dir].getToken())
          && (PosTagHelper.hasPosTagPart(tokens[pos+1*dir], "insert")
              || LemmaHelper.hasLemma(tokens[pos+1*dir], Arrays.asList("зокрема", "відповідно")));
    }
    
  }
  
  static class Condition {
    Pattern postag;
    Pattern lemma;
    Pattern tokenPattern;
    String tokenStr;
    private boolean negate;

    public Condition() {
    }

    public Condition(Pattern lemma, Pattern postag) {
      this.lemma = lemma;
      this.postag = postag;
    }

    public static Condition postag(Pattern pattern) {
      Condition condition = new Condition();
      condition.postag = pattern;
      return condition;
    }

    public static Condition lemma(Pattern pattern) {
      Condition condition = new Condition();
      condition.lemma = pattern;
      return condition;
    }

    public static Condition token(Pattern pattern) {
      Condition condition = new Condition();
      condition.tokenPattern = pattern;
      return condition;
    }

    public static Condition token(String token) {
      Condition condition = new Condition();
      condition.tokenStr = token;
      return condition;
    }
    
    public Condition negate() {
      this.negate = true;
      return this;
    }

    public boolean matches(AnalyzedTokenReadings analyzedTokenReadings, Context context) {
      return negate  
          ^ ((postag == null || PosTagHelper.hasPosTag(analyzedTokenReadings, postag))
          && (lemma == null || LemmaHelper.hasLemma(analyzedTokenReadings, lemma)) 
          && (tokenPattern == null || tokenPattern.matcher(analyzedTokenReadings.getCleanToken()).matches())
          && (tokenStr == null || tokenStr.equalsIgnoreCase(analyzedTokenReadings.getCleanToken())));
    }

    @Override
    public String toString() {
      return "Condition [postag=" + postag + ", lemma=" + lemma + ", token=" + tokenPattern + ", tokenStr=" + tokenStr + "]";
    }
    
  }
  
  static class Context {

    final AnalyzedTokenReadings[] tokens;
    final int pos;

    public Context(AnalyzedTokenReadings[] tokens, int pos) {
      this.tokens = tokens;
      this.pos = pos;
    }
    
  }

}