All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.graylog.plugins.views.search.validation.TermCollectingQueryVisitor Maven / Gradle / Ivy

There is a newer version: 6.0.1
Show newest version
/*
 * Copyright (C) 2020 Graylog, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the Server Side Public License, version 1,
 * as published by MongoDB, Inc.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * Server Side Public License for more details.
 *
 * You should have received a copy of the Server Side Public License
 * along with this program. If not, see
 * .
 */
package org.graylog.plugins.views.search.validation;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParserConstants;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class TermCollectingQueryVisitor extends QueryVisitor {

    private final Analyzer analyzer;
    private final List parsedTerms = new ArrayList<>();
    private final Map> tokenLookup;

    public TermCollectingQueryVisitor(Analyzer analyzer, Map> tokenLookup) {
        this.analyzer = analyzer;
        this.tokenLookup = tokenLookup;
    }

    @Override
    public void consumeTerms(Query query, Term... terms) {
        super.consumeTerms(query, terms);
        final Collection tokens = tokenLookup.getOrDefault(query, Collections.emptySet());
        processTerms(tokens, terms);
    }

    private void processTerms(Collection tokens, Term... terms) {
        for (Term t : terms) {

            final ParsedTerm.Builder termBuilder = ParsedTerm.builder()
                    .field(t.field())
                    .value(t.text());

            if (t.field().equals(ParsedTerm.DEFAULT_FIELD) || t.field().equals(ParsedTerm.EXISTS)) {
                tokens.stream()
                        .filter(token -> token.matches(QueryParserConstants.TERM, t.text()))
                        .findFirst()
                        .ifPresent(termBuilder::keyToken);
            } else {
                tokens.stream()
                        .filter(token -> token.kind() == QueryParserConstants.TERM)
                        .filter(token -> token.image().equals(t.field()))
                        .findFirst()
                        .ifPresent(token -> {
                            termBuilder.keyToken(token);
                            final String value = t.text();
                            tokens.stream()
                                    .filter(v -> v.kind() == QueryParserConstants.TERM)
                                    .filter(v -> normalize(t.field(), v.image()).equals(value))
                                    .findFirst()
                                    .ifPresent(termBuilder::valueToken);


                        });
            }
            parsedTerms.add(termBuilder.build());
        }
    }

    /**
     * To be able to compare token values with query values, we first need to normalize the value, using the same analyzer
     * For example using the StandardAnalyzer, it could mean difference like lowercase conversion
     */
    private String normalize(String fieldName, String value) {
        return analyzer.normalize(fieldName, value).utf8ToString();
    }

    @Override
    public void visitLeaf(Query query) {
        final Collection tokens = tokenLookup.get(query);
        if (query instanceof RegexpQuery) {
            processTerms(tokens, ((RegexpQuery) query).getRegexp());
        } else if (query instanceof TermRangeQuery) { // add lower and upper term as independent values, good enough for validation
            final TermRangeQuery trq = (TermRangeQuery) query;
            processTerms(
                    tokens, new Term(trq.getField(), trq.getLowerTerm().utf8ToString()),
                    new Term(trq.getField(), trq.getUpperTerm().utf8ToString())
            );
        } else if (query instanceof WildcardQuery) {
            processTerms(tokens, ((WildcardQuery) query).getTerm());
        } else if (query instanceof PrefixQuery) {
            processTerms(tokens, ((PrefixQuery) query).getPrefix());
        } else if (query instanceof FuzzyQuery) {
            processTerms(tokens, ((FuzzyQuery) query).getTerm());
        } else {
            throw new IllegalArgumentException("Unrecognized query type: " + query.getClass().getName());
        }
    }

    @Override
    public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
        // the default implementation ignores MUST_NOT clauses, we want to collect all, even MUST_NOT
        return this;
    }

    public List getParsedTerms() {
        return parsedTerms;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy