All Downloads are FREE. Search and download functionalities are using the official Maven repository.

querqy.rewrite.contrib.replace.ReplaceRewriterParser Maven / Gradle / Ivy

There is a newer version: 3.18.1
Show newest version
package querqy.rewrite.contrib.replace;

import querqy.CompoundCharSequence;
import querqy.LowerCaseCharSequence;
import querqy.model.DisjunctionMaxQuery;
import querqy.model.Query;
import querqy.model.Term;
import querqy.parser.QuerqyParser;
import querqy.rewrite.commonrules.RuleParseException;
import querqy.trie.SequenceLookup;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class ReplaceRewriterParser {

    private final InputStreamReader inputStreamReader;
    private final boolean ignoreCase;
    private final String inputDelimiter;
    private final QuerqyParser querqyParser;

    private static final String ERROR_MESSAGE_IMPROPER_INPUT_TEMPLATE = "ReplaceRule not properly configured for rule %s. \n" +
            "Each non-empty line must either start with # or " +
            "contain a rule with at least one input and one output, e. g. a => b\n" +
            "For suffix and prefix rules, only one input can be defined per output, e. g. a* => b\n" +
            "The wildcard cannot be defined multiple times in the same rule, a definition like *a* => b is not allowed." +
            "The wildcard cannot be used as a standalone input, a definition like * => b is not allowed.\n" +
            "The wildcard match can be added to the output of a suffix or prefix rule using $1, e. g. a* => a $1 or " +
            "*a => $1b.";

    private static final String ERROR_MESSAGE_DUPLICATE_INPUT_TEMPLATE = "Duplicate input: %s";

    private static final String OPERATOR = "=>";
    private static final String WILDCARD = "*";

    private String errorMessageImproperInput = "";
    private String errorMessageDuplicateInput = "";

    public ReplaceRewriterParser(final InputStreamReader inputStreamReader,
                                 final boolean ignoreCase,
                                 final String inputDelimiter,
                                 final QuerqyParser querqyParser) {

        this.inputStreamReader = inputStreamReader;
        this.ignoreCase = ignoreCase;
        this.inputDelimiter = inputDelimiter;
        this.querqyParser = querqyParser;
    }

    public SequenceLookup parseConfig() throws IOException {

        final SequenceLookup sequenceLookup = new SequenceLookup<>(ignoreCase);
        final Set checkForDuplicateInput = new HashSet<>();

        try (final BufferedReader bufferedReader = new BufferedReader(inputStreamReader)) {

            String line;
            while ((line = bufferedReader.readLine()) != null) {

                line = line.trim();
                if (line.isEmpty() || line.startsWith("#")) {
                    continue;
                }

                errorMessageImproperInput = String.format(ERROR_MESSAGE_IMPROPER_INPUT_TEMPLATE, line);

                throwIfTrue(!line.contains(OPERATOR), errorMessageImproperInput);

                final String[] lineSplit = line.split(OPERATOR);
                throwIfTrue(lineSplit.length < 1 || lineSplit.length > 2, errorMessageImproperInput);

                final String fullInput = lineSplit[0].trim();
                throwIfTrue(fullInput.isEmpty(), errorMessageImproperInput);

                final String output = lineSplit.length == 2 ? lineSplit[1].trim() : "";

                errorMessageDuplicateInput = String.format(ERROR_MESSAGE_DUPLICATE_INPUT_TEMPLATE, fullInput);

                final List> inputs = parseInput(fullInput);
                final LinkedList outputList = parseOutput(output);

                for (final LinkedList input : inputs) {

                    if (fullInput.startsWith(WILDCARD) && fullInput.length() > 1) {

                        throwIfTrue(fullInput.endsWith(WILDCARD), errorMessageImproperInput);
                        throwIfTrue(input.size() != 1, errorMessageImproperInput);

                        final CharSequence seq = lc(input.get(0));
                        throwIfTrue(checkForDuplicateInput.contains(seq), errorMessageDuplicateInput);
                        checkForDuplicateInput.add(seq);

                        sequenceLookup.putSuffix(
                                seq.subSequence(1, seq.length()),
                                new WildcardReplaceInstruction(outputList)
                        );

                    } else if (fullInput.endsWith(WILDCARD) && fullInput.length() > 1) {

                        throwIfTrue(input.size() != 1, errorMessageImproperInput);

                        final CharSequence seq = lc(input.get(0));
                        throwIfTrue(checkForDuplicateInput.contains(seq), errorMessageDuplicateInput);
                        checkForDuplicateInput.add(seq);

                        sequenceLookup.putPrefix(
                                seq.subSequence(0, fullInput.length() - 1),
                                new WildcardReplaceInstruction(outputList)
                        );

                    } else {

                        throwIfTrue(input.stream().anyMatch(term ->
                                term.startsWith(WILDCARD) || term.endsWith(WILDCARD)), errorMessageImproperInput);
                        final List seqList = lc(input);

                        final CharSequence seq = new CompoundCharSequence(" ", seqList);
                        throwIfTrue(checkForDuplicateInput.contains(seq), errorMessageDuplicateInput);
                        checkForDuplicateInput.add(seq);

                        sequenceLookup.put(
                                seqList,
                                new TermsReplaceInstruction(outputList)
                        );
                    }
                }
            }

        } catch (RuleParseException e) {
            throw new IOException(e);
        }

        return sequenceLookup;
    }

    private CharSequence lc(final String seq) {
        return ignoreCase ? new LowerCaseCharSequence(seq) : seq;
    }

    private List lc(final List seq) {
        return seq.stream().map(this::lc).collect(Collectors.toList());
    }

    private void throwIfTrue(final boolean bool, final String message) throws RuleParseException {
        if (bool) {
            throw new RuleParseException(message);
        }
    }

    private LinkedList parseOutput(final String term) {
        return parseQuery(this.querqyParser.parse(term));
    }

    private List> parseInput(final String fullInput) throws RuleParseException {
        final List inputs = Arrays.stream(fullInput.split(this.inputDelimiter))
                .map(String::trim)
                .filter(term -> !term.isEmpty())
                .collect(Collectors.toList());

        throwIfTrue(inputs.isEmpty(), errorMessageImproperInput);

        return inputs.stream()
                .map(this.querqyParser::parse)
                .map(this::parseQuery)
                .collect(Collectors.toList());
    }

    private LinkedList parseQuery(final Query query) {
        return query.getClauses().stream()
                .map(booleanClause -> (DisjunctionMaxQuery) booleanClause)
                .flatMap(disjunctionMaxQuery -> disjunctionMaxQuery.getTerms().stream())
                .map(Term::getValue)
                .map(CharSequence::toString)
                .collect(Collectors.toCollection(LinkedList::new));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy