com.exsoinn.util.epf.AbstractContext Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of element-path-finder Show documentation
Adapter to search disparate data formats
The newest version!
package com.exsoinn.util.epf;

import net.jcip.annotations.Immutable;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;


/**
 * Abstract implementation of {@link Context}. This class should be suitable for all cases, however users are free to implement
 * {@link Context} from scratch, provided the contract is upheld.
 *
 * The caller can specify the matching style/behavior on either the filter values provided in the {@link Filter} object, or
 * the values configured on the target {@link Context} being search. Both cannot be specified though, it's either one or
 * the other. In case caller has specified both, the code gives preference to {@link Context} matching style/behavior. This
 * behavior is controlled by either:
 *   - Passing flag {@link AbstractContext#FOUND_ELEM_VAL_IS_REGEX}, and optionally flag {@link AbstractContext#PARTIAL_REGEX_MATCH}
 *     in the extra parameters {@code Map} argument that {@link Context#findElement(SearchPath, Filter, TargetElements, Map)}
 *     accepts. This affects behavior on the {@code Context} only. The former flag says that for filtering purposes, the
 *     relevant {@code Context} values should behave as if they were regular expressions, in which case the code will
 *     use a {@link Pattern} to make the comparison on the entire filter key value, meaning they both have to match
 *     exactly. Internally code uses {@link Matcher#matches()} method, read that documentation for that for details. But if you want to do
 *     partial matching only, in addition  pass latter flag as well. Internally the code will use method
 *     {@link Matcher#find()}  to do these kind of partial matches. Refer to that method's documentation for details.
 *   - To control behavior on the filter values instead, simply use asterisk (*) on the filter values that should match
 *     partially against the {@code Context} values in question. Currently only asterisk at beginning or end,
 *     or both of string are supported. Placement of asterisk that deviates from these will throw exception.
 *   Note that if you mix both methods above, the first one, regular expression matching will take precedence, and passing wildcards (*)
 *   in the filter values will have no effect (get completely ignored by code).
 *
 *
 * Created by QuijadaJ on 5/4/2017.
 */
@Immutable
abstract class AbstractContext implements Context {
    private static final char WILD_CARD = '*';
    private static final Map patternCache = new ConcurrentHashMap<>();
    private static final String ANON_ARY_HANDLE = "anonymousArray";


    @Override
    public SearchResult findElement(SearchPath pSearchPath,
                                    Filter pFilter,
                                    TargetElements pTargetElements,
                                    Map pExtraParams)
            throws IllegalArgumentException {
        Map found = findElement(this, pSearchPath, pFilter, pTargetElements, null, pExtraParams);
        return SearchResult.createSearchResult(found);
    }

    @Override
    public SearchResult findElement(SelectionCriteria pSelectCriteria,
                                    Map pExtraParams) throws IllegalArgumentException {
        return findElement(
                pSelectCriteria.getSearchPath(), pSelectCriteria.getFilter(), pSelectCriteria.getTargetElements(), pExtraParams);
    }


    Map findElement(Context pElem,
                                     SearchPath pSearchPath,
                                     Filter pFilter,
                                     TargetElements pTargetElements,
                                     Map pFoundElemVals,
                                     Map pExtraParams)
            throws IllegalArgumentException {

        if (null == pFoundElemVals) {
            pFoundElemVals = new HashMap<>(pTargetElements != null ? pTargetElements.size() : 0);
        }

        String curNodeInPath;

        /*
         * Advance the to the next element/node in search path. Because the object is immutable, it's a
         * 2-step process to do so. Read the API javadoc of SearchPath for more details.
         */
        pSearchPath = pSearchPath.advanceToNextNode();
        curNodeInPath = pSearchPath.currentNode();

        /**
         * Deal with case where the original Context given is an anonymous array. In this scenario we expect search path
         * to be "[N]||nodeX||nodeY||nodeZ||...". The way we handle is that we make the array non-anonymous and identify it by
         * {@link this#ANON_ARY_HANDLE}, then modify the current node in search path by adding {@link this#ANON_ARY_HANDLE}
         * in front of the "[]", and finally we let the logic further below deal with an array inside recursible we've just
         * created. That code already does all checks, throws exception where appropriate, etc.
         */
        if (pSearchPath.currentNodeIndex() == 0 && curNodeInPath.indexOf("[") == 0 && pElem.isArray()) {
            MutableContext mc = ContextFactory.obtainMutableContext("{}");
            mc.addMember(ANON_ARY_HANDLE, ContextFactory.obtainContext(pElem.stringRepresentation()));
            curNodeInPath = ANON_ARY_HANDLE + curNodeInPath;
            pElem = mc;
        }

        String curNodeInPathNoBrackets = curNodeInPath;
        if (arrayIndex(curNodeInPath) >= 0) {
            curNodeInPathNoBrackets = removeBrackets(curNodeInPath);
        }

        boolean atEndOfSearchPath = pSearchPath.isAtEndOfSearchPath();


        /**
         * If below if() is true, then we're dealing with a complex structure. At this
         * point check if the current node in the search path we've been given exists in the current
         * element. If not, or if it designates an array node with index > 0 yet encountered node
         * is not in fact of type array, then it means the element will not be found, hence throw
         * IllegalArgumentException, unless the {@link Context#IGNORE_INCOMPATIBLE_SEARCH_PATH_PROVIDED_ERROR} was
         * passed in the extra parameters map. The full search path given has to exist in order to return any results.
         */
        Set> elemEntries = null;
        if (pElem.isRecursible()) {
            /**
             * The 'arrayIndex()...' condition is there to see if caller expects array node to be found yet actual
             * is not an array, and they specified an index greater than 1, in which case throw exception unless
             * we were specifically instructed to ignore such scenarios (via presence
             * of {@link Context#IGNORE_INCOMPATIBLE_SEARCH_PATH_PROVIDED_ERROR)}).
             * We're interested in aforementioned check for non-array nodes only.
             */
            if (pElem.containsElement(curNodeInPathNoBrackets) && (arrayIndex(curNodeInPath) <= 0
                    || pElem.memberValue(curNodeInPathNoBrackets).isArray())) {
                /**
                 * Check inverse of "UnexpectedArrayNodeException" further below; a none-array node encountered,
                 * yet search path told to expect array here. Unless the array index is 0, throw exception. The
                 * motivation to make an exception if array index is 0 is to offer some flexibility to calling code. The same
                 * data node can sometimes be an array, and at others a non-array. This can happen when there's no
                 * schema backing things up, and in data conversion situations, the target data uses presence
                 * of multi node or single to display respectively as array or not. A concrete example:
                 * ... -> {xml: {node: [{}, {}]}}
                 *
                 * or
                 *
                 * ... -> {xml: {node: {}}}
                 *
                 * Notice in first, the node is array, in second it's not. It all depends on how original
                 * data looked. The rationale for this logic is as follows:
                 * The client just wants the first node in array if index specified is [0], therefore
                 * give it to them if it is a none-array, which obviously is a single element. However if client
                 * gave [idx > 0], then I'm confused and don't know what to do, so throw it back to client
                 * to decide what they want to do.
                 */

                elemEntries = pElem.entrySet();
            } else {

                /**
                 * Have to wrap into an IllegalArgumentException because the method signature says so. When it was
                 * decided to throw a checked exception, namely IncompatibleSearchPathException, there would have had
                 * to be a lot of changes made in dependent code to reflect an updated method signature. Hence the reason
                 * the below exception wrapping is made.
                 */
                if (null != pExtraParams && pExtraParams.containsKey(IGNORE_INCOMPATIBLE_SEARCH_PATH_PROVIDED_ERROR)) {
                    /*
                     * Handles case where caller instructed this API to ignore it if search path is not
                     * applicable for node in question. In such cases the node simply gets ignored and is excluded from search
                     * results.
                     */
                    return pFoundElemVals;
                } else {
                    IncompatibleSearchPathException ispe = new IncompatibleSearchPathException(
                            pSearchPath, curNodeInPathNoBrackets, pElem);
                    throw new IllegalArgumentException(ispe);
                }

            }
        }

        /**
         * If "elemEntries" is not NULL, it means we're dealing with a complex structure (I.e. not a primitive)
         * and the current element in the search path has been found at this location of the passed in element to search.
         * Why am I constructing if() statements like this instead of nesting them? Makes code easier to read and
         * hence maintain, less nesting which means less indentation.
         */
        if (null != elemEntries) {
            for (Map.Entry elemEntry : elemEntries) {
                /*
                 * If this pFoundElemVals is not empty, exit, no need to process further. It means we reached
                 * the last node in search path and found the goods. This was added here so that JVM does not
                 * continue iterating if there's more than one element in the element node that contains the element we're
                 * searching for.
                 */
                if (!pFoundElemVals.isEmpty()) {
                    return pFoundElemVals;
                }

                String curElemName = elemEntry.getKey();

                if (!curNodeInPathNoBrackets.equals(curElemName)) {
                    continue;
                }

                Context elemToProcessNext = elemEntry.getValue();
                /*
                 * If the current element is of type array, deal with it below. If we're *not* at the last node
                 * of the search path, enforce requirement that user must specify which array entry to select
                 * to continue on that path of the search.
                 * Otherwise, if we're already at last node of search path, the requirement is relaxed, and caller has
                 * option of either specifying and array entry to select, or just select the entire array.
                 */
                if (elemToProcessNext.isArray()) {
                    /*
                     * If we're not at end of search path and we encountered an array node, yet the search path
                     * did not tell us to expect an array at this spot of the search path, throw exception. If the
                     * caller does not explicitly say what array entry to select, how do we know which path to continue on?
                     * Also if we didn't enforce this, then it might result in hard to trace bugs in the callers code.
                     * This is the inverse of check further above, where error is thrown if search path said to expect
                     * an array but the actual node is not an array.
                     * Note that this rule is relaxed if the array contains only one entry; in such a case, the client code
                     * is not required to specify in the search path that the node is an array, the code will
                     * auto select the only choice, namely the only array entry.
                     */
                    if (arrayIndex(curNodeInPath) < 0 && !atEndOfSearchPath && elemToProcessNext.asArray().size() > 1) {
                        UnexpectedArrayNodeException uane =
                                new UnexpectedArrayNodeException(pSearchPath, curNodeInPath, elemToProcessNext);
                        throw new IllegalArgumentException(uane);
                    }


                    /**
                     * The search path did specify what array entry to grab, deal with that logic in the if() block
                     * below. Then further below this "if()" we check if this is the last node of search path
                     * or not. These two pieces of logic combined is what allows the client to specify what array entry to grab
                     * from last node, or grab the entire last array node.
                     */
                    int aryIdx;
                    if ((aryIdx = arrayIndex(curNodeInPath)) >= 0) {
                        /**
                         * Handles scenario where a node in the search path specifies an array entry that does not exist,
                         * and caller wants to ignore node-not-found error.
                         */
                        if (aryIdx >= elemToProcessNext.asArray().size()) {
                            if (null != pExtraParams && pExtraParams.containsKey(IGNORE_INCOMPATIBLE_SEARCH_PATH_PROVIDED_ERROR)) {
                                return pFoundElemVals;
                            } else {
                                IncompatibleSearchPathException ispe = new IncompatibleSearchPathException(
                                        pSearchPath, curNodeInPath, elemToProcessNext);
                                throw new IllegalArgumentException(ispe);
                            }
                        }

                        elemToProcessNext = elemToProcessNext.entryFromArray(aryIdx);
                    }
                }


                /*
                 * If below evaluates to true, we're at the last node of our search path. Invoke helper
                 * method to add the elements to results for us.
                 * WARNING: Watch out, do not alter code below; do "atEndOfSearchPath" first. Once we have reached end of search path,
                 *   recursion does not make sense. If we didn't do this check first, because the element to process next
                 *   might be recursible, we might recurse even though we're at end of search path!!!
                 */
                if (atEndOfSearchPath) {
                    processElement(curElemName, elemToProcessNext, pFilter, pTargetElements, pFoundElemVals, pExtraParams);
                } else if (elemToProcessNext.isRecursible()) {
                    findElement(elemToProcessNext, pSearchPath, pFilter, pTargetElements, pFoundElemVals, pExtraParams);
                }
            }
        }

        return pFoundElemVals;
    }


    /**
     * Extracts the index specified between square brackets. If passed in string contains no
     * square brackets, -1 is returned.
     *
     * @param pNode
     * @return - The intenger contained within square brackets, -1 if no brackets found.
     */
    private int arrayIndex(String pNode) {
        if (pNode.indexOf('[') < 0) {
            return -1;
        }
        return Integer.parseInt(pNode.substring(pNode.indexOf('[') + 1, pNode.indexOf(']')));
    }


    private void processElement(String pElemName,
                                Context pElem,
                                Filter pFilter,
                                TargetElements pTargetElements,
                                Map pFoundElemVals,
                                Map pExtraParams) throws IllegalArgumentException {
        Context elemValToStore = null;
        /*
         * Handle case when element in last node of search path is primitive or another complex structure
         */
        if (pElem.isPrimitive() || pElem.isRecursible()) {
            /*
             * Hm, here the shouldExcludeFromResults() check might not be necessary. Why would the caller give
             * an element as last node in search path, and also give that element name in the pFilter Map?? In
             * other words, this might be a scenario that never happens, but leaving code here for now in case
             * there's something I'm missing.
             */
            if (shouldExcludeFromResults(pElemName, pElem, pFilter, pExtraParams)) {
                return;
            }

            elemValToStore = pElem;

            /*
             * The pTargetElems parameter applies only when results contain another complex structure.
             */
            if (pElem.isRecursible()) {
                elemValToStore = filterUnwantedElements(pElem, pTargetElements, pExtraParams);
            }
        } else if (pElem.isArray()) {
            Iterator itElem = pElem.asArray().iterator();
            List