org.apache.camel.support.TokenXMLExpressionIterator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of camel-core Show documentation
The Core Camel Java DSL based router
There is a newer version: 4.6.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.camel.support;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.camel.Exchange;
import org.apache.camel.InvalidPayloadException;
import org.apache.camel.language.simple.SimpleLanguage;
import org.apache.camel.util.IOHelper;
import org.apache.camel.util.ObjectHelper;

/**
 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
 * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
 * 
 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
 * to access the message body.
 * 

 * Can be used to split big XML files.
 * 

 * This implementation supports inheriting namespaces from a parent/root tag.
 */
public class TokenXMLExpressionIterator extends ExpressionAdapter {
    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
    private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!()).)*";
    private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
    private static final String OPTION_WRAP_TOKEN = "<*>";

    protected final String tagToken;
    protected final String inheritNamespaceToken;

    public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
        ObjectHelper.notEmpty(tagToken, "tagToken");
        this.tagToken = tagToken;
        // namespace token is optional
        this.inheritNamespaceToken = inheritNamespaceToken;
    }

    protected Iterator createIterator(Exchange exchange, InputStream in, String charset) {
        String tag = tagToken;
        if (SimpleLanguage.hasSimpleFunction(tag)) {
            tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class);
        }
        String inherit = inheritNamespaceToken;
        if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) {
            inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class);
        }

        // must be XML tokens
        if (!tag.startsWith("<")) {
            tag = "<" + tag;
        }
        if (!tag.endsWith(">")) {
            tag = tag + ">";
        }

        if (inherit != null) {
            if (!inherit.startsWith("<")) {
                inherit = "<" + inherit;
            }
            if (!inherit.endsWith(">")) {
                inherit = inherit + ">";
            }
        }

        // must be XML tokens
        if (!tag.startsWith("<") || !tag.endsWith(">")) {
            throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag);
        }
        if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) {
            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit);
        }

        XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset);
        iterator.init();
        return iterator;
    }

    @Override
    public boolean matches(Exchange exchange) {
        // as a predicate we must close the stream, as we do not return an iterator that can be used
        // afterwards to iterate the input stream
        Object value = doEvaluate(exchange, true);
        return ObjectHelper.evaluateValuePredicate(value);
    }

    @Override
    public Object evaluate(Exchange exchange) {
        // as we return an iterator to access the input stream, we should not close it
        return doEvaluate(exchange, false);
    }

    /**
     * Strategy to evaluate the exchange
     *
     * @param exchange   the exchange
     * @param closeStream whether to close the stream before returning from this method.
     * @return the evaluated value
     */
    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
        InputStream in = null;
        try {
            in = exchange.getIn().getMandatoryBody(InputStream.class);
            // we may read from a file, and want to support custom charset defined on the exchange
            String charset = IOHelper.getCharsetName(exchange);
            return createIterator(exchange, in, charset);
        } catch (InvalidPayloadException e) {
            exchange.setException(e);
            // must close input stream
            IOHelper.close(in);
            return null;
        } finally {
            if (closeStream) {
                IOHelper.close(in);
            }
        }
    }
    
    /**
     * Iterator to walk the input stream
     */
    static class XMLTokenIterator implements Iterator