com.composum.sling.core.proxy.GenericProxyReader Maven / Gradle / Ivy

Go to download
package com.composum.sling.core.proxy;

import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;

import java.io.FilterReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

/**
 * a filter reader implementation to filter out HTML tags from the base reader
 */
public class GenericProxyReader extends FilterReader {

    public static final String[] DEFAULT_TO_RENAME = new String[]{"html:div class=\"proxy-html-content\""};
    public static final String[] DEFAULT_TO_STRIP = new String[]{"body"};
    public static final String[] DEFAULT_TO_DROP = new String[]{"head", "style", "script", "link"};

    private enum Debug {none, read, strip, drop} // switch debug on for unit test debugging

    private Debug debug = Debug.none;

    /**
     * the strategy to handle tokens of an 'open' tag
     */
    interface TagFilter {

        String tagName();

        /**
         * the tag start handler with the token which has triggered this handler
         */
        int start(int token) throws IOException;

        /**
         * the tag body handler
         */
        int body() throws IOException;

        /**
         * the tag end handler
         */
        int end() throws IOException;

        void debug(Character hint);
    }

    /**
     * the abstract tag handler base implementation
     */
    protected abstract class TagFilterBase implements TagFilter {

        protected final String tagName;

        protected TagFilterBase(String tagName) {
            this.tagName = tagName;
        }

        @Override
        public String tagName() {
            return tagName;
        }

        /**
         * scan and discard all tokens of the tag start up to the tags body
         *
         * @param token the first token after the tag name
         * @return the token after the tags start
         * - the first token of the body or
         * - the first token of the tag end if the body is empty or
         * - the token after the tag if the tag start is closing the tag imeadiately
         */
        @Override
        public int start(int token) throws IOException {
            Integer last = null;
            while (token != '>') {
                last = token;
                token = scan();
            }
            if (last != null && last == '/') {
                // tag ends at the end of start; no body
                openTags.pop();
                debug('#');
            } else {
                debug(null);
            }
            return trim();
        }

        /**
         * @return return the next non space token after the closed tag
         */
        @Override
        public int end() throws IOException {
            debug('/');
            return trim();
        }
    }

    /**
     * the tag handler implementation for tags to 'stripe'
     */
    protected class RenameTagFilter extends StripTagFilter {

        protected final String newTagName;
        protected final String attributes;

        protected RenameTagFilter(String tagName, String newTagName) {
            super(tagName);
            this.newTagName = StringUtils.substringBefore(newTagName, " ");
            this.attributes = StringUtils.substringAfter(newTagName, " ");
        }

        /**
         * @return the buffer() for the tag start with new new tag name
         */
        @Override
        public int start(int token) {
            StringBuilder tagName = new StringBuilder(newTagName);
            if (StringUtils.isNotBlank(attributes)) {
                tagName.append(" ").append(attributes);
            }
            tagName.append((char) token);
            return buffer(tagName);
        }

        /**
         * @return the buffer() for the end of the tag with the new tag name
         */
        @Override
        public int end() {
            StringBuilder tagName = new StringBuilder("/");
            tagName.append(newTagName).append('>');
            debug('/');
            return buffer(tagName);
        }

        @Override
        public void debug(Character hint) {
            if (debug == Debug.strip) {
                System.out.println("<" + (hint != null ? hint : "") + tagName + ":" + newTagName + ">");
            }
        }
    }

    /**
     * the tag handler implementation for tags to 'stripe'
     */
    protected class StripTagFilter extends TagFilterBase {

        protected StripTagFilter(String tagName) {
            super(tagName);
        }

        /**
         * @return each useful token of the tags body (uses 'scan' to filter out tags inside of the body)
         */
        @Override
        public int body() throws IOException {
            int token = scan();
            if (debug == Debug.strip) {
                System.out.println((char) token);
            }
            return token;
        }

        @Override
        public void debug(Character hint) {
            if (debug == Debug.strip) {
                System.out.println("<" + (hint != null ? hint : "") + tagName + ">");
            }
        }
    }

    /**
     * the tag handler implementation for tags to 'drop'
     */
    protected class DropTagFilter extends TagFilterBase {

        protected DropTagFilter(String tagName) {
            super(tagName);
        }

        /**
         * @return the token after discarding all tokens of the handlers tag and the tags body
         */
        @Override
        public int body() throws IOException {
            int token;
            while ((token = scan()) >= 0 && !openTags.isEmpty() && openTags.peek() == this) {
                if (debug == Debug.drop) {
                    System.err.println((char) token);
                }
            }
            return token;
        }

        @Override
        public void debug(Character hint) {
            if (debug == Debug.drop) {
                System.out.println("[" + (hint != null ? hint : "") + tagName + "]");
            }
        }
    }

    /**
     * the tag names configuration of the filter reader
     */
    protected final Map toRename;
    protected final List toStrip;
    protected final List toDrop;

    /**
     * the stack of handlers for open tags - the tomost handler is used for token reading
     */
    protected Stack openTags = new Stack<>();

    /**
     * the buffer filled if a 'look forward' to find a tag name has found no tag to filter out
     */
    protected char[] buffer = null;
    protected int bufferPos;

    /**
     * Creates a tag filtering reader to filter out the tags configured as sets of tag names
     *
     * @param in       the reader to filter during read
     * @param toRename the set of tag names which should be kept but renamed (e.g. 'html:div' to keep the root)
     * @param toStrip  the set of tag names to 'stripe' - remove the tags around and keep the body of the tags
     * @param toDrop   the set of tag names to 'drop' - remove the tags including their body
     */
    public GenericProxyReader(@NotNull final Reader in, @NotNull final String[] toRename,
                              @NotNull final String[] toStrip, @NotNull final String[] toDrop) {
        super(in);
        this.toRename = new HashMap<>();
        for (String rule : toRename) {
            if (StringUtils.isNotBlank(rule)) {
                String[] split = StringUtils.split(rule, ":", 2);
                this.toRename.put(split[0], split[1]);
            }
        }
        this.toStrip = Arrays.asList(toStrip);
        this.toDrop = Arrays.asList(toDrop);
    }

    public GenericProxyReader(@NotNull final InputStream in, @NotNull final String[] toRename,
                              @NotNull final String[] toStrip, @NotNull String[] toDrop) {
        this(new InputStreamReader(in, StandardCharsets.UTF_8), toRename, toStrip, toDrop);
    }

    public GenericProxyReader(@NotNull final InputStream in) {
        this(in, DEFAULT_TO_RENAME, DEFAULT_TO_STRIP, DEFAULT_TO_DROP);
    }

    public GenericProxyReader(@NotNull final Reader in) {
        this(in, DEFAULT_TO_RENAME, DEFAULT_TO_STRIP, DEFAULT_TO_DROP);
    }

    //
    // Reader...
    //

    /**
     * uses the single token read() method to fill the buffer
     */
    @Override
    public int read(char[] cbuf, int off, int len) throws IOException {
        int count;
        for (count = 0; count < len; count++) {
            int token = read();
            if (token < 0) {
                return count > 0 ? count : -1;
            }
            cbuf[off + count] = (char) token;
        }
        return count;
    }

    /**
     * delegates the read to the current tag handler or to the 'scan' if no such handler is active
     * (the handlers are also using the 'scan' method to find tags inside of open tags)
     */
    @Override
    public int read() throws IOException {
        int token;
        if (openTags.isEmpty()) {
            token = scan();
        } else {
            token = openTags.peek().body();
        }
        return token;
    }

    //
    // Filter...
    //

    /**
     * scans the token from the 'original' reader and determines tag starts / ends and the tag names
     * controls the tag handler stack if tags found which should be filtered
     *
     * @return the next token for the readers 'read' method
     */
    protected int scan() throws IOException {
        int token;
        if (buffer != null) { // a filled buffer has precendence, a scan is always done in this case
            token = buffer[bufferPos];
            bufferPos++;
            if (bufferPos >= buffer.length) {
                buffer = null;
            }
        } else {
            token = next();
            if (token >= 0) {
                if (token == '<') {
                    if ((token = next()) >= 0) {
                        StringBuilder tagName = new StringBuilder();
                        if (token == '/') {
                            // tag end...
                            while ((token = next()) >= 0) {
                                if (token == '>') {
                                    TagFilter filter;
                                    if (!openTags.isEmpty() &&
                                            (filter = openTags.peek()).tagName().equals(tagName.toString().toLowerCase())) {
                                        // this tag end closes the topmost tag of the tag handlers stack...
                                        openTags.pop();
                                        return filter.end();
                                    } else if (openTags.size() > 1 && (filter = openTags.get(openTags.size() - 2))
                                            .tagName().equals(tagName.toString().toLowerCase())) {
                                        // assuming that the topmost tag is not closed (not well formed)...
                                        openTags.pop();
                                        openTags.pop();
                                        return filter.end();
                                    } else {
                                        // the tag end is not an end of a configured tag
                                        // fill the buffer to flush this tag end
                                        tagName.insert(0, '/');
                                        tagName.append((char) token);
                                        return buffer(tagName);
                                    }
                                } else {
                                    // collect the tokens to build the tag name
                                    tagName.append((char) token);
                                }
                            }
                            // unexpected EOF - flush the last buffered tokens...
                            tagName.insert(0, '/');
                        } else {
                            // tag start...
                            do {
                                if (token == ' ' || token == '>' || token == '/') {
                                    // tag name delimiter reached...
                                    TagFilter filter = getFilter(tagName.toString().toLowerCase());
                                    if (filter != null) {
                                        // this tag has to be filtered - use the filter as the current handler
                                        openTags.push(filter);
                                        return filter.start(token);
                                    } else {
                                        // the tag start is not a start of a configured tag
                                        // fill the buffer to flush this tag start
                                        tagName.append((char) token);
                                        return buffer(tagName);
                                    }
                                } else {
                                    // collect the tokens to build the tag name
                                    tagName.append((char) token);
                                }
                            } while ((token = next()) >= 0);
                        }
                        // unexpected EOF - flush the last buffered tokens...
                        return buffer(tagName);
                    } else {
                        // unexpected EOF - return the last '<'
                        token = '<';
                    }
                }
            }
        }
        return token;
    }

    /**
     * @return the next token from the 'original' reader
     */
    protected int next() throws IOException {
        int token = in.read();
        if (debug == Debug.read) {
            System.out.print((char) token);
        }
        return token;
    }

    /**
     * @return the next non space token
     */
    protected int trim() throws IOException {
        int token;
        while (Character.isWhitespace(token = read())) ; // next non space token...
        return token;
    }

    /**
     * fills up the buffer with the string builders content and returns a 'tag start' tokone ('<')
     */
    protected int buffer(StringBuilder tagName) {
        buffer = tagName.toString().toCharArray();
        bufferPos = 0;
        return '<';
    }

    /**
     * @return the filter to handle the found tag name; 'null' if the tag name is not configured for this filter
     */
    protected TagFilter getFilter(String tagName) {
        TagFilter current = openTags.isEmpty() ? null : openTags.peek();
        if (current instanceof DropTagFilter) {
            return new DropTagFilter(tagName);
        } else {
            if (toRename.containsKey(tagName)) {
                return new RenameTagFilter(tagName, toRename.get(tagName));
            } else if (toStrip.contains(tagName)) {
                return new StripTagFilter(tagName);
            } else if (toDrop.contains(tagName)) {
                return new DropTagFilter(tagName);
            }
        }
        return null;
    }
}