All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.schoolwow.quickhttp.document.query.QueryParser Maven / Gradle / Ivy

There is a newer version: 3.0.1
Show newest version
package cn.schoolwow.quickhttp.document.query;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.function.Function;
import java.util.regex.Pattern;

public class QueryParser {
    private static Logger logger = LoggerFactory.getLogger(QueryParser.class);

    private static final char[] combinators = {'>', '+', '~', ' '};
    private static final Map pseudoMap = new HashMap<>();

    static {
        pseudoMap.put(":first-child", new Evaluator.IsFirstChild());
        pseudoMap.put(":last-child", new Evaluator.IsLastChild());
        pseudoMap.put(":first-of-type", new Evaluator.IsFirstOfType());
        pseudoMap.put(":last-of-type", new Evaluator.IsLastOfType());
        pseudoMap.put(":only-child", new Evaluator.IsOnlyChild());
        pseudoMap.put(":only-of-type", new Evaluator.IsOnlyOfType());
        pseudoMap.put(":empty", new Evaluator.IsEmpty());
    }

    private char[] chars;
    private int pos;
    private Evaluator root;
    private final Stack evaluatorStack = new Stack<>();
    private CombiningEvaluator.Or or;

    public static Evaluator parse(String cssQuery) {
        return new QueryParser(cssQuery).root;
    }

    private QueryParser(String cssQuery) {
        evaluatorStack.clear();

        chars = cssQuery.toCharArray();
        Selector[] selectors = Selector.values();
        while (pos < chars.length) {
            boolean find = false;
            for (Selector selector : selectors) {
                int count = selector.condition.apply(this);
                if (count > 0) {
                    pos += count;
                    find = true;
                    break;
                }
            }
            if (!find) {
                pos++;
            }
        }
        if (evaluatorStack.size() > 1) {
            CombiningEvaluator.And lastAnd = new CombiningEvaluator.And(new ArrayList<>());
            while (!evaluatorStack.isEmpty() && !(evaluatorStack.peek() instanceof StructuralEvaluator)) {
                lastAnd.evaluatorList.add(evaluatorStack.pop());
            }
            evaluatorStack.push(lastAnd);
        }
        logger.trace("[原始选择器列表]{}", evaluatorStack);
        //如果Or选择器存在,则将栈内剩余元素包括成And选择器加入到Or选择器中
        if (or != null) {
            CombiningEvaluator.And and = new CombiningEvaluator.And(new ArrayList<>());
            while (!evaluatorStack.isEmpty()) {
                and.evaluatorList.add(evaluatorStack.pop());
            }
            if (and.evaluatorList.size() == 1) {
                or.evaluatorList.add(and.evaluatorList.get(0));
            } else {
                or.evaluatorList.add(and);
            }
        }
        if (or != null) {
            root = or;
        } else if (evaluatorStack.size() == 1) {
            root = evaluatorStack.get(0);
        } else {
            List evaluatorList = new ArrayList<>(evaluatorStack.size());
            while (!evaluatorStack.isEmpty()) {
                evaluatorList.add(evaluatorStack.pop());
            }
            root = new CombiningEvaluator.And(evaluatorList);
        }
        logger.trace("[最终选择器列表]{}", root);
    }

    private enum Selector {
        ByIdOrClass((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (chars[pos] != '#' && chars[pos] != '.') {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && !isCombinators(chars[last])) {
                last++;
            }
            int count = last - pos;
            if (last == chars.length - 1) {
                count++;
            }
            String content = new String(chars, pos, count);
            if (chars[pos] == '#') {
                Evaluator.Id idEvaluator = new Evaluator.Id(content.substring(1));
                queryParser.evaluatorStack.push(idEvaluator);
                logger.trace("[添加id选择器]{}", idEvaluator);
            } else if (chars[pos] == '.') {
                Evaluator.Class aClassEvaluator = new Evaluator.Class(content.substring(1));
                queryParser.evaluatorStack.push(aClassEvaluator);
                logger.trace("[添加class选择器]{}", aClassEvaluator);
            }
            return content.length();
        }),
        ByTag((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (!Character.isLetterOrDigit(chars[pos])) {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && Character.isLetterOrDigit(chars[last])) {
                last++;
            }
            int count = last - pos;
            if (last == chars.length - 1) {
                count++;
            }
            String content = new String(chars, pos, count);
            Evaluator.Tag tag = new Evaluator.Tag(content);
            queryParser.evaluatorStack.push(tag);
            logger.trace("[添加tag选择器]{}", tag);
            return content.length();
        }),
        ByAll((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (chars[pos] != '*') {
                return 0;
            }
            queryParser.evaluatorStack.push(new Evaluator.AllElements());
            return 1;
        }),
        ByAttribute((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (chars[pos] != '[') {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && chars[last] != ']') {
                last++;
            }
            int count = last - pos + 1;
            String content = new String(chars, pos, count);
            String escapeContent = content.substring(1, content.length() - 1).replaceAll("[\"|']]", "");
            Evaluator evaluator = null;
            if (content.charAt(1) == '^') {
                evaluator = new Evaluator.AttributeStarting(content.substring(2, content.length() - 1));
            } else if (content.contains("^=")) {
                String key = escapeContent.substring(0, escapeContent.indexOf("^="));
                String value = escapeContent.substring(escapeContent.indexOf("^=") + 2);
                evaluator = new Evaluator.AttributeWithValueStarting(key, value);
            } else if (content.contains("$=")) {
                String key = escapeContent.substring(0, escapeContent.indexOf("$="));
                String value = escapeContent.substring(escapeContent.indexOf("$=") + 2);
                evaluator = new Evaluator.AttributeWithValueEnding(key, value);
            } else if (content.contains("*=")) {
                String key = escapeContent.substring(0, escapeContent.indexOf("*="));
                String value = escapeContent.substring(escapeContent.indexOf("*=") + 2);
                evaluator = new Evaluator.AttributeWithValueContaining(key, value);
            } else if (content.contains("~=")) {
                String key = escapeContent.substring(0, escapeContent.indexOf("~="));
                String pattern = escapeContent.substring(escapeContent.indexOf("~=") + 2);
                evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(pattern));
            } else if (content.contains("=")) {
                String key = escapeContent.substring(0, escapeContent.indexOf("="));
                String value = escapeContent.substring(escapeContent.indexOf("=") + 1);
                evaluator = new Evaluator.AttributeWithValue(key, value);
            } else {
                evaluator = new Evaluator.Attribute(content.substring(1, content.length() - 1));
            }
            queryParser.evaluatorStack.push(evaluator);
            logger.trace("[添加{}选择器]{}", evaluator.getClass().getSimpleName(), evaluator);
            return content.length();
        }),
        ByOr((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (chars[pos] != ' ' && chars[pos] != ',') {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && (chars[last] == ' ' || chars[last] == ',')) {
                last++;
            }
            String content = new String(chars, pos, last - pos);
            if (!content.contains(",")) {
                return 0;
            }
            //弹出选择器,直到栈为空或者碰到一个And选择器
            CombiningEvaluator.And and = new CombiningEvaluator.And(new ArrayList<>());
            while (!queryParser.evaluatorStack.isEmpty() && !(queryParser.evaluatorStack.peek() instanceof CombiningEvaluator.And)) {
                and.evaluatorList.add(queryParser.evaluatorStack.pop());
            }
            if (queryParser.or == null) {
                queryParser.or = new CombiningEvaluator.Or(new ArrayList<>());
                logger.trace("[添加Or选择器]{}", queryParser.or);
            }
            if (and.evaluatorList.size() == 1) {
                queryParser.or.evaluatorList.add(and.evaluatorList.get(0));
            } else {
                queryParser.or.evaluatorList.add(and);
            }
            logger.trace("[Or选择器中添加And选择器]{}", and);
            return content.length();
        }),
        ByCombination((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (!isCombinators(chars[pos])) {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && isCombinators(chars[last])) {
                last++;
            }
            String content = new String(chars, pos, last - pos);
            //弹出所有非StructuralEvaluator
            List evaluatorList = new ArrayList<>();
            while (!queryParser.evaluatorStack.isEmpty() && !(queryParser.evaluatorStack.peek() instanceof StructuralEvaluator)) {
                evaluatorList.add(queryParser.evaluatorStack.pop());
            }
            Evaluator lastEvaluator = null;
            if (evaluatorList.size() == 1) {
                lastEvaluator = evaluatorList.get(0);
            } else {
                lastEvaluator = new CombiningEvaluator.And(evaluatorList);
            }
            if (!queryParser.evaluatorStack.isEmpty()) {
                CombiningEvaluator.And and = new CombiningEvaluator.And(new ArrayList<>());
                and.evaluatorList.add(lastEvaluator);
                and.evaluatorList.add(queryParser.evaluatorStack.pop());
                lastEvaluator = and;
            }

            StructuralEvaluator evaluator = null;
            if (content.contains(">")) {
                evaluator = new StructuralEvaluator.ImmediateParent(lastEvaluator);
            } else if (content.contains("+")) {
                evaluator = new StructuralEvaluator.ImmediatePreviousSibling(lastEvaluator);
            } else if (content.contains("~")) {
                evaluator = new StructuralEvaluator.PreviousSibling(lastEvaluator);
            } else if (content.contains(" ")) {
                evaluator = new StructuralEvaluator.Parent(lastEvaluator);
            }
            queryParser.evaluatorStack.push(evaluator);
            logger.trace("[添加Combination选择器]{}", evaluator);
            return content.length();
        }),
        ByPseudoCommon((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            int count = ":first-of-type".length();
            if (pos + count >= chars.length) {
                count = chars.length - pos;
            }
            String prefix = new String(chars, pos, count);
            Set keySet = pseudoMap.keySet();
            String targetKey = null;
            for (String key : keySet) {
                if (prefix.startsWith(key)) {
                    targetKey = key;
                    break;
                }
            }
            if (targetKey == null) {
                return 0;
            }
            queryParser.evaluatorStack.push(pseudoMap.get(targetKey));
            return targetKey.length();
        }),
        ByNth((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (pos + 5 >= chars.length) {
                return 0;
            }
            String prefix = new String(chars, pos, 5);
            if (!prefix.equals(":nth-")) {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && chars[last] != ')') {
                last++;
            }
            int count = last - pos + 1;
            String content = new String(chars, pos, count);
            String data = content.substring(content.indexOf("(") + 1, content.lastIndexOf(")"));
            int a = -1, b = -1;
            if (data.contains("n")) {
                String token1 = data.substring(0, data.indexOf("n"));
                String token2 = data.substring(data.indexOf("n") + 1);
                if (!token1.equals("-")) {
                    a = Integer.parseInt(token1);
                }
                b = Integer.parseInt(token2);
            } else {
                a = 0;
                b = Integer.parseInt(data);
            }
            if (a < 0 && b < 0) {
                return 0;
            }
            Evaluator structuralEvaluator = null;
            if (content.startsWith(":nth-child(")) {
                structuralEvaluator = new Evaluator.IsNthChild(a, b);
            } else if (content.startsWith(":nth-last-child(")) {
                structuralEvaluator = new Evaluator.IsNthLastChild(a, b);
            } else if (content.startsWith(":nth-of-type(")) {
                structuralEvaluator = new Evaluator.IsNthOfType(a, b);
            } else if (content.startsWith(":nth-last-of-type(")) {
                structuralEvaluator = new Evaluator.IsNthLastOfType(a, b);
            }
            queryParser.evaluatorStack.push(structuralEvaluator);
            logger.trace("[添加Nth选择器]{}", structuralEvaluator);
            return content.length();
        }),
        ByPseudo((queryParser) -> {
            char[] chars = queryParser.chars;
            int pos = queryParser.pos;

            if (chars[pos] != ':') {
                return 0;
            }
            int last = pos;
            while (last < chars.length - 1 && chars[last] != ')') {
                last++;
            }
            String content = new String(chars, pos, last - pos + 1);
            String data = content.substring(content.indexOf("(") + 1, content.lastIndexOf(")"));
            Evaluator evaluator = null;
            if (content.contains(":lt")) {
                evaluator = new Evaluator.IndexLessThan(Integer.parseInt(data));
            } else if (content.contains(":gt")) {
                evaluator = new Evaluator.IndexGreaterThan(Integer.parseInt(data));
            } else if (content.contains(":eq")) {
                evaluator = new Evaluator.IndexEquals(Integer.parseInt(data));
            } else if (content.contains(":has")) {
                evaluator = new StructuralEvaluator.Has(queryParser.evaluatorStack.pop());
            } else if (content.contains(":not")) {
                evaluator = new StructuralEvaluator.Not(queryParser.evaluatorStack.pop());
            } else if (content.contains(":containsOwn")) {
                evaluator = new Evaluator.ContainsOwnText(data);
            } else if (content.contains(":matchesOwn")) {
                evaluator = new Evaluator.MatchesOwn(Pattern.compile(data));
            } else if (content.contains(":contains")) {
                evaluator = new Evaluator.ContainsText(data);
            } else if (content.contains(":matches")) {
                evaluator = new Evaluator.Matches(Pattern.compile(data));
            }
            queryParser.evaluatorStack.push(evaluator);
            logger.trace("[添加伪类选择器]{}", evaluator);
            return content.length();
        });
        private Function condition;

        Selector(Function condition) {
            this.condition = condition;
        }
    }

    private static boolean isCombinators(char c) {
        for (char combinator : combinators) {
            if (c == combinator) {
                return true;
            }
        }
        return false;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy