All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsoup.select.Evaluator Maven / Gradle / Ivy

Go to download

SDK for dev_appserver (local development) with some of the dependencies shaded (repackaged)

There is a newer version: 2.0.31
Show newest version
package org.jsoup.select;

import org.jsoup.helper.Validate;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.DocumentType;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.PseudoTextElement;
import org.jsoup.nodes.TextNode;
import org.jsoup.nodes.XmlDeclaration;
import org.jsoup.parser.ParseSettings;

import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.jsoup.internal.Normalizer.lowerCase;
import static org.jsoup.internal.Normalizer.normalize;
import static org.jsoup.internal.StringUtil.normaliseWhitespace;


/**
 * Evaluates that an element matches the selector.
 */
public abstract class Evaluator {
    protected Evaluator() {
    }

    /**
     Provides a Predicate for this Evaluator, matching the test Element.
     * @param root the root Element, for match evaluation
     * @return a predicate that accepts an Element to test for matches with this Evaluator
     * @since 1.17.1
     */
    public Predicate asPredicate(Element root) {
        return element -> matches(root, element);
    }

    /**
     * Test if the element meets the evaluator's requirements.
     *
     * @param root    Root of the matching subtree
     * @param element tested element
     * @return Returns true if the requirements are met or
     * false otherwise
     */
    public abstract boolean matches(Element root, Element element);

    /**
     Reset any internal state in this Evaluator before executing a new Collector evaluation.
     */
    protected void reset() {
    }

    /**
     A relative evaluator cost function. During evaluation, Evaluators are sorted by ascending cost as an optimization.
     * @return the relative cost of this Evaluator
     */
    protected int cost() {
        return 5; // a nominal default cost
    }

    /**
     * Evaluator for tag name
     */
    public static final class Tag extends Evaluator {
        private final String tagName;

        public Tag(String tagName) {
            this.tagName = tagName;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return (element.nameIs(tagName));
        }

        @Override protected int cost() {
            return 1;
        }

        @Override
        public String toString() {
            return String.format("%s", tagName);
        }
    }


    /**
     * Evaluator for tag name that ends with
     */
    public static final class TagEndsWith extends Evaluator {
        private final String tagName;

        public TagEndsWith(String tagName) {
            this.tagName = tagName;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return (element.normalName().endsWith(tagName));
        }

        @Override
        public String toString() {
            return String.format("%s", tagName);
        }
    }

    /**
     * Evaluator for element id
     */
    public static final class Id extends Evaluator {
        private final String id;

        public Id(String id) {
            this.id = id;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return (id.equals(element.id()));
        }

        @Override protected int cost() {
            return 2;
        }
        @Override
        public String toString() {
            return String.format("#%s", id);
        }
    }

    /**
     * Evaluator for element class
     */
    public static final class Class extends Evaluator {
        private final String className;

        public Class(String className) {
            this.className = className;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return (element.hasClass(className));
        }

        @Override protected int cost() {
            return 6; // does whitespace scanning
        }

        @Override
        public String toString() {
            return String.format(".%s", className);
        }

    }

    /**
     * Evaluator for attribute name matching
     */
    public static final class Attribute extends Evaluator {
        private final String key;

        public Attribute(String key) {
            this.key = key;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key);
        }

        @Override protected int cost() {
            return 2;
        }

        @Override
        public String toString() {
            return String.format("[%s]", key);
        }
    }

    /**
     * Evaluator for attribute name prefix matching
     */
    public static final class AttributeStarting extends Evaluator {
        private final String keyPrefix;

        public AttributeStarting(String keyPrefix) {
            Validate.notNull(keyPrefix); // OK to be empty - will find elements with any attributes
            this.keyPrefix = lowerCase(keyPrefix);
        }

        @Override
        public boolean matches(Element root, Element element) {
            List values = element.attributes().asList();
            for (org.jsoup.nodes.Attribute attribute : values) {
                if (lowerCase(attribute.getKey()).startsWith(keyPrefix))
                    return true;
            }
            return false;
        }

        @Override protected int cost() {
            return 6;
        }

        @Override
        public String toString() {
            return String.format("[^%s]", keyPrefix);
        }

    }

    /**
     * Evaluator for attribute name/value matching
     */
    public static final class AttributeWithValue extends AttributeKeyPair {
        public AttributeWithValue(String key, String value) {
            super(key, value);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim());
        }

        @Override protected int cost() {
            return 3;
        }

        @Override
        public String toString() {
            return String.format("[%s=%s]", key, value);
        }

    }

    /**
     * Evaluator for attribute name != value matching
     */
    public static final class AttributeWithValueNot extends AttributeKeyPair {
        public AttributeWithValueNot(String key, String value) {
            super(key, value);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return !value.equalsIgnoreCase(element.attr(key));
        }

        @Override protected int cost() {
            return 3;
        }

        @Override
        public String toString() {
            return String.format("[%s!=%s]", key, value);
        }

    }

    /**
     * Evaluator for attribute name/value matching (value prefix)
     */
    public static final class AttributeWithValueStarting extends AttributeKeyPair {
        public AttributeWithValueStarting(String key, String value) {
            super(key, value, false);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key) && lowerCase(element.attr(key)).startsWith(value); // value is lower case already
        }

        @Override protected int cost() {
            return 4;
        }

        @Override
        public String toString() {
            return String.format("[%s^=%s]", key, value);
        }
    }

    /**
     * Evaluator for attribute name/value matching (value ending)
     */
    public static final class AttributeWithValueEnding extends AttributeKeyPair {
        public AttributeWithValueEnding(String key, String value) {
            super(key, value, false);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key) && lowerCase(element.attr(key)).endsWith(value); // value is lower case
        }

        @Override protected int cost() {
            return 4;
        }

        @Override
        public String toString() {
            return String.format("[%s$=%s]", key, value);
        }
    }

    /**
     * Evaluator for attribute name/value matching (value containing)
     */
    public static final class AttributeWithValueContaining extends AttributeKeyPair {
        public AttributeWithValueContaining(String key, String value) {
            super(key, value);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key) && lowerCase(element.attr(key)).contains(value); // value is lower case
        }

        @Override protected int cost() {
            return 6;
        }

        @Override
        public String toString() {
            return String.format("[%s*=%s]", key, value);
        }

    }

    /**
     * Evaluator for attribute name/value matching (value regex matching)
     */
    public static final class AttributeWithValueMatching extends Evaluator {
        final String key;
        final Pattern pattern;

        public AttributeWithValueMatching(String key, Pattern pattern) {
            this.key = normalize(key);
            this.pattern = pattern;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.hasAttr(key) && pattern.matcher(element.attr(key)).find();
        }

        @Override protected int cost() {
            return 8;
        }

        @Override
        public String toString() {
            return String.format("[%s~=%s]", key, pattern.toString());
        }

    }

    /**
     * Abstract evaluator for attribute name/value matching
     */
    public abstract static class AttributeKeyPair extends Evaluator {
        final String key;
        final String value;

        public AttributeKeyPair(String key, String value) {
            this(key, value, true);
        }

        public AttributeKeyPair(String key, String value, boolean trimValue) {
            Validate.notEmpty(key);
            Validate.notEmpty(value);

            this.key = normalize(key);
            boolean isStringLiteral = value.startsWith("'") && value.endsWith("'")
                                        || value.startsWith("\"") && value.endsWith("\"");
            if (isStringLiteral) {
                value = value.substring(1, value.length()-1);
            }

            this.value = trimValue ? normalize(value) : normalize(value, isStringLiteral);
        }
    }

    /**
     * Evaluator for any / all element matching
     */
    public static final class AllElements extends Evaluator {

        @Override
        public boolean matches(Element root, Element element) {
            return true;
        }

        @Override protected int cost() {
            return 10;
        }

        @Override
        public String toString() {
            return "*";
        }
    }

    /**
     * Evaluator for matching by sibling index number (e {@literal <} idx)
     */
    public static final class IndexLessThan extends IndexEvaluator {
        public IndexLessThan(int index) {
            super(index);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return root != element && element.elementSiblingIndex() < index;
        }

        @Override
        public String toString() {
            return String.format(":lt(%d)", index);
        }

    }

    /**
     * Evaluator for matching by sibling index number (e {@literal >} idx)
     */
    public static final class IndexGreaterThan extends IndexEvaluator {
        public IndexGreaterThan(int index) {
            super(index);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.elementSiblingIndex() > index;
        }

        @Override
        public String toString() {
            return String.format(":gt(%d)", index);
        }

    }

    /**
     * Evaluator for matching by sibling index number (e = idx)
     */
    public static final class IndexEquals extends IndexEvaluator {
        public IndexEquals(int index) {
            super(index);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.elementSiblingIndex() == index;
        }

        @Override
        public String toString() {
            return String.format(":eq(%d)", index);
        }

    }

    /**
     * Evaluator for matching the last sibling (css :last-child)
     */
    public static final class IsLastChild extends Evaluator {
		@Override
		public boolean matches(Element root, Element element) {
			final Element p = element.parent();
			return p != null && !(p instanceof Document) && element == p.lastElementChild();
		}

		@Override
		public String toString() {
			return ":last-child";
		}
    }

    public static final class IsFirstOfType extends IsNthOfType {
		public IsFirstOfType() {
			super(0,1);
		}
		@Override
		public String toString() {
			return ":first-of-type";
		}
    }

    public static final class IsLastOfType extends IsNthLastOfType {
		public IsLastOfType() {
			super(0,1);
		}
		@Override
		public String toString() {
			return ":last-of-type";
		}
    }


    public static abstract class CssNthEvaluator extends Evaluator {
    	protected final int a, b;

    	public CssNthEvaluator(int a, int b) {
    		this.a = a;
    		this.b = b;
    	}
    	public CssNthEvaluator(int b) {
    		this(0,b);
    	}

    	@Override
    	public boolean matches(Element root, Element element) {
    		final Element p = element.parent();
    		if (p == null || (p instanceof Document)) return false;

    		final int pos = calculatePosition(root, element);
    		if (a == 0) return pos == b;

    		return (pos-b)*a >= 0 && (pos-b)%a==0;
    	}

		@Override
		public String toString() {
			if (a == 0)
				return String.format(":%s(%d)",getPseudoClass(), b);
			if (b == 0)
				return String.format(":%s(%dn)",getPseudoClass(), a);
			return String.format(":%s(%dn%+d)", getPseudoClass(),a, b);
		}

		protected abstract String getPseudoClass();
		protected abstract int calculatePosition(Element root, Element element);
    }


    /**
     * css-compatible Evaluator for :eq (css :nth-child)
     *
     * @see IndexEquals
     */
    public static final class IsNthChild extends CssNthEvaluator {

    	public IsNthChild(int a, int b) {
    		super(a,b);
		}

		@Override protected int calculatePosition(Element root, Element element) {
			return element.elementSiblingIndex()+1;
		}


		@Override protected String getPseudoClass() {
			return "nth-child";
		}
    }

    /**
     * css pseudo class :nth-last-child)
     *
     * @see IndexEquals
     */
    public static final class IsNthLastChild extends CssNthEvaluator {
    	public IsNthLastChild(int a, int b) {
    		super(a,b);
    	}

        @Override
        protected int calculatePosition(Element root, Element element) {
    	    if (element.parent() == null)
    	        return 0;
        	return element.parent().childrenSize()- element.elementSiblingIndex();
        }

		@Override
		protected String getPseudoClass() {
			return "nth-last-child";
		}
    }

    /**
     * css pseudo class nth-of-type
     *
     */
    public static class IsNthOfType extends CssNthEvaluator {
        public IsNthOfType(int a, int b) {
            super(a, b);
        }

        @Override protected int calculatePosition(Element root, Element element) {
            Element parent = element.parent();
            if (parent == null)
                return 0;

            int pos = 0;
            final int size = parent.childNodeSize();
            for (int i = 0; i < size; i++) {
                Node node = parent.childNode(i);
                if (node.normalName().equals(element.normalName())) pos++;
                if (node == element) break;
            }
            return pos;
        }

        @Override
        protected String getPseudoClass() {
            return "nth-of-type";
        }
    }

    public static class IsNthLastOfType extends CssNthEvaluator {

        public IsNthLastOfType(int a, int b) {
            super(a, b);
        }

        @Override
        protected int calculatePosition(Element root, Element element) {
            Element parent = element.parent();
            if (parent == null)
                return 0;

            int pos = 0;
            Element next = element;
            while (next != null) {
                if (next.normalName().equals(element.normalName()))
                    pos++;
                next = next.nextElementSibling();
            }
            return pos;
        }

        @Override
        protected String getPseudoClass() {
            return "nth-last-of-type";
        }
    }

    /**
     * Evaluator for matching the first sibling (css :first-child)
     */
    public static final class IsFirstChild extends Evaluator {
    	@Override
    	public boolean matches(Element root, Element element) {
    		final Element p = element.parent();
    		return p != null && !(p instanceof Document) && element == p.firstElementChild();
    	}

    	@Override
    	public String toString() {
    		return ":first-child";
    	}
    }

    /**
     * css3 pseudo-class :root
     * @see :root selector
     *
     */
    public static final class IsRoot extends Evaluator {
    	@Override
    	public boolean matches(Element root, Element element) {
    		final Element r = root instanceof Document ? root.firstElementChild() : root;
    		return element == r;
    	}

        @Override protected int cost() {
            return 1;
        }

    	@Override
    	public String toString() {
    		return ":root";
    	}
    }

    public static final class IsOnlyChild extends Evaluator {
		@Override
		public boolean matches(Element root, Element element) {
			final Element p = element.parent();
			return p!=null && !(p instanceof Document) && element.siblingElements().isEmpty();
		}
    	@Override
    	public String toString() {
    		return ":only-child";
    	}
    }

    public static final class IsOnlyOfType extends Evaluator {
		@Override
		public boolean matches(Element root, Element element) {
			final Element p = element.parent();
			if (p==null || p instanceof Document) return false;

			int pos = 0;
            Element next = p.firstElementChild();
            while (next != null) {
                if (next.normalName().equals(element.normalName()))
                    pos++;
                if (pos > 1)
                    break;
                next = next.nextElementSibling();
            }
        	return pos == 1;
		}
    	@Override
    	public String toString() {
    		return ":only-of-type";
    	}
    }

    public static final class IsEmpty extends Evaluator {
		@Override
		public boolean matches(Element root, Element element) {
        	List family = element.childNodes();
            for (Node n : family) {
                if (n instanceof TextNode)
                    return ((TextNode)n).isBlank();
                if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType))
                    return false;
            }
        	return true;
		}
    	@Override
    	public String toString() {
    		return ":empty";
    	}
    }

    /**
     * Abstract evaluator for sibling index matching
     *
     * @author ant
     */
    public abstract static class IndexEvaluator extends Evaluator {
        final int index;

        public IndexEvaluator(int index) {
            this.index = index;
        }
    }

    /**
     * Evaluator for matching Element (and its descendants) text
     */
    public static final class ContainsText extends Evaluator {
        private final String searchText;

        public ContainsText(String searchText) {
            this.searchText = lowerCase(normaliseWhitespace(searchText));
        }

        @Override
        public boolean matches(Element root, Element element) {
            return lowerCase(element.text()).contains(searchText);
        }

        @Override protected int cost() {
            return 10;
        }

        @Override
        public String toString() {
            return String.format(":contains(%s)", searchText);
        }
    }

    /**
     * Evaluator for matching Element (and its descendants) wholeText. Neither the input nor the element text is
     * normalized. :containsWholeText()
     * @since 1.15.1.
     */
    public static final class ContainsWholeText extends Evaluator {
        private final String searchText;

        public ContainsWholeText(String searchText) {
            this.searchText = searchText;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.wholeText().contains(searchText);
        }

        @Override protected int cost() {
            return 10;
        }

        @Override
        public String toString() {
            return String.format(":containsWholeText(%s)", searchText);
        }
    }

    /**
     * Evaluator for matching Element (but not its descendants) wholeText. Neither the input nor the element text is
     * normalized. :containsWholeOwnText()
     * @since 1.15.1.
     */
    public static final class ContainsWholeOwnText extends Evaluator {
        private final String searchText;

        public ContainsWholeOwnText(String searchText) {
            this.searchText = searchText;
        }

        @Override
        public boolean matches(Element root, Element element) {
            return element.wholeOwnText().contains(searchText);
        }

        @Override
        public String toString() {
            return String.format(":containsWholeOwnText(%s)", searchText);
        }
    }

    /**
     * Evaluator for matching Element (and its descendants) data
     */
    public static final class ContainsData extends Evaluator {
        private final String searchText;

        public ContainsData(String searchText) {
            this.searchText = lowerCase(searchText);
        }

        @Override
        public boolean matches(Element root, Element element) {
            return lowerCase(element.data()).contains(searchText); // not whitespace normalized
        }

        @Override
        public String toString() {
            return String.format(":containsData(%s)", searchText);
        }
    }

    /**
     * Evaluator for matching Element's own text
     */
    public static final class ContainsOwnText extends Evaluator {
        private final String searchText;

        public ContainsOwnText(String searchText) {
            this.searchText = lowerCase(normaliseWhitespace(searchText));
        }

        @Override
        public boolean matches(Element root, Element element) {
            return lowerCase(element.ownText()).contains(searchText);
        }

        @Override
        public String toString() {
            return String.format(":containsOwn(%s)", searchText);
        }
    }

    /**
     * Evaluator for matching Element (and its descendants) text with regex
     */
    public static final class Matches extends Evaluator {
        private final Pattern pattern;

        public Matches(Pattern pattern) {
            this.pattern = pattern;
        }

        @Override
        public boolean matches(Element root, Element element) {
            Matcher m = pattern.matcher(element.text());
            return m.find();
        }

        @Override protected int cost() {
            return 8;
        }

        @Override
        public String toString() {
            return String.format(":matches(%s)", pattern);
        }
    }

    /**
     * Evaluator for matching Element's own text with regex
     */
    public static final class MatchesOwn extends Evaluator {
        private final Pattern pattern;

        public MatchesOwn(Pattern pattern) {
            this.pattern = pattern;
        }

        @Override
        public boolean matches(Element root, Element element) {
            Matcher m = pattern.matcher(element.ownText());
            return m.find();
        }

        @Override protected int cost() {
            return 7;
        }

        @Override
        public String toString() {
            return String.format(":matchesOwn(%s)", pattern);
        }
    }

    /**
     * Evaluator for matching Element (and its descendants) whole text with regex.
     * @since 1.15.1.
     */
    public static final class MatchesWholeText extends Evaluator {
        private final Pattern pattern;

        public MatchesWholeText(Pattern pattern) {
            this.pattern = pattern;
        }

        @Override
        public boolean matches(Element root, Element element) {
            Matcher m = pattern.matcher(element.wholeText());
            return m.find();
        }

        @Override protected int cost() {
            return 8;
        }

        @Override
        public String toString() {
            return String.format(":matchesWholeText(%s)", pattern);
        }
    }

    /**
     * Evaluator for matching Element's own whole text with regex.
     * @since 1.15.1.
     */
    public static final class MatchesWholeOwnText extends Evaluator {
        private final Pattern pattern;

        public MatchesWholeOwnText(Pattern pattern) {
            this.pattern = pattern;
        }

        @Override
        public boolean matches(Element root, Element element) {
            Matcher m = pattern.matcher(element.wholeOwnText());
            return m.find();
        }

        @Override protected int cost() {
            return 7;
        }

        @Override
        public String toString() {
            return String.format(":matchesWholeOwnText(%s)", pattern);
        }
    }

    public static final class MatchText extends Evaluator {

        @Override
        public boolean matches(Element root, Element element) {
            if (element instanceof PseudoTextElement)
                return true;

            List textNodes = element.textNodes();
            for (TextNode textNode : textNodes) {
                PseudoTextElement pel = new PseudoTextElement(
                    org.jsoup.parser.Tag.valueOf(element.tagName(), element.tag().namespace(), ParseSettings.preserveCase), element.baseUri(), element.attributes());
                textNode.replaceWith(pel);
                pel.appendChild(textNode);
            }
            return false;
        }

        @Override protected int cost() {
            return -1; // forces first evaluation, which prepares the DOM for later evaluator matches
        }

        @Override
        public String toString() {
            return ":matchText";
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy