All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsoup.nodes.Attributes Maven / Gradle / Ivy

Go to download

SDK for dev_appserver (local development) with some of the dependencies shaded (repackaged)

There is a newer version: 2.0.31
Show newest version
package org.jsoup.nodes;

import org.jsoup.SerializationException;
import org.jsoup.helper.Validate;
import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.ParseSettings;
import org.jspecify.annotations.Nullable;

import java.io.IOException;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;

import static org.jsoup.internal.Normalizer.lowerCase;
import static org.jsoup.internal.SharedConstants.AttrRangeKey;
import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr;

/**
 * The attributes of an Element.
 * 

* During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if * {@link #add(String, String)} vs {@link #put(String, String)} is used. *

*

* Attribute name and value comparisons are generally case sensitive. By default for HTML, attribute names are * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by * name. *

* * @author Jonathan Hedley, [email protected] */ public class Attributes implements Iterable, Cloneable { // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about // that. Suppressed from list, iter.) static final char InternalPrefix = '/'; // The Attributes object is only created on the first use of an attribute; the Element will just have a null // Attribute slot otherwise protected static final String dataPrefix = "data-"; private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs. // manages the key/val arrays private static final int GrowthFactor = 2; static final int NotFound = -1; private static final String EmptyString = ""; // the number of instance fields is kept as low as possible giving an object size of 24 bytes private int size = 0; // number of slots used (not total capacity, which is keys.length) String[] keys = new String[InitialCapacity]; Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access. // todo - make keys iterable without creating Attribute objects // check there's room for more private void checkCapacity(int minNewSize) { Validate.isTrue(minNewSize >= size); int curCap = keys.length; if (curCap >= minNewSize) return; int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity; if (minNewSize > newCap) newCap = minNewSize; keys = Arrays.copyOf(keys, newCap); vals = Arrays.copyOf(vals, newCap); } int indexOfKey(String key) { Validate.notNull(key); for (int i = 0; i < size; i++) { if (key.equals(keys[i])) return i; } return NotFound; } private int indexOfKeyIgnoreCase(String key) { Validate.notNull(key); for (int i = 0; i < size; i++) { if (key.equalsIgnoreCase(keys[i])) return i; } return NotFound; } // we track boolean attributes as null in values - they're just keys. so returns empty for consumers // casts to String, so only for non-internal attributes static String checkNotNull(@Nullable Object val) { return val == null ? EmptyString : (String) val; } /** Get an attribute value by key. @param key the (case-sensitive) attribute key @return the attribute value if set; or empty string if not set (or a boolean attribute). @see #hasKey(String) */ public String get(String key) { int i = indexOfKey(key); return i == NotFound ? EmptyString : checkNotNull(vals[i]); } /** Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and their owning Element. @param key the (case-sensitive) attribute key @return the Attribute for this key, or null if not present. @since 1.17.2 */ public Attribute attribute(String key) { int i = indexOfKey(key); return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); } /** * Get an attribute's value by case-insensitive key * @param key the attribute name * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute). */ public String getIgnoreCase(String key) { int i = indexOfKeyIgnoreCase(key); return i == NotFound ? EmptyString : checkNotNull(vals[i]); } /** * Adds a new attribute. Will produce duplicates if the key already exists. * @see Attributes#put(String, String) */ public Attributes add(String key, @Nullable String value) { addObject(key, value); return this; } private void addObject(String key, @Nullable Object value) { checkCapacity(size + 1); keys[size] = key; vals[size] = value; size++; } /** * Set a new attribute, or replace an existing one by key. * @param key case sensitive attribute key (not null) * @param value attribute value (which can be null, to set a true boolean attribute) * @return these attributes, for chaining */ public Attributes put(String key, @Nullable String value) { Validate.notNull(key); int i = indexOfKey(key); if (i != NotFound) vals[i] = value; else add(key, value); return this; } /** Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc. * @return the map holding user-data */ Map userData() { final Map userData; int i = indexOfKey(SharedConstants.UserDataKey); if (i == NotFound) { userData = new HashMap<>(); addObject(SharedConstants.UserDataKey, userData); } else { //noinspection unchecked userData = (Map) vals[i]; } return userData; } /** Get an arbitrary user-data object by key. * @param key case-sensitive key to the object. * @return the object associated to this key, or {@code null} if not found. * @see #userData(String key, Object val) * @since 1.17.1 */ @Nullable public Object userData(String key) { Validate.notNull(key); if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists Map userData = userData(); return userData.get(key); } /** Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML. * @param key case-sensitive key * @param value object value * @return these attributes * @see #userData(String key) * @since 1.17.1 */ public Attributes userData(String key, Object value) { Validate.notNull(key); userData().put(key, value); return this; } void putIgnoreCase(String key, @Nullable String value) { int i = indexOfKeyIgnoreCase(key); if (i != NotFound) { vals[i] = value; if (!keys[i].equals(key)) // case changed, update keys[i] = key; } else add(key, value); } /** * Set a new boolean attribute. Removes the attribute if the value is false. * @param key case insensitive attribute key * @param value attribute value * @return these attributes, for chaining */ public Attributes put(String key, boolean value) { if (value) putIgnoreCase(key, null); else remove(key); return this; } /** Set a new attribute, or replace an existing one by key. @param attribute attribute with case-sensitive key @return these attributes, for chaining */ public Attributes put(Attribute attribute) { Validate.notNull(attribute); put(attribute.getKey(), attribute.getValue()); attribute.parent = this; return this; } // removes and shifts up @SuppressWarnings("AssignmentToNull") private void remove(int index) { Validate.isFalse(index >= size); int shifted = size - index - 1; if (shifted > 0) { System.arraycopy(keys, index + 1, keys, index, shifted); System.arraycopy(vals, index + 1, vals, index, shifted); } size--; keys[size] = null; // release hold vals[size] = null; } /** Remove an attribute by key. Case sensitive. @param key attribute key to remove */ public void remove(String key) { int i = indexOfKey(key); if (i != NotFound) remove(i); } /** Remove an attribute by key. Case insensitive. @param key attribute key to remove */ public void removeIgnoreCase(String key) { int i = indexOfKeyIgnoreCase(key); if (i != NotFound) remove(i); } /** Tests if these attributes contain an attribute with this key. @param key case-sensitive key to check for @return true if key exists, false otherwise */ public boolean hasKey(String key) { return indexOfKey(key) != NotFound; } /** Tests if these attributes contain an attribute with this key. @param key key to check for @return true if key exists, false otherwise */ public boolean hasKeyIgnoreCase(String key) { return indexOfKeyIgnoreCase(key) != NotFound; } /** * Check if these attributes contain an attribute with a value for this key. * @param key key to check for * @return true if key exists, and it has a value */ public boolean hasDeclaredValueForKey(String key) { int i = indexOfKey(key); return i != NotFound && vals[i] != null; } /** * Check if these attributes contain an attribute with a value for this key. * @param key case-insensitive key to check for * @return true if key exists, and it has a value */ public boolean hasDeclaredValueForKeyIgnoreCase(String key) { int i = indexOfKeyIgnoreCase(key); return i != NotFound && vals[i] != null; } /** Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods. @return size */ public int size() { return size; // todo - exclude internal attributes from this count - maintain size, count of internals } /** * Test if this Attributes list is empty (size==0). */ public boolean isEmpty() { return size == 0; } /** Add all the attributes from the incoming set to this set. @param incoming attributes to add to these attributes. */ public void addAll(Attributes incoming) { if (incoming.size() == 0) return; checkCapacity(size + incoming.size); boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put() // (and save bashing on the indexOfKey() for (Attribute attr : incoming) { if (needsPut) put(attr); else add(attr.getKey(), attr.getValue()); } } /** Get the source ranges (start to end position) in the original input source from which this attribute's name and value were parsed.

Position tracking must be enabled prior to parsing the content.

@param key the attribute name @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range was not tracked. @see org.jsoup.parser.Parser#setTrackPosition(boolean) @see Attribute#sourceRange() @see Node#sourceRange() @see Element#endSourceRange() @since 1.17.1 */ public Range.AttributeRange sourceRange(String key) { if (!hasKey(key)) return UntrackedAttr; Map ranges = getRanges(); if (ranges == null) return Range.AttributeRange.UntrackedAttr; Range.AttributeRange range = ranges.get(key); return range != null ? range : Range.AttributeRange.UntrackedAttr; } /** Get the Ranges, if tracking is enabled; null otherwise. */ @Nullable Map getRanges() { //noinspection unchecked return (Map) userData(AttrRangeKey); } @Override public Iterator iterator() { return new Iterator() { int expectedSize = size; int i = 0; @Override public boolean hasNext() { checkModified(); while (i < size) { if (isInternalKey(keys[i])) // skip over internal keys i++; else break; } return i < size; } @Override public Attribute next() { checkModified(); if (i >= size) throw new NoSuchElementException(); final Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this); i++; return attr; } private void checkModified() { if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating."); } @Override public void remove() { Attributes.this.remove(--i); // next() advanced, so rewind expectedSize--; } }; } /** Get the attributes as a List, for iteration. @return a view of the attributes as an unmodifiable List. */ public List asList() { ArrayList list = new ArrayList<>(size); for (int i = 0; i < size; i++) { if (isInternalKey(keys[i])) continue; // skip internal keys Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this); list.add(attr); } return Collections.unmodifiableList(list); } /** * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys * starting with {@code data-}. * @return map of custom data attributes. */ public Map dataset() { return new Dataset(this); } /** Get the HTML representation of these attributes. @return HTML */ public String html() { StringBuilder sb = StringUtil.borrowBuilder(); try { html(sb, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used } catch (IOException e) { // ought never happen throw new SerializationException(e); } return StringUtil.releaseBuilder(sb); } final void html(final Appendable accum, final Document.OutputSettings out) throws IOException { final int sz = size; for (int i = 0; i < sz; i++) { if (isInternalKey(keys[i])) continue; final String key = Attribute.getValidKey(keys[i], out.syntax()); if (key != null) Attribute.htmlNoValidate(key, (String) vals[i], accum.append(' '), out); } } @Override public String toString() { return html(); } /** * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order * of the attributes does not impact this equality (as per the Map interface equals()). * @param o attributes to compare with * @return if both sets of attributes have the same content */ @Override public boolean equals(@Nullable Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Attributes that = (Attributes) o; if (size != that.size) return false; for (int i = 0; i < size; i++) { String key = keys[i]; int thatI = that.indexOfKey(key); if (thatI == NotFound) return false; Object val = vals[i]; Object thatVal = that.vals[thatI]; if (val == null) { if (thatVal != null) return false; } else if (!val.equals(thatVal)) return false; } return true; } /** * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. * @return calculated hashcode */ @Override public int hashCode() { int result = size; result = 31 * result + Arrays.hashCode(keys); result = 31 * result + Arrays.hashCode(vals); return result; } @Override public Attributes clone() { Attributes clone; try { clone = (Attributes) super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } clone.size = size; clone.keys = Arrays.copyOf(keys, size); clone.vals = Arrays.copyOf(vals, size); return clone; } /** * Internal method. Lowercases all (non-internal) keys. */ public void normalize() { for (int i = 0; i < size; i++) { if (!isInternalKey(keys[i])) keys[i] = lowerCase(keys[i]); } } /** * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names. * @param settings case sensitivity * @return number of removed dupes */ public int deduplicate(ParseSettings settings) { if (isEmpty()) return 0; boolean preserve = settings.preserveAttributeCase(); int dupes = 0; OUTER: for (int i = 0; i < keys.length; i++) { for (int j = i + 1; j < keys.length; j++) { if (keys[j] == null) continue OUTER; // keys.length doesn't shrink when removing, so re-test if ((preserve && keys[i].equals(keys[j])) || (!preserve && keys[i].equalsIgnoreCase(keys[j]))) { dupes++; remove(j); j--; } } } return dupes; } private static class Dataset extends AbstractMap { private final Attributes attributes; private Dataset(Attributes attributes) { this.attributes = attributes; } @Override public Set> entrySet() { return new EntrySet(); } @Override public String put(String key, String value) { String dataKey = dataKey(key); String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null; attributes.put(dataKey, value); return oldValue; } private class EntrySet extends AbstractSet> { @Override public Iterator> iterator() { return new DatasetIterator(); } @Override public int size() { int count = 0; Iterator iter = new DatasetIterator(); while (iter.hasNext()) count++; return count; } } private class DatasetIterator implements Iterator> { private Iterator attrIter = attributes.iterator(); private Attribute attr; public boolean hasNext() { while (attrIter.hasNext()) { attr = attrIter.next(); if (attr.isDataAttribute()) return true; } return false; } public Entry next() { return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); } public void remove() { attributes.remove(attr.getKey()); } } } private static String dataKey(String key) { return dataPrefix + key; } static String internalKey(String key) { return InternalPrefix + key; } static boolean isInternalKey(String key) { return key != null && key.length() > 1 && key.charAt(0) == InternalPrefix; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy