de.schlichtherle.util.CanonicalStringSet Maven / Gradle / Ivy
Show all versions of truezip Show documentation
/*
* Copyright (C) 2007-2010 Schlichtherle IT Services
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.schlichtherle.util;
import java.util.AbstractSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* A set of canonicalized strings in natural sort order.
* A string is canonicalized by the template method {@link #canonicalize},
* which should get overridden by subclasses.
*
* String sets can be converted from and to string lists by using
* {@link #addAll(String)} and {@link #toString()}.
* A string list is a string which consists of zero or more elements
* which are separated by the separator character provided to the
* constructor.
* Note that in general, a string list is just a sequence of strings elements.
* In particular, a string list may be empty (but not {@code null}) and
* its elements don't have to be in canonical form, may be duplicated in the
* list and may be listed in arbitrary order.
* However, string lists have a canonical form, too:
* A string list in canonical form (or canonical string list for short)
* is a string list which contains only canonical strings in natural sort order
* and does not contain any duplicates (so it's actually a set).
*
* Unless otherwise documented, all {@link java.util.Set} methods work on the
* canonical form of the string elements in this set.
*
* Null elements are not permitted in this set.
*
* @author Christian Schlichtherle
* @version $Id$
* @since TrueZIP 6.5
*/
public class CanonicalStringSet extends AbstractSet {
/** The separator for string lists. */
private final char separator;
/** The sorted map which implements the behaviour of this class. */
private final SortedMap map = new TreeMap();
/**
* Constructs a new, empty set of canonical strings.
*
* @param separator The separator character to use in string lists.
*/
public CanonicalStringSet(final char separator) {
this.separator = separator;
}
/**
* Constructs a new set of canonical strings from the given set of
* canonical strings.
*
* @param separator The separator character to use in string lists.
* @param set A set of canonical strings - may be {@code null} to
* construct an empty set.
*/
public CanonicalStringSet(final char separator, final CanonicalStringSet set) {
this.separator = separator;
if (set != null)
addAll(set); // no dangerous constructor - method is final!
}
/**
* Constructs a new set of canonical strings from the given string list.
*
* @deprecated This constructor is dangerous: It may call
* {@link #canonicalize}, which may result in a call from a
* superclass if this constructor is called from a subclass.
* @param separator The separator character to use in string lists.
* @param list A string list - may be {@code null} to
* construct an empty set.
*/
public CanonicalStringSet(final char separator, final String list) {
this.separator = separator;
if (list != null)
addAll(list);
}
/** @deprecated Override and use {@link #canonicalize} instead. */
protected String canonical(final String s) {
return canonicalize(s);
}
/**
* A template method which returns the canonical form of {@code s} or
* {@code null} if the given string does not have a canonical form.
*
* The implementation in {@link CanonicalStringSet} simply returns the
* parameter.
*
* @param s The string to get canonicalized.
* Never {@code null} and never contains the separator.
* @return The canonical form of {@code s} or {@code null} if
* {@code s} does not have a canonical form.
*/
protected String canonicalize(final String s) {
assert s != null;
assert s.indexOf(separator) < 0 : "separator in string is illegal";
return s;
}
public final boolean isEmpty() {
return super.isEmpty();
}
public final int size() {
return map.size();
}
/**
* Tests if the canonical form of all strings in the given string list
* is contained in this set.
* If a string in the list does not have a canonical form, it's skipped.
* This implies that if the list is empty or entirely consists of strings
* which do not have a canonical form, {@code true} is returned.
* In other words, an empty set is considered to be a true subset of this
* set.
*
* @param list A non-null string list.
* @return {@code true} Iff the canonical form of all strings in the
* given string list is contained in this set.
* @throws NullPointerException If {@code list} is {@code null}.
* @throws ClassCastException If {@code list} is not a {@code String}.
*/
public final boolean contains(Object list) {
return containsAll((String) list);
}
/**
* Returns a new iterator for all canonical string elements in this set.
*
* @return A new iterator for all canonical string elements.
*/
public final Iterator iterator() {
return map.keySet().iterator();
}
/**
* Returns a new iterator for all original string elements in this set.
* Note that strings which don't have a canonical form cannot get added
* to this class and hence cannot get returned by the iterator.
*
* @return A new iterator for all original string elements.
*/
public final Iterator originalIterator() {
return map.values().iterator();
}
public final Object[] toArray() {
return map.keySet().toArray();
}
public final Object[] toArray(Object[] array) {
return map.keySet().toArray(array);
}
//
// Modification operations.
//
/**
* Adds the canonical form of all strings in the given list to this set.
* If a string in the list does not have a canonical form or its canonical
* form is already contained in this set, it's ignored.
*
* @param list A non-null string list.
* @return {@code true} Iff this set changed as a result of the call.
* @throws NullPointerException If {@code list} is {@code null}.
* @throws ClassCastException If {@code list} is not a {@code String}.
*/
public final boolean add(Object list) {
return addAll((String) list);
}
/**
* Removes the canonical form of all strings in the given list from this set.
* If a string in the list does not have a canonical form, it's ignored.
*
* @param list A non-null string list.
* @return {@code true} Iff this set changed as a result of the call.
* @throws NullPointerException If {@code list} is {@code null}.
* @throws ClassCastException If {@code list} is not a {@code String}.
*/
public final boolean remove(Object list) {
return removeAll((String) list);
}
//
// Bulk operations.
//
/**
* Tests if all canonical strings in the given set are contained in this
* set.
* An empty set is considered to be a true subset of this set.
*
* @param set A non-null set of canonical strings.
* @return {@code true} Iff all strings in the given set are contained
* in this set.
* @throws NullPointerException If {@code set} is {@code null}.
*/
public final boolean containsAll(final CanonicalStringSet set) {
return map.keySet().containsAll(set.map.keySet());
}
/**
* Tests if the canonical form of all strings in the given string list
* is contained in this set.
* If a string in the list does not have a canonical form, it's skipped.
* This implies that if the list is empty or entirely consists of strings
* which do not have a canonical form, {@code true} is returned.
* In other words, an empty set is considered to be a true subset of this
* set.
*
* @param list A non-null string list.
* @return {@code true} Iff the canonical form of all strings in the
* given string list is contained in this set.
* @throws NullPointerException If {@code list} is {@code null}.
*/
public final boolean containsAll(final String list) {
final Iterator i = new CanonicalStringIterator(list);
while (i.hasNext())
if (!map.containsKey(i.next()))
return false;
return true;
}
/**
* Adds all canonical strings in the given set to this set after they have
* been canonicalized by this set again.
*
* @param set A non-null set of canonical strings.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code set} is {@code null}.
*/
public final boolean addAll(final CanonicalStringSet set) {
boolean changed = false;
final Iterator e = set.map.values().iterator();
while (e.hasNext())
changed |= add(e.next());
return changed;
}
/**
* Adds the canonical form of all strings in the given list to this set.
* If a string in the list does not have a canonical form, it's skipped.
*
* @param list A non-null string list.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code list} is {@code null}.
*/
public final boolean addAll(final String list) {
boolean changed = false;
final Iterator i = new StringIterator(list);
while (i.hasNext()) {
final String element = (String) i.next();
final String canonical = canonicalize(element);
if (canonical != null) {
final String previous = (String) map.put(canonical, element);
changed |= previous == null; //!element.equals(previous);
}
}
return changed;
}
/**
* Retains all canonical strings in the given set in this set.
*
* @param set A non-null set of canonical strings.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code set} is {@code null}.
*/
public final boolean retainAll(CanonicalStringSet set) {
return map.keySet().retainAll(set.map.keySet());
}
/**
* Retains the canonical form of all strings in the given list in this set.
* If a string in the list does not have a canonical form, it's skipped.
*
* @param list A non-null string list.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code list} is {@code null}.
*/
public final boolean retainAll(final String list) {
class CustomSet extends CanonicalStringSet {
CustomSet() {
super(separator);
super.addAll(list);
}
protected String canonicalize(String s) {
return CanonicalStringSet.this.canonicalize(s);
}
}
return map.keySet().retainAll(new CustomSet());
}
/**
* Removes all canonical strings in the given set from this set.
*
* @param set A non-null set of strings.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code set} is {@code null}.
*/
public final boolean removeAll(CanonicalStringSet set) {
return map.keySet().removeAll(set.map.keySet());
}
/**
* Removes the canonical form of all strings in the given list from this set.
* If a string in the list does not have a canonical form, it's skipped.
*
* @param list A non-null string list.
* @return {@code true} Iff this set of canonicalized strings has
* changed as a result of the call.
* @throws NullPointerException If {@code list} is {@code null}.
*/
public final boolean removeAll(final String list) {
boolean changed = false;
final Iterator i = new CanonicalStringIterator(list);
while (i.hasNext())
changed |= (map.remove(i.next()) != null);
return changed;
}
public final void clear() {
map.clear();
}
//
// Miscellaneous.
//
/**
* Returns the canonical string representation of this set.
* If this string set is empty, an empty string is returned.
*/
public final String toString() {
final Iterator i = iterator();
if (i.hasNext()) {
// TODO: JSE 5: Use StringBuilder
final StringBuffer sb = new StringBuffer();
int c = 0;
do {
final String string = (String) i.next();
if (c++ > 0)
sb.append(separator);
sb.append(string);
} while (i.hasNext());
return sb.toString();
} else {
return "";
}
}
//
// Inner classes.
//
private class CanonicalStringIterator implements Iterator {
private final Iterator i;
private String canonical;
private CanonicalStringIterator(final String list) {
i = new StringIterator(list);
advance();
}
public boolean hasNext() {
return canonical != null;
}
public Object next() {
if (canonical == null)
throw new NoSuchElementException();
final String c = canonical;
advance();
return c;
}
private void advance() {
while (i.hasNext()) {
canonical = canonicalize((String) i.next());
if (canonical != null)
return;
}
canonical = null; // no such element
}
public void remove() {
throw new UnsupportedOperationException();
}
} // class CanonicalSuffixIterator
private class StringIterator implements Iterator {
private final String[] split;
private int i = 0;
private StringIterator(final String list) {
split = list.split("\\" + separator); // NOI18N
}
public boolean hasNext() {
return i < split.length;
}
public Object next() {
try {
return split[i++];
} catch (IndexOutOfBoundsException ex) {
throw new NoSuchElementException();
}
}
public void remove() {
throw new UnsupportedOperationException();
}
} // class StringIterator
}