All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terasoluna.gfw.common.codepoints.CodePoints Maven / Gradle / Ivy

There is a newer version: 5.9.0.RELEASE
Show newest version
/*
 * Copyright(c) 2013 NTT DATA Corporation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.terasoluna.gfw.common.codepoints;

import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

/**
 * Represents the collection of code point. This class holds immutable code points as {@link java.util.Set} and provides
 * 
    *
  • check method if the code points in the given string are included
  • *
  • set operations (union, subtract, intersect)
  • *
*

How to create an instance

Use Factory method to create a cached instance * *
 * CodePoints cp = CodePoints.of(ASCIIPrintableChars.class);
 * 
* * The constructor can be also used. In this case, of course, the set of code points are not cached and created every time. * *
 * CodePoints cp = new ASCIIPrintableChars();
 * 
* * There are three types of constructor: *
    *
  1. Pass {@code int} varargs * *
     * CodePoints cp = new CodePoints(0x0061, 0x0062); // a b
     * 
    * *
  2. *
  3. Pass {@link java.util.Collection} of {@link java.lang.Integer} * *
     * {@literal Set} set = new {@literal HashSet<>}();
     * set.add(0x0061); // a
     * set.add(0x0062); // b
     * CodePoints cp = new CodePoints(set);
     * 
    * *
  4. *
  5. Pass {@link java.lang.String} varargs including the target code points * *
     * CodePoints cp = new CodePoints("ab");
     * CodePoints cp = new CodePoints("a", "b"); // is same 
     * 
    * *
  6. *
  7. Pass existing {@link CodePoints}. This type is intended to use for the definition of new code points. The set in the * {@link CodePoints} are shared. * *
     * CodePoints cp = ...;
     * CodePoints newCp = new CodePoints(cp); 
     * 
    * *
  8. *
*

How to check strings

{@link #containsAll(String)} returns {@code true} if all code points in the given string are * included in the target code points. Otherwise {@code false} is returned. * *
 * CodePoints cp = new CodePoints(0x0061, 0x0062); // a b
 * cp.containsAll("a"); // true
 * cp.containsAll("b"); // true
 * cp.containsAll("ab"); // true
 * cp.containsAll("c"); // false
 * cp.containsAll("abc"); // false
 * 
 * 
* * {@link #firstExcludedCodePoint(String)} return the first code point in the given string which is not included in the target * code points. * *
 * CodePoints cp = new CodePoints(0x0061, 0x0062); // a b
 * cp.firstExcludedContPoint("abc"); // 0x0063 (c)
 * cp.firstExcludedContPoint("abcad"); // 0x0063 (c)
 * cp.firstExcludedContPoint("ab"); // CodePoints#NOT_FOUND
 * 
 * 
* * {@link #allExcludedCodePoints(String)} returns set of code points in the given string which are not not included in the * target. * *
 * CodePoints cp = new CodePoints(0x0061, 0x0062); // a b
 * cp.allExcludedCodePoints("abc"); // [0x0063 (c)]
 * cp.allExcludedCodePoints("abcad"); // [0x0063 (c), 0x0064 (d)]
 * cp.allExcludedCodePoints("ab"); // []
 * 
 * 
* *

How to compose code points

*

* {@code CodePoints} provides composable APIs. Since a {@code CodePoints} instance is immutable. These API does not effect the * state of {@code CodePoints} instances. *

*

Union

*

* Use {@link #union(CodePoints)} *

* *
 * CodePoints ab = new CodePoints(0x0061 , 0x0062); // a b
 * CodePoints cd = new CodePoints(0x0063, 0x0064); // c d
 * CodePoints abcd = ab.union(cd); // a b c d
 * 
* *

Subtract

*

* Use {@link #subtract(CodePoints)} *

* *
 * CodePoints abcd = new CodePoints(0x0061 , 0x0062, 0x0063, 0x0064); // a b c d
 * CodePoints cd = new CodePoints(0x0063, 0x0064); // c d
 * CodePoints ab = abcd.subtract(cd); // a b
 * 
* *

Intersect

*

* Use {@link #intersect(CodePoints)} *

* *
 * CodePoints abcd = new CodePoints(0x0061 , 0x0062, 0x0063, 0x0064); // a b c d
 * CodePoints cde = new CodePoints(0x0063, 0x0064, 0x0064 ); // c d e
 * CodePoints cd = abcd.intersect(cde); // c d
 * 
* *

How to define new code points

*

* Extend {@link CodePoints} to define new code points. Following is a simple code points: *

* *
 * public class ABCD extends CodePoints {
 *   public ABCD() {
 *     super(0x0061, 0x0062, 0x0063, 0x0064); // a b c d
 *   }
 * }
 * 
*

* New code points can be created using the combination of existing code points. *

* *
 * public class X_JIS_0208_Hiragana_Katakana extends CodePoints {
 *   public X_JIS_0208_Hiragana_Katakana() {
 *     super(new X_JIS_0208_Hiragana().union(new X_JIS_0208_Hiragana_Katakana()));
 *   }
 * }
 * 
*

* Not that, new is used not to cache temporary code points. If {@code X_JIS_0208_Hiragana} and * {@code X_JIS_0208_Hiragana_Katakana} are also intended to be used, use {@link #of(Class)} instead of {@code new} so that * these are cached: *

* *
 * public class X_JIS_0208_Hiragana_Katakana extends CodePoints {
 *   public X_JIS_0208_Hiragana_Katakana() {
 *     super(CodePoints.of(X_JIS_0208_Hiragana.class).union(CodePoints.of(X_JIS_0208_Hiragana_Katakana.class)));
 *   }
 * }
 * 
* @since 5.1.0 */ public class CodePoints implements Serializable { private static final long serialVersionUID = 1L; /** * shows no code point is found in the given string which is not included in the target code points. */ public static final int NOT_FOUND = Integer.MIN_VALUE; /** * {@code CodePoints} cache */ private static final ConcurrentMap, CodePoints> cache = new ConcurrentHashMap, CodePoints>(); /** * set for code points. */ private final Set set; /** * Constructor with the given {@code java.lang.Integer} code points * @param codePoints array of actual code points */ public CodePoints(Integer... codePoints) { Set s = new HashSet(codePoints.length); Collections.addAll(s, codePoints); this.set = Collections.unmodifiableSet(s); } /** * Constructor with the given {@code java.lang.String} * @param strings array of strings which include target code points */ public CodePoints(String... strings) { Set s = new HashSet(); for (String str : strings) { int len = str.length(); int codePoint; for (int i = 0; i < len; i += Character.charCount(codePoint)) { codePoint = str.codePointAt(i); s.add(codePoint); } } this.set = Collections.unmodifiableSet(s); } /** * Constructor with the given {@code java.lang.Integer} code points * @param codePoints collection of actual code points */ public CodePoints(Collection codePoints) { Set s = new HashSet(codePoints); this.set = Collections.unmodifiableSet(s); } /** * Constructor with the given {@code CodePoints}. The {@code java.util.Set} object inside {@code CodePoints} is shared. * @param codePoints actual code points */ public CodePoints(CodePoints codePoints) { this.set = codePoints.set; } /** * returns whether all code points in the given string are included in the target code points. * @param s target string * @return {@code true} if all code points in the given string are included in the target code points。Otherwise * {@code false} is returned. */ public boolean containsAll(String s) { return this.firstExcludedCodePoint(s) == NOT_FOUND; } /** * returns the first code point in the given string which is not included in the target code points. * @param s target string * @return first code point in the given string which is not included in the target code points. {@link #NOT_FOUND} is * returned if all code points in the given string are included in the target code points. */ public int firstExcludedCodePoint(String s) { if (s == null || s.isEmpty()) { return NOT_FOUND; } // http://www.ibm.com/developerworks/jp/ysl/library/java/j-unicode_surrogate/ int len = s.length(); int codePoint; for (int i = 0; i < len; i += Character.charCount(codePoint)) { codePoint = s.codePointAt(i); if (!set.contains(codePoint)) { return codePoint; } } return NOT_FOUND; } /** * returns set of code points in the given string which are not not included in the target. * @param s target string * @return set of code points in the given string which are not not included in the target. an empty set is returned if all * code points in the given string are included in the target code points. */ public Set allExcludedCodePoints(String s) { if (s == null || s.isEmpty()) { return Collections.emptySet(); } Set excludedCodePoints = new LinkedHashSet(); // http://www.ibm.com/developerworks/jp/ysl/library/java/j-unicode_surrogate/ int len = s.length(); Integer codePoint; for (int i = 0; i < len; i += Character.charCount(codePoint)) { codePoint = s.codePointAt(i); if (!set.contains(codePoint)) { excludedCodePoints.add(codePoint); } } return excludedCodePoints; } /** * unite two set of code points * @param codePoints code points to unite * @return united code points */ public CodePoints union(CodePoints codePoints) { Set setTmp = new HashSet(this.set); setTmp.addAll(codePoints.set); return new CodePoints(setTmp); } /** * subtract two set of code points * @param codePoints code points to subtract * @return subtracted code points */ public CodePoints subtract(CodePoints codePoints) { Set setTmp = new HashSet(this.set); setTmp.removeAll(codePoints.set); return new CodePoints(setTmp); } /** * intersect two set of code points * @param codePoints code points to intersect * @return intersected code points */ public CodePoints intersect(CodePoints codePoints) { Set setTmp = new HashSet(this.set); setTmp.retainAll(codePoints.set); return new CodePoints(setTmp); } /** * Produces cached {@link CodePoints}. At first time, a new {@link CodePoints} is created. After second time, same instance * is returned. * @param clazz {@link CodePoints} class to create * @param {@link CodePoints} class * @return cached instance */ @SuppressWarnings("unchecked") public static T of(Class clazz) { if (cache.containsKey(clazz)) { return (T) cache.get(clazz); } try { T codePoints = clazz.getDeclaredConstructor().newInstance(); cache.put(clazz, codePoints); return codePoints; } catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException e) { throw new IllegalArgumentException("public default constructor not found", e); } catch (InstantiationException | InvocationTargetException e) { throw new IllegalArgumentException("exception occurred while initializing", e); } } /** * Helper method to check whether all code points in the given string are included in any of the code points list. * @param s target string * @param codePointsList array of code points * @return {@code true} if all code points in the given string are included in any of the code points list. Otherwise * {@code false} is returned. */ public static boolean containsAllInAnyCodePoints(String s, final CodePoints... codePointsList) { Map excludedCounts = new HashMap(); for (CodePoints codePoints : codePointsList) { Set excluded = codePoints.allExcludedCodePoints(s); if (excluded.isEmpty()) { // return immediately if the given string consists of a code points. return true; } for (Integer codePoint : excluded) { // count the number of CodePoints in the given list which forbade the given code point Integer count = excludedCounts.get(codePoint); if (count != null) { excludedCounts.put(codePoint, count + 1); } else { excludedCounts.put(codePoint, 1); } } } for (Map.Entry entry : excludedCounts.entrySet()) { if (entry.getValue() == codePointsList.length) { // All CodePoints forbade the given code point. // This means there are some code points which are not included in any given CodePoints' list return false; } } // OK if each code point is included in some CodePoints' list return true; } /** * equals method * @param o object to check * @return {@code true} if the given object equals to this instance. {@code false} otherwise. */ @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } CodePoints that = (CodePoints) o; return set.equals(that.set); } /** * hash code of the instance * @return hash code */ @Override public int hashCode() { return set.hashCode(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy