All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jodd.util.NaturalOrderComparator Maven / Gradle / Ivy

// Copyright (c) 2003-present, Jodd Team (http://jodd.org)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package jodd.util;

import java.io.Serializable;
import java.util.Comparator;

/**
 * Probably the best natural strings comparator.
 */
public class NaturalOrderComparator implements Comparator, Serializable {

	/* copied from Perl6 code */
	private static final char[] ACCENT_CHARS = new char[]{
		'À', 'A', 'Á', 'A', 'Â', 'A', 'Ã', 'A', 'Ä', 'A', 'Å', 'A',
		'à', 'a', 'á', 'a', 'â', 'a', 'ã', 'a', 'ä', 'a', 'å', 'a',
		'Ç', 'C', 'ç', 'c',
		'È', 'E', 'É', 'E', 'Ê', 'E', 'Ë', 'E',
		'è', 'e', 'é', 'e', 'ê', 'e', 'ë', 'e',
		'Ì', 'I', 'Í', 'I', 'Î', 'I', 'Ï', 'I',
		'ì', 'i', 'í', 'i', 'î', 'i', 'ï', 'i',
		'Ò', 'O', 'Ó', 'O', 'Ô', 'O', 'Õ', 'O', 'Ö', 'O',
		'Ø', 'O', 'ò', 'o', 'ó', 'o', 'ô', 'o', 'õ', 'o', 'ö', 'o', 'ø', 'o',
		'Ñ', 'N', 'ñ', 'n',
		'Ù', 'U', 'Ú', 'U', 'Û', 'U', 'Ü', 'U', 'ù', 'u', 'ú', 'u', 'û', 'u', 'ü', 'u',
		'Ý', 'Y', 'ÿ', 'y', 'ý', 'y',
	};

	protected final boolean ignoreCase;
	protected final boolean ignoreAccents;
	protected final boolean skipSpaces;

	public NaturalOrderComparator() {
		this(false, true, true);
	}

	public NaturalOrderComparator(final boolean ignoreCase, final boolean ignoreAccents, final boolean skipSpaces) {
		this.ignoreCase = ignoreCase;
		this.ignoreAccents = ignoreAccents;
		this.skipSpaces = skipSpaces;
	}

	/**
	 * Compare digits at certain position in two strings.
	 * The longest run of digits wins. That aside, the greatest
	 * value wins.
	 * @return if numbers are different, only 1 element is returned.
	 */
	protected int[] compareDigits(final String str1, int ndx1, final String str2, int ndx2) {
		// iterate all digits in the first string

		int zeroCount1 = 0;
		while (charAt(str1, ndx1) == '0') {
			zeroCount1++;
			ndx1++;
		}

		int len1 = 0;
		while (true) {
			final char char1 = charAt(str1, ndx1);
			final boolean isDigitChar1 = CharUtil.isDigit(char1);
			if (!isDigitChar1) {
				break;
			}
			len1++;
			ndx1++;
		}

		// iterate all digits in the second string and compare with the first

		int zeroCount2 = 0;
		while (charAt(str2, ndx2) == '0') {
			zeroCount2++;
			ndx2++;
		}

		int len2 = 0;

		int ndx1_new = ndx1 - len1;
		int equalNumbers = 0;

		while (true) {
			final char char2 = charAt(str2, ndx2);
			final boolean isDigitChar2 = CharUtil.isDigit(char2);
			if (!isDigitChar2) {
				break;
			}
			if (equalNumbers == 0 && (ndx1_new < ndx1)) {
				equalNumbers = charAt(str1, ndx1_new++) - char2;
			}
			len2++;
			ndx2++;
		}

		// compare

		if (len1 != len2) {
			// numbers are not equals size
			return new int[] {len1 - len2};
		}

		if (equalNumbers != 0) {
			return new int[] {equalNumbers};
		}

		// numbers are equal, but number of zeros is different
		return new int[] {0, zeroCount1 - zeroCount2, ndx1, ndx2};
	}

	@Override
	public int compare(final T o1, final T o2) {
		String str1 = o1.toString();
		String str2 = o2.toString();

		if (ignoreAccents) {
			str1 = StringUtil.replace(str1, "ß", "ss");
			str2 = StringUtil.replace(str2, "ß", "ss");

			str1 = StringUtil.replace(str1, "æ", "ae");
			str2 = StringUtil.replace(str2, "æ", "ae");

			str1 = StringUtil.replace(str1, "Æ", "AE");
			str2 = StringUtil.replace(str2, "Æ", "AE");
		}

		int ndx1 = 0, ndx2 = 0;
		char char1, char2;
		int lastZeroDifference = 0;

		while (true) {
			char1 = charAt(str1, ndx1);
			char2 = charAt(str2, ndx2);

			// skip over spaces in both strings
			if (skipSpaces) {
				while (Character.isSpaceChar(char1)) {
					ndx1++;
					char1 = charAt(str1, ndx1);
				}

				while (Character.isSpaceChar(char2)) {
					ndx2++;
					char2 = charAt(str2, ndx2);
				}
			}

			// check for numbers

			final boolean isDigitChar1 = CharUtil.isDigit(char1);
			final boolean isDigitChar2 = CharUtil.isDigit(char2);

			if (isDigitChar1 && isDigitChar2) {
				// numbers detected!

				final int[] result = compareDigits(str1, ndx1, str2, ndx2);

				if (result[0] != 0) {
					// not equals, return
					return result[0];
				}

				// equals, save zero difference if not already saved
				if (lastZeroDifference == 0) {
					lastZeroDifference = result[1];
				}

				ndx1 = result[2];
				ndx2 = result[3];
				continue;
			}

			if (char1 == 0 && char2 == 0) {
				// both strings end; the strings are the same
				return lastZeroDifference;
			}

			// compare chars
			if (ignoreCase) {
				char1 = Character.toLowerCase(char1);
				char2 = Character.toLowerCase(char2);
			}

			if (ignoreAccents) {
				char1 = fixAccent(char1);
				char2 = fixAccent(char2);
			}

			if (char1 < char2) {
				return -1;
			}
			if (char1 > char2) {
				return 1;
			}

			ndx1++;
			ndx2++;
		}
	}

	/**
	 * Fixes accent char.
	 */
	private char fixAccent(final char c) {
		for (int i = 0; i < ACCENT_CHARS.length; i+=2) {
			final char accentChar = ACCENT_CHARS[i];
			if (accentChar == c) {
				return ACCENT_CHARS[i + 1];
			}
		}
		return c;
	}

	/**
	 * Safe {@code charAt} that returns 0 when ndx is out of boundaries.
	 */
	private static char charAt(final String string, final int ndx) {
		if (ndx >= string.length()) {
			return 0;
		}
		return string.charAt(ndx);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy