cn.hutool.core.codec.Hashids Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hutool-core Show documentation
Hutool核心，包括集合、字符串、Bean等工具
There is a newer version: 5.8.33
package cn.hutool.core.codec;

import java.math.BigInteger;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.LongStream;

/**
 * Hashids 协议实现，以实现：
 * 
 * 生成简短、唯一、大小写敏感并无序的hash值
 * 自然数字的Hash值
 * 可以设置不同的盐，具有保密性
 * 可配置的hash长度
 * 递增的输入产生的输出无法预测
 * 
 *
 * 
 * 来自：https://github.com/davidafsilva/java-hashids
 * 
 *
 * 
 * {@code Hashids}可以将数字或者16进制字符串转为短且唯一不连续的字符串，采用双向编码实现，比如，它可以将347之类的数字转换为yr8之类的字符串，也可以将yr8之类的字符串重新解码为347之类的数字。

 * 此编码算法主要是解决爬虫类应用对连续ID爬取问题，将有序的ID转换为无序的Hashids，而且一一对应。
 * 
 *
 * @author david
 */
public class Hashids implements Encoder, Decoder {

	private static final int LOTTERY_MOD = 100;
	private static final double GUARD_THRESHOLD = 12;
	private static final double SEPARATOR_THRESHOLD = 3.5;
	// 最小编解码字符串
	private static final int MIN_ALPHABET_LENGTH = 16;
	private static final Pattern HEX_VALUES_PATTERN = Pattern.compile("[\\w\\W]{1,12}");

	// 默认编解码字符串
	public static final char[] DEFAULT_ALPHABET = {
			'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
			'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
			'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
			'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
			'1', '2', '3', '4', '5', '6', '7', '8', '9', '0'
	};
	// 默认分隔符
	private static final char[] DEFAULT_SEPARATORS = {
			'c', 'f', 'h', 'i', 's', 't', 'u', 'C', 'F', 'H', 'I', 'S', 'T', 'U'
	};

	// algorithm properties
	private final char[] alphabet;
	// 多个数字编解码的分界符
	private final char[] separators;
	private final Set separatorsSet;
	private final char[] salt;
	// 补齐至 minLength 长度添加的字符列表
	private final char[] guards;
	// 编码后最小的字符长度
	private final int minLength;

	// region create

	/**
	 * 根据参数值，创建{@code Hashids}，使用默认{@link #DEFAULT_ALPHABET}作为字母表，不限制最小长度
	 *
	 * @param salt 加盐值
	 * @return {@code Hashids}
	 */
	public static Hashids create(final char[] salt) {
		return create(salt, DEFAULT_ALPHABET, -1);
	}

	/**
	 * 根据参数值，创建{@code Hashids}，使用默认{@link #DEFAULT_ALPHABET}作为字母表
	 *
	 * @param salt      加盐值
	 * @param minLength 限制最小长度，-1表示不限制
	 * @return {@code Hashids}
	 */
	public static Hashids create(final char[] salt, final int minLength) {
		return create(salt, DEFAULT_ALPHABET, minLength);
	}

	/**
	 * 根据参数值，创建{@code Hashids}
	 *
	 * @param salt      加盐值
	 * @param alphabet  hash字母表
	 * @param minLength 限制最小长度，-1表示不限制
	 * @return {@code Hashids}
	 */
	public static Hashids create(final char[] salt, final char[] alphabet, final int minLength) {
		return new Hashids(salt, alphabet, minLength);
	}
	// endregion

	/**
	 * 构造
	 *
	 * @param salt      加盐值
	 * @param alphabet  hash字母表
	 * @param minLength 限制最小长度，-1表示不限制
	 */
	public Hashids(final char[] salt, final char[] alphabet, final int minLength) {
		this.minLength = minLength;
		this.salt = Arrays.copyOf(salt, salt.length);

		// filter and shuffle separators
		char[] tmpSeparators = shuffle(filterSeparators(DEFAULT_SEPARATORS, alphabet), this.salt);

		// validate and filter the alphabet
		char[] tmpAlphabet = validateAndFilterAlphabet(alphabet, tmpSeparators);

		// check separator threshold
		if (tmpSeparators.length == 0 ||
				((double) (tmpAlphabet.length / tmpSeparators.length)) > SEPARATOR_THRESHOLD) {
			final int minSeparatorsSize = (int) Math.ceil(tmpAlphabet.length / SEPARATOR_THRESHOLD);
			// check minimum size of separators
			if (minSeparatorsSize > tmpSeparators.length) {
				// fill separators from alphabet
				final int missingSeparators = minSeparatorsSize - tmpSeparators.length;
				tmpSeparators = Arrays.copyOf(tmpSeparators, tmpSeparators.length + missingSeparators);
				System.arraycopy(tmpAlphabet, 0, tmpSeparators,
						tmpSeparators.length - missingSeparators, missingSeparators);
				System.arraycopy(tmpAlphabet, 0, tmpSeparators,
						tmpSeparators.length - missingSeparators, missingSeparators);
				tmpAlphabet = Arrays.copyOfRange(tmpAlphabet, missingSeparators, tmpAlphabet.length);
			}
		}

		// shuffle the current alphabet
		shuffle(tmpAlphabet, this.salt);

		// check guards
		this.guards = new char[(int) Math.ceil(tmpAlphabet.length / GUARD_THRESHOLD)];
		if (alphabet.length < 3) {
			System.arraycopy(tmpSeparators, 0, guards, 0, guards.length);
			this.separators = Arrays.copyOfRange(tmpSeparators, guards.length, tmpSeparators.length);
			this.alphabet = tmpAlphabet;
		} else {
			System.arraycopy(tmpAlphabet, 0, guards, 0, guards.length);
			this.separators = tmpSeparators;
			this.alphabet = Arrays.copyOfRange(tmpAlphabet, guards.length, tmpAlphabet.length);
		}

		// create the separators set
		separatorsSet = IntStream.range(0, separators.length)
				.mapToObj(idx -> separators[idx])
				.collect(Collectors.toSet());
	}

	/**
	 * 编码给定的16进制数字
	 *
	 * @param hexNumbers 16进制数字
	 * @return 编码后的值, {@code null} if {@code numbers} 是 {@code null}.
	 * @throws IllegalArgumentException 数字不支持抛出此异常
	 */
	public String encodeFromHex(final String hexNumbers) {
		if (hexNumbers == null) {
			return null;
		}

		// remove the prefix, if present
		final String hex = hexNumbers.startsWith("0x") || hexNumbers.startsWith("0X") ?
				hexNumbers.substring(2) : hexNumbers;

		// get the associated long value and encode it
		LongStream values = LongStream.empty();
		final Matcher matcher = HEX_VALUES_PATTERN.matcher(hex);
		while (matcher.find()) {
			final long value = new BigInteger("1" + matcher.group(), 16).longValue();
			values = LongStream.concat(values, LongStream.of(value));
		}

		return encode(values.toArray());
	}

	/**
	 * 编码给定的数字数组
	 *
	 * @param numbers 数字数组
	 * @return 编码后的值, {@code null} if {@code numbers} 是 {@code null}.
	 * @throws IllegalArgumentException 数字不支持抛出此异常
	 */
	@Override
	public String encode(final long... numbers) {
		if (numbers == null) {
			return null;
		}

		// copy alphabet
		final char[] currentAlphabet = Arrays.copyOf(alphabet, alphabet.length);

		// determine the lottery number
		final long lotteryId = LongStream.range(0, numbers.length)
				.reduce(0, (state, i) -> {
					final long number = numbers[(int) i];
					if (number < 0) {
						throw new IllegalArgumentException("invalid number: " + number);
					}
					return state + number % (i + LOTTERY_MOD);
				});
		final char lottery = currentAlphabet[(int) (lotteryId % currentAlphabet.length)];

		// encode each number
		final StringBuilder global = new StringBuilder();
		IntStream.range(0, numbers.length)
				.forEach(idx -> {
					// derive alphabet
					deriveNewAlphabet(currentAlphabet, salt, lottery);

					// encode
					final int initialLength = global.length();
					translate(numbers[idx], currentAlphabet, global, initialLength);

					// prepend the lottery
					if (idx == 0) {
						global.insert(0, lottery);
					}

					// append the separator, if more numbers are pending encoding
					if (idx + 1 < numbers.length) {
						long n = numbers[idx] % (global.charAt(initialLength) + 1);
						global.append(separators[(int) (n % separators.length)]);
					}
				});

		// add the guards, if there's any space left
		if (minLength > global.length()) {
			int guardIdx = (int) ((lotteryId + lottery) % guards.length);
			global.insert(0, guards[guardIdx]);
			if (minLength > global.length()) {
				guardIdx = (int) ((lotteryId + global.charAt(2)) % guards.length);
				global.append(guards[guardIdx]);
			}
		}

		// add the necessary padding
		int paddingLeft = minLength - global.length();
		while (paddingLeft > 0) {
			shuffle(currentAlphabet, Arrays.copyOf(currentAlphabet, currentAlphabet.length));

			final int alphabetHalfSize = currentAlphabet.length / 2;
			final int initialSize = global.length();
			if (paddingLeft > currentAlphabet.length) {
				// entire alphabet with the current encoding in the middle of it
				int offset = alphabetHalfSize + (currentAlphabet.length % 2 == 0 ? 0 : 1);

				global.insert(0, currentAlphabet, alphabetHalfSize, offset);
				global.insert(offset + initialSize, currentAlphabet, 0, alphabetHalfSize);
				// decrease the padding left
				paddingLeft -= currentAlphabet.length;
			} else {
				// calculate the excess
				final int excess = currentAlphabet.length + global.length() - minLength;
				final int secondHalfStartOffset = alphabetHalfSize + Math.floorDiv(excess, 2);
				final int secondHalfLength = currentAlphabet.length - secondHalfStartOffset;
				final int firstHalfLength = paddingLeft - secondHalfLength;

				global.insert(0, currentAlphabet, secondHalfStartOffset, secondHalfLength);
				global.insert(secondHalfLength + initialSize, currentAlphabet, 0, firstHalfLength);

				paddingLeft = 0;
			}
		}

		return global.toString();
	}

	//-------------------------
	// Decode
	//-------------------------

	/**
	 * 解码Hash值为16进制数字
	 *
	 * @param hash hash值
	 * @return 解码后的16进制值, {@code null} if {@code numbers} 是 {@code null}.
	 * @throws IllegalArgumentException if the hash is invalid.
	 */
	public String decodeToHex(final String hash) {
		if (hash == null) {
			return null;
		}

		final StringBuilder sb = new StringBuilder();
		Arrays.stream(decode(hash))
				.mapToObj(Long::toHexString)
				.forEach(hex -> sb.append(hex, 1, hex.length()));
		return sb.toString();
	}

	/**
	 * 解码Hash值为数字数组
	 *
	 * @param hash hash值
	 * @return 解码后的16进制值, {@code null} if {@code numbers} 是 {@code null}.
	 * @throws IllegalArgumentException if the hash is invalid.
	 */
	@Override
	public long[] decode(final String hash) {
		if (hash == null) {
			return null;
		}

		// create a set of the guards
		final Set guardsSet = IntStream.range(0, guards.length)
				.mapToObj(idx -> guards[idx])
				.collect(Collectors.toSet());
		// count the total guards used
		final int[] guardsIdx = IntStream.range(0, hash.length())
				.filter(idx -> guardsSet.contains(hash.charAt(idx)))
				.toArray();
		// get the start/end index base on the guards count
		final int startIdx, endIdx;
		if (guardsIdx.length > 0) {
			startIdx = guardsIdx[0] + 1;
			endIdx = guardsIdx.length > 1 ? guardsIdx[1] : hash.length();
		} else {
			startIdx = 0;
			endIdx = hash.length();
		}

		LongStream decoded = LongStream.empty();
		// parse the hash
		if (hash.length() > 0) {
			final char lottery = hash.charAt(startIdx);

			// create the initial accumulation string
			final int length = hash.length() - guardsIdx.length - 1;
			StringBuilder block = new StringBuilder(length);

			// create the base salt
			final char[] decodeSalt = new char[alphabet.length];
			decodeSalt[0] = lottery;
			final int saltLength = salt.length >= alphabet.length ? alphabet.length - 1 : salt.length;
			System.arraycopy(salt, 0, decodeSalt, 1, saltLength);
			final int saltLeft = alphabet.length - saltLength - 1;

			// copy alphabet
			final char[] currentAlphabet = Arrays.copyOf(alphabet, alphabet.length);

			for (int i = startIdx + 1; i < endIdx; i++) {
				if (false == separatorsSet.contains(hash.charAt(i))) {
					block.append(hash.charAt(i));
					// continue if we have not reached the end, yet
					if (i < endIdx - 1) {
						continue;
					}
				}

				if (block.length() > 0) {
					// create the salt
					if (saltLeft > 0) {
						System.arraycopy(currentAlphabet, 0, decodeSalt,
								alphabet.length - saltLeft, saltLeft);
					}

					// shuffle the alphabet
					shuffle(currentAlphabet, decodeSalt);

					// prepend the decoded value
					final long n = translate(block.toString().toCharArray(), currentAlphabet);
					decoded = LongStream.concat(decoded, LongStream.of(n));

					// create a new block
					block = new StringBuilder(length);
				}
			}
		}

		// validate the hash
		final long[] decodedValue = decoded.toArray();
		if (!Objects.equals(hash, encode(decodedValue))) {
			throw new IllegalArgumentException("invalid hash: " + hash);
		}

		return decodedValue;
	}

	private StringBuilder translate(final long n, final char[] alphabet,
									final StringBuilder sb, final int start) {
		long input = n;
		do {
			// prepend the chosen char
			sb.insert(start, alphabet[(int) (input % alphabet.length)]);

			// trim the input
			input = input / alphabet.length;
		} while (input > 0);

		return sb;
	}

	private long translate(final char[] hash, final char[] alphabet) {
		long number = 0;

		final Map alphabetMapping = IntStream.range(0, alphabet.length)
				.mapToObj(idx -> new Object[]{alphabet[idx], idx})
				.collect(Collectors.groupingBy(arr -> (Character) arr[0],
						Collectors.mapping(arr -> (Integer) arr[1],
								Collectors.reducing(null, (a, b) -> a == null ? b : a))));

		for (int i = 0; i < hash.length; ++i) {
			number += alphabetMapping.computeIfAbsent(hash[i], k -> {
				throw new IllegalArgumentException("Invalid alphabet for hash");
			}) * (long) Math.pow(alphabet.length, hash.length - i - 1);
		}

		return number;
	}

	private char[] deriveNewAlphabet(final char[] alphabet, final char[] salt, final char lottery) {
		// create the new salt
		final char[] newSalt = new char[alphabet.length];

		// 1. lottery
		newSalt[0] = lottery;
		int spaceLeft = newSalt.length - 1;
		int offset = 1;
		// 2. salt
		if (salt.length > 0 && spaceLeft > 0) {
			int length = Math.min(salt.length, spaceLeft);
			System.arraycopy(salt, 0, newSalt, offset, length);
			spaceLeft -= length;
			offset += length;
		}
		// 3. alphabet
		if (spaceLeft > 0) {
			System.arraycopy(alphabet, 0, newSalt, offset, spaceLeft);
		}

		// shuffle
		return shuffle(alphabet, newSalt);
	}

	private char[] validateAndFilterAlphabet(final char[] alphabet, final char[] separators) {
		// validate size
		if (alphabet.length < MIN_ALPHABET_LENGTH) {
			throw new IllegalArgumentException(String.format("alphabet must contain at least %d unique " +
					"characters: %d", MIN_ALPHABET_LENGTH, alphabet.length));
		}

		final Set seen = new LinkedHashSet<>(alphabet.length);
		final Set invalid = IntStream.range(0, separators.length)
				.mapToObj(idx -> separators[idx])
				.collect(Collectors.toSet());

		// add to seen set (without duplicates)
		IntStream.range(0, alphabet.length)
				.forEach(i -> {
					if (alphabet[i] == ' ') {
						throw new IllegalArgumentException(String.format("alphabet must not contain spaces: " +
								"index %d", i));
					}
					final Character c = alphabet[i];
					if (!invalid.contains(c)) {
						seen.add(c);
					}
				});

		// create a new alphabet without the duplicates
		final char[] uniqueAlphabet = new char[seen.size()];
		int idx = 0;
		for (char c : seen) {
			uniqueAlphabet[idx++] = c;
		}
		return uniqueAlphabet;
	}

	@SuppressWarnings("SameParameterValue")
	private char[] filterSeparators(final char[] separators, final char[] alphabet) {
		final Set valid = IntStream.range(0, alphabet.length)
				.mapToObj(idx -> alphabet[idx])
				.collect(Collectors.toSet());

		return IntStream.range(0, separators.length)
				.mapToObj(idx -> (separators[idx]))
				.filter(valid::contains)
				// ugly way to convert back to char[]
				.map(c -> Character.toString(c))
				.collect(Collectors.joining())
				.toCharArray();
	}

	private char[] shuffle(final char[] alphabet, final char[] salt) {
		for (int i = alphabet.length - 1, v = 0, p = 0, j, z; salt.length > 0 && i > 0; i--, v++) {
			v %= salt.length;
			p += z = salt[v];
			j = (z + v + p) % i;
			final char tmp = alphabet[j];
			alphabet[j] = alphabet[i];
			alphabet[i] = tmp;
		}
		return alphabet;
	}
}