org.apache.flink.table.dataformat.BinaryStringUtil Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.	See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.	You may obtain a copy of the License at
 *
 *		http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.dataformat;

import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.table.runtime.util.SegmentsUtil;
import org.apache.flink.table.runtime.util.StringUtf8Utils;
import org.apache.flink.table.utils.EncodingUtils;

import java.math.BigDecimal;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.flink.table.dataformat.BinaryString.EMPTY_UTF8;
import static org.apache.flink.table.dataformat.BinaryString.fromAddress;
import static org.apache.flink.table.dataformat.BinaryString.fromBytes;
import static org.apache.flink.table.dataformat.BinaryString.fromString;
import static org.apache.flink.table.dataformat.BinaryString.numBytesForFirstByte;

/**
 * Util for {@link BinaryString}.
 */
public class BinaryStringUtil {

	public static final BinaryString[] EMPTY_STRING_ARRAY = new BinaryString[0];
	private static final List TRUE_STRINGS =
			Stream.of("t", "true", "y", "yes", "1")
					.map(BinaryString::fromString)
					.peek(BinaryString::ensureMaterialized)
					.collect(Collectors.toList());

	private static final List FALSE_STRINGS =
			Stream.of("f", "false", "n", "no", "0")
					.map(BinaryString::fromString)
					.peek(BinaryString::ensureMaterialized)
					.collect(Collectors.toList());

	private static byte[] getTmpBytes(BinaryString str, int sizeInBytes) {
		byte[] bytes = SegmentsUtil.allocateReuseBytes(sizeInBytes);
		SegmentsUtil.copyToBytes(str.getSegments(), str.getOffset(), bytes, 0, sizeInBytes);
		return bytes;
	}

	/**
	 * Splits the provided text into an array, separator string specified. 
	 *
	 * The separator is not included in the returned String array.
	 * Adjacent separators are treated as separators for empty tokens.
	 *
	 * A {@code null} separator splits on whitespace.
	 *
	 * 	 * "".splitByWholeSeparatorPreserveAllTokens(*)                 = []
	 * "ab de fg".splitByWholeSeparatorPreserveAllTokens(null)      = ["ab", "de", "fg"]
	 * "ab   de fg".splitByWholeSeparatorPreserveAllTokens(null)    = ["ab", "", "", "de", "fg"]
	 * "ab:cd:ef".splitByWholeSeparatorPreserveAllTokens(":")       = ["ab", "cd", "ef"]
	 * "ab-!-cd-!-ef".splitByWholeSeparatorPreserveAllTokens("-!-") = ["ab", "cd", "ef"]
	 * 
	 *
	 * Note: returned binary strings reuse memory segments from the input str.
	 *
	 * @param separator  String containing the String to be used as a delimiter,
	 *  {@code null} splits on whitespace
	 * @return an array of parsed Strings, {@code null} if null String was input
	 */
	public static BinaryString[] splitByWholeSeparatorPreserveAllTokens(BinaryString str, BinaryString separator) {
		str.ensureMaterialized();
		final int sizeInBytes = str.getSizeInBytes();
		MemorySegment[] segments = str.getSegments();
		int offset = str.getOffset();

		if (sizeInBytes == 0) {
			return EMPTY_STRING_ARRAY;
		}

		if (separator == null || EMPTY_UTF8.equals(separator)) {
			// Split on whitespace.
			return splitByWholeSeparatorPreserveAllTokens(str, fromString(" "));
		}
		separator.ensureMaterialized();

		int sepSize = separator.getSizeInBytes();
		MemorySegment[] sepSegs = separator.getSegments();
		int sepOffset = separator.getOffset();

		final ArrayList substrings = new ArrayList<>();
		int beg = 0;
		int end = 0;
		while (end < sizeInBytes) {
			end = SegmentsUtil.find(
					segments, offset + beg, sizeInBytes - beg,
					sepSegs, sepOffset, sepSize) - offset;

			if (end > -1) {
				if (end > beg) {

					// The following is OK, because String.substring( beg, end ) excludes
					// the character at the position 'end'.
					substrings.add(fromAddress(segments, offset + beg, end - beg));

					// Set the starting point for the next search.
					// The following is equivalent to beg = end + (separatorLength - 1) + 1,
					// which is the right calculation:
					beg = end + sepSize;
				} else {
					// We found a consecutive occurrence of the separator.
					substrings.add(EMPTY_UTF8);
					beg = end + sepSize;
				}
			} else {
				// String.substring( beg ) goes from 'beg' to the end of the String.
				substrings.add(fromAddress(segments, offset + beg, sizeInBytes - beg));
				end = sizeInBytes;
			}
		}

		return substrings.toArray(new BinaryString[0]);
	}

	/**
	 * Decide boolean representation of a string.
	 */
	public static Boolean toBooleanSQL(BinaryString str) {
		BinaryString lowerCase = str.toLowerCase();
		return TRUE_STRINGS.contains(lowerCase) ? Boolean.TRUE :
				(FALSE_STRINGS.contains(lowerCase) ? Boolean.FALSE : null);
	}

	/**
	 * Calculate the hash value of a given string use {@link MessageDigest}.
	 */
	public static BinaryString hash(BinaryString str, MessageDigest md) {
		return fromString(EncodingUtils.hex(md.digest(str.getBytes())));
	}

	public static BinaryString hash(BinaryString str, String algorithm) throws NoSuchAlgorithmException {
		return hash(str, MessageDigest.getInstance(algorithm));
	}

	/**
	 * Parses this BinaryString to Decimal.
	 *
	 * @return Decimal value if the parsing was successful, or null if overflow
	 * @throws NumberFormatException if the parsing failed.
	 */
	public static Decimal toDecimal(BinaryString str, int precision, int scale) {
		str.ensureMaterialized();

		if (precision > Decimal.MAX_LONG_DIGITS || str.getSizeInBytes() > Decimal.MAX_LONG_DIGITS) {
			return toBigPrecisionDecimal(str, precision, scale);
		}

		int sizeInBytes = str.getSizeInBytes();
		return toDecimalFromBytes(precision, scale, getTmpBytes(str, sizeInBytes), 0, sizeInBytes);
	}

	private static Decimal toDecimalFromBytes(
			int precision, int scale, byte[] bytes, int offset, int sizeInBytes) {
		// Data in Decimal is stored by one long value if `precision` <= Decimal.MAX_LONG_DIGITS.
		// In this case we can directly extract the value from memory segment.
		int i = 0;

		// Remove white spaces at the beginning
		byte b = 0;
		while (i < sizeInBytes) {
			b = bytes[offset + i];
			if (b != ' ' && b != '\n' && b != '\t') {
				break;
			}
			i++;
		}
		if (i == sizeInBytes) {
			// all whitespaces
			return null;
		}

		// ======= begin significant part =======
		final boolean negative = b == '-';
		if (negative || b == '+') {
			i++;
			if (i == sizeInBytes) {
				// only contains prefix plus/minus
				return null;
			}
		}

		long significand = 0;
		int exp = 0;
		int significandLen = 0, pointPos = -1;

		while (i < sizeInBytes) {
			b = bytes[offset + i];
			i++;

			if (b >= '0' && b <= '9') {
				// No need to worry about overflow, because sizeInBytes <= Decimal.MAX_LONG_DIGITS
				significand = significand * 10 + (b - '0');
				significandLen++;
			} else if (b == '.') {
				if (pointPos >= 0) {
					// More than one decimal point
					return null;
				}
				pointPos = significandLen;
			} else {
				break;
			}
		}

		if (pointPos < 0) {
			pointPos = significandLen;
		}
		if (negative) {
			significand = -significand;
		}
		// ======= end significand part =======

		// ======= begin exponential part =======
		if ((b == 'e' || b == 'E') && i < sizeInBytes) {
			b = bytes[offset + i];
			final boolean expNegative = b == '-';
			if (expNegative || b == '+') {
				i++;
				if (i == sizeInBytes) {
					return null;
				}
			}

			int expDigits = 0;
			// As `precision` <= 18, value absolute range is limited to 10^-18 ~ 10^18.
			// The worst case is <18-digits>E-36
			final int expStopValue = 40;

			while (i < sizeInBytes) {
				b = bytes[offset + i];
				i++;

				if (b >= '0' && b <= '9') {
					// No need to worry about larger exponents,
					// because they will produce overflow or underflow
					if (expDigits < expStopValue) {
						expDigits = expDigits * 10 + (b - '0');
					}
				} else {
					break;
				}
			}

			if (expNegative) {
				expDigits = -expDigits;
			}
			exp += expDigits;
		}
		exp -= significandLen - pointPos;
		// ======= end exponential part =======

		// Check for invalid character at the end
		while (i < sizeInBytes) {
			b = bytes[offset + i];
			i++;
			// White spaces are allowed at the end
			if (b != ' ' && b != '\n' && b != '\t') {
				return null;
			}
		}

		// Round exp to scale
		int change = exp + scale;
		if (significandLen + change > precision) {
			// Overflow
			return null;
		}
		if (change >= 0) {
			significand *= Decimal.POW10[change];
		} else {
			int k = negative ? -5 : 5;
			significand = (significand + k * Decimal.POW10[-change - 1]) / Decimal.POW10[-change];
		}
		return Decimal.fromLong(significand, precision, scale);
	}

	private static Decimal toBigPrecisionDecimal(BinaryString str, int precision, int scale) {
		// As data in Decimal is currently stored by BigDecimal if `precision` > Decimal.MAX_LONG_DIGITS,
		// and BigDecimal only supports String or char[] for its constructor,
		// we can't directly extract the value from BinaryString.
		//
		// As BigDecimal(char[], int, int) is faster than BigDecimal(String, int, int),
		// we extract char[] from the memory segment and pass it to the constructor of BigDecimal.
		int sizeInBytes = str.getSizeInBytes();
		int offset = str.getOffset();
		MemorySegment[] segments = str.getSegments();
		char[] chars = SegmentsUtil.allocateReuseChars(sizeInBytes);
		int len;
		if (segments.length == 1) {
			len = StringUtf8Utils.decodeUTF8Strict(segments[0], offset, sizeInBytes, chars);
		} else {
			byte[] bytes = SegmentsUtil.allocateReuseBytes(sizeInBytes);
			SegmentsUtil.copyToBytes(segments, offset, bytes, 0, sizeInBytes);
			len = StringUtf8Utils.decodeUTF8Strict(bytes, 0, sizeInBytes, chars);
		}

		if (len < 0) {
			return null;
		} else {
			// Trim white spaces
			int start = 0, end = len;
			for (int i = 0; i < len; i++) {
				if (chars[i] != ' ' && chars[i] != '\n' && chars[i] != '\t') {
					start = i;
					break;
				}
			}
			for (int i = len - 1; i >= 0; i--) {
				if (chars[i] != ' ' && chars[i] != '\n' && chars[i] != '\t') {
					end = i + 1;
					break;
				}
			}
			try {
				BigDecimal bd = new BigDecimal(chars, start, end - start);
				return Decimal.fromBigDecimal(bd, precision, scale);
			} catch (NumberFormatException nfe) {
				return null;
			}
		}
	}

	/**
	 * Parses this BinaryString to Long.
	 *
	 * Note that, in this method we accumulate the result in negative format, and convert it to
	 * positive format at the end, if this string is not started with '-'. This is because min value
	 * is bigger than max value in digits, e.g. Long.MAX_VALUE is '9223372036854775807' and
	 * Long.MIN_VALUE is '-9223372036854775808'.
	 *
	 * 
This code is mostly copied from LazyLong.parseLong in Hive.
	 * @return Long value if the parsing was successful else null.
	 */
	public static Long toLong(BinaryString str) {
		int sizeInBytes = str.getSizeInBytes();
		byte[] tmpBytes = getTmpBytes(str, sizeInBytes);
		if (sizeInBytes == 0) {
			return null;
		}
		int i = 0;

		byte b = tmpBytes[i];
		final boolean negative = b == '-';
		if (negative || b == '+') {
			i++;
			if (sizeInBytes == 1) {
				return null;
			}
		}

		long result = 0;
		final byte separator = '.';
		final int radix = 10;
		final long stopValue = Long.MIN_VALUE / radix;
		while (i < sizeInBytes) {
			b = tmpBytes[i];
			i++;
			if (b == separator) {
				// We allow decimals and will return a truncated integral in that case.
				// Therefore we won't throw an exception here (checking the fractional
				// part happens below.)
				break;
			}

			int digit;
			if (b >= '0' && b <= '9') {
				digit = b - '0';
			} else {
				return null;
			}

			// We are going to process the new digit and accumulate the result. However, before
			// doing this, if the result is already smaller than the
			// stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller
			// than minValue, and we can stop.
			if (result < stopValue) {
				return null;
			}

			result = result * radix - digit;
			// Since the previous result is less than or equal to
			// stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow.
			// If result overflows, we should stop.
			if (result > 0) {
				return null;
			}
		}

		// This is the case when we've encountered a decimal separator. The fractional
		// part will not change the number, but we will verify that the fractional part
		// is well formed.
		while (i < sizeInBytes) {
			byte currentByte = tmpBytes[i];
			if (currentByte < '0' || currentByte > '9') {
				return null;
			}
			i++;
		}

		if (!negative) {
			result = -result;
			if (result < 0) {
				return null;
			}
		}
		return result;
	}

	/**
	 * Parses this BinaryString to Int.
	 *
	 * 
Note that, in this method we accumulate the result in negative format, and convert it to
	 * positive format at the end, if this string is not started with '-'. This is because min value
	 * is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and
	 * Integer.MIN_VALUE is '-2147483648'.
	 *
	 * 
This code is mostly copied from LazyInt.parseInt in Hive.
	 *
	 * 
Note that, this method is almost same as `toLong`, but we leave it duplicated for performance
	 * reasons, like Hive does.
	 * @return Integer value if the parsing was successful else null.
	 */
	public static Integer toInt(BinaryString str) {
		int sizeInBytes = str.getSizeInBytes();
		byte[] tmpBytes = getTmpBytes(str, sizeInBytes);
		if (sizeInBytes == 0) {
			return null;
		}
		int i = 0;

		byte b = tmpBytes[i];
		final boolean negative = b == '-';
		if (negative || b == '+') {
			i++;
			if (sizeInBytes == 1) {
				return null;
			}
		}

		int result = 0;
		final byte separator = '.';
		final int radix = 10;
		final long stopValue = Integer.MIN_VALUE / radix;
		while (i < sizeInBytes) {
			b = tmpBytes[i];
			i++;
			if (b == separator) {
				// We allow decimals and will return a truncated integral in that case.
				// Therefore we won't throw an exception here (checking the fractional
				// part happens below.)
				break;
			}

			int digit;
			if (b >= '0' && b <= '9') {
				digit = b - '0';
			} else {
				return null;
			}

			// We are going to process the new digit and accumulate the result. However, before
			// doing this, if the result is already smaller than the
			// stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller
			// than minValue, and we can stop.
			if (result < stopValue) {
				return null;
			}

			result = result * radix - digit;
			// Since the previous result is less than or equal to
			// stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow.
			// If result overflows, we should stop.
			if (result > 0) {
				return null;
			}
		}

		// This is the case when we've encountered a decimal separator. The fractional
		// part will not change the number, but we will verify that the fractional part
		// is well formed.
		while (i < sizeInBytes) {
			byte currentByte = tmpBytes[i];
			if (currentByte < '0' || currentByte > '9') {
				return null;
			}
			i++;
		}

		if (!negative) {
			result = -result;
			if (result < 0) {
				return null;
			}
		}
		return result;
	}

	public static Short toShort(BinaryString str) {
		Integer intValue = toInt(str);
		if (intValue != null) {
			short result = intValue.shortValue();
			if (result == intValue) {
				return result;
			}
		}
		return null;
	}

	public static Byte toByte(BinaryString str) {
		Integer intValue = toInt(str);
		if (intValue != null) {
			byte result = intValue.byteValue();
			if (result == intValue) {
				return result;
			}
		}
		return null;
	}

	public static Double toDouble(BinaryString str) {
		try {
			return Double.valueOf(str.toString());
		} catch (NumberFormatException e) {
			return null;
		}
	}

	public static Float toFloat(BinaryString str) {
		try {
			return Float.valueOf(str.toString());
		} catch (NumberFormatException e) {
			return null;
		}
	}

	/**
	 * Parse target string as key-value string and
	 * return the value matches key name.
	 * If accept any null arguments, return null.
	 * example:
	 * keyvalue('k1=v1;k2=v2', ';', '=', 'k2') = 'v2'
	 * keyvalue('k1:v1,k2:v2', ',', ':', 'k3') = NULL
	 *
	 * @param split1  separator between key-value tuple.
	 * @param split2  separator between key and value.
	 * @param keyName name of the key whose value you want return.
	 *
	 * @return target value.
	 */
	public static BinaryString keyValue(BinaryString str, byte split1, byte split2, BinaryString keyName) {
		str.ensureMaterialized();
		if (keyName == null || keyName.getSizeInBytes() == 0) {
			return null;
		}
		if (str.inFirstSegment() && keyName.inFirstSegment()) {
			// position in byte
			int byteIdx = 0;
			// position of last split1
			int lastSplit1Idx = -1;
			while (byteIdx < str.getSizeInBytes()) {
				// If find next split1 in str, process current kv
				if (str.getSegments()[0].get(str.getOffset() + byteIdx) == split1) {
					int currentKeyIdx = lastSplit1Idx + 1;
					// If key of current kv is keyName, return the value directly
					BinaryString value = findValueOfKey(str, split2, keyName, currentKeyIdx, byteIdx);
					if (value != null) {
						return value;
					}
					lastSplit1Idx = byteIdx;
				}
				byteIdx++;
			}
			// process the string which is not ends with split1
			int currentKeyIdx = lastSplit1Idx + 1;
			return findValueOfKey(str, split2, keyName, currentKeyIdx, str.getSizeInBytes());
		} else {
			return keyValueSlow(str, split1, split2, keyName);
		}
	}

	private static BinaryString findValueOfKey(
			BinaryString str,
			byte split,
			BinaryString keyName,
			int start,
			int end) {
		int keyNameLen = keyName.getSizeInBytes();
		for (int idx = start; idx < end; idx++) {
			if (str.getSegments()[0].get(str.getOffset() + idx) == split) {
				if (idx == start + keyNameLen &&
						str.getSegments()[0].equalTo(keyName.getSegments()[0], str.getOffset() + start,
								keyName.getOffset(), keyNameLen)) {
					int valueIdx = idx + 1;
					int valueLen = end - valueIdx;
					byte[] bytes = new byte[valueLen];
					str.getSegments()[0].get(str.getOffset() + valueIdx, bytes, 0, valueLen);
					return fromBytes(bytes, 0, valueLen);
				} else {
					return null;
				}
			}
		}
		return null;
	}

	private static BinaryString keyValueSlow(
			BinaryString str,
			byte split1,
			byte split2,
			BinaryString keyName) {
		// position in byte
		int byteIdx = 0;
		// position of last split1
		int lastSplit1Idx = -1;
		while (byteIdx < str.getSizeInBytes()) {
			// If find next split1 in str, process current kv
			if (str.byteAt(byteIdx) == split1) {
				int currentKeyIdx = lastSplit1Idx + 1;
				BinaryString value = findValueOfKeySlow(str, split2, keyName, currentKeyIdx, byteIdx);
				if (value != null) {
					return value;
				}
				lastSplit1Idx = byteIdx;
			}
			byteIdx++;
		}
		int currentKeyIdx = lastSplit1Idx + 1;
		return findValueOfKeySlow(str, split2, keyName, currentKeyIdx, str.getSizeInBytes());
	}

	private static BinaryString findValueOfKeySlow(
			BinaryString str,
			byte split,
			BinaryString keyName,
			int start,
			int end) {
		int keyNameLen = keyName.getSizeInBytes();
		for (int idx = start; idx < end; idx++) {
			if (str.byteAt(idx) == split) {
				if (idx == start + keyNameLen &&
						SegmentsUtil.equals(str.getSegments(), str.getOffset() + start, keyName.getSegments(),
								keyName.getOffset(), keyNameLen)) {
					int valueIdx = idx + 1;
					byte[] bytes = SegmentsUtil.copyToBytes(str.getSegments(), str.getOffset() + valueIdx, end - valueIdx);
					return fromBytes(bytes);
				} else {
					return null;
				}
			}
		}
		return null;
	}

	public static BinaryString substringSQL(BinaryString str, int pos) {
		return substringSQL(str, pos, Integer.MAX_VALUE);
	}

	public static BinaryString substringSQL(BinaryString str, int pos, int length) {
		if (length < 0) {
			return null;
		}
		str.ensureMaterialized();
		if (str.equals(EMPTY_UTF8)) {
			return EMPTY_UTF8;
		}

		int start;
		int end;
		int numChars = str.numChars();

		if (pos > 0) {
			start = pos - 1;
			if (start >= numChars) {
				return EMPTY_UTF8;
			}
		} else if (pos < 0) {
			start = numChars + pos;
			if (start < 0) {
				return EMPTY_UTF8;
			}
		} else {
			start = 0;
		}

		if ((numChars - start) < length) {
			end = numChars;
		} else {
			end = start + length;
		}
		return str.substring(start, end);
	}

	/**
	 * Concatenates input strings together into a single string.
	 * Returns NULL if any argument is NULL.
	 */
	public static BinaryString concat(BinaryString... inputs) {
		return concat(Arrays.asList(inputs));
	}

	public static BinaryString concat(Iterable inputs) {
		// Compute the total length of the result.
		int totalLength = 0;
		for (BinaryString input : inputs) {
			if (input == null) {
				return null;
			}

			input.ensureMaterialized();
			totalLength += input.getSizeInBytes();
		}

		// Allocate a new byte array, and copy the inputs one by one into it.
		final byte[] result = new byte[totalLength];
		int offset = 0;
		for (BinaryString input : inputs) {
			if (input != null) {
				int len = input.getSizeInBytes();
				SegmentsUtil.copyToBytes(input.getSegments(), input.getOffset(), result, offset, len);
				offset += len;
			}
		}
		return fromBytes(result);
	}

	/**
	 * 
Concatenates input strings together into a single string using the separator.
	 * Returns NULL If the separator is NULL.
	 *
	 * Note: CONCAT_WS() does not skip any empty strings, however it does skip any NULL values after
	 * the separator. For example, concat_ws(",", "a", null, "c") would yield "a,c".
	 */
	public static BinaryString concatWs(BinaryString separator, BinaryString... inputs) {
		return concatWs(separator, Arrays.asList(inputs));
	}

	public static BinaryString concatWs(BinaryString separator, Iterable inputs) {
		if (null == separator) {
			return null;
		}

		separator.ensureMaterialized();

		int numInputBytes = 0;  // total number of bytes from the inputs
		int numInputs = 0;      // number of non-null inputs
		for (BinaryString input : inputs) {
			if (input != null) {
				input.ensureMaterialized();
				numInputBytes += input.getSizeInBytes();
				numInputs++;
			}
		}

		if (numInputs == 0) {
			// Return an empty string if there is no input, or all the inputs are null.
			return EMPTY_UTF8;
		}

		// Allocate a new byte array, and copy the inputs one by one into it.
		// The size of the new array is the size of all inputs, plus the separators.
		final byte[] result = new byte[numInputBytes + (numInputs - 1) * separator.getSizeInBytes()];
		int offset = 0;

		int j = 0;
		for (BinaryString input : inputs) {
			if (input != null) {
				int len = input.getSizeInBytes();
				SegmentsUtil.copyToBytes(input.getSegments(), input.getOffset(), result, offset, len);
				offset += len;

				j++;
				// Add separator if this is not the last input.
				if (j < numInputs) {
					SegmentsUtil.copyToBytes(
						separator.getSegments(),
						separator.getOffset(),
						result,
						offset,
						separator.getSizeInBytes());
					offset += separator.getSizeInBytes();
				}
			}
		}
		return fromBytes(result);
	}

	/**
	 * Reverse each character in current string.
	 *
	 * @return a new string which character order is reverse to current string.
	 */
	public static BinaryString reverse(BinaryString str) {
		str.ensureMaterialized();
		if (str.inFirstSegment()) {
			byte[] result = new byte[str.getSizeInBytes()];
			// position in byte
			int byteIdx = 0;
			while (byteIdx < str.getSizeInBytes()) {
				int charBytes = numBytesForFirstByte(str.getByteOneSegment(byteIdx));
				str.getSegments()[0].get(
						str.getOffset() + byteIdx,
						result,
						result.length - byteIdx - charBytes,
						charBytes);
				byteIdx += charBytes;
			}
			return BinaryString.fromBytes(result);
		} else {
			return reverseMultiSegs(str);
		}
	}

	private static BinaryString reverseMultiSegs(BinaryString str) {
		byte[] result = new byte[str.getSizeInBytes()];
		// position in byte
		int byteIdx = 0;
		int segSize = str.getSegments()[0].size();
		BinaryString.SegmentAndOffset index = str.firstSegmentAndOffset(segSize);
		while (byteIdx <  str.getSizeInBytes()) {
			int charBytes = numBytesForFirstByte(index.value());
			SegmentsUtil.copyMultiSegmentsToBytes(
					str.getSegments(),
					str.getOffset() + byteIdx,
					result,
					result.length - byteIdx - charBytes,
					charBytes);
			byteIdx += charBytes;
			index.skipBytes(charBytes, segSize);
		}
		return BinaryString.fromBytes(result);
	}

	/**
	 * Walk each character of current string from both ends, remove the character if it
	 * is in trim string.
	 * Return the new substring which both ends trim characters have been removed.
	 *
	 * @param trimStr the trim string
	 * @return A subString which both ends trim characters have been removed.
	 */
	public static BinaryString trim(BinaryString str, BinaryString trimStr) {
		if (trimStr == null) {
			return null;
		}
		return trimRight(trimLeft(str, trimStr), trimStr);
	}

	public static BinaryString trimLeft(BinaryString str) {
		str.ensureMaterialized();
		if (str.inFirstSegment()) {
			int s = 0;
			// skip all of the space (0x20) in the left side
			while (s < str.getSizeInBytes() && str.getByteOneSegment(s) == 0x20) {
				s++;
			}
			if (s == str.getSizeInBytes()) {
				// empty string
				return EMPTY_UTF8;
			} else {
				return str.copyBinaryStringInOneSeg(s, str.getSizeInBytes() - s);
			}
		} else {
			return trimLeftSlow(str);
		}
	}

	private static BinaryString trimLeftSlow(BinaryString str) {
		int s = 0;
		int segSize = str.getSegments()[0].size();
		BinaryString.SegmentAndOffset front = str.firstSegmentAndOffset(segSize);
		// skip all of the space (0x20) in the left side
		while (s < str.getSizeInBytes() && front.value() == 0x20) {
			s++;
			front.nextByte(segSize);
		}
		if (s == str.getSizeInBytes()) {
			// empty string
			return EMPTY_UTF8;
		} else {
			return str.copyBinaryString(s, str.getSizeInBytes() - 1);
		}
	}

	public static boolean isSpaceString(BinaryString str) {
		if (str.javaObject != null) {
			return str.javaObject.equals(" ");
		} else {
			return str.byteAt(0) == ' ';
		}
	}

	/**
	 * Walk each character of current string from left end, remove the character if it
	 * is in trim string. Stops at the first character which is not in trim string.
	 * Return the new substring.
	 *
	 * @param trimStr the trim string
	 * @return A subString which removes all of the character from the left side that is in
	 * trim string.
	 */
	public static BinaryString trimLeft(BinaryString str, BinaryString trimStr) {
		str.ensureMaterialized();
		if (trimStr == null) {
			return null;
		}
		trimStr.ensureMaterialized();
		if (isSpaceString(trimStr)) {
			return trimLeft(str);
		}
		if (str.inFirstSegment()) {
			int searchIdx = 0;
			while (searchIdx < str.getSizeInBytes()) {
				int charBytes = numBytesForFirstByte(str.getByteOneSegment(searchIdx));
				BinaryString currentChar = str.copyBinaryStringInOneSeg(searchIdx, charBytes);
				// try to find the matching for the character in the trimString characters.
				if (trimStr.contains(currentChar)) {
					searchIdx += charBytes;
				} else {
					break;
				}
			}
			// empty string
			if (searchIdx >= str.getSizeInBytes()) {
				return EMPTY_UTF8;
			} else {
				return str.copyBinaryStringInOneSeg(searchIdx, str.getSizeInBytes() - searchIdx);
			}
		} else {
			return trimLeftSlow(str, trimStr);
		}
	}

	private static BinaryString trimLeftSlow(BinaryString str, BinaryString trimStr) {
		int searchIdx = 0;
		int segSize = str.getSegments()[0].size();
		BinaryString.SegmentAndOffset front = str.firstSegmentAndOffset(segSize);
		while (searchIdx < str.getSizeInBytes()) {
			int charBytes = numBytesForFirstByte(front.value());
			BinaryString currentChar = str.copyBinaryString(searchIdx, searchIdx + charBytes - 1);
			if (trimStr.contains(currentChar)) {
				searchIdx += charBytes;
				front.skipBytes(charBytes, segSize);
			} else {
				break;
			}
		}
		if (searchIdx == str.getSizeInBytes()) {
			// empty string
			return EMPTY_UTF8;
		} else {
			return str.copyBinaryString(searchIdx, str.getSizeInBytes() - 1);
		}
	}

	public static BinaryString trimRight(BinaryString str) {
		str.ensureMaterialized();
		if (str.inFirstSegment()) {
			int e = str.getSizeInBytes() - 1;
			// skip all of the space (0x20) in the right side
			while (e >= 0 && str.getByteOneSegment(e) == 0x20) {
				e--;
			}

			if (e < 0) {
				// empty string
				return EMPTY_UTF8;
			} else {
				return str.copyBinaryStringInOneSeg(0, e + 1);
			}
		} else {
			return trimRightSlow(str);
		}
	}

	private static BinaryString trimRightSlow(BinaryString str) {
		int e = str.getSizeInBytes() - 1;
		int segSize = str.getSegments()[0].size();
		BinaryString.SegmentAndOffset behind = str.lastSegmentAndOffset(segSize);
		// skip all of the space (0x20) in the right side
		while (e >= 0 && behind.value() == 0x20) {
			e--;
			behind.previousByte(segSize);
		}

		if (e < 0) {
			// empty string
			return EMPTY_UTF8;
		} else {
			return str.copyBinaryString(0, e);
		}
	}

	/**
	 * Walk each character of current string from right end, remove the character if it
	 * is in trim string. Stops at the first character which is not in trim string.
	 * Return the new substring.
	 *
	 * @param trimStr the trim string
	 * @return A subString which removes all of the character from the right side that is in
	 * trim string.
	 */
	public static BinaryString trimRight(BinaryString str, BinaryString trimStr) {
		str.ensureMaterialized();
		if (trimStr == null) {
			return null;
		}
		trimStr.ensureMaterialized();
		if (isSpaceString(trimStr)) {
			return trimRight(str);
		}
		if (str.inFirstSegment()) {
			int charIdx = 0;
			int byteIdx = 0;
			// each element in charLens is length of character in the source string
			int[] charLens = new int[str.getSizeInBytes()];
			// each element in charStartPos is start position of first byte in the source string
			int[] charStartPos = new int[str.getSizeInBytes()];
			while (byteIdx < str.getSizeInBytes()) {
				charStartPos[charIdx] = byteIdx;
				charLens[charIdx] = numBytesForFirstByte(str.getByteOneSegment(byteIdx));
				byteIdx += charLens[charIdx];
				charIdx++;
			}
			// searchIdx points to the first character which is not in trim string from the right
			// end.
			int searchIdx = str.getSizeInBytes() - 1;
			charIdx -= 1;
			while (charIdx >= 0) {
				BinaryString currentChar = str.copyBinaryStringInOneSeg(
						charStartPos[charIdx], charLens[charIdx]);
				if (trimStr.contains(currentChar)) {
					searchIdx -= charLens[charIdx];
				} else {
					break;
				}
				charIdx--;
			}
			if (searchIdx < 0) {
				// empty string
				return EMPTY_UTF8;
			} else {
				return str.copyBinaryStringInOneSeg(0, searchIdx + 1);
			}
		} else {
			return trimRightSlow(str, trimStr);
		}
	}

	private static BinaryString trimRightSlow(BinaryString str, BinaryString trimStr) {
		int charIdx = 0;
		int byteIdx = 0;
		int segSize = str.getSegments()[0].size();
		BinaryString.SegmentAndOffset index = str.firstSegmentAndOffset(segSize);
		// each element in charLens is length of character in the source string
		int[] charLens = new int[str.getSizeInBytes()];
		// each element in charStartPos is start position of first byte in the source string
		int[] charStartPos = new int[str.getSizeInBytes()];
		while (byteIdx < str.getSizeInBytes()) {
			charStartPos[charIdx] = byteIdx;
			int charBytes = numBytesForFirstByte(index.value());
			charLens[charIdx] = charBytes;
			byteIdx += charBytes;
			charIdx++;
			index.skipBytes(charBytes, segSize);
		}
		// searchIdx points to the first character which is not in trim string from the right
		// end.
		int searchIdx = str.getSizeInBytes() - 1;
		charIdx -= 1;
		while (charIdx >= 0) {
			BinaryString currentChar = str.copyBinaryString(
					charStartPos[charIdx],
					charStartPos[charIdx] + charLens[charIdx] - 1);
			if (trimStr.contains(currentChar)) {
				searchIdx -= charLens[charIdx];
			} else {
				break;
			}
			charIdx--;
		}
		if (searchIdx < 0) {
			// empty string
			return EMPTY_UTF8;
		} else {
			return str.copyBinaryString(0, searchIdx);
		}
	}

	public static BinaryString trim(BinaryString str, boolean leading, boolean trailing, BinaryString seek) {
		str.ensureMaterialized();
		if (seek == null) {
			return null;
		}
		if (leading && trailing) {
			return trim(str, seek);
		} else if (leading) {
			return trimLeft(str, seek);
		} else if (trailing) {
			return trimRight(str, seek);
		} else {
			return str;
		}
	}

	public static String safeToString(BinaryString str) {
		if (str == null) {
			return null;
		} else {
			return str.toString();
		}
	}
}