All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.rapidoidx.bytes.BytesUtil Maven / Gradle / Ivy

There is a newer version: 5.0.3
Show newest version
package org.rapidoidx.bytes;

/*
 * #%L
 * rapidoid-x-buffer
 * %%
 * Copyright (C) 2014 - 2015 Nikolche Mihajlovski and contributors
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 * #L%
 */

import org.rapidoid.annotation.Authors;
import org.rapidoid.annotation.Since;
import org.rapidoid.util.Constants;
import org.rapidoid.util.U;
import org.rapidoid.util.UTILS;
import org.rapidoid.wrap.LongWrap;
import org.rapidoidx.data.Range;
import org.rapidoidx.data.Ranges;

@Authors("Nikolche Mihajlovski")
@Since("3.0.0")
public class BytesUtil implements Constants {

	public static final byte[] CHARS_SWITCH_CASE = new byte[128];

	static {
		for (int ch = 0; ch < 128; ch++) {
			if (ch >= 'a' && ch <= 'z') {
				CHARS_SWITCH_CASE[ch] = (byte) (ch - 32);
			} else if (ch >= 'A' && ch <= 'Z') {
				CHARS_SWITCH_CASE[ch] = (byte) (ch + 32);
			} else {
				CHARS_SWITCH_CASE[ch] = (byte) ch;
			}
		}
	}

	public static Bytes from(byte[] bytes) {
		return new ByteArrayBytes(bytes);
	}

	public static Bytes from(String s) {
		return new StringBytes(s);
	}

	public static long parseLines(Bytes bytes, Ranges lines, LongWrap res, long start, long limit, byte end1, byte end2) {
		byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
		long ret = -1;
		res.value = NOT_FOUND;

		long i;
		long from = start;
		for (i = start; i < limit; i++) {
			b0 = b1;
			b1 = b2;
			b2 = b3;
			b3 = bytes.get(i);

			if (b3 == LF) {
				long len;

				if (b2 == CR) {
					len = i - from - 1;
					if (b0 == end1 && b1 == end2 && len > 0) {
						res.value = lines.count;
					}
				} else {
					len = i - from;
					if (b1 == end1 && b2 == end2 && len > 0) {
						res.value = lines.count;
					}
				}

				if (len == 0) {
					ret = i + 1;
					break;
				}

				lines.add(from, len);
				from = i + 1;
			}
		}

		return ret;
	}

	public static long parseLines(Bytes bytes, Ranges lines, long start, long limit) {
		byte b0 = 0, b1 = 0;
		long ret = -1;

		long i;
		long from = start;
		for (i = start; i < limit; i++) {
			b0 = b1;
			b1 = bytes.get(i);

			if (b1 == LF) {
				long len;

				if (b0 == CR) {
					len = i - from - 1;
				} else {
					len = i - from;
				}

				if (len == 0) {
					ret = i + 1;
					break;
				}

				lines.add(from, len);
				from = i + 1;
			}
		}

		return ret;
	}

	public static long parseLine(Bytes bytes, Range line, long start, long limit) {
		byte b0 = 0, b1 = 0;
		long ret = -1;

		long i;
		for (i = start; i < limit; i++) {
			b0 = b1;
			b1 = bytes.get(i);

			if (b1 == LF) {
				long len;

				if (b0 == CR) {
					len = i - start - 1;
				} else {
					len = i - start;
				}

				line.set(start, len);
				ret = i + 1;
				break;
			}
		}

		return ret;
	}

	public static Range getByPrefix(Bytes bytes, Ranges ranges, byte[] prefix, boolean caseSensitive) {
		for (int i = 0; i < ranges.count; i++) {
			if (startsWith(bytes, ranges.ranges[i], prefix, caseSensitive)) {
				return ranges.ranges[i];
			}
		}
		return null;
	}

	public static String get(Bytes bytes, Range range) {
		return new String(getBytes(bytes, range));
	}

	public static byte[] getBytes(Bytes bytes, Range range) {
		byte[] byteArr = new byte[(int) range.length];

		for (int i = 0; i < byteArr.length; i++) {
			byteArr[i] = bytes.get(range.start + i);
		}

		return byteArr;
	}

	public static long scan(Bytes bytes, long from, long to, byte value) {
		for (long i = from; i <= to; i++) {
			if (bytes.get(i) == value) {
				return i;
			}
		}

		return -1;
	}

	public static long scanNoCase(Bytes bytes, long from, long to, byte value) {
		for (long i = from; i <= to; i++) {
			byte b = bytes.get(i);

			if (b == value || (b >= 'A' && CHARS_SWITCH_CASE[b] == value)) {
				return i;
			}
		}

		return -1;
	}

	public static boolean match(Bytes bytes, long start, byte[] match, int offset, int length, boolean caseSensitive) {

		boolean result;
		if (caseSensitive) {
			result = matchSensitive(bytes, start, match, offset, length);
		} else {
			result = matchNoCase(bytes, start, match, offset, length);
		}

		return result;
	}

	public static boolean matchNoCase(Bytes bytes, long start, byte[] match, int offset, int length) {
		for (int i = 0; i < length; i++) {
			byte b = bytes.get(start + i);
			if (b != match[offset + i] && (b < 'A' || CHARS_SWITCH_CASE[b] != match[offset + i])) {
				return false;
			}
		}
		return true;
	}

	public static boolean matchSensitive(Bytes bytes, long start, byte[] match, int offset, int length) {
		for (int i = 0; i < length; i++) {
			if (bytes.get(start + i) != match[offset + i]) {
				return false;
			}
		}

		return true;
	}

	public static boolean match(Bytes bytes, long start, byte[] match, boolean caseSensitive) {
		return match(bytes, start, match, 0, match.length, caseSensitive);
	}

	public static long find(Bytes bytes, long start, long limit, byte match, boolean caseSensitive) {

		assert start >= 0;
		assert limit >= 0;

		if (limit - start < 1) {
			return -1;
		}

		if (caseSensitive) {
			return scan(bytes, start, limit - 1, match);
		} else {
			return scanNoCase(bytes, start, limit - 1, match);
		}
	}

	public static long find(Bytes bytes, long start, long limit, byte[] match, boolean caseSensitive) {
		return find(bytes, start, limit, match, 0, match.length, caseSensitive);
	}

	public static long find(Bytes bytes, long start, long limit, byte[] match, int offset, int length,
			boolean caseSensitive) {

		assert start >= 0;
		assert limit >= 0;
		assert offset >= 0;
		assert length >= 0;

		long result;
		if (caseSensitive) {
			result = findSensitive(bytes, start, limit, match, offset, length);
		} else {
			result = findNoCase(bytes, start, limit, match, offset, length);
		}

		return result;
	}

	private static long findNoCase(Bytes bytes, long start, long limit, byte[] match, long offset, long length) {
		throw U.notReady();
	}

	private static long findSensitive(Bytes bytes, long start, long limit, byte[] match, int offset, int length) {
		if (limit - start < length) {
			return -1;
		}

		long pos = start;
		long last = limit - length;

		while ((pos = scan(bytes, pos, last, match[0])) >= 0) {
			if (matchSensitive(bytes, pos, match, offset, length)) {
				return pos;
			}
			pos++;
		}

		return -1;
	}

	public static boolean matches(Bytes bytes, Range target, byte[] match, boolean caseSensitive) {

		if (target.length != match.length || target.start < 0 || target.last() >= bytes.limit()) {
			return false;
		}

		boolean result = match(bytes, target.start, match, caseSensitive);

		return result;
	}

	public static boolean startsWith(Bytes bytes, Range target, byte[] match, boolean caseSensitive) {

		if (target.length < match.length || target.start < 0 || target.last() >= bytes.limit()) {
			return false;
		}

		boolean result = match(bytes, target.start, match, caseSensitive);

		return result;
	}

	public static boolean containsAt(Bytes bytes, Range target, long offset, byte[] match, boolean caseSensitive) {

		if (offset < 0 || target.length < offset + match.length || target.start < 0 || target.last() >= bytes.limit()) {
			return false;
		}

		boolean result = match(bytes, target.start + offset, match, caseSensitive);

		return result;
	}

	public static void trim(Bytes bytes, Range target) {

		long start = target.start;
		long len = target.length;
		long finish = start + len - 1;

		if (start < 0 || len == 0) {

			return;
		}

		while (start < finish && bytes.get(start) == ' ') {
			start++;
		}

		while (start < finish && bytes.get(finish) == ' ') {
			finish--;
		}

		target.start = start;
		target.length = finish - start + 1;

	}

	public static boolean split(Bytes bytes, Range target, byte sep, Range before, Range after, boolean trimParts) {

		long pos = find(bytes, target.start, target.limit(), sep, true);

		if (pos >= 0) {
			before.setInterval(target.start, pos);
			after.setInterval(pos + 1, target.limit());

			if (trimParts) {
				trim(bytes, before);
				trim(bytes, after);
			}

			return true;
		} else {
			before.assign(target);
			after.reset();

			if (trimParts) {
				trim(bytes, before);
			}

			return false;
		}
	}

	/**
	 * Scans the buffer until the specified separator is found, and matches the 4-byte prefix of the scanned selection
	 * against the specified search prefix. Returns the position of the separator, or -1 if the limit is
	 * reached and separator not found. If the prefix is matched, the negative of the position is returned, to mark the
	 * prefix match. Duplicated code for performance reasons.
	 */
	public static long scanUntilAndMatchPrefix(Bytes bytes, Range result, byte separator, long fromPos, long toPos,
			long searchPrefix) {

		byte b0, b1, b2, b3;

		long p = fromPos;
		if (p <= toPos) {
			b0 = bytes.get(p);
			if (b0 == separator) {
				result.set(fromPos, 0);
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b1 = bytes.get(p);
			if (b1 == separator) {
				result.set(fromPos, 1);
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b2 = bytes.get(p);
			if (b2 == separator) {
				result.set(fromPos, 2);
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b3 = bytes.get(p);
			if (b3 == separator) {
				result.set(fromPos, 3);
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		long prefix = UTILS.intFrom(b0, b1, b2, b3);

		boolean matchedPrefix = prefix == searchPrefix;

		for (long i = p; i <= toPos; i++) {
			if (bytes.get(i) == separator) {
				result.setInterval(fromPos, i);
				long nextPos = i + 1;
				return matchedPrefix ? -nextPos : nextPos;
			}
		}

		result.reset();
		return NOT_FOUND;
	}

	/**
	 * Scans the buffer until a line separator (CRLF or LF) is found, and matches the 4-byte prefix of the scanned
	 * selection against the specified search prefix. Returns the position of the separator, or -1 if the
	 * limit is reached and separator not found. If the prefix is matched, the negative of the position is returned, to
	 * mark the prefix match. Duplicated code for performance reasons.
	 */
	public static long scanLnAndMatchPrefix(Bytes bytes, Range result, long fromPos, long toPos, long searchPrefix) {

		byte b0, b1, b2, b3;

		long p = fromPos;
		if (p <= toPos) {
			b0 = bytes.get(p);
			if (b0 == LF) {
				result.set(fromPos, 0);
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b1 = bytes.get(p);
			if (b1 == LF) {
				if (b0 == CR) {
					result.set(fromPos, 0);
				} else {
					result.set(fromPos, 1);
				}
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b2 = bytes.get(p);
			if (b2 == LF) {
				if (b1 == CR) {
					result.set(fromPos, 1);
				} else {
					result.set(fromPos, 2);
				}
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		p++;
		if (p <= toPos) {
			b3 = bytes.get(p);
			if (b3 == LF) {
				if (b2 == CR) {
					result.set(fromPos, 2);
				} else {
					result.set(fromPos, 3);
				}
				return p + 1;
			}
		} else {
			result.reset();
			return NOT_FOUND;
		}

		long prefix = UTILS.intFrom(b0, b1, b2, b3);

		boolean matchedPrefix = prefix == searchPrefix;

		for (long i = p; i <= toPos; i++) {
			if (bytes.get(i) == LF) {

				if (bytes.get(i - 1) == CR) {
					result.setInterval(fromPos, i - 1);
				} else {
					result.setInterval(fromPos, i);
				}

				long nextPos = i + 1;
				return matchedPrefix ? -nextPos : nextPos;
			}
		}

		result.reset();
		return NOT_FOUND;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy