All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.jetty.util.SearchPattern Maven / Gradle / Ivy

There is a newer version: 12.1.0.alpha0
Show newest version
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//

package org.eclipse.jetty.util;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

/**
 * 

Fast search for patterns within strings, arrays of * bytes and {@code ByteBuffer}s.

*

Uses an implementation of the Boyer–Moore–Horspool * algorithm with a 256 character alphabet.

* *

The algorithm has an average-case complexity of O(n) * on random text and O(nm) in the worst case, where * {@code m = pattern length} and {@code n = length of data to search}. */ public class SearchPattern { private static final int ALPHABET_SIZE = 256; private final int[] table = new int[ALPHABET_SIZE]; private final byte[] pattern; /** *

Creates a {@code SearchPattern} instance which can be used * to find matches of the pattern in data.

* * @param pattern byte array containing the pattern to search * @return a new SearchPattern instance using the given pattern */ public static SearchPattern compile(byte[] pattern) { return new SearchPattern(Arrays.copyOf(pattern, pattern.length)); } /** *

Creates a {@code SearchPattern} instance which can be used * to find matches of the pattern in data.

*

The pattern string must only contain ASCII characters.

* * @param pattern string containing the pattern to search * @return a new SearchPattern instance using the given pattern */ public static SearchPattern compile(String pattern) { return compile(pattern.getBytes(StandardCharsets.US_ASCII)); } /** * @param pattern byte array containing the pattern used for matching */ private SearchPattern(byte[] pattern) { this.pattern = pattern; if (pattern.length == 0) throw new IllegalArgumentException("Empty Pattern"); //Build up the pre-processed table for this pattern. Arrays.fill(table, pattern.length); for (int i = 0; i < pattern.length - 1; ++i) { table[0xff & pattern[i]] = pattern.length - 1 - i; } } /** * @return the pattern to search. */ public byte[] getPattern() { return pattern; } /** * Search for a complete match of the pattern within the data * * @param data The data in which to search for. The data may be arbitrary binary data, * but the pattern will always be {@link StandardCharsets#US_ASCII} encoded. * @param offset The offset within the data to start the search * @param length The length of the data to search * @return The index within the data array at which the first instance of the pattern or -1 if not found */ public int match(byte[] data, int offset, int length) { validateArgs(data, offset, length); int skip = offset; while (skip <= offset + length - pattern.length) { for (int i = pattern.length - 1; data[skip + i] == pattern[i]; i--) { if (i == 0) return skip; } skip += table[0xff & data[skip + pattern.length - 1]]; } return -1; } /** *

Searches for a full match of the pattern in the {@code ByteBuffer}.

*

The {@code ByteBuffer} may contain arbitrary binary data, but the pattern will * always be {@link StandardCharsets#US_ASCII} encoded.

*

The position and limit of the {@code ByteBuffer} are not changed.

* * @param buffer the {@code ByteBuffer} to search into * @return the number of bytes after the buffer's position at which the full pattern * was found, or -1 if the full pattern was not found */ public int match(ByteBuffer buffer) { int remaining = buffer.remaining(); int cursor = 0; while (remaining - cursor >= getLength()) { int i = getLength() - 1; while (buffer.get(buffer.position() + cursor + i) == pattern[i]) { if (i == 0) return cursor; --i; } cursor += table[buffer.get(buffer.position() + cursor + getLength() - 1) & 0xFF]; } return -1; } /** * Search for a partial match of the pattern at the end of the data. * * @param data The data in which to search for. The data may be arbitrary binary data, * but the pattern will always be {@link StandardCharsets#US_ASCII} encoded. * @param offset The offset within the data to start the search * @param length The length of the data to search * @return the length of the partial pattern matched and 0 for no match. */ public int endsWith(byte[] data, int offset, int length) { validateArgs(data, offset, length); int skip = (pattern.length <= length) ? (offset + length - pattern.length) : offset; while (skip < offset + length) { for (int i = (offset + length - 1) - skip; data[skip + i] == pattern[i]; --i) { if (i == 0) return (offset + length - skip); } // We can't use the pre-processed table as we are not matching on the full pattern. skip++; } return 0; } /** *

Searches for a partial match of the pattern at the end of the {@code ByteBuffer}.

*

The {@code ByteBuffer} may contain arbitrary binary data, but the pattern will * always be {@link StandardCharsets#US_ASCII} encoded.

*

The position and limit of the {@code ByteBuffer} are not changed.

* * @param buffer the {@code ByteBuffer} to search into * @return how many bytes of the pattern were matched at the end of the {@code ByteBuffer}, * or 0 for no match */ public int endsWith(ByteBuffer buffer) { int limit = buffer.limit(); int cursor = getLength() <= buffer.remaining() ? limit - getLength() : buffer.position(); while (cursor < limit) { int i = limit - 1 - cursor; while (buffer.get(cursor + i) == pattern[i]) { if (i == 0) return limit - cursor; --i; } // Cannot use the pre-processed table as we are not matching on the full pattern. ++cursor; } return 0; } /** * Search for a possibly partial match of the pattern at the start of the data. * * @param data The data in which to search for. The data may be arbitrary binary data, * but the pattern will always be {@link StandardCharsets#US_ASCII} encoded. * @param offset The offset within the data to start the search * @param length The length of the data to search * @param matched The length of the partial pattern already matched * @return the length of the partial pattern matched and 0 for no match. */ public int startsWith(byte[] data, int offset, int length, int matched) { validateArgs(data, offset, length); int matchedCount = 0; for (int i = 0; i < pattern.length - matched && i < length; i++) { if (data[offset + i] == pattern[i + matched]) matchedCount++; else return 0; } return matched + matchedCount; } /** *

Searches for a partial match of the pattern at the beginning of the {@code ByteBuffer}.

*

The {@code ByteBuffer} may contain arbitrary binary data, but the pattern will * always be {@link StandardCharsets#US_ASCII} encoded.

*

The position and limit of the {@code ByteBuffer} are not changed.

* * @param buffer the {@code ByteBuffer} to search into * @param matched how many bytes of the pattern were already matched * @return how many bytes of the pattern were matched (including those already matched) * at the beginning of the {@code ByteBuffer}, or 0 for no match */ public int startsWith(ByteBuffer buffer, int matched) { int position = buffer.position(); for (int i = matched; i < getLength(); ++i) { if (buffer.get(position) != pattern[i]) return 0; ++matched; ++position; if (position == buffer.limit()) break; } return matched; } /** * Performs legality checks for standard arguments input into SearchPattern methods. * * @param data The data in which to search for. The data may be arbitrary binary data, * but the pattern will always be {@link StandardCharsets#US_ASCII} encoded. * @param offset The offset within the data to start the search * @param length The length of the data to search */ private void validateArgs(byte[] data, int offset, int length) { if (offset < 0) throw new IllegalArgumentException("offset was negative"); else if (length < 0) throw new IllegalArgumentException("length was negative"); else if (offset + length > data.length) throw new IllegalArgumentException("(offset+length) out of bounds of data[]"); } /** * @return The length of the pattern in bytes. */ public int getLength() { return pattern.length; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy