All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.codegen.util.StringLikeChainChecker Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.	See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.	You may obtain a copy of the License at
 *
 *		http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.codegen.util;

import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.table.dataformat.BinaryString;
import org.apache.flink.table.dataformat.util.BinaryRowUtil;

import org.apache.commons.lang3.ArrayUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import static org.apache.flink.table.dataformat.BinaryString.fromString;

/**
 * String Like util:
 * Matches a chained sequence of checkers.
 *
 * 

This has 4 chain scenarios cases in it (has no escaping or single char wildcards) * 1) anchored left "abc%def%" * 2) anchored right "%abc%def" * 3) unanchored "%abc%def%" * 4) anchored on both sides "abc%def" */ public class StringLikeChainChecker { private final int minLen; private final BinaryString beginPattern; private final BinaryString endPattern; private final BinaryString[] middlePatterns; private final int[] midLens; private final int beginLen; private final int endLen; public StringLikeChainChecker(String pattern) { final StringTokenizer tokens = new StringTokenizer(pattern, "%"); final boolean leftAnchor = !pattern.startsWith("%"); final boolean rightAnchor = !pattern.endsWith("%"); int len = 0; // at least 2 checkers always BinaryString leftPattern = null; BinaryString rightPattern = null; int leftLen = 0; // not -1 int rightLen = 0; // not -1 final List middleCheckers = new ArrayList<>(2); final List lengths = new ArrayList<>(2); for (int i = 0; tokens.hasMoreTokens(); i++) { String chunk = tokens.nextToken(); if (chunk.length() == 0) { // %% is folded in the .*?.*? regex usually into .*? continue; } len += utf8Length(chunk); if (leftAnchor && i == 0) { // first item leftPattern = fromString(chunk); leftLen = utf8Length(chunk); } else if (rightAnchor && !tokens.hasMoreTokens()) { // last item rightPattern = fromString(chunk); rightLen = utf8Length(chunk); } else { // middle items in order middleCheckers.add(fromString(chunk)); lengths.add(utf8Length(chunk)); } } midLens = ArrayUtils.toPrimitive(lengths.toArray(ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY)); middlePatterns = middleCheckers.toArray(new BinaryString[0]); minLen = len; beginPattern = leftPattern; endPattern = rightPattern; beginLen = leftLen; endLen = rightLen; } public boolean check(BinaryString str) { MemorySegment[] segments = str.getSegments(); int pos = str.getOffset(); int mark = str.numBytes(); if (str.numBytes() < minLen) { return false; } // prefix, extend start if (beginPattern != null && !checkBegin(beginPattern, segments, pos, mark)) { // no match return false; } else { pos += beginLen; mark -= beginLen; } // suffix, reduce len if (endPattern != null && !checkEnd(endPattern, segments, pos, mark)) { // no match return false; } else { // no pos change - no need since we've shrunk the string with same pos mark -= endLen; } // loop for middles for (int i = 0; i < middlePatterns.length; i++) { int index = indexMiddle(middlePatterns[i], segments, pos, mark); if (index == -1) { // no match return false; } else { mark -= index - pos + midLens[i]; pos = index + midLens[i]; } } // if all is good return true; } private static int utf8Length(String chunk) { return fromString(chunk).numBytes(); } /** * Matches the beginning of each string to a pattern. */ private static boolean checkBegin( BinaryString pattern, MemorySegment[] segments, int start, int len) { int lenSub = pattern.numBytes(); return len >= lenSub && BinaryRowUtil.equals(pattern.getSegments(), 0, segments, start, lenSub); } /** * Matches the ending of each string to its pattern. */ private static boolean checkEnd( BinaryString pattern, MemorySegment[] segments, int start, int len) { int lenSub = pattern.numBytes(); return len >= lenSub && BinaryRowUtil.equals( pattern.getSegments(), 0, segments, start + len - lenSub, lenSub); } /** * Matches the middle of each string to its pattern. * @return Returns absolute offset of the match. */ private static int indexMiddle( BinaryString pattern, MemorySegment[] segments, int start, int len) { return BinaryRowUtil.find( segments, start, len, pattern.getSegments(), pattern.getOffset(), pattern.numBytes()); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy