
dev.cel.extensions.CelStringExtensions Maven / Gradle / Ivy
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package dev.cel.extensions;
import static java.lang.Math.max;
import static java.lang.Math.min;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.errorprone.annotations.Immutable;
import dev.cel.checker.CelCheckerBuilder;
import dev.cel.common.CelFunctionDecl;
import dev.cel.common.CelOverloadDecl;
import dev.cel.common.internal.CelCodePointArray;
import dev.cel.common.types.ListType;
import dev.cel.common.types.SimpleType;
import dev.cel.compiler.CelCompilerLibrary;
import dev.cel.runtime.CelEvaluationException;
import dev.cel.runtime.CelRuntime;
import dev.cel.runtime.CelRuntimeBuilder;
import dev.cel.runtime.CelRuntimeLibrary;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/** Internal implementation of CEL string extensions. */
@Immutable
public final class CelStringExtensions implements CelCompilerLibrary, CelRuntimeLibrary {
/** Denotes the string extension function */
@SuppressWarnings({"unchecked"}) // Unchecked: Type-checker guarantees casting safety.
public enum Function {
CHAR_AT(
CelFunctionDecl.newFunctionDeclaration(
"charAt",
CelOverloadDecl.newMemberOverload(
"string_char_at_int",
"Returns the character at the given position. If the position is negative, or"
+ " greater than the length of the string, the function will produce an error.",
SimpleType.STRING,
ImmutableList.of(SimpleType.STRING, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_char_at_int", String.class, Long.class, CelStringExtensions::charAt)),
INDEX_OF(
CelFunctionDecl.newFunctionDeclaration(
"indexOf",
CelOverloadDecl.newMemberOverload(
"string_index_of_string",
"Returns the integer index of the first occurrence of the search string. If the"
+ " search string is not found the function returns -1.",
SimpleType.INT,
ImmutableList.of(SimpleType.STRING, SimpleType.STRING)),
CelOverloadDecl.newMemberOverload(
"string_index_of_string_int",
"Returns the integer index of the first occurrence of the search string from the"
+ " given offset. If the search string is not found the function returns"
+ " -1. If the substring is the empty string, the index where the search starts"
+ " is returned (zero or custom).",
SimpleType.INT,
ImmutableList.of(SimpleType.STRING, SimpleType.STRING, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_index_of_string", String.class, String.class, CelStringExtensions::indexOf),
CelRuntime.CelFunctionBinding.from(
"string_index_of_string_int",
ImmutableList.of(String.class, String.class, Long.class),
CelStringExtensions::indexOf)),
JOIN(
CelFunctionDecl.newFunctionDeclaration(
"join",
CelOverloadDecl.newMemberOverload(
"list_join",
"Returns a new string where the elements of string list are concatenated.",
SimpleType.STRING,
ListType.create(SimpleType.STRING)),
CelOverloadDecl.newMemberOverload(
"list_join_string",
"Returns a new string where the elements of string list are concatenated using the"
+ " separator.",
SimpleType.STRING,
ImmutableList.of(ListType.create(SimpleType.STRING), SimpleType.STRING))),
CelRuntime.CelFunctionBinding.from("list_join", List.class, CelStringExtensions::join),
CelRuntime.CelFunctionBinding.from(
"list_join_string", List.class, String.class, CelStringExtensions::join)),
LAST_INDEX_OF(
CelFunctionDecl.newFunctionDeclaration(
"lastIndexOf",
CelOverloadDecl.newMemberOverload(
"string_last_index_of_string",
"Returns the integer index of the last occurrence of the search string. If the"
+ " search string is not found the function returns -1.",
SimpleType.INT,
ImmutableList.of(SimpleType.STRING, SimpleType.STRING)),
CelOverloadDecl.newMemberOverload(
"string_last_index_of_string_int",
"Returns the integer index of the last occurrence of the search string from the"
+ " given offset. If the search string is not found the function returns -1. If"
+ " the substring is the empty string, the index where the search starts is"
+ " returned (string length or custom).",
SimpleType.INT,
ImmutableList.of(SimpleType.STRING, SimpleType.STRING, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_last_index_of_string",
String.class,
String.class,
CelStringExtensions::lastIndexOf),
CelRuntime.CelFunctionBinding.from(
"string_last_index_of_string_int",
ImmutableList.of(String.class, String.class, Long.class),
CelStringExtensions::lastIndexOf)),
LOWER_ASCII(
CelFunctionDecl.newFunctionDeclaration(
"lowerAscii",
CelOverloadDecl.newMemberOverload(
"string_lower_ascii",
"Returns a new string where all ASCII characters are lower-cased. This function"
+ " does not perform Unicode case-mapping for characters outside the ASCII"
+ " range.",
SimpleType.STRING,
SimpleType.STRING)),
CelRuntime.CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)),
REPLACE(
CelFunctionDecl.newFunctionDeclaration(
"replace",
CelOverloadDecl.newMemberOverload(
"string_replace_string_string",
"Returns a new string based on the target, which replaces the occurrences of a"
+ " search string with a replacement string if present.",
SimpleType.STRING,
ImmutableList.of(SimpleType.STRING, SimpleType.STRING, SimpleType.STRING)),
CelOverloadDecl.newMemberOverload(
"string_replace_string_string_int",
"Returns a new string based on the target, which replaces the occurrences of a"
+ " search string with a replacement string if present. The function accepts a"
+ " limit on the number of substring replacements to be made. When the"
+ " replacement limit is 0, the result is the original string. When the limit"
+ " is a negative number, the function behaves the same as replace all.",
SimpleType.STRING,
ImmutableList.of(
SimpleType.STRING, SimpleType.STRING, SimpleType.STRING, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_replace_string_string",
ImmutableList.of(String.class, String.class, String.class),
CelStringExtensions::replaceAll),
CelRuntime.CelFunctionBinding.from(
"string_replace_string_string_int",
ImmutableList.of(String.class, String.class, String.class, Long.class),
CelStringExtensions::replace)),
SPLIT(
CelFunctionDecl.newFunctionDeclaration(
"split",
CelOverloadDecl.newMemberOverload(
"string_split_string",
"Returns a mutable list of strings split from the input by the given separator.",
ListType.create(SimpleType.STRING),
ImmutableList.of(SimpleType.STRING, SimpleType.STRING)),
CelOverloadDecl.newMemberOverload(
"string_split_string_int",
"Returns a mutable list of strings split from the input by the given separator with"
+ " the specified limit on the number of substrings produced by the split.",
ListType.create(SimpleType.STRING),
ImmutableList.of(SimpleType.STRING, SimpleType.STRING, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_split_string", String.class, String.class, CelStringExtensions::split),
CelRuntime.CelFunctionBinding.from(
"string_split_string_int",
ImmutableList.of(String.class, String.class, Long.class),
CelStringExtensions::split)),
SUBSTRING(
CelFunctionDecl.newFunctionDeclaration(
"substring",
CelOverloadDecl.newMemberOverload(
"string_substring_int",
"returns a string that is a substring of this string. The substring begins with the"
+ " character at the specified index and extends to the end of this string.",
SimpleType.STRING,
ImmutableList.of(SimpleType.STRING, SimpleType.INT)),
CelOverloadDecl.newMemberOverload(
"string_substring_int_int",
"returns a string that is a substring of this string. The substring begins at the"
+ " specified beginIndex and extends to the character at index endIndex - 1."
+ " Thus the length of the substring is {@code endIndex-beginIndex}.",
SimpleType.STRING,
ImmutableList.of(SimpleType.STRING, SimpleType.INT, SimpleType.INT))),
CelRuntime.CelFunctionBinding.from(
"string_substring_int", String.class, Long.class, CelStringExtensions::substring),
CelRuntime.CelFunctionBinding.from(
"string_substring_int_int",
ImmutableList.of(String.class, Long.class, Long.class),
CelStringExtensions::substring)),
TRIM(
CelFunctionDecl.newFunctionDeclaration(
"trim",
CelOverloadDecl.newMemberOverload(
"string_trim",
"Returns a new string which removes the leading and trailing whitespace in the"
+ " target string. The trim function uses the Unicode definition of whitespace"
+ " which does not include the zero-width spaces. ",
SimpleType.STRING,
SimpleType.STRING)),
CelRuntime.CelFunctionBinding.from("string_trim", String.class, CelStringExtensions::trim)),
UPPER_ASCII(
CelFunctionDecl.newFunctionDeclaration(
"upperAscii",
CelOverloadDecl.newMemberOverload(
"string_upper_ascii",
"Returns a new string where all ASCII characters are upper-cased. This function"
+ " does not perform Unicode case-mapping for characters outside the ASCII"
+ " range.",
SimpleType.STRING,
SimpleType.STRING)),
CelRuntime.CelFunctionBinding.from("string_upper_ascii", String.class, Ascii::toUpperCase));
private final CelFunctionDecl functionDecl;
private final ImmutableSet functionBindings;
Function(CelFunctionDecl functionDecl, CelRuntime.CelFunctionBinding... functionBindings) {
this.functionDecl = functionDecl;
this.functionBindings = ImmutableSet.copyOf(functionBindings);
}
}
private final ImmutableSet functions;
CelStringExtensions() {
this(ImmutableSet.copyOf(Function.values()));
}
CelStringExtensions(Set functions) {
this.functions = ImmutableSet.copyOf(functions);
}
@Override
public void setCheckerOptions(CelCheckerBuilder checkerBuilder) {
functions.forEach(function -> checkerBuilder.addFunctionDeclarations(function.functionDecl));
}
@Override
public void setRuntimeOptions(CelRuntimeBuilder runtimeBuilder) {
functions.forEach(function -> runtimeBuilder.addFunctionBindings(function.functionBindings));
}
private static String charAt(String s, long i) throws CelEvaluationException {
int index;
try {
index = Math.toIntExact(i);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format("charAt failure: Index must not exceed the int32 range: %d", i), e);
}
CelCodePointArray codePointArray = CelCodePointArray.fromString(s);
if (index == codePointArray.length()) {
return "";
}
if (index < 0 || index > codePointArray.length()) {
throw new CelEvaluationException(
String.format("charAt failure: Index out of range: %d", index));
}
return codePointArray.slice(index, index + 1).toString();
}
private static Long indexOf(String str, String substr) throws CelEvaluationException {
Object[] params = {str, substr, 0L};
return indexOf(params);
}
/**
* @param args Object array with indices of: [0: string], [1: substring], [2: offset]
*/
private static Long indexOf(Object[] args) throws CelEvaluationException {
String str = (String) args[0];
String substr = (String) args[1];
long offsetInLong = (Long) args[2];
int offset;
try {
offset = Math.toIntExact(offsetInLong);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format(
"indexOf failure: Offset must not exceed the int32 range: %d", offsetInLong),
e);
}
return indexOf(str, substr, offset);
}
private static Long indexOf(String str, String substr, int offset) throws CelEvaluationException {
if (substr.isEmpty()) {
return (long) offset;
}
CelCodePointArray strCpa = CelCodePointArray.fromString(str);
CelCodePointArray substrCpa = CelCodePointArray.fromString(substr);
if (offset < 0 || offset >= strCpa.length()) {
throw new CelEvaluationException(
String.format("indexOf failure: Offset out of range: %d", offset));
}
return safeIndexOf(strCpa, substrCpa, offset);
}
/** Retrieves the index of the substring in a given string without throwing. */
private static Long safeIndexOf(CelCodePointArray str, CelCodePointArray substr, int offset) {
for (int i = offset; i < str.length() - (substr.length() - 1); i++) {
int j;
for (j = 0; j < substr.length(); j++) {
if (str.get(i + j) != substr.get(j)) {
break;
}
}
if (j == substr.length()) {
return (long) i;
}
}
// Offset is out of bound.
return -1L;
}
private static String join(List stringList) {
return join(stringList, "");
}
private static String join(List stringList, String separator) {
return Joiner.on(separator).join(stringList);
}
private static Long lastIndexOf(String str, String substr) throws CelEvaluationException {
CelCodePointArray strCpa = CelCodePointArray.fromString(str);
CelCodePointArray substrCpa = CelCodePointArray.fromString(substr);
if (substrCpa.isEmpty()) {
return (long) strCpa.length();
}
return lastIndexOf(strCpa, substrCpa, (long) strCpa.length() - 1);
}
private static Long lastIndexOf(Object[] args) throws CelEvaluationException {
CelCodePointArray strCpa = CelCodePointArray.fromString((String) args[0]);
CelCodePointArray substrCpa = CelCodePointArray.fromString((String) args[1]);
long offset = (long) args[2];
return lastIndexOf(strCpa, substrCpa, offset);
}
private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr, long offset)
throws CelEvaluationException {
if (substr.isEmpty()) {
return offset;
}
int off;
try {
off = Math.toIntExact(offset);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format("lastIndexOf failure: Offset must not exceed the int32 range: %d", offset),
e);
}
if (off < 0 || off >= str.length()) {
throw new CelEvaluationException(
String.format("lastIndexOf failure: Offset out of range: %d", offset));
}
if (off > str.length() - substr.length()) {
off = str.length() - substr.length();
}
for (int i = off; i >= 0; i--) {
int j;
for (j = 0; j < substr.length(); j++) {
if (str.get(i + j) != substr.get(j)) {
break;
}
}
if (j == substr.length()) {
return (long) i;
}
}
return -1L;
}
private static String replaceAll(Object[] objects) {
return replace((String) objects[0], (String) objects[1], (String) objects[2], -1);
}
private static String replace(Object[] objects) throws CelEvaluationException {
Long indexInLong = (Long) objects[3];
int index;
try {
index = Math.toIntExact(indexInLong);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format("replace failure: Index must not exceed the int32 range: %d", indexInLong),
e);
}
return replace((String) objects[0], (String) objects[1], (String) objects[2], index);
}
private static String replace(String text, String searchString, String replacement, int limit) {
if (searchString.equals(replacement) || limit == 0) {
return text;
}
if (text.isEmpty()) {
return searchString.isEmpty() ? replacement : "";
}
CelCodePointArray textCpa = CelCodePointArray.fromString(text);
CelCodePointArray searchCpa = CelCodePointArray.fromString(searchString);
CelCodePointArray replaceCpa = CelCodePointArray.fromString(replacement);
int start = 0;
int end = Math.toIntExact(safeIndexOf(textCpa, searchCpa, 0));
if (end < 0) {
return text;
}
// The minimum length of 1 handles the case of searchString being empty, where every character
// would be matched. This ensures the window is always moved forward to continue the search.
int minSearchLength = max(searchCpa.length(), 1);
StringBuilder sb =
new StringBuilder(textCpa.length() - searchCpa.length() + replaceCpa.length());
do {
CelCodePointArray sliced = textCpa.slice(start, end);
sb.append(sliced).append(replaceCpa);
start = end + searchCpa.length();
limit--;
} while (limit != 0
&& (end = Math.toIntExact(safeIndexOf(textCpa, searchCpa, end + minSearchLength))) > 0);
return sb.append(textCpa.slice(start, textCpa.length())).toString();
}
private static List split(String str, String separator) {
return split(str, separator, Integer.MAX_VALUE);
}
/**
* @param args Object array with indices of: [0: string], [1: separator], [2: limit]
*/
private static List split(Object[] args) throws CelEvaluationException {
long limitInLong = (Long) args[2];
int limit;
try {
limit = Math.toIntExact(limitInLong);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format("split failure: Limit must not exceed the int32 range: %d", limitInLong),
e);
}
return split((String) args[0], (String) args[1], limit);
}
/** Returns a **mutable** list of strings split on the separator */
private static List split(String str, String separator, int limit) {
if (limit == 0) {
return new ArrayList<>();
}
if (limit == 1) {
List singleElementList = new ArrayList<>();
singleElementList.add(str);
return singleElementList;
}
if (limit < 0) {
limit = str.length();
}
if (separator.isEmpty()) {
return explode(str, limit);
}
Iterable splitString = Splitter.on(separator).limit(limit).split(str);
return Lists.newArrayList(splitString);
}
/**
* Explodes a given string up to a limit
*
* Example 1: "aź°bš" (no limit or negative limit) -> ["a", "ź°", "b", "š"]
*
*
Example 2: "aź°bš" (limit 2) -> ["a", "ź°", "bš"]
*
*
This exists because neither the built-in String.split nor Guava's splitter is able to deal
* with separating single printable characters.
*/
private static List explode(String str, int limit) {
List exploded = new ArrayList<>();
CelCodePointArray codePointArray = CelCodePointArray.fromString(str);
if (limit > 0) {
limit -= 1;
}
int charCount = min(codePointArray.length(), limit);
for (int i = 0; i < charCount; i++) {
exploded.add(codePointArray.slice(i, i + 1).toString());
}
if (codePointArray.length() > limit) {
exploded.add(codePointArray.slice(limit, codePointArray.length()).toString());
}
return exploded;
}
private static Object substring(String s, long i) throws CelEvaluationException {
int beginIndex;
try {
beginIndex = Math.toIntExact(i);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format("substring failure: Index must not exceed the int32 range: %d", i), e);
}
CelCodePointArray codePointArray = CelCodePointArray.fromString(s);
boolean indexIsInRange = beginIndex <= codePointArray.length() && beginIndex >= 0;
if (!indexIsInRange) {
throw new CelEvaluationException(
String.format(
"substring failure: Range [%d, %d) out of bounds",
beginIndex, codePointArray.length()));
}
if (beginIndex == codePointArray.length()) {
return "";
}
return codePointArray.slice(beginIndex, codePointArray.length()).toString();
}
/**
* @param args Object array with indices of [0: string], [1: beginIndex], [2: endIndex]
*/
private static String substring(Object[] args) throws CelEvaluationException {
Long beginIndexInLong = (Long) args[1];
Long endIndexInLong = (Long) args[2];
int beginIndex;
int endIndex;
try {
beginIndex = Math.toIntExact(beginIndexInLong);
endIndex = Math.toIntExact(endIndexInLong);
} catch (ArithmeticException e) {
throw new CelEvaluationException(
String.format(
"substring failure: Indices must not exceed the int32 range: [%d, %d)",
beginIndexInLong, endIndexInLong),
e);
}
String s = (String) args[0];
CelCodePointArray codePointArray = CelCodePointArray.fromString(s);
boolean indicesIsInRange =
beginIndex <= endIndex
&& beginIndex >= 0
&& beginIndex <= codePointArray.length()
&& endIndex <= codePointArray.length();
if (!indicesIsInRange) {
throw new CelEvaluationException(
String.format("substring failure: Range [%d, %d) out of bounds", beginIndex, endIndex));
}
if (beginIndex == endIndex) {
return "";
}
return codePointArray.slice(beginIndex, endIndex).toString();
}
private static String trim(String text) {
CelCodePointArray textCpa = CelCodePointArray.fromString(text);
int left = indexOfNonWhitespace(textCpa);
if (left == textCpa.length()) {
return "";
}
int right = lastIndexOfNonWhitespace(textCpa);
return textCpa.slice(left, right + 1).toString();
}
/**
* Finds the first index of the non-whitespace character found in the string. See {@link
* #isWhitespace} for definition of a whitespace char.
*
* @return index of first non-whitespace character found (ex: " test " -> 0). Length of the string
* is returned instead if a non-whitespace character is not found.
*/
private static int indexOfNonWhitespace(CelCodePointArray textCpa) {
for (int i = 0; i < textCpa.length(); i++) {
if (!isWhitespace(textCpa.get(i))) {
return i;
}
}
return textCpa.length();
}
/**
* Finds the last index of the non-whitespace character found in the string. See {@link
* #isWhitespace} for definition of a whitespace char.
*
* @return index of last non-whitespace character found. (ex: " test " -> 5). 0 is returned
* instead if a non-whitespace char is not found. -1 is returned for an empty string ("").
*/
private static int lastIndexOfNonWhitespace(CelCodePointArray textCpa) {
if (textCpa.isEmpty()) {
return -1;
}
for (int i = textCpa.length() - 1; i >= 0; i--) {
if (!isWhitespace(textCpa.get(i))) {
return i;
}
}
return 0;
}
/**
* Checks if a provided codepoint is a whitespace according to Unicode's standard
* (White_Space=yes).
*
* This exists because Java's native Character.isWhitespace does not follow the Unicode's
* standard of whitespace definition.
*
*
See link for the full list.
*/
private static boolean isWhitespace(int codePoint) {
return (codePoint >= 0x0009 && codePoint <= 0x000D)
|| codePoint == 0x0020
|| codePoint == 0x0085
|| codePoint == 0x00A0
|| codePoint == 0x1680
|| (codePoint >= 0x2000 && codePoint <= 0x200A)
|| codePoint == 0x2028
|| codePoint == 0x2029
|| codePoint == 0x202F
|| codePoint == 0x205F
|| codePoint == 0x3000;
}
}