All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.couchbase.connect.kafka.util.DocumentPathExtractor Maven / Gradle / Ivy

/*
 * Copyright 2017 Couchbase, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.couchbase.connect.kafka.util;

import com.couchbase.client.core.deps.com.fasterxml.jackson.core.JsonFactory;
import com.couchbase.client.core.deps.com.fasterxml.jackson.core.JsonParser;
import com.couchbase.client.core.deps.com.fasterxml.jackson.core.JsonPointer;
import com.couchbase.client.core.deps.com.fasterxml.jackson.core.filter.FilteringParserDelegate;
import com.couchbase.client.core.deps.com.fasterxml.jackson.core.filter.JsonPointerBasedFilter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.couchbase.client.core.deps.com.fasterxml.jackson.core.filter.TokenFilter.Inclusion.ONLY_INCLUDE_ALL;
import static java.util.Collections.unmodifiableMap;
import static java.util.Objects.requireNonNull;

/**
 * Locates a document ID using a JSON pointer, optionally removing the ID from the document.
 * 

* Immutable. */ public class DocumentPathExtractor { public static class DocumentPathNotFoundException extends Exception { DocumentPathNotFoundException(String message) { super(message); } } public static class DocumentExtraction { private final String pathValue; private final byte[] data; DocumentExtraction(String pathValue, byte[] data) { this.pathValue = pathValue; this.data = requireNonNull(data); } public String getPathValue() { return pathValue; } public byte[] getData() { return data; } } private static class ByteRange { private final byte[] bytes; private int startOffset; private int pastEndOffset; private ByteRange(byte[] bytes, int startOffset, int pastEndOffset) { this.bytes = bytes; this.startOffset = startOffset; this.pastEndOffset = pastEndOffset; } private static ByteRange forCurrentToken(byte[] bytes, JsonParser parser) { return new ByteRange(bytes, (int) parser.getTokenLocation().getByteOffset(), (int) parser.getCurrentLocation().getByteOffset()); } @Override public String toString() { return "[" + startOffset + "," + pastEndOffset + ") = |" + new String(bytes, startOffset, pastEndOffset - startOffset) + "|"; } void fill(byte[] bytes, byte fillByte) { Arrays.fill(bytes, startOffset, pastEndOffset, fillByte); } } private static final JsonFactory factory = new JsonFactory(); private final String documentPathFormat; private final Map placeholderToJsonPointer; private final boolean removeDocumentPath; private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\$\\{(.+?)}"); public DocumentPathExtractor(String documentPathFormat, boolean removeDocumentPath) { if (documentPathFormat.isEmpty()) { throw new IllegalArgumentException("Document ID format must not be empty"); } Matcher m = PLACEHOLDER_PATTERN.matcher(documentPathFormat); if (!m.find()) { // For backwards compatibility, treat the whole thing as a single JSON pointer documentPathFormat = "${" + documentPathFormat + "}"; m = PLACEHOLDER_PATTERN.matcher(documentPathFormat); if (!m.find()) { // Shouldn't happen, since we just added the placeholder delimiters throw new AssertionError("invalid document ID format string"); } } Map placeholderToJsonPointerTemp = new HashMap<>(); do { final String placeholder = m.group(); final String jsonPointer = m.group(1); placeholderToJsonPointerTemp.put(placeholder, JsonPointer.compile(jsonPointer)); } while (m.find()); this.placeholderToJsonPointer = unmodifiableMap(placeholderToJsonPointerTemp); this.documentPathFormat = documentPathFormat; this.removeDocumentPath = removeDocumentPath; } /** * @param json The document content encoded as UTF-8. If this method returns normally, * it may modify the contents of the array to remove the fields used by the document ID. */ public DocumentExtraction extractDocumentPath(final byte[] json) throws IOException, DocumentPathNotFoundException { final List rangesToRemove = new ArrayList<>(placeholderToJsonPointer.size()); String documentId = documentPathFormat; for (Map.Entry idComponent : placeholderToJsonPointer.entrySet()) { final String placeholder = idComponent.getKey(); final JsonPointer documentIdPointer = idComponent.getValue(); final JsonParser parser = new FilteringParserDelegate( factory.createParser(json), new JsonPointerBasedFilter(documentIdPointer), ONLY_INCLUDE_ALL, false); if (parser.nextToken() == null) { throw new DocumentPathNotFoundException("Document has no value matching JSON pointer '" + documentIdPointer + "'"); } final String component = parser.getValueAsString(); if (component == null) { throw new DocumentPathNotFoundException("The value matching JSON pointer '" + documentIdPointer + "' is null or non-scalar."); } documentId = documentId.replace(placeholder, component); if (removeDocumentPath) { rangesToRemove.add(ByteRange.forCurrentToken(json, parser)); } } // At this point we're sure DocumentIdNotFoundException wasn't thrown, and we can expect // this method to return normally. It is finally safe to modify the document content. for (ByteRange range : rangesToRemove) { swallowFieldName(range); swallowOneComma(range); range.fill(json, (byte) ' '); } return new DocumentExtraction(documentId, json); } private static void swallowOneComma(ByteRange range) { swallowWhitespace(range); if (range.bytes[range.pastEndOffset] == ',') { range.pastEndOffset++; } else if (range.bytes[range.startOffset - 1] == ',') { range.startOffset--; } } private static void swallowWhitespace(ByteRange range) { swallowWhitespaceLeft(range); swallowWhitespaceRight(range); } private static void swallowWhitespaceLeft(ByteRange range) { while (isJsonWhitespace(range.bytes[range.startOffset - 1])) { range.startOffset--; } } private static void swallowWhitespaceRight(ByteRange range) { while (isJsonWhitespace(range.bytes[range.pastEndOffset])) { range.pastEndOffset++; } } private static void swallowFieldName(ByteRange range) { swallowWhitespaceLeft(range); // If the target was a field, then prevChar will be the colon (:) that separates the field name from value. // If the target was an array element, then prevChar will be the array start token ([) or the comma (,) // separating the target from the previous array element. byte prevChar = range.bytes[range.startOffset - 1]; if (prevChar == ':') { range.startOffset--; // swallow colon swallowWhitespaceLeft(range); range.startOffset--; // swallow field name closing quote // swallow left to include field name opening quote (guaranteed to not be preceded by backslash) do { range.startOffset--; } while (!(range.bytes[range.startOffset] == '"' && range.bytes[range.startOffset - 1] != '\\')); } } private static boolean isJsonWhitespace(byte b) { switch (b) { case 0x20: // Space case 0x09: // Horizontal tab case 0x0A: // LF case 0x0D: // CR return true; default: return false; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy