com.cognite.client.SequenceRows Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cdf-sdk-java Show documentation
Show all versions of cdf-sdk-java Show documentation
Java SDK for reading and writing from/to CDF resources.
/*
* Copyright (c) 2020 Cognite AS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cognite.client;
import com.cognite.client.config.ResourceType;
import com.cognite.client.dto.*;
import com.cognite.client.servicesV1.ConnectorServiceV1;
import com.cognite.client.servicesV1.ResponseItems;
import com.cognite.client.servicesV1.parser.ItemParser;
import com.cognite.client.servicesV1.parser.SequenceParser;
import com.cognite.client.util.Items;
import com.cognite.client.util.Partition;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.protobuf.Value;
import org.apache.commons.lang3.RandomStringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ThreadLocalRandom;
import static com.cognite.client.servicesV1.ConnectorConstants.*;
/**
* This class represents the Cognite sequence body/rows api endpoint.
*
* It provides methods for reading and writing {@link SequenceBody}.
*/
@AutoValue
public abstract class SequenceRows extends ApiBase {
private static final SequenceMetadata DEFAULT_SEQ_METADATA = SequenceMetadata.newBuilder()
.setExternalId("SDK_default")
.setName("SDK_default")
.setDescription("Default Sequence metadata created by the Java SDK.")
.build();
private static Builder builder() {
return new AutoValue_SequenceRows.Builder();
}
protected static final Logger LOG = LoggerFactory.getLogger(SequenceRows.class);
/**
* Construct a new {@link SequenceRows} object using the provided configuration.
*
* This method is intended for internal use--SDK clients should always use {@link CogniteClient}
* as the entry point to this class.
*
* @param client The {@link CogniteClient} to use for configuration settings.
* @return the assets api object.
*/
public static SequenceRows of(CogniteClient client) {
return SequenceRows.builder()
.setClient(client)
.build();
}
/**
* Returns all {@link SequenceBody} objects (i.e. sequences rows x columns) that matches the
* specification set in the {@link Request}.
*
* The results are paged through / iterated over via an {@link Iterator}--the entire results set is not buffered in
* memory, but streamed in "pages" from the Cognite api. If you need to buffer the entire results set, then you
* have to stream these results into your own data structure.
*
* The sequence bodies are retrieved using multiple, parallel request streams towards the Cognite api. The number of
* parallel streams are set in the {@link com.cognite.client.config.ClientConfig}.
*
* @param requestParameters the filters to use for retrieving sequences bodies.
* @return an {@link Iterator} to page through the results set.
* @throws Exception
*/
public Iterator> retrieve(Request requestParameters) throws Exception {
return this.retrieve(ImmutableList.of(requestParameters));
}
/**
* Returns all {@link SequenceBody} objects (i.e. sequences rows x columns) that matches the
* specification set in the collection of {@link Request}.
*
* By submitting a collection of {@link Request}, the requests will be submitted in parallel to
* Cognite Data Fusion, potentially increasing the overall I/O performance.
*
* The results are paged through / iterated over via an {@link Iterator}--the entire results set is not buffered in
* memory, but streamed in "pages" from the Cognite api. If you need to buffer the entire results set, then you
* have to stream these results into your own data structure.
*
* The sequence bodies are retrieved using multiple, parallel request streams towards the Cognite api. The number of
* parallel streams are set in the {@link com.cognite.client.config.ClientConfig}.
*
* @param requestParametersList the filters to use for retrieving sequences bodies.
* @return an {@link Iterator} to page through the results set.
* @throws Exception
*/
public Iterator> retrieve(List requestParametersList) throws Exception {
// Build the api iterators.
List>>> iterators = new ArrayList<>();
for (Request requestParameters : requestParametersList) {
iterators.add(getListResponseIterator(ResourceType.SEQUENCE_BODY, addAuthInfo(requestParameters)));
}
// The iterator that will collect results across multiple results streams
FanOutIterator fanOutIterator = FanOutIterator.of(iterators);
// Add results object parsing
return AdapterIterator.of(fanOutIterator, this::parseSequenceBody);
}
/**
* Retrieves {@link SequenceBody} by {@code externalId}.
* Refer to {@link #retrieveComplete(List)} for more information.
*
* @param externalId The {@code externalIds} to retrieve
* @return The retrieved sequence bodies.
* @throws Exception
*/
public Iterator> retrieveComplete(String... externalId) throws Exception {
return retrieveComplete(Items.parseItems(externalId));
}
/**
* Retrieves {@link SequenceBody} by {@code internal id}.
* Refer to {@link #retrieveComplete(List)} for more information.
*
* @param id The {@code ids} to retrieve
* @return The retrieved sequence bodies.
* @throws Exception
*/
public Iterator> retrieveComplete(long... id) throws Exception {
return retrieveComplete(Items.parseItems(id));
}
/**
* Retrieves {@link SequenceBody} by {@code externalId / id}.
*
* The entire Sequence body (i.e. all rows and columns) will be retrieved.
*
* The sequence bodies are retrieved using multiple, parallel request streams towards the Cognite api. The number of
* parallel streams are set in the {@link com.cognite.client.config.ClientConfig}.
*
* @param items The sequences {@code externalId / id} to retrieve rows for.
* @return The retrieved sequence rows / bodies.
* @throws Exception
*/
public Iterator> retrieveComplete(List- items) throws Exception {
List
requestParametersList = new ArrayList<>(items.size());
// Build the request objects representing the items
for (Item item : items) {
Request requestParameters = Request.create()
.withRootParameter("limit", DEFAULT_MAX_BATCH_SIZE_SEQUENCES_ROWS);
if (item.getIdTypeCase() == Item.IdTypeCase.EXTERNAL_ID) {
requestParameters = requestParameters.withRootParameter("externalId", item.getExternalId());
} else if (item.getIdTypeCase() == Item.IdTypeCase.ID) {
requestParameters = requestParameters.withRootParameter("id", item.getId());
} else {
throw new Exception("Item does not contain externalId or id: " + item.toString());
}
requestParametersList.add(requestParameters);
}
return this.retrieve(requestParametersList);
}
/**
* Creates or updates a set of {@link SequenceBody} objects.
*
* A {@link SequenceBody} carries the data cells (columns x rows) to be upserted to a sequence. If the
* main sequence object hasn't been created in Cognite Data Fusion yet (maybe because of a large job where
* both sequence headers and bodies are upserted in parallel), this method will create the sequence objects
* based on the information carried in the {@link SequenceBody}.
*
* The algorithm runs as follows:
* 1. Write all {@link SequenceBody} objects to the Cognite API.
* 2. If one (or more) of the objects fail, check if it is because of missing sequence objects--create temp headers.
* 3. Retry the failed {@link SequenceBody} objects.
*
* @param sequenceBodies The sequences rows to upsert
* @return The upserted sequences rows
* @throws Exception
*/
public List upsert(List sequenceBodies) throws Exception {
Instant startInstant = Instant.now();
String batchLogPrefix =
"upsert() - batch " + RandomStringUtils.randomAlphanumeric(5) + " - ";
Preconditions.checkArgument(sequenceBodies.stream().allMatch(sequenceBody -> getSequenceId(sequenceBody).isPresent()),
batchLogPrefix + "All items must have externalId or id.");
int inputRowsCounter = 0;
int inputCellsCounter = 0;
for (SequenceBody sequenceBody : sequenceBodies) {
inputRowsCounter += sequenceBody.getRowsCount();
inputCellsCounter += sequenceBody.getRowsCount() * sequenceBody.getColumnsCount();
}
LOG.debug(batchLogPrefix + "Received {} sequence body objects with {} cells across {} rows to upsert",
sequenceBodies.size(),
inputCellsCounter,
inputRowsCounter);
// Should not happen--but need to guard against empty input
if (sequenceBodies.isEmpty()) {
LOG.debug(batchLogPrefix + "Received an empty input list. Will just output an empty list.");
return Collections.emptyList();
}
ConnectorServiceV1 connector = getClient().getConnectorService();
ConnectorServiceV1.ItemWriter createItemWriter = connector.writeSequencesRows();
/*
Start the upsert:
1. Write all sequences to the Cognite API.
2. If one (or more) of the sequences fail, it is most likely because of missing headers. Add temp headers.
3. Retry the failed sequences
*/
Map, List> responseMap = splitAndUpsertSeqBody(sequenceBodies, createItemWriter);
LOG.debug(batchLogPrefix + "Completed create items requests for {} input items across {} batches at duration {}",
sequenceBodies.size(),
responseMap.size(),
Duration.between(startInstant, Instant.now()).toString());
// Check for unsuccessful request
List- missingItems = new ArrayList<>();
List
retrySequenceBodyList = new ArrayList<>(sequenceBodies.size());
List> successfulBatches = new ArrayList<>(sequenceBodies.size());
boolean requestsAreSuccessful = true;
for (ResponseItems responseItems : responseMap.keySet()) {
requestsAreSuccessful = requestsAreSuccessful && responseItems.isSuccessful();
if (!responseItems.isSuccessful()) {
// Check for duplicates. Duplicates should not happen, so fire off an exception.
if (!responseItems.getDuplicateItems().isEmpty()) {
String message = String.format(batchLogPrefix + "Duplicates reported: %d %n "
+ "Response body: %s",
responseItems.getDuplicateItems().size(),
responseItems.getResponseBodyAsString()
.substring(0, Math.min(1000, responseItems.getResponseBodyAsString().length())));
LOG.error(message);
throw new Exception(message);
}
// Get the missing items and add the original sequence bodies to the retry list
missingItems.addAll(parseItems(responseItems.getMissingItems()));
retrySequenceBodyList.addAll(responseMap.get(responseItems));
} else {
successfulBatches.add(responseItems);
}
}
if (!requestsAreSuccessful) {
LOG.warn(batchLogPrefix + "Write sequence rows failed. Most likely due to missing sequence header / metadata. "
+ "Will add minimum sequence metadata and retry the sequence rows insert.");
LOG.info(batchLogPrefix + "Number of missing entries reported by CDF: {}", missingItems.size());
// check if the missing items are based on internal id--not supported
List missingSequences = new ArrayList<>(missingItems.size());
for (Item item : missingItems) {
if (item.getIdTypeCase() != Item.IdTypeCase.EXTERNAL_ID) {
String message = batchLogPrefix + "Sequence with internal id refers to a non-existing sequence. "
+ "Only externalId is supported. Item specification: " + item.toString();
LOG.error(message);
throw new Exception(message);
}
// add the corresponding sequence body to a list for later processing
sequenceBodies.stream()
.filter(sequence -> sequence.getExternalId().equals(item.getExternalId()))
.forEach(missingSequences::add);
}
LOG.debug(batchLogPrefix + "All missing items are based on externalId");
// If we have missing items, add default sequence header
if (missingSequences.isEmpty()) {
LOG.warn(batchLogPrefix + "Write sequences rows failed, but cannot identify missing sequences headers");
} else {
LOG.debug(batchLogPrefix + "Start writing default sequence headers for {} items",
missingSequences.size());
writeSeqHeaderForRows(missingSequences);
}
// Retry the failed sequence body upsert
LOG.debug(batchLogPrefix + "Finished writing default headers. Will retry {} sequence body items.",
retrySequenceBodyList.size());
if (retrySequenceBodyList.isEmpty()) {
LOG.warn(batchLogPrefix + "Write sequences rows failed, but cannot identify sequences to retry.");
} else {
Map, List> retryResponseMap =
splitAndUpsertSeqBody(retrySequenceBodyList, createItemWriter);
// Check status of the requests
requestsAreSuccessful = true;
for (ResponseItems responseItems : retryResponseMap.keySet()) {
requestsAreSuccessful = requestsAreSuccessful && responseItems.isSuccessful();
}
}
}
if (!requestsAreSuccessful) {
String message = batchLogPrefix + "Failed to write sequences rows.";
LOG.error(message);
throw new Exception(message);
}
LOG.info(batchLogPrefix + "Completed writing {} sequence items with {} total rows and {} cells "
+ "across {} requests within a duration of {}.",
sequenceBodies.size(),
inputRowsCounter,
inputCellsCounter,
responseMap.size(),
Duration.between(startInstant, Instant.now()).toString());
return ImmutableList.copyOf(sequenceBodies);
}
/**
* Deletes the given rows of the sequence(s).
*
* This method will delete the rows specified via the sequence externalId/id + row number list in the input
* {@link SequenceBody} objects. You don't need to specify columns or values. All columns will always be removed
* from the listed row numbers.
*
* @param sequenceRows
* @return The deleted rows
* @throws Exception
*/
public List delete(List sequenceRows) throws Exception {
String loggingPrefix = "delete() - " + RandomStringUtils.randomAlphanumeric(5) + " - ";
Instant startInstant = Instant.now();
int maxDeleteLoopIterations = 3;
// should not happen, but need to guard against empty input
if (sequenceRows.isEmpty()) {
LOG.warn(loggingPrefix + "No items in the input. Returning without deleting any rows.");
return Collections.emptyList();
}
ConnectorServiceV1 connector = getClient().getConnectorService();
ConnectorServiceV1.ItemWriter deleteItemWriter = connector.deleteSequencesRows();
// Delete and completed lists
List elementListDelete = sequenceRows;
List elementListCompleted = new ArrayList<>(elementListDelete.size());
/*
The delete loop. If there are items left to delete:
1. Delete items
2. If conflict, remove duplicates and missing items.
*/
ThreadLocalRandom random = ThreadLocalRandom.current();
String exceptionMessage = "";
for (int i = 0; i < maxDeleteLoopIterations && elementListDelete.size() > 0;
i++, Thread.sleep(Math.min(500L, (10L * (long) Math.exp(i)) + random.nextLong(5)))) {
LOG.debug(loggingPrefix + "Start delete loop {} with {} sequence body items to delete and "
+ "{} completed items at duration {}",
i,
elementListDelete.size(),
elementListCompleted.size(),
Duration.between(startInstant, Instant.now()).toString());
/*
Delete items
*/
Map, List> deleteResponseMap =
splitAndDeleteItems(elementListDelete, deleteItemWriter);
LOG.debug(loggingPrefix + "Completed delete items requests for {} items across {} batches at duration {}",
elementListDelete.size(),
deleteResponseMap.size(),
Duration.between(startInstant, Instant.now()).toString());
elementListDelete.clear(); // Must prepare the list for possible new entries.
for (ResponseItems response : deleteResponseMap.keySet()) {
if (response.isSuccessful()) {
elementListCompleted.addAll(deleteResponseMap.get(response));
LOG.debug(loggingPrefix + "Delete items request success. Adding {} delete result items to result collection.",
deleteResponseMap.get(response).size());
} else {
exceptionMessage = response.getResponseBodyAsString();
LOG.debug(loggingPrefix + "Delete items request failed: {}", response.getResponseBodyAsString());
if (i == maxDeleteLoopIterations - 1) {
// Add the error message to std logging
LOG.error(loggingPrefix + "Delete items request failed. {}", response.getResponseBodyAsString());
}
LOG.debug(loggingPrefix + "Delete items request failed. "
+ "Removing duplicates and missing items and retrying the request");
List- duplicates = ItemParser.parseItems(response.getDuplicateItems());
List
- missing = ItemParser.parseItems(response.getMissingItems());
LOG.debug(loggingPrefix + "No of duplicate entries reported by CDF: {}", duplicates.size());
LOG.debug(loggingPrefix + "No of missing items reported by CDF: {}", missing.size());
// Remove missing items from the delete request
Map
itemsMap = mapToId(deleteResponseMap.get(response));
for (Item value : missing) {
if (value.getIdTypeCase() == Item.IdTypeCase.EXTERNAL_ID) {
itemsMap.remove(value.getExternalId());
} else if (value.getIdTypeCase() == Item.IdTypeCase.ID) {
itemsMap.remove(String.valueOf(value.getId()));
}
}
// Remove duplicate items from the delete request
for (Item value : duplicates) {
if (value.getIdTypeCase() == Item.IdTypeCase.EXTERNAL_ID) {
itemsMap.remove(value.getExternalId());
} else if (value.getIdTypeCase() == Item.IdTypeCase.ID) {
itemsMap.remove(String.valueOf(value.getId()));
}
}
elementListDelete.addAll(itemsMap.values()); // Add remaining items to be re-deleted
}
}
}
// Check if all elements completed the upsert requests
if (elementListDelete.isEmpty()) {
LOG.info(loggingPrefix + "Successfully deleted {} items within a duration of {}.",
elementListCompleted.size(),
Duration.between(startInstant, Instant.now()).toString());
} else {
LOG.error(loggingPrefix + "Failed to delete items. {} items remaining. {} items completed delete."
+ System.lineSeparator() + "{}",
elementListDelete.size(),
elementListCompleted.size(),
exceptionMessage);
throw new Exception(String.format(loggingPrefix + "Failed to upsert items. %d items remaining. "
+ " %d items completed upsert. %n " + exceptionMessage,
elementListDelete.size(),
elementListCompleted.size()));
}
return elementListCompleted;
}
/**
* Delete sequences rows.
*
* Submits a (large) batch of sequence body / row items by splitting it up into multiple, parallel delete requests.
* The response from each request is returned along with the items used as input.
*
* This method will:
* 1. Check all input for valid ids.
* 2. Deduplicate the items, including row numbers.
* 3. Split the input into request batches (if necessary).
*
* @param sequenceRows The sequence rows to delete
* @param deleteWriter The {@link com.cognite.client.servicesV1.ConnectorServiceV1.ItemWriter} to use for the delete requests.
* @return A {@link Map} with the responses and request inputs.
* @throws Exception
*/
private Map, List> splitAndDeleteItems(List sequenceRows,
ConnectorServiceV1.ItemWriter deleteWriter) throws Exception {
String loggingPrefix = "splitAndDeleteItems() - " + RandomStringUtils.randomAlphanumeric(5) + " - ";
Instant startInstant = Instant.now();
int maxItemsPerBatch = 1000;
int maxRowsPerItem = 10_000;
List deleteItemsList = new ArrayList<>(sequenceRows.size());
/*
check that ids are provided + remove duplicate rows.
1. Map all input objects to id.
2. Consolidate all rows per id.
*/
Map> itemMap = new HashMap<>();
long rowCounter = 0;
for (SequenceBody value : sequenceRows) {
if (getSequenceId(value).isPresent()) {
List rows = itemMap.getOrDefault(getSequenceId(value).get(), new ArrayList<>());
rows.add(value);
itemMap.put(getSequenceId(value).get(), rows);
rowCounter += value.getRowsCount();
} else {
String message = loggingPrefix + "Sequence does not contain id nor externalId: " + value.toString();
LOG.error(message);
throw new Exception(message);
}
}
LOG.debug(loggingPrefix + "Received {} rows to remove from {} sequences, with {} unique sequence ids. Duration of: {}",
rowCounter,
sequenceRows.size(),
itemMap.size(),
Duration.between(startInstant, Instant.now()).toString());
int dedupeRowCounter = 0;
for (List elements : itemMap.values()) {
List sequenceRowNumbers = new ArrayList<>();
SequenceBody sequenceBody = elements.get(0).toBuilder()
.clearRows()
.clearColumns()
.build();
for (SequenceBody item : elements) {
// deduplicate row numbers
Set uniqueRowNos = new HashSet<>(sequenceRowNumbers.size() + item.getRowsCount());
uniqueRowNos.addAll(sequenceRowNumbers);
item.getRowsList().forEach(row -> uniqueRowNos.add(row.getRowNumber()));
sequenceRowNumbers = new ArrayList<>(uniqueRowNos);
}
List sequenceRowsDeduplicated = new ArrayList<>();
sequenceRowNumbers.forEach(rowNumber ->
sequenceRowsDeduplicated.add(SequenceRow.newBuilder().setRowNumber(rowNumber).build()));
deleteItemsList.add(sequenceBody.toBuilder()
.addAllRows(sequenceRowsDeduplicated)
.build());
dedupeRowCounter += sequenceRowsDeduplicated.size();
}
LOG.debug(loggingPrefix + "Finished deduplication. Result: {} rows across {} sequences. Duration of: {}",
dedupeRowCounter,
deleteItemsList.size(),
Duration.between(startInstant, Instant.now()).toString());
// Split into batches and submit delete requests
Map>, List> responseMap = new HashMap<>();
List deleteBatch = new ArrayList<>(maxItemsPerBatch);
int submitDeleteRowCounter = 0;
int submitItemsCounter = 0;
for (SequenceBody deleteItem : deleteItemsList) {
// Check if there are too many rows per item
if (deleteItem.getRowsCount() > maxRowsPerItem) {
List> rowBatches = Partition.ofSize(deleteItem.getRowsList(), maxRowsPerItem);
for (List rowBatch : rowBatches) {
deleteBatch.add(deleteItem.toBuilder()
.clearRows()
.addAllRows(rowBatch)
.build());
submitDeleteRowCounter += rowBatch.size();
// Always submit a request when splitting up the rows for a single sequence id.
// Because we cannot have multiple items with the same id in the same batch.
responseMap.put(deleteItems(deleteBatch, deleteWriter), deleteBatch);
submitItemsCounter += deleteBatch.size();
deleteBatch = new ArrayList<>();
}
} else {
deleteBatch.add(deleteItem);
submitDeleteRowCounter += deleteItem.getRowsCount();
if (deleteBatch.size() >= maxItemsPerBatch) {
responseMap.put(deleteItems(deleteBatch, deleteWriter), deleteBatch);
submitItemsCounter += deleteBatch.size();
deleteBatch = new ArrayList<>();
}
}
}
if (!deleteBatch.isEmpty()) {
responseMap.put(deleteItems(deleteBatch, deleteWriter), deleteBatch);
submitItemsCounter += deleteBatch.size();
}
LOG.debug(loggingPrefix + "Finished submitting delete requests for {} rows across {} sequences items via {} batches. "
+ "Duration of: {}",
submitDeleteRowCounter,
submitItemsCounter,
responseMap.size(),
Duration.between(startInstant, Instant.now()).toString());
// Wait for all requests futures to complete
List>> futureList = new ArrayList<>();
responseMap.keySet().forEach(futureList::add);
CompletableFuture allFutures =
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[futureList.size()]));
allFutures.join(); // Wait for all futures to complete
// Collect the responses from the futures
Map, List> resultsMap = new HashMap<>(responseMap.size());
for (Map.Entry>, List> entry : responseMap.entrySet()) {
resultsMap.put(entry.getKey().join(), entry.getValue());
}
return resultsMap;
}
/**
* Submits a set of items as a delete sequence rows request to the Cognite API.
*
* @param sequenceRows the objects to delete.
* @return a {@link CompletableFuture} representing the response from the create request.
* @throws Exception
*/
private CompletableFuture> deleteItems(List sequenceRows,
ConnectorServiceV1.ItemWriter deleteWriter) throws Exception {
ImmutableList.Builder
© 2015 - 2025 Weber Informatics LLC | Privacy Policy