All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.projectnessie.versioned.transfer.ExportPersist Maven / Gradle / Ivy

There is a newer version: 0.97.1
Show newest version
/*
 * Copyright (C) 2022 Dremio
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.projectnessie.versioned.transfer;

import static java.util.Collections.emptyMap;
import static java.util.Spliterators.spliteratorUnknownSize;
import static org.projectnessie.versioned.storage.common.logic.CommitLogQuery.commitLogQuery;
import static org.projectnessie.versioned.storage.common.logic.ReferencesQuery.referencesQuery;

import com.fasterxml.jackson.core.JsonProcessingException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.projectnessie.model.Content;
import org.projectnessie.nessie.relocated.protobuf.ByteString;
import org.projectnessie.versioned.storage.common.exceptions.ObjNotFoundException;
import org.projectnessie.versioned.storage.common.logic.HeadsAndForkPoints;
import org.projectnessie.versioned.storage.common.logic.IdentifyHeadsAndForkPoints;
import org.projectnessie.versioned.storage.common.objtypes.CommitObj;
import org.projectnessie.versioned.storage.common.objtypes.CommitOp;
import org.projectnessie.versioned.storage.common.objtypes.ContentValueObj;
import org.projectnessie.versioned.storage.common.persist.Obj;
import org.projectnessie.versioned.storage.common.persist.ObjId;
import org.projectnessie.versioned.storage.common.persist.Reference;
import org.projectnessie.versioned.transfer.files.ExportFileSupplier;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.Commit;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.ExportVersion;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.HeadsAndForks;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.Operation;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.OperationType;
import org.projectnessie.versioned.transfer.serialize.TransferTypes.Ref;

final class ExportPersist extends ExportCommon {

  private final List references = new ArrayList<>();

  ExportPersist(
      ExportFileSupplier exportFiles, NessieExporter exporter, ExportVersion exportVersion) {
    super(exportFiles, exporter, exportVersion);
  }

  @Override
  void prepare(ExportContext exportContext) {
    // Load reference info before processing commits to make sure exported ref HEADs are included
    // in the commit data even if HEADs advances during export (new commits during export).
    exporter.referenceLogic().queryReferences(referencesQuery()).forEachRemaining(references::add);
  }

  @Override
  HeadsAndForks exportCommits(ExportContext exportContext) {

    HeadsAndForkPoints headsAndForkPoints;
    try (Batcher commitObjBatcher =
        new Batcher<>(
            exporter.commitBatchSize(), commits -> mapCommitObjs(commits, exportContext))) {
      headsAndForkPoints =
          exporter.fullScan()
              ? scanDatabase(commitObjBatcher::add)
              : scanAllReferences(commitObjBatcher::add);
    }

    HeadsAndForks.Builder hf =
        HeadsAndForks.newBuilder()
            .setScanStartedAtInMicros(headsAndForkPoints.getScanStartedAtInMicros());
    headsAndForkPoints.getHeads().forEach(h -> hf.addHeads(h.asBytes()));
    headsAndForkPoints.getForkPoints().forEach(h -> hf.addForkPoints(h.asBytes()));
    return hf.build();
  }

  private HeadsAndForkPoints scanAllReferences(Consumer commitHandler) {
    IdentifyHeadsAndForkPoints identify =
        new IdentifyHeadsAndForkPoints(
            exporter.expectedCommitCount(), exporter.persist().config().currentTimeMicros());

    references.forEach(
        ref -> {
          Deque commitsToProcess = new ArrayDeque<>();
          commitsToProcess.offerFirst(ref.pointer());
          while (!commitsToProcess.isEmpty()) {
            ObjId id = commitsToProcess.removeFirst();
            if (identify.isCommitNew(id)) {
              Iterator commitIter = exporter.commitLogic().commitLog(commitLogQuery(id));
              while (commitIter.hasNext()) {
                CommitObj commit = commitIter.next();
                if (!identify.handleCommit(commit)) {
                  break;
                }
                commitHandler.accept(commit);
                for (ObjId parentId : commit.secondaryParents()) {
                  if (identify.isCommitNew(parentId)) {
                    commitsToProcess.addLast(parentId);
                  }
                }
              }
            }
          }
        });

    return identify.finish();
  }

  private HeadsAndForkPoints scanDatabase(Consumer commitHandler) {
    return exporter
        .commitLogic()
        .identifyAllHeadsAndForkPoints(exporter.expectedCommitCount(), commitHandler);
  }

  @Override
  void exportReferences(ExportContext exportContext) {
    for (Reference reference : references) {
      handleGenericObjs(transferRelatedObjects.referenceRelatedObjects(reference));

      ObjId extendedInfoObj = reference.extendedInfoObj();
      Ref.Builder refBuilder =
          Ref.newBuilder().setName(reference.name()).setPointer(reference.pointer().asBytes());
      if (extendedInfoObj != null) {
        refBuilder.setExtendedInfoObj(extendedInfoObj.asBytes());
      }
      refBuilder.setCreatedAtMicros(reference.createdAtMicros());
      exportContext.writeRef(refBuilder.build());
      exporter.progressListener().progress(ProgressEvent.NAMED_REFERENCE_WRITTEN);
    }
  }

  private void mapCommitObjs(List commitObjs, ExportContext exportContext) {
    Map objs = fetchReferencedObjs(commitObjs);

    for (CommitObj c : commitObjs) {
      Commit commit = mapCommitObj(c, objs);
      exportContext.writeCommit(commit);

      handleGenericObjs(transferRelatedObjects.commitRelatedObjects(c));

      exporter.progressListener().progress(ProgressEvent.COMMIT_WRITTEN);
    }
  }

  private Map fetchReferencedObjs(List commitObjs) {
    try {
      ObjId[] valueIds =
          commitObjs.stream()
              .flatMap(
                  c ->
                      StreamSupport.stream(
                              spliteratorUnknownSize(
                                  exporter.indexesLogic().commitOperations(c).iterator(), 0),
                              false)
                          .map(op -> op.content().value())
                          .filter(Objects::nonNull))
              .distinct()
              .toArray(ObjId[]::new);

      return valueIds.length > 0
          ? Arrays.stream(exporter.persist().fetchObjs(valueIds))
              .filter(Objects::nonNull)
              .collect(Collectors.toMap(Obj::id, Function.identity()))
          : emptyMap();
    } catch (ObjNotFoundException e) {
      throw new RuntimeException(e);
    }
  }

  private Commit mapCommitObj(CommitObj c, Map objs) {
    Commit.Builder b =
        Commit.newBuilder()
            .setCommitId(c.id().asBytes())
            .setParentCommitId(c.tail().get(0).asBytes())
            .setMessage(c.message())
            .setCommitSequence(c.seq())
            .setCreatedTimeMicros(c.created());

    c.headers()
        .keySet()
        .forEach(h -> b.addHeadersBuilder().setName(h).addAllValues(c.headers().getAll(h)));
    c.secondaryParents().forEach(p -> b.addAdditionalParents(p.asBytes()));
    exporter
        .indexesLogic()
        .commitOperations(c)
        .forEach(
            op -> {
              CommitOp content = op.content();
              Operation.Builder opBuilder = b.addOperationsBuilder().setPayload(content.payload());

              opBuilder.addContentKey(op.key().rawString());

              ObjId valueId = content.value();
              if (valueId != null) {
                try {
                  ContentValueObj value = (ContentValueObj) objs.get(valueId);
                  Content modelContent =
                      exporter.storeWorker().valueFromStore((byte) content.payload(), value.data());
                  byte[] modelContentBytes =
                      exporter.objectMapper().writeValueAsBytes(modelContent);
                  opBuilder
                      .setContentId(value.contentId())
                      .setValue(ByteString.copyFrom(modelContentBytes));

                  handleGenericObjs(transferRelatedObjects.contentRelatedObjects(modelContent));
                } catch (JsonProcessingException e) {
                  throw new RuntimeException(e);
                }
              }
              if (content.action().exists()) {
                opBuilder.setOperationType(OperationType.Put);
              } else {
                opBuilder.setOperationType(OperationType.Delete);
              }
            });
    return b.build();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy