All Downloads are FREE. Search and download functionalities are using the official Maven repository.

overflowdb.Graph Maven / Gradle / Ivy

package overflowdb;

import gnu.trove.iterator.TObjectIntIterator;
import gnu.trove.map.hash.TObjectIntHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import overflowdb.storage.NodeDeserializer;
import overflowdb.storage.NodeSerializer;
import overflowdb.storage.NodesWriter;
import overflowdb.storage.OdbStorage;
import overflowdb.util.*;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.function.Predicate;

public final class Graph implements AutoCloseable {
  private static final Logger logger = LoggerFactory.getLogger(Graph.class);

  protected final AtomicLong currentId = new AtomicLong(-1L);
  final NodesList nodes = new NodesList();
  public final IndexManager indexManager = new IndexManager(this);
  private final Config config;
  private volatile boolean closed = false;

  protected final Map nodeFactoryByLabel;
  protected final Map edgeFactoryByLabel;

  protected final OdbStorage storage;
  public final NodeSerializer nodeSerializer;
  protected final NodeDeserializer nodeDeserializer;
  protected final StringInterner stringInterner;
  protected final Optional heapUsageMonitor;
  protected final boolean overflowEnabled;
  protected final ReferenceManager referenceManager;
  protected final NodesWriter nodesWriter;

  /**
   * @param convertPropertyForPersistence applied to all element property values by @{@link NodeSerializer} prior
   *                                      to persisting nodes/edges. That's useful if your runtime types are not
   *                                      supported by plain java, e.g. because you're using Scala Seq etc.
   */
  public static Graph open(Config configuration,
                           List> nodeFactories,
                           List> edgeFactories,
                           Function convertPropertyForPersistence) {
    Map nodeFactoryByLabel = new HashMap<>(nodeFactories.size());
    Map edgeFactoryByLabel = new HashMap<>(edgeFactories.size());
    nodeFactories.forEach(factory -> nodeFactoryByLabel.put(factory.forLabel(), factory));
    edgeFactories.forEach(factory -> edgeFactoryByLabel.put(factory.forLabel(), factory));
    return new Graph(configuration, nodeFactoryByLabel, edgeFactoryByLabel, convertPropertyForPersistence);
  }

  public static Graph open(Config configuration,
                           List> nodeFactories,
                           List> edgeFactories) {
    return open(configuration, nodeFactories, edgeFactories, Function.identity());
  }

  private Graph(Config config,
                Map nodeFactoryByLabel,
                Map edgeFactoryByLabel,
                Function convertPropertyForPersistence) {
    this.config = config;
    this.nodeFactoryByLabel = nodeFactoryByLabel;
    this.edgeFactoryByLabel = edgeFactoryByLabel;
    this.stringInterner = new StringInterner();

    this.storage = config.getStorageLocation().isPresent()
        ? OdbStorage.createWithSpecificLocation(config.getStorageLocation().get().toFile(), stringInterner)
        : OdbStorage.createWithTempFile(stringInterner);
    this.nodeDeserializer = new NodeDeserializer(this, nodeFactoryByLabel, config.isSerializationStatsEnabled(), storage);
    this.nodeSerializer = new NodeSerializer(config.isSerializationStatsEnabled(), storage, convertPropertyForPersistence);
    this.nodesWriter = new NodesWriter(nodeSerializer, storage);
    config.getStorageLocation().ifPresent(l -> initElementCollections(storage));

    this.overflowEnabled = config.isOverflowEnabled();
    if (this.overflowEnabled) {
      if (config.getExecutorService().isPresent()) {
        this.referenceManager = new ReferenceManager(storage, nodesWriter, config.getExecutorService().get());
      } else {
        this.referenceManager = new ReferenceManager(storage, nodesWriter);
      }
      this.heapUsageMonitor = Optional.of(new HeapUsageMonitor(config.getHeapPercentageThreshold(), this.referenceManager));
    } else {
      this.referenceManager = null; // not using Optional only due to performance reasons - it's invoked *a lot*
      this.heapUsageMonitor = Optional.empty();
    }
  }

  private void initElementCollections(OdbStorage storage) {
    long start = System.currentTimeMillis();
    final Set> serializedNodes = storage.allNodes();
    final int serializedNodesCount = serializedNodes.size();
    if (serializedNodesCount > 0) {
      logger.info(String.format("initializing %d nodes from existing storage", serializedNodesCount));
    }
    int importCount = 0;
    long maxId = currentId.get();

    final Iterator> serializedVertexIter = serializedNodes.iterator();
    while (serializedVertexIter.hasNext()) {
      final Map.Entry entry = serializedVertexIter.next();
      try {
        final NodeRef nodeRef = nodeDeserializer.deserializeRef(entry.getValue());
        nodes.add(nodeRef);
        importCount++;
        if (importCount % 131072 == 0) { // some random magic number that allows for quick division
          logger.debug("imported " + importCount + " elements - still running...");
        }
        if (nodeRef.id > maxId) maxId = nodeRef.id;
      } catch (IOException e) {
        throw new RuntimeException("error while initializing vertex from storage: id=" + entry.getKey(), e);
      }
    }

    currentId.set(maxId + 1);
    indexManager.initializeStoredIndices(storage);
    long elapsedMillis = System.currentTimeMillis() - start;
    logger.debug(String.format("initialized %s from existing storage in %sms", this, elapsedMillis));
  }


  ////////////// STRUCTURE API METHODS //////////////////

  /**
   * Add a node with given label and properties
   * Will automatically assign an ID - this is the safest option to avoid ID clashes.
   */
  public Node addNode(final String label, final Object... keyValues) {
    return addNodeInternal(currentId.incrementAndGet(), label, keyValues);
  }

  /**
   * Add a node with given id, label and properties.
   * Throws an {@link IllegalArgumentException} if a node with the given ID already exists
   */
  public Node addNode(final long id, final String label, final Object... keyValues) {
    if (nodes.contains(id)) {
      throw new IllegalArgumentException(String.format("Node with id already exists: %s", id));
    }

    long currentIdBefore = currentId.get();
    long currentIdAfter = Long.max(id, currentId.get());
    if (!currentId.compareAndSet(currentIdBefore, currentIdAfter)) {
      // concurrent thread must have changed `currentId` - try again
      return addNode(id, label, keyValues);
    }
    return addNodeInternal(id, label, keyValues);
  }

  private Node addNodeInternal(long id, String label, Object... keyValues) {
    if (isClosed()) {
      throw new AssertionError("graph is closed - no more mutation allowed");
    }
    final NodeRef node = createNode(id, label, keyValues);
    nodes.add(node);
    return node;
  }

  public DetachedNodeData createDetached(String label){
    if (!nodeFactoryByLabel.containsKey(label)) {
      throw new IllegalArgumentException("No NodeFactory for label=" + label + " available.");
    }
    final NodeFactory factory = nodeFactoryByLabel.get(label);
    return factory.createDetached();
  }

  private NodeRef createNode(final long idValue, final String label, final Object... keyValues) {
    if (isClosed()) {
      throw new AssertionError("graph is closed - no more mutation allowed");
    }
    if (!nodeFactoryByLabel.containsKey(label)) {
      throw new IllegalArgumentException("No NodeFactory for label=" + label + " available.");
    }
    final NodeFactory factory = nodeFactoryByLabel.get(label);
    final NodeDb node = factory.createNode(this, idValue, null);
    PropertyHelper.attachProperties(node, keyValues);
    registerNodeRef(node.ref);

    return node.ref;
  }

  /**
   * When we're running low on heap memory we'll serialize some elements to disk. To ensure we're not creating new ones
   * faster than old ones are serialized away, we're applying some backpressure to those newly created ones.
   */
  public void applyBackpressureMaybe() {
    if (referenceManager != null) {
      referenceManager.applyBackpressureMaybe();
    }
  }

  /* Register NodeRef at ReferenceManager, so it can be cleared on low memory */
  public void registerNodeRef(NodeRef ref) {
    if (referenceManager != null && !isClosed()) {
      referenceManager.registerRef(ref);
    }
  }

  @Override
  public String toString() {
    return String.format("%s [%d nodes]", getClass().getSimpleName(), nodeCount());
  }

  /**
   * If the config.graphLocation is set, data in the graph is persisted to that location.
   *
   * If called from multiple threads concurrently, only one starts the shutdown process, but the other one will
   * still be blocked. This is intentional: we also want the second caller to block until `close` is completed, and not
   * falsely assume that it has finished, only because it exits straight away.
   */
  @Override
  public synchronized void close() {
    if (isClosed()) {
      logger.info("graph is already closed");
    } else {
      this.closed = true;
      shutdownNow();
      stringInterner.clear();
    }
  }

  private void shutdownNow() {
    logger.info("shutdown: start");
    try {
      heapUsageMonitor.ifPresent(monitor -> monitor.close());
      if (config.getStorageLocation().isPresent()) {

        /* persist to disk: if overflow is enabled, ReferenceManager takes care of that
         * otherwise: persist all nodes here */
        indexManager.storeIndexes(storage);
        if (referenceManager != null) {
          referenceManager.clearAllReferences();
        } else {
          nodes.persistAll(nodesWriter);
        }
      }
    } finally {
      if (referenceManager != null) {
        referenceManager.close();
      }
      storage.close();
    }
    logger.info("shutdown finished");
  }

  /** overall number of nodes */
  public int nodeCount() {
    return nodes.size();
  }

  /** number of nodes for given label */
  public int nodeCount(String label) {
    return nodes.cardinality(label);
  }

  /** number of nodes grouped by label */
  public Map nodeCountByLabel() {
    Set nodeLabels = nodes.nodeLabels();
    HashMap counts = new HashMap(nodeLabels.size());
    for (String label : nodeLabels) {
      counts.put(label, nodes.nodesByLabel(label).size());
    }
    return counts;
  }

  /** calculates the number of edges in the graph
   * Note: this is an expensive operation, because edges are stored as part of the nodes
   */
  public int edgeCount() {
    int edgeCount = 0;
    final Iterator nodes = nodes();
    while (nodes.hasNext()) {
      NodeDb node = getNodeDb(nodes.next());
      edgeCount += node.outEdgeCount();
    }
    return edgeCount;
  }

  /** number of edges grouped by label */
  public Map edgeCountByLabel() {
    TObjectIntHashMap counts = new TObjectIntHashMap<>();
    edges().forEachRemaining(edge ->
      counts.adjustOrPutValue(edge.label(), 1, 1)
    );

    Map ret = new HashMap<>(counts.size());
    TObjectIntIterator iterator = counts.iterator();
    while (iterator.hasNext()) {
      iterator.advance();
      ret.put(iterator.key(), iterator.value());
    }
    return ret;
  }

  /** Iterator over all edges - alias for `edges` */
  public Iterator E() {
    return edges();
  }

  /** Iterator over all edges */
  public Iterator edges() {
    return IteratorUtils.flatMap(nodes(), node -> node.outE());
  }

  /** Iterator over edges with given label */
  public Iterator edges(String label) {
    return IteratorUtils.flatMap(nodes(), node -> node.outE(label));
  }

  /** Iterator over all nodes - alias for `nodes` */
  public Iterator V() {
    return nodes();
  }

  /** Iterator over all nodes */
  public final Iterator nodes() {
    return nodes.iterator();
  }

  /** Iterator over nodes with provided ids - alias for `nodes(ids...)`
   * note: does not return any nodes if no ids are provided */
  public Iterator V(long... ids) {
    return nodes(ids);
  }

  /** return node with given `id`, or `null` if there is no such node */
  public Node node(long id) {
    return nodes.nodeById(id);
  }

  /** Iterator over nodes with provided ids
   * empty, if no ids are provided */
  public Iterator nodes(long... ids) {
    if (ids.length == 0) {
      return Collections.emptyIterator();
    } else if (ids.length == 1) {
      // optimization for common case where only one id is requested
      return IteratorUtils.from(node(ids[0]));
    } else {
      return IteratorUtils.map(
          Arrays.stream(ids).iterator(),
          this::node);
    }
  }

  public Iterator nodes(final String label) {
    return nodes.nodesByLabel(label).iterator();
  }

  public Iterator nodes(final String... labels) {
    final MultiIterator multiIterator = new MultiIterator<>();
    for (String label : labels) {
      addNodesToMultiIterator(multiIterator, label);
    }
    return multiIterator;
  }

  public Iterator nodes(final Set labels) {
    final MultiIterator multiIterator = new MultiIterator<>();
    for (String label : labels) {
      addNodesToMultiIterator(multiIterator, label);
    }
    return multiIterator;
  }

  public Iterator nodes(final Predicate labelPredicate) {
    final MultiIterator multiIterator = new MultiIterator<>();
    for (String label : nodes.nodeLabels()) {
      if (labelPredicate.test(label)) {
        addNodesToMultiIterator(multiIterator, label);
      }
    }
    return multiIterator;
  }

  private final void addNodesToMultiIterator(final MultiIterator multiIterator, final String label) {
    final Collection ret = nodes.nodesByLabel(label);
    if (ret != null) {
      multiIterator.addIterator(ret.iterator());
    }
  }

  public boolean isClosed() {
    return closed;
  }

  public OdbStorage getStorage() {
    return storage;
  }

  /** Copies all nodes/edges into the given empty graph, preserving their ids and properties. */
  public void copyTo(Graph destination) {
    if (destination.nodeCount() > 0) throw new AssertionError("destination graph must be empty, but isn't");
    nodes().forEachRemaining(node -> {
      destination.addNode(node.id(), node.label(), PropertyHelper.toKeyValueArray(node.propertiesMap()));
    });
    nodes().forEachRemaining( node -> {
      NodeDb mapped =  ((NodeRef) destination.node(node.id())).get();

      node.outE().forEachRemaining(edge -> {
                NodeRef other = (NodeRef) destination.node(edge.inNode().id());
                mapped.storeAdjacentNode(Direction.OUT, edge.label(), other, PropertyHelper.toKeyValueArray(edge.propertiesMap()));
      });
      node.inE().forEachRemaining(edge -> {
        NodeRef other = (NodeRef) destination.node(edge.outNode().id());
        mapped.storeAdjacentNode(Direction.IN, edge.label(), other, PropertyHelper.toKeyValueArray(edge.propertiesMap()));
      });

    });
  }

  public void remove(Node node) {
    final NodeRef nodeRef = getNodeRef(node);
    nodes.remove(nodeRef);
    indexManager.removeElement(nodeRef);
    storage.removeNode(node.id());
  }

  private NodeRef getNodeRef(Node node) {
    if (node instanceof NodeRef)
      return (NodeRef) node;
    else
      return ((NodeDb) node).ref;
  }

  private NodeDb getNodeDb(Node node) {
    if (node instanceof NodeDb)
      return (NodeDb) node;
    else
      return ((NodeRef) node).get();
  }

  public void persistLibraryVersion(String name, String version) {
    storage.persistLibraryVersion(name, version);
  }

  public ArrayList> getAllLibraryVersions() {
    return storage.getAllLibraryVersions();
  }

  public StringInterner getStringInterner() {
    return this.stringInterner;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy