Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
apoc.meta.Meta Maven / Gradle / Ivy
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package apoc.meta;
import static apoc.util.MapUtil.map;
import static java.lang.String.format;
import static org.neo4j.token.api.TokenConstants.ANY_LABEL;
import static org.neo4j.token.api.TokenConstants.ANY_RELATIONSHIP_TYPE;
import apoc.export.util.NodesAndRelsSubGraph;
import apoc.result.GraphResult;
import apoc.result.VirtualGraph;
import apoc.result.VirtualNode;
import apoc.result.VirtualRelationship;
import apoc.util.CollectionUtils;
import apoc.util.MapUtil;
import apoc.util.collection.Iterables;
import com.google.common.collect.Sets;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.tuple.Pair;
import org.neo4j.cypher.export.CypherResultSubGraph;
import org.neo4j.cypher.export.DatabaseSubGraph;
import org.neo4j.cypher.export.SubGraph;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Entity;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Result;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.schema.ConstraintDefinition;
import org.neo4j.graphdb.schema.ConstraintType;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.schema.Schema;
import org.neo4j.internal.kernel.api.Read;
import org.neo4j.internal.kernel.api.TokenRead;
import org.neo4j.kernel.api.KernelTransaction;
import org.neo4j.logging.Log;
import org.neo4j.procedure.Context;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.NotThreadSafe;
import org.neo4j.procedure.Procedure;
import org.neo4j.procedure.UserFunction;
/**
* The Meta class provides metadata-related operations and functions for working with Neo4j graph database.
* It is part of the APOC (Awesome Procedures on Cypher) library.
*/
public class Meta {
private record MetadataKey(Types type, String key) {}
@Context
public Transaction tx;
@Context
public KernelTransaction kernelTx;
@Context
public Transaction transaction;
@Context
public Log log;
/**
* Represents the result of a metadata operation.
*/
public static class MetaResult {
@Description("The label or type name.")
public String label;
@Description("The property name.")
public String property;
@Description("The count of seen values.")
public long count;
@Description("If all seen values are unique.")
public boolean unique;
@Description("If an index exists for this property.")
public boolean index;
@Description("If an existence constraint exists for this property.")
public boolean existence;
@Description("The type represented by this row.")
public String type;
@Description(
"Indicates whether the property is an array. If the type column is \"RELATIONSHIP,\" this will be true if there is at least one node with two outgoing relationships of the type specified by the label or property column.")
public boolean array;
@Description("This is always null.")
public List sample;
@Description(
"The ratio (rounded down) of the count of outgoing relationships for a specific label and relationship type relative to the total count of those patterns.")
public long left; // 0,1,
@Description(
"The ratio (rounded down) of the count of incoming relationships for a specific label and relationship type relative to the total count of those patterns.")
public long right; // 0,1,many
@Description("The labels of connect nodes.")
public List other = new ArrayList<>();
@Description(
"For uniqueness constraints, this field shows other labels present on nodes that also contain the uniqueness constraint.")
public List otherLabels = new ArrayList<>();
@Description("Whether this refers to a node or a relationship.")
public String elementType;
}
/**
* Represents a specific metadata item, extending MetaResult.
*/
public static class MetaItem extends MetaResult {
public long leftCount; // 0,1,
public long rightCount; // 0,1,many
public MetaItem addLabel(String label) {
this.otherLabels.add(label);
return this;
}
public MetaItem(String label, String name) {
this.label = label;
this.property = name;
}
public MetaItem inc() {
count++;
return this;
}
public MetaItem rel(long out, long in) {
this.type = Types.RELATIONSHIP.name();
if (out > 1) array = true;
leftCount += out;
rightCount += in;
left = leftCount / count;
right = rightCount / count;
return this;
}
public MetaItem other(List labels) {
for (String l : labels) {
if (!this.other.contains(l)) this.other.add(l);
}
return this;
}
public MetaItem type(String type) {
this.type = type;
return this;
}
public MetaItem array(boolean array) {
this.array = array;
return this;
}
public MetaItem elementType(String elementType) {
switch (elementType) {
case "NODE":
this.elementType = "node";
break;
case "RELATIONSHIP":
this.elementType = "relationship";
break;
}
return this;
}
}
@UserFunction("apoc.meta.cypher.isType")
@Description("Returns true if the given value matches the given type.")
public boolean isTypeCypher(
@Name(value = "value", description = "An object to check the type of.") Object value,
@Name(value = "type", description = "The verification type.") String type) {
return type.equalsIgnoreCase(typeCypher(value));
}
@UserFunction("apoc.meta.cypher.type")
@Description("Returns the type name of the given value.")
public String typeCypher(@Name(value = "value", description = "An object to get the type of.") Object value) {
Types type = Types.of(value);
switch (type) {
case ANY: // TODO Check if it's necessary
return value.getClass().getSimpleName();
default:
return type.toString();
}
}
@UserFunction("apoc.meta.cypher.types")
@Description("Returns a `MAP` containing the type names of the given values.")
public Map typesCypher(
@Name(value = "props", description = "A relationship, node or map to get the property types from.")
Object target) {
Map properties = Collections.emptyMap();
if (target instanceof Node) properties = ((Node) target).getAllProperties();
if (target instanceof Relationship) properties = ((Relationship) target).getAllProperties();
if (target instanceof Map) {
//noinspection unchecked
properties = (Map) target;
}
Map result = new LinkedHashMap<>(properties.size());
properties.forEach((key, value) -> {
result.put(key, typeCypher(value));
});
return result;
}
/**
* The MetaStats class represents metadata statistics collected from the transactional database.
* It includes counts for labels, relationship types, property keys, nodes, relationships, and various maps for stats.
*/
public static class MetaStats {
@Description("The total number of distinct node labels.")
public final long labelCount;
@Description("The total number of distinct relationship types.")
public final long relTypeCount;
@Description("The count of property keys.")
public final long propertyKeyCount;
@Description("The total number of nodes.")
public final long nodeCount;
@Description("The total number of relationships.")
public final long relCount;
@Description("A map of labels to their count.")
public final Map labels;
@Description("A map of relationship types per start or end node label.")
public final Map relTypes;
@Description("A map of relationship types to their count.")
public final Map relTypesCount;
@Description("A map containing all the given return fields from this procedure.")
public final Map stats;
/**
* Constructs a MetaStats object with the provided metadata statistics.
*
* @param labelCount The count of labels in the database.
* @param relTypeCount The count of relationship types in the database.
* @param propertyKeyCount The count of property keys in the database.
* @param nodeCount The count of nodes in the database.
* @param relCount The count of relationships in the database.
* @param labels A map of label names and their corresponding counts.
* @param relTypes A map of relationship type names and their corresponding counts.
* @param relTypesCount A map of relationship type names and their total count.
*/
public MetaStats(
long labelCount,
long relTypeCount,
long propertyKeyCount,
long nodeCount,
long relCount,
Map labels,
Map relTypes,
Map relTypesCount) {
this.labelCount = labelCount;
this.relTypeCount = relTypeCount;
this.propertyKeyCount = propertyKeyCount;
this.nodeCount = nodeCount;
this.relCount = relCount;
this.labels = labels;
this.relTypes = relTypes;
this.relTypesCount = relTypesCount;
this.stats = map(
"labelCount",
labelCount,
"relTypeCount",
relTypeCount,
"propertyKeyCount",
propertyKeyCount,
"nodeCount",
nodeCount,
"relCount",
relCount,
"labels",
labels,
"relTypes",
relTypes);
}
}
/**
* The StatsCallback interface defines callback methods for collecting label and relationship statistics.
*/
interface StatsCallback {
void label(String labelName, long count);
void rel(String typeName, long count);
void rel(String typeName, String labelName, long out, long in);
}
@NotThreadSafe
@Procedure("apoc.meta.stats")
@Description("Returns the metadata stored in the transactional database statistics.")
public Stream stats() {
return Stream.of(collectStats());
}
@NotThreadSafe
@UserFunction(name = "apoc.meta.nodes.count")
@Description("Returns the sum of the `NODE` values with the given labels in the `LIST`.")
public long count(
@Name(value = "nodes", defaultValue = "[]", description = "A list of node labels.") List nodes,
@Name(
value = "config",
defaultValue = "{}",
description =
"A relationship, node or map to get the property types from. { includeRels = [] :: LIST }")
Map config) {
MetaConfig conf = new MetaConfig(config);
final var subGraph = DatabaseSubGraph.optimizedForCount(transaction, kernelTx);
Stream labels = CollectionUtils.isEmpty(nodes)
? StreamSupport.stream(subGraph.getAllLabelsInUse().spliterator(), false)
: nodes.stream().filter(Objects::nonNull).map(String::trim).map(Label::label);
final boolean isIncludeRels = CollectionUtils.isEmpty(conf.getIncludeRels());
Set visitedNodes = new HashSet<>();
return labels.flatMap(label -> isIncludeRels
? Stream.of(subGraph.countsForNode(label))
: conf.getIncludeRels().stream()
.filter(Objects::nonNull)
.map(String::trim)
.map(rel -> {
final int lastCharIdx = rel.length() - 1;
final Direction direction;
switch (rel.charAt(lastCharIdx)) {
case '>':
direction = Direction.OUTGOING;
rel = rel.substring(0, lastCharIdx);
break;
case '<':
direction = Direction.INCOMING;
rel = rel.substring(0, lastCharIdx);
break;
default:
direction = Direction.BOTH;
}
return Pair.of(direction, rel);
})
.flatMap(pair -> transaction
.findNodes(label)
.map(node -> {
if (!visitedNodes.contains(node.getElementId())
&& node.hasRelationship(
pair.getLeft(),
RelationshipType.withName(pair.getRight()))) {
visitedNodes.add(node.getElementId());
return 1L;
} else {
return 0L;
}
})
.stream()))
.reduce(0L, Math::addExact);
}
/**
* Collects statistics about the graph database.
*
* @return The collected statistics about the graph database.
*/
private MetaStats collectStats() {
Map relStatsCount = new LinkedHashMap<>();
TokenRead tokenRead = kernelTx.tokenRead();
Read read = kernelTx.dataRead();
long relTypeCount = Iterables.count(tx.getAllRelationshipTypesInUse());
long labelCount = Iterables.count(tx.getAllLabelsInUse());
Map labelStats = new LinkedHashMap<>((int) labelCount);
Map relStats = new LinkedHashMap<>(2 * (int) relTypeCount);
collectStats(DatabaseSubGraph.optimizedForCount(transaction, kernelTx), new StatsCallback() {
@Override
public void label(String labelName, long count) {
if (count > 0) labelStats.put(labelName, count);
}
@Override
public void rel(String typeName, long count) {
if (count > 0) {
relStatsCount.merge(typeName, count, Long::sum);
relStats.put("()-[:" + typeName + "]->()", count);
}
}
@Override
public void rel(String typeName, String labelName, long out, long in) {
if (out > 0) {
relStats.put("(:" + labelName + ")-[:" + typeName + "]->()", out);
}
if (in > 0) {
relStats.put("()-[:" + typeName + "]->(:" + labelName + ")", in);
}
}
});
return new MetaStats(
labelCount,
relTypeCount,
tokenRead.propertyKeyCount(),
read.countsForNode(ANY_LABEL),
read.countsForRelationship(ANY_LABEL, ANY_RELATIONSHIP_TYPE, ANY_LABEL),
labelStats,
relStats,
relStatsCount);
}
/**
* Collects statistics about a subgraph of the database.
*
* @param subGraph The subgraph to collect statistics for.
* @param cb The callback to receive the collected statistics.
*/
private void collectStats(SubGraph subGraph, StatsCallback cb) {
final var labels = subGraph.getAllLabelsInUse();
final var types = subGraph.getAllRelationshipTypesInUse();
labels.forEach(label -> {
long count = subGraph.countsForNode(label);
if (count > 0) {
String name = label.name();
cb.label(name, count);
types.forEach(type -> {
long relCountOut = subGraph.countsForRelationship(label, type);
long relCountIn = subGraph.countsForRelationship(type, label);
cb.rel(type.name(), name, relCountOut, relCountIn);
});
}
});
types.forEach(type -> {
String name = type.name();
cb.rel(name, subGraph.countsForRelationship(type));
});
}
@NotThreadSafe
@Procedure("apoc.meta.data.of")
@Description("Examines the given sub-graph and returns a table of metadata.")
public Stream dataOf(
@Name(value = "graph", description = "The graph to extract metadata from.") Object graph,
@Name(
value = "config",
defaultValue = "{}",
description =
"Number of nodes to sample, setting sample to `-1` will remove sampling; { sample = 1000 :: INTEGER }")
Map config) {
MetaConfig metaConfig = new MetaConfig(config);
final SubGraph subGraph;
if (graph instanceof String) {
Result result = tx.execute((String) graph);
subGraph = CypherResultSubGraph.from(tx, result, metaConfig.isAddRelationshipsBetweenNodes());
} else if (graph instanceof Map) {
Map mGraph = (Map) graph;
if (!mGraph.containsKey("nodes")) {
throw new IllegalArgumentException(
"Graph Map must contains `nodes` field and `relationships` optionally");
}
subGraph = new NodesAndRelsSubGraph(
tx, (Collection) mGraph.get("nodes"), (Collection) mGraph.get("relationships"));
} else if (graph instanceof VirtualGraph) {
VirtualGraph vGraph = (VirtualGraph) graph;
subGraph = new NodesAndRelsSubGraph(tx, vGraph.nodes(), vGraph.relationships());
} else {
throw new IllegalArgumentException("Supported inputs are String, VirtualGraph, Map");
}
return collectMetaData(subGraph, metaConfig.getSampleMetaConfig()).values().stream()
.flatMap(x -> x.values().stream());
}
// todo ask index for distinct values if index size < 10 or so
// todo put index sizes for indexed properties
@NotThreadSafe
@Procedure("apoc.meta.data")
@Description("Examines the full graph and returns a table of metadata.")
public Stream data(
@Name(
value = "config",
defaultValue = "{}",
description =
"Number of nodes to sample, setting sample to `-1` will remove sampling; { sample = 1000 :: INTEGER }")
Map config) {
SampleMetaConfig metaConfig = new SampleMetaConfig(config);
return collectMetaData(DatabaseSubGraph.optimizedForCount(transaction, kernelTx), metaConfig).values().stream()
.flatMap(x -> x.values().stream());
}
public record MetaMapResult(@Description("Meta information represented as a map.") Map value) {}
@NotThreadSafe
@Procedure("apoc.meta.schema")
@Description("Examines the given sub-graph and returns metadata as a `MAP`.")
public Stream schema(
@Name(
value = "config",
defaultValue = "{}",
description =
"Number of nodes to sample, setting sample to `-1` will remove sampling; { sample = 1000 :: INTEGER }")
Map config) {
MetaStats metaStats = collectStats();
SampleMetaConfig metaConfig = new SampleMetaConfig(config);
Map> metaData =
collectMetaData(DatabaseSubGraph.optimizedForCount(transaction, kernelTx), metaConfig);
Map relationships = collectRelationshipsMetaData(metaStats, metaData);
Map nodes = collectNodesMetaData(metaStats, metaData, relationships);
final Collection commonKeys = Sets.intersection(nodes.keySet(), relationships.keySet());
if (!commonKeys.isEmpty()) {
relationships = relationships.entrySet().stream()
.map(e -> {
final String key = e.getKey();
return commonKeys.contains(key)
? new AbstractMap.SimpleEntry<>(
format("%s (%s)", key, Types.RELATIONSHIP.name()), e.getValue())
: e;
})
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
nodes.putAll(relationships);
return Stream.of(new MetaMapResult(nodes));
}
/**
* This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it
* supports flexible sampling options, and does not scan the entire database. The result is producing a table of
* metadata that is useful for generating "Tables 4 Labels" schema designs for RDBMSs, but in a more performant way.
*/
@NotThreadSafe
@Procedure("apoc.meta.nodeTypeProperties")
@Description(
"Examines the full graph and returns a table of metadata with information about the `NODE` values therein.")
public Stream nodeTypeProperties(
@Name(
value = "config",
defaultValue = "{}",
description =
"""
{
includeLabels = [] :: LIST,
includeRels = [] :: LIST,
excludeLabels = [] :: LIST,
excludeRels = [] :: LIST,
sample = 1000 :: INTEGER,
maxRels = 100 :: INTEGER
}
""")
Map config) {
MetaConfig metaConfig = new MetaConfig(config);
try {
return collectTables4LabelsProfile(metaConfig).asNodeStream();
} catch (Exception e) {
log.debug("apoc.meta.nodeTypeProperties(): Failed to return stream", e);
throw new RuntimeException(e);
}
}
/**
* This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it supports flexible sampling options, and
* does not scan the entire database. The result is producing a table of metadata that is useful for generating "Tables 4 Labels" schema designs for
* RDBMSs, but in a more performant way.
*/
@NotThreadSafe
@Procedure("apoc.meta.relTypeProperties")
@Description(
"Examines the full graph and returns a table of metadata with information about the `RELATIONSHIP` values therein.")
public Stream relTypeProperties(
@Name(
value = "config",
defaultValue = "{}",
description =
"""
{
includeLabels = [] :: LIST,
includeRels = [] :: LIST,
excludeLabels = [] :: LIST,
excludeRels = [] :: LIST,
sample = 1000 :: INTEGER,
maxRels = 100 :: INTEGER
}
""")
Map config) {
MetaConfig metaConfig = new MetaConfig(config);
try {
return collectTables4LabelsProfile(metaConfig).asRelStream();
} catch (Exception e) {
log.debug("apoc.meta.relTypeProperties(): Failed to return stream", e);
throw new RuntimeException(e);
}
}
/**
* Collects metadata for generating a "Tables 4 Labels" profile based on the provided configuration. This method
* examines the graph schema and counts the occurrences of labels and relationships. It also observes nodes and their
* relationships, applying sampling and filtering based on the configuration. The collected metadata is stored in a
* Tables4LabelsProfile object.
*/
private Tables4LabelsProfile collectTables4LabelsProfile(MetaConfig config) {
Tables4LabelsProfile profile = new Tables4LabelsProfile();
// Collect constraints from the schema
Schema schema = tx.schema();
for (ConstraintDefinition cd : schema.getConstraints()) {
if (cd.isConstraintType(ConstraintType.NODE_PROPERTY_EXISTENCE)) {
List props = new ArrayList<>(10);
if (ConstraintTracker.nodeConstraints.containsKey(cd.getLabel().name())) {
props = ConstraintTracker.nodeConstraints.get(cd.getLabel().name());
}
cd.getPropertyKeys().forEach(props::add);
ConstraintTracker.nodeConstraints.put(cd.getLabel().name(), props);
} else if (cd.isConstraintType(ConstraintType.RELATIONSHIP_PROPERTY_EXISTENCE)) {
List props = new ArrayList<>(10);
if (ConstraintTracker.relConstraints.containsKey(
cd.getRelationshipType().name())) {
props = ConstraintTracker.relConstraints.get(
cd.getRelationshipType().name());
}
cd.getPropertyKeys().forEach(props::add);
ConstraintTracker.relConstraints.put(cd.getRelationshipType().name(), props);
}
}
// Get label counts from the label count store
Map countStore = getLabelCountStore(transaction, kernelTx);
Set includeLabels = config.getIncludeLabels();
Set excludeLabels = config.getExcludeLabels();
Set includeRels = config.getIncludeRels();
Set excludeRels = config.getExcludeRels();
// Iterate over labels in the graph
for (Label label : tx.getAllLabelsInUse()) {
String labelName = label.name();
if (!excludeLabels.contains(labelName) && (includeLabels.isEmpty() || includeLabels.contains(labelName))) {
// Skip if explicitly excluded or at least 1 include specified and not included
long labelCount = countStore.get(labelName);
long sample = getSampleForLabelCount(labelCount, config.getSample());
try (ResourceIterator nodes = tx.findNodes(label)) {
int count = 1;
while (nodes.hasNext()) {
Node node = nodes.next();
if (count++ % sample == 0) {
final Set skips = StreamSupport
// we analyze the node for each its relationship type
.stream(node.getRelationshipTypes().spliterator(), false)
.map(rel -> excludeRels.contains(
rel.name()) // we skip a node when the user said that must be
// excluded
// or when the user provided and inclusion list, but it's not in the
// provided list
|| (!includeRels.isEmpty() && !includeRels.contains(rel.name())))
.collect(Collectors.toSet());
// if the Set has just one element and is true we skip the node
// if there are two elements [true, false] we don't skip it as give it means that
// it have a relationship that satisfies the condition provided
// by the configuration
if (skips.size() == 1 && skips.iterator().next()) continue;
profile.observe(node, config);
}
}
}
}
}
return profile.finished();
}
// End new code
/**
* Collects metadata for generating a metadata map based on the provided subgraph and configuration. This method iterates
* over the labels and relationships in the subgraph, collects various metadata information, and stores it in the
* metadata map.
*/
private Map> collectMetaData(SubGraph graph, SampleMetaConfig config) {
Map> metaData = new LinkedHashMap<>(100);
Set types = Iterables.asSet(graph.getAllRelationshipTypesInUse());
Map> relConstraints = new HashMap<>(20);
Map> relIndexes = new HashMap<>();
for (RelationshipType type : graph.getAllRelationshipTypesInUse()) {
metaData.put(new MetadataKey(Types.RELATIONSHIP, type.name()), new LinkedHashMap<>(10));
relConstraints.put(type.name(), graph.getConstraints(type));
relIndexes.put(type.name(), getIndexedProperties(graph.getIndexes(type)));
}
for (Label label : graph.getAllLabelsInUse()) {
Map nodeMeta = new LinkedHashMap<>(50);
String labelName = label.name();
// workaround in case of duplicated keys
metaData.put(new MetadataKey(Types.NODE, labelName), nodeMeta);
Iterable constraints = graph.getConstraints(label);
Set indexed = getIndexedProperties(graph.getIndexes(label));
long labelCount = graph.countsForNode(label);
long sample = getSampleForLabelCount(labelCount, config.getSample());
Iterator nodes = graph.findNodes(label);
int count = 1;
while (nodes.hasNext()) {
Node node = nodes.next();
if (count++ % sample == 0) {
addRelationships(metaData, nodeMeta, labelName, node, relConstraints, types, relIndexes);
addProperties(nodeMeta, labelName, constraints, indexed, node, node);
}
}
}
return metaData;
}
private Set getIndexedProperties(Iterable indexes) {
return Iterables.stream(indexes)
.map(IndexDefinition::getPropertyKeys)
.flatMap(Iterables::stream)
.collect(Collectors.toSet());
}
private static Map getLabelCountStore(Transaction tx, KernelTransaction kernelTx) {
List labels =
Iterables.stream(tx.getAllLabelsInUse()).map(Label::name).collect(Collectors.toList());
TokenRead tokenRead = kernelTx.tokenRead();
return labels.stream()
.collect(Collectors.toMap(e -> e, e -> kernelTx.dataRead().countsForNode(tokenRead.nodeLabel(e))));
}
public static long getSampleForLabelCount(long labelCount, long sample) {
if (sample != -1L) {
long skipCount = labelCount / sample;
long min = (long) Math.floor(skipCount - (skipCount * 0.1D));
long max = (long) Math.ceil(skipCount + (skipCount * 0.1D));
if (min >= max) {
return -1L;
}
long randomValue = ThreadLocalRandom.current().nextLong(min, max);
return randomValue == 0L ? -1L : randomValue; // it can't return zero as it's used in % ops
} else {
return sample;
}
}
private Map collectNodesMetaData(
MetaStats metaStats, Map> metaData, Map relationships) {
Map nodes = new LinkedHashMap<>();
Map>> startNodeNameToRelationshipsMap = new HashMap<>();
for (MetadataKey metadataKey : metaData.keySet()) {
Map entityData = metaData.get(metadataKey);
Map entityProperties = new LinkedHashMap<>();
Map entityRelationships = new LinkedHashMap<>();
List labels = new LinkedList<>();
boolean isNode = metaStats.labels.keySet().stream().anyMatch((label) -> metadataKey.key.equals(label));
for (String entityDataKey : entityData.keySet()) {
MetaItem metaItem = entityData.get(entityDataKey);
if (metaItem.elementType.equals("relationship")) {
isNode = false;
break;
} else {
if (metaItem.unique) labels = metaItem.otherLabels;
if (!metaItem.type.equals("RELATIONSHIP")) { // NODE PROPERTY
entityProperties.put(
entityDataKey,
MapUtil.map(
"type",
metaItem.type,
"indexed",
metaItem.index,
"unique",
metaItem.unique,
"existence",
metaItem.existence));
} else {
entityRelationships.put(
metaItem.property,
MapUtil.map(
"direction",
"out",
"count",
metaItem.rightCount,
"labels",
metaItem.other,
"properties",
((Map) relationships.getOrDefault(metaItem.property, Map.of()))
.get("properties")));
metaItem.other.forEach(o -> {
Map mirroredRelationship = new LinkedHashMap<>();
mirroredRelationship.put(
metaItem.property,
MapUtil.map(
"direction",
"in",
"count",
metaItem.leftCount,
"labels",
new LinkedList<>(Arrays.asList(metaItem.label)),
"properties",
((Map)
relationships.getOrDefault(metaItem.property, Map.of()))
.get("properties")));
if (startNodeNameToRelationshipsMap.containsKey(o))
startNodeNameToRelationshipsMap.get(o).add(mirroredRelationship);
else {
List> relList = new LinkedList<>();
relList.add(mirroredRelationship);
startNodeNameToRelationshipsMap.put(o, relList);
}
});
}
}
}
if (isNode) {
String key = metadataKey.key;
nodes.put(
key,
MapUtil.map(
"type", "node",
"count", metaStats.labels.get(key),
"labels", labels,
"properties", entityProperties,
"relationships", entityRelationships));
}
}
setIncomingRelationships(nodes, startNodeNameToRelationshipsMap);
return nodes;
}
private void setIncomingRelationships(
Map nodes, Map>> nodeNameToRelationshipsMap) {
nodes.keySet().forEach(k -> {
if (nodeNameToRelationshipsMap.containsKey(k)) {
Map node = (Map) nodes.get(k);
List> relationshipsToAddList = nodeNameToRelationshipsMap.get(k);
relationshipsToAddList.forEach(relationshipNameToRelationshipMap -> {
Map actualRelationshipsList = (Map) node.get("relationships");
relationshipNameToRelationshipMap.keySet().forEach(relationshipName -> {
if (actualRelationshipsList.containsKey(relationshipName)) {
Map relToAdd =
(Map) relationshipNameToRelationshipMap.get(relationshipName);
Map existingRel =
(Map) actualRelationshipsList.get(relationshipName);
List labels = (List) existingRel.get("labels");
labels.addAll((List) relToAdd.get("labels"));
} else
actualRelationshipsList.put(
relationshipName, relationshipNameToRelationshipMap.get(relationshipName));
});
});
}
});
}
private Map collectRelationshipsMetaData(
MetaStats metaStats, Map> metaData) {
Map relationships = new LinkedHashMap<>();
for (MetadataKey metadataKey : metaData.keySet()) {
Map entityData = metaData.get(metadataKey);
Map entityProperties = new LinkedHashMap<>();
boolean isRelationship =
metaStats.relTypesCount.keySet().stream().anyMatch((rel) -> metadataKey.key.equals(rel));
for (String entityDataKey : entityData.keySet()) {
MetaItem metaItem = entityData.get(entityDataKey);
if (!metaItem.elementType.equals("relationship")) {
isRelationship = false;
break;
}
if (!metaItem.type.equals("RELATIONSHIP")) { // RELATIONSHIP PROPERTY
entityProperties.put(
entityDataKey,
MapUtil.map(
"type", metaItem.type,
"array", metaItem.array,
"existence", metaItem.existence,
"indexed", metaItem.index));
}
}
if (isRelationship) {
String key = metadataKey.key;
relationships.put(
key,
MapUtil.map(
"type",
"relationship",
"count",
metaStats.relTypesCount.get(key),
"properties",
entityProperties));
}
}
return relationships;
}
private void addProperties(
Map properties,
String labelName,
Iterable constraints,
Set indexed,
Entity pc,
Node node) {
for (String prop : pc.getPropertyKeys()) {
if (properties.containsKey(prop)) continue;
MetaItem res = metaResultForProp(pc, labelName, prop);
res.elementType(Types.of(pc).name());
addSchemaInfo(res, prop, constraints, indexed, node);
properties.put(prop, res);
}
}
private void addRelationships(
Map> metaData,
Map nodeMeta,
String labelName,
Node node,
Map> relConstraints,
Set types,
Map> relIndexes) {
StreamSupport.stream(node.getRelationshipTypes().spliterator(), false)
.filter(type -> types.contains(type))
.forEach(type -> {
int out = node.getDegree(type, Direction.OUTGOING);
if (out == 0) return;
String typeName = type.name();
// workaround in case of duplicated keys
Iterable constraints = relConstraints.get(typeName);
Set indexes = relIndexes.get(typeName);
if (!nodeMeta.containsKey(typeName)) nodeMeta.put(typeName, new MetaItem(labelName, typeName));
int in = node.getDegree(type, Direction.INCOMING);
Map typeMeta = metaData.get(new MetadataKey(Types.RELATIONSHIP, typeName));
if (!typeMeta.containsKey(labelName)) typeMeta.put(labelName, new MetaItem(typeName, labelName));
MetaItem relMeta = nodeMeta.get(typeName);
addOtherNodeInfo(node, labelName, out, in, type, relMeta, typeMeta, constraints, indexes);
});
}
private void addOtherNodeInfo(
Node node,
String labelName,
int out,
int in,
RelationshipType type,
MetaItem relMeta,
Map typeMeta,
Iterable relConstraints,
Set indexes) {
MetaItem relNodeMeta = typeMeta.get(labelName);
relMeta.elementType(Types.of(node).name());
relMeta.inc().rel(out, in);
relNodeMeta.inc().rel(out, in);
for (Relationship rel : node.getRelationships(Direction.OUTGOING, type)) {
Node endNode = rel.getEndNode();
List labels = toStrings(endNode.getLabels());
relMeta.other(labels);
relNodeMeta.other(labels);
addProperties(typeMeta, type.name(), relConstraints, indexes, rel, node);
relNodeMeta.elementType(Types.RELATIONSHIP.name());
}
}
private void addSchemaInfo(
MetaItem res, String prop, Iterable constraints, Set indexed, Node node) {
if (indexed.contains(prop)) {
res.index = true;
}
if (constraints == null) return;
for (ConstraintDefinition constraint : constraints) {
for (String key : constraint.getPropertyKeys()) {
if (key.equals(prop)) {
switch (constraint.getConstraintType()) {
case UNIQUENESS -> {
res.unique = true;
node.getLabels().forEach(l -> {
if (res.label != l.name()) res.addLabel(l.name());
});
}
case RELATIONSHIP_UNIQUENESS -> res.unique = true;
case NODE_PROPERTY_EXISTENCE, RELATIONSHIP_PROPERTY_EXISTENCE -> res.existence = true;
}
}
}
}
}
private MetaItem metaResultForProp(Entity pc, String labelName, String prop) {
MetaItem res = new MetaItem(labelName, prop);
Object value = pc.getProperty(prop);
res.type(Types.of(value).name());
res.elementType(Types.of(pc).name());
if (value.getClass().isArray()) {
res.array = true;
}
return res;
}
private List toStrings(Iterable labels) {
List res = new ArrayList<>(10);
for (Label label : labels) {
String name = label.name();
res.add(name);
}
return res;
}
/**
* Represents a pattern used for matching relationships between nodes.
*/
static class Pattern {
private final String from;
private final String type;
private final String to;
private Pattern(String from, String type, String to) {
this.from = from;
this.type = type;
this.to = to;
}
public static Pattern of(String labelFrom, String type, String labelTo) {
return new Pattern(labelFrom, type, labelTo);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o instanceof Pattern) {
Pattern pattern = (Pattern) o;
return from.equals(pattern.from) && type.equals(pattern.type) && to.equals(pattern.to);
}
return false;
}
@Override
public int hashCode() {
return 31 * (31 * from.hashCode() + type.hashCode()) + to.hashCode();
}
public Label labelTo() {
return Label.label(to);
}
public Label labelFrom() {
return Label.label(from);
}
public RelationshipType relationshipType() {
return RelationshipType.withName(type);
}
}
@NotThreadSafe
@Procedure("apoc.meta.graph")
@Description("Examines the full graph and returns a meta-graph.")
public Stream graph(
@Name(
value = "config",
defaultValue = "{}",
description =
"The number of nodes whose relationships are checked to remove false positives and the number of relationships to read per sampled node. A value of -1 will read all; { sample = 1 :: INTEGER, maxRels = -1 :: INTEGER }")
Map config) {
SampleMetaConfig metaConfig = new SampleMetaConfig(config, false);
return metaGraph(DatabaseSubGraph.optimizedForCount(transaction, kernelTx), null, null, true, metaConfig);
}
@NotThreadSafe
@Procedure("apoc.meta.graph.of")
@Description("Examines the given sub-graph and returns a meta-graph.")
public Stream graphOf(
@Name(value = "graph", defaultValue = "{}", description = "The graph to extract metadata from.")
Object graph,
@Name(
value = "config",
defaultValue = "{}",
description =
"The number of nodes whose relationships are checked to remove false positives and the number of relationships to read per sampled node. A value of -1 will read all; { sample = 1 :: INTEGER, maxRels = -1 :: INTEGER, addRelationshipsBetweenNodes = true :: BOOLEAN }")
Map config) {
MetaConfig metaConfig = new MetaConfig(config, false);
final SubGraph subGraph;
if (graph instanceof String) {
Result result = tx.execute("CYPHER runtime=pipelined " + (String) graph);
subGraph = CypherResultSubGraph.from(tx, result, metaConfig.isAddRelationshipsBetweenNodes());
} else if (graph instanceof Map) {
Map mGraph = (Map) graph;
if (!mGraph.containsKey("nodes")) {
throw new IllegalArgumentException(
"Graph Map must contains `nodes` field and `relationships` optionally");
}
subGraph = new NodesAndRelsSubGraph(
tx, (Collection) mGraph.get("nodes"), (Collection) mGraph.get("relationships"));
} else if (graph instanceof VirtualGraph) {
VirtualGraph vGraph = (VirtualGraph) graph;
subGraph = new NodesAndRelsSubGraph(tx, vGraph.nodes(), vGraph.relationships());
} else {
throw new IllegalArgumentException("Supported inputs are String, VirtualGraph, Map");
}
return metaGraph(subGraph, null, null, true, metaConfig.getSampleMetaConfig());
}
/**
* Generates a meta-graph based on the provided sub-graph and configuration.
*
* @param subGraph The sub-graph to generate the meta-graph from.
* @param labelNames (optional) A collection of label names to consider. If not specified, all labels in the sub-graph will be used.
* @param relTypeNames (optional) A collection of relationship type names to consider. If not specified, all relationship types in the sub-graph will be used.
* @param removeMissing A boolean value indicating whether non-existing relationships should be filtered out from the meta-graph.
* @param metaConfig Configuration options for the meta-graph generation.
* @return A stream of GraphResult objects representing the generated meta-graph.
*/
private Stream metaGraph(
SubGraph subGraph,
Collection labelNames,
Collection relTypeNames,
boolean removeMissing,
SampleMetaConfig metaConfig) {
TokenRead tokenRead = kernelTx.tokenRead();
Map typeMap = subGraph.relTypesInUse(tokenRead, relTypeNames);
Iterable labels = CollectionUtils.isNotEmpty(labelNames)
? labelNames.stream().map(Label::label).collect(Collectors.toList())
: subGraph.getAllLabelsInUse();
Iterable types = CollectionUtils.isNotEmpty(relTypeNames)
? relTypeNames.stream().map(RelationshipType::withName).collect(Collectors.toList())
: subGraph.getAllRelationshipTypesInUse();
Map vNodes = new TreeMap<>();
Map vRels = new HashMap<>(typeMap.size() * 2);
labels.forEach(label -> {
long count = subGraph.countsForNode(label);
if (count > 0) {
mergeMetaNode(label, vNodes, count);
}
});
types.forEach(type -> {
labels.forEach(start -> {
labels.forEach(end -> {
String startLabel = start.name();
String endLabel = end.name();
String relType = type.name();
if (vRels.containsKey(Pattern.of(startLabel, relType, endLabel))) return;
long relCountOut = subGraph.countsForRelationship(start, type);
if (relCountOut == 0) return;
long relCountIn = subGraph.countsForRelationship(type, end);
if (relCountIn > 0) {
Node startNode = vNodes.get(startLabel);
Node endNode = vNodes.get(endLabel);
long global = subGraph.countsForRelationship(type);
Relationship vRel = new VirtualRelationship(startNode, endNode, type)
.withProperties(
map("type", relType, "out", relCountOut, "in", relCountIn, "count", global));
vRels.put(Pattern.of(startLabel, relType, endLabel), vRel);
}
});
});
});
if (removeMissing) filterNonExistingRelationships(vRels, metaConfig);
GraphResult graphResult = new GraphResult(new ArrayList<>(vNodes.values()), new ArrayList<>(vRels.values()));
return Stream.of(graphResult);
}
/**
* Filters out non-existing relationships from the provided map of patterns to relationships in the meta-graph.
*/
private void filterNonExistingRelationships(Map vRels, SampleMetaConfig metaConfig) {
Set rels = vRels.keySet();
Map, Set> aggregated = new HashMap<>();
for (Pattern rel : rels) {
combine(aggregated, Pair.of(rel.from, rel.type), rel);
combine(aggregated, Pair.of(rel.type, rel.to), rel);
}
aggregated.values().stream()
.filter(c -> c.size() > 1)
.flatMap(Collection::stream)
.filter(p -> !relationshipExistsWithDegreeCheck(p, vRels.get(p), metaConfig))
.forEach(vRels::remove);
}
/**
* Checks if a relationship exists in the database with a degree check.
*
* @param p The pattern representing the relationship.
* @param relationship The relationship to check existence for.
* @param metaConfig Configuration options for the meta-graph generation.
* @return A boolean value indicating whether the relationship exists in the database.
*/
private boolean relationshipExistsWithDegreeCheck(
Pattern p, Relationship relationship, SampleMetaConfig metaConfig) {
if (relationship == null) return false;
double degreeFrom = (double) (long) relationship.getProperty("out")
/ (long) relationship.getStartNode().getProperty("count");
double degreeTo = (double) (long) relationship.getProperty("in")
/ (long) relationship.getEndNode().getProperty("count");
if (degreeFrom < degreeTo) {
return relationshipExists(
tx, p.labelFrom(), p.labelTo(), p.relationshipType(), Direction.OUTGOING, metaConfig);
} else {
return relationshipExists(
tx, p.labelTo(), p.labelFrom(), p.relationshipType(), Direction.INCOMING, metaConfig);
}
}
/**
* relationshipExists uses sampling to check if the relationships added in previous steps exist.
* The sample count is the skip count; e.g. if set to 1000 this means every 1000th node will be checked.
* A high sample count means that only one node will be checked each time.
* Note; Each node is still fetched, but the relationships on that node will not be checked
* if skipped, which should make it faster.
*/
static boolean relationshipExists(
Transaction tx,
Label labelFromLabel,
Label labelToLabel,
RelationshipType relationshipType,
Direction direction,
SampleMetaConfig metaConfig) {
try (ResourceIterator nodes = tx.findNodes(labelFromLabel)) {
long count = 0L;
// A sample size below or equal to 0 means we should check every node.
long skipCount = metaConfig.getSample() > 0 ? metaConfig.getSample() : 1;
while (nodes.hasNext()) {
Node node = nodes.next();
if (count % skipCount == 0) {
long maxRels = metaConfig.getMaxRels();
for (Relationship rel : node.getRelationships(direction, relationshipType)) {
Node otherNode = direction == Direction.OUTGOING ? rel.getEndNode() : rel.getStartNode();
// We have found the rel, we are confident the relationship exists.
if (otherNode.hasLabel(labelToLabel)) return true;
if (maxRels != -1 && maxRels-- == 0) break;
}
}
count++;
}
}
// Our sampling (or full scan if skipCount == 1) did not find the relationship
// So we assume it doesn't exist and remove it from the schema, may result in false negatives!
return false;
}
private void combine(Map, Set> aggregated, Pair p, Pattern rel) {
if (!aggregated.containsKey(p)) aggregated.put(p, new HashSet<>());
aggregated.get(p).add(rel);
}
@NotThreadSafe
@Procedure("apoc.meta.graphSample")
@Description("Examines the full graph and returns a meta-graph.\n"
+ "Unlike `apoc.meta.graph`, this procedure does not filter away non-existing paths.")
public Stream graphSample(
@Name(value = "config", defaultValue = "{}", description = "Empty map (deprecated).") @Deprecated
Map config) {
return metaGraph(
DatabaseSubGraph.optimizedForCount(transaction, kernelTx),
null,
null,
false,
new SampleMetaConfig(null));
}
@NotThreadSafe
@Procedure("apoc.meta.subGraph")
@Description("Examines the given sub-graph and returns a meta-graph.")
public Stream subGraph(
@Name(
value = "config",
description =
"""
{
excludeLabels :: LIST,
includeLabels :: LIST,
includeRels :: LIST,
maxRels = -1 :: INTEGER,
sample = 1 :: INTEGER
}
""")
Map config) {
MetaConfig metaConfig = new MetaConfig(config, false);
return filterResultStream(
metaConfig.getExcludeLabels(),
metaGraph(
DatabaseSubGraph.optimizedForCount(transaction, kernelTx),
metaConfig.getIncludeLabels(),
metaConfig.getIncludeRels(),
true,
metaConfig.getSampleMetaConfig()));
}
private Stream filterResultStream(Set excludes, Stream graphResultStream) {
if (excludes == null || excludes.isEmpty()) return graphResultStream;
return graphResultStream.map(gr -> {
Iterator it = gr.nodes.iterator();
while (it.hasNext()) {
Node node = it.next();
if (containsLabelName(excludes, node)) it.remove();
}
Iterator it2 = gr.relationships.iterator();
while (it2.hasNext()) {
Relationship relationship = it2.next();
if (excludes.contains(relationship.getType().name())
|| containsLabelName(excludes, relationship.getStartNode())
|| containsLabelName(excludes, relationship.getEndNode())) {
it2.remove();
}
}
return gr;
});
}
private boolean containsLabelName(Set excludes, Node node) {
for (Label label : node.getLabels()) {
if (excludes.contains(label.name())) return true;
}
return false;
}
private Node mergeMetaNode(Label label, Map labels, long increment) {
String name = label.name();
Node vNode = labels.get(name);
if (vNode == null) {
vNode = new VirtualNode(new Label[] {label}, Collections.singletonMap("name", name));
labels.put(name, vNode);
}
if (increment > 0)
vNode.setProperty("count", (((Number) vNode.getProperty("count", 0L)).longValue()) + increment);
return vNode;
}
}