apoc.hashing.Fingerprinting Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of apoc-core Show documentation
Show all versions of apoc-core Show documentation
Core package for Neo4j Procedures
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package apoc.hashing;
import apoc.util.Util;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Formatter;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Entity;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.logging.Log;
import org.neo4j.procedure.Context;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.UserFunction;
public class Fingerprinting {
@Context
public Transaction tx;
@Context
public Log log;
@UserFunction("apoc.hashing.fingerprint")
@Description(
"Calculates a MD5 checksum over a `NODE` or `RELATIONSHIP` (identical entities share the same checksum).\n"
+ "Unsuitable for cryptographic use-cases.")
public String fingerprint(
@Name(value = "object", description = "A node or relationship to hash.") Object thing,
@Name(
value = "excludedPropertyKeys",
defaultValue = "[]",
description = "Property keys to exclude from the hashing.")
List excludedPropertyKeys) {
FingerprintingConfig config = new FingerprintingConfig(Util.map(
"allNodesDisallowList",
excludedPropertyKeys,
"allRelsDisallowList",
excludedPropertyKeys,
"mapDisallowList",
excludedPropertyKeys,
"strategy",
FingerprintingConfig.FingerprintStrategy.EAGER.toString()));
return fingerprint(thing, config);
}
@UserFunction("apoc.hashing.fingerprinting")
@Description(
"""
Calculates a MD5 checksum over a `NODE` or `RELATIONSHIP` (identical entities share the same checksum).
Unlike `apoc.hashing.fingerprint()`, this function supports a number of config parameters.
Unsuitable for cryptographic use-cases.""")
public String fingerprinting(
@Name(value = "object", description = "A node or relationship to hash.") Object thing,
@Name(
value = "config",
defaultValue = "{}",
description =
"""
{
digestAlgorithm = "MD5" :: STRING,
strategy = "LAZY" :: STRING,
nodeAllowMap = [] :: MAP>,
relAllowMap = [] :: MAP>,
relDisallowMap = [] :: MAP>,
mapAllowList = [] :: LIST,
mapDisallowList = [] :: LIST,
allNodesAllowList = [] :: LIST,
allNodesDisallowList = [] :: LIST,
allRelsAllowList = [] :: LIST,
allRelsDisallowList = [] :: LIST
}""")
Map conf) {
FingerprintingConfig config = new FingerprintingConfig(conf);
return fingerprint(thing, config);
}
private String fingerprint(Object thing, FingerprintingConfig config) {
return withMessageDigest(config, md -> fingerprint(md, thing, config));
}
private void fingerprint(DiagnosingMessageDigestDecorator md, Object thing, FingerprintingConfig conf) {
if (thing instanceof Node) {
fingerprintNode(md, (Node) thing, conf);
} else if (thing instanceof Relationship) {
fingerprintRelationship(md, (Relationship) thing, conf);
} else if (thing instanceof Path) {
fingerprintPath(md, (Path) thing, conf);
} else if (thing instanceof Map) {
fingerprintMap(md, conf, (Map) thing);
} else if (thing instanceof List) {
fingerprintList(md, conf, (List) thing);
} else {
md.update(convertValueToString(thing).getBytes());
}
}
private void fingerprintList(DiagnosingMessageDigestDecorator md, FingerprintingConfig conf, List list) {
list.stream().forEach(o -> fingerprint(md, o, conf));
}
private void fingerprintPath(DiagnosingMessageDigestDecorator md, Path thing, FingerprintingConfig conf) {
StreamSupport.stream(thing.nodes().spliterator(), false).forEach(o -> fingerprint(md, o, conf));
StreamSupport.stream(thing.relationships().spliterator(), false).forEach(o -> fingerprint(md, o, conf));
}
private void fingerprintMap(
DiagnosingMessageDigestDecorator md, FingerprintingConfig conf, Map map) {
map.entrySet().stream()
.filter(e -> {
if (!conf.getMapAllowList().isEmpty()) {
return conf.getMapAllowList().contains(e.getKey());
} else {
return !conf.getMapDisallowList().contains(e.getKey());
}
})
.sorted(Map.Entry.comparingByKey())
.forEachOrdered(entry -> {
md.update(entry.getKey().getBytes());
md.update(fingerprint(entry.getValue(), conf).getBytes());
});
}
@UserFunction("apoc.hashing.fingerprintGraph")
@Description(
"""
Calculates a MD5 checksum over the full graph.
This function uses in-memory data structures.
Unsuitable for cryptographic use-cases.""")
public String fingerprintGraph(
@Name(
value = "propertyExcludes",
defaultValue = "[]",
description = "Property keys to exclude from the hashing.")
List excludedPropertyKeys) {
FingerprintingConfig config = new FingerprintingConfig(Util.map(
"allNodesDisallowList",
excludedPropertyKeys,
"allRelsDisallowList",
excludedPropertyKeys,
"mapDisallowList",
excludedPropertyKeys,
"strategy",
FingerprintingConfig.FingerprintStrategy.EAGER.toString()));
return withMessageDigest(config, messageDigest -> {
// step 1: load all nodes, calc their hash and map them to id
Map idToNodeHash = tx.getAllNodes().stream()
.collect(Collectors.toMap(
Node::getId,
node -> fingerprint(node, config),
(aLong, aLong2) -> {
throw new RuntimeException();
},
() -> new TreeMap<>()));
// step 2: build inverse map
final Map> nodeHashToId = idToNodeHash.entrySet().stream()
.collect(Collectors.groupingBy(
Map.Entry::getValue,
TreeMap::new,
Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
// step 3: iterate nodes in order of their hash (we cannot rely on internal ids)
nodeHashToId.forEach((hash, ids) -> ids.forEach(id -> {
messageDigest.update(hash.getBytes());
Node node = tx.getNodeById(id);
List endNodeRelationshipHashTuples = StreamSupport.stream(
node.getRelationships(Direction.OUTGOING).spliterator(), false)
.map(relationship -> {
String endNodeHash = idToNodeHash.get(relationship.getEndNodeId());
String relationshipHash = fingerprint(relationship, excludedPropertyKeys);
return new EndNodeRelationshipHashTuple(endNodeHash, relationshipHash);
})
.collect(Collectors.toList());
endNodeRelationshipHashTuples.stream().sorted().forEach(endNodeRelationshipHashTuple -> {
messageDigest.update(
endNodeRelationshipHashTuple.getEndNodeHash().getBytes());
messageDigest.update(
endNodeRelationshipHashTuple.getRelationshipHash().getBytes());
});
}));
});
}
private static class EndNodeRelationshipHashTuple implements Comparable {
private final String endNodeHash;
private final String relationshipHash;
public EndNodeRelationshipHashTuple(String endNodeHash, String relationshipHash) {
this.endNodeHash = endNodeHash;
this.relationshipHash = relationshipHash;
}
@Override
public int compareTo(Object o) {
EndNodeRelationshipHashTuple other = (EndNodeRelationshipHashTuple) o;
int res = endNodeHash.compareTo(other.endNodeHash);
if (res == 0) {
res = relationshipHash.compareTo(other.relationshipHash);
}
return res;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
EndNodeRelationshipHashTuple that = (EndNodeRelationshipHashTuple) o;
if (endNodeHash != null ? !endNodeHash.equals(that.endNodeHash) : that.endNodeHash != null) return false;
return relationshipHash != null
? relationshipHash.equals(that.relationshipHash)
: that.relationshipHash == null;
}
@Override
public int hashCode() {
int result = endNodeHash != null ? endNodeHash.hashCode() : 0;
result = 31 * result + (relationshipHash != null ? relationshipHash.hashCode() : 0);
return result;
}
public String getEndNodeHash() {
return endNodeHash;
}
public String getRelationshipHash() {
return relationshipHash;
}
}
private void fingerprintNode(DiagnosingMessageDigestDecorator md, Node node, FingerprintingConfig config) {
switch (config.getStrategy()) {
case EAGER:
StreamSupport.stream(node.getLabels().spliterator(), false)
.map(Label::name)
.sorted()
.map(String::getBytes)
.forEach(md::update);
break;
case LAZY:
StreamSupport.stream(node.getLabels().spliterator(), false)
.map(Label::name)
.filter(name -> config.getAllLabels().contains(name))
.sorted()
.map(String::getBytes)
.forEach(md::update);
}
final List keysToRetain = new ArrayList<>(config.getAllNodesAllowList());
keysToRetain.addAll(StreamSupport.stream(node.getLabels().spliterator(), false)
.map(Label::name)
.flatMap(label -> config.getNodeAllowMap().getOrDefault(label, Collections.emptyList()).stream())
.collect(Collectors.toSet()));
final List keysToRemove = new ArrayList<>(config.getAllNodesDisallowList());
keysToRemove.addAll(StreamSupport.stream(node.getLabels().spliterator(), false)
.map(Label::name)
.flatMap(label -> config.getNodeDisallowMap().getOrDefault(label, Collections.emptyList()).stream())
.collect(Collectors.toSet()));
keysToRemove.addAll(config.getMapDisallowList()); // just to backwards compatibility remove it
final Map allProperties = getEntityProperties(node, config, keysToRetain, keysToRemove);
fingerprint(md, allProperties, config);
}
private void fingerprintRelationship(
DiagnosingMessageDigestDecorator md, Relationship rel, FingerprintingConfig config) {
switch (config.getStrategy()) {
case EAGER:
md.update(rel.getType().name().getBytes());
md.update(fingerprint(rel.getStartNode(), config).getBytes());
md.update(fingerprint(rel.getEndNode(), config).getBytes());
break;
case LAZY:
if (config.getAllTypes().contains(rel.getType().name())) {
md.update(rel.getType().name().getBytes());
md.update(fingerprint(rel.getStartNode(), config).getBytes());
md.update(fingerprint(rel.getEndNode(), config).getBytes());
}
}
final List keysToRetain = new ArrayList<>(config.getAllRelsAllowList());
keysToRetain.addAll(config.getRelAllowMap().getOrDefault(rel.getType().name(), Collections.emptyList()));
final List keysToRemove = new ArrayList<>(config.getAllRelsDisallowList());
keysToRemove.addAll(
config.getRelDisallowMap().getOrDefault(rel.getType().name(), Collections.emptyList()));
keysToRemove.addAll(config.getMapDisallowList()); // just to backwards compatibility remove it
final Map allProperties = getEntityProperties(rel, config, keysToRetain, keysToRemove);
fingerprint(md, allProperties, config);
}
private Map getEntityProperties(
Entity entity, FingerprintingConfig config, List keysToRetain, List keysToRemove) {
final Map allProperties;
if (keysToRetain.isEmpty() && keysToRemove.isEmpty()) {
switch (config.getStrategy()) {
case LAZY:
allProperties = Collections.emptyMap();
break;
default:
allProperties = entity.getAllProperties();
}
} else {
allProperties = entity.getAllProperties();
if (!keysToRetain.isEmpty()) {
allProperties.keySet().retainAll(keysToRetain);
}
if (!keysToRemove.isEmpty()) {
allProperties.keySet().removeAll(keysToRemove);
}
}
return allProperties;
}
private String withMessageDigest(FingerprintingConfig conf, Consumer consumer) {
try {
MessageDigest md = MessageDigest.getInstance(conf.getDigestAlgorithm());
DiagnosingMessageDigestDecorator dmd = new DiagnosingMessageDigestDecorator(md);
consumer.accept(dmd);
return renderAsHex(md.digest());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
private static String renderAsHex(byte[] content) {
Formatter formatter = new Formatter();
for (byte b : content) {
formatter.format("%02X", b);
}
return formatter.toString();
}
private String convertValueToString(Object value) {
if (value == null) {
return "";
}
if (value.getClass().isArray()) {
return nativeArrayToString(value);
} else {
return value.toString();
}
}
private String nativeArrayToString(Object value) {
StringBuilder sb = new StringBuilder();
if (value instanceof String[]) {
for (String s : (String[]) value) {
sb.append(s);
}
} else if (value instanceof double[]) {
for (double d : (double[]) value) {
sb.append(d);
}
} else if (value instanceof long[]) {
for (long l : (long[]) value) {
sb.append(l);
}
} else if (value instanceof byte[]) {
for (byte l : (byte[]) value) {
sb.append(l);
}
} else {
throw new UnsupportedOperationException(
"cannot yet deal with " + value.getClass().getName());
}
return sb.toString();
}
/**
* if debug log level is enabled, send all updates to the message digest to the log as well for diagnosis
*/
private class DiagnosingMessageDigestDecorator {
private final MessageDigest delegate;
public DiagnosingMessageDigestDecorator(MessageDigest delegate) {
this.delegate = delegate;
}
public void update(byte[] value) {
if (log.isDebugEnabled()) {
log.debug("adding to message digest {}", new String(value));
}
delegate.update(value);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy