org.neo4j.gds.embeddings.hashgnn.MinHashTask Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of algo Show documentation
Show all versions of algo Show documentation
Neo4j Graph Data Science :: Algorithms
The newest version!
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.gds.embeddings.hashgnn;
import com.carrotsearch.hppc.BitSet;
import org.apache.commons.lang3.mutable.MutableLong;
import org.neo4j.gds.api.Graph;
import org.neo4j.gds.core.concurrency.Concurrency;
import org.neo4j.gds.core.concurrency.RunWithConcurrency;
import org.neo4j.gds.termination.TerminationFlag;
import org.neo4j.gds.core.utils.paged.HugeAtomicBitSet;
import org.neo4j.gds.collections.ha.HugeObjectArray;
import org.neo4j.gds.core.utils.partition.DegreePartition;
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static org.neo4j.gds.embeddings.hashgnn.HashGNNCompanion.hashArgMin;
class MinHashTask implements Runnable {
private final List hashes;
private final int k;
private final int embeddingDimension;
private final DegreePartition partition;
private final List concurrentGraphs;
private final HugeObjectArray currentEmbeddings;
private final HugeObjectArray previousEmbeddings;
private final TerminationFlag terminationFlag;
private final ProgressTracker progressTracker;
private long totalFeatureCount = 0;
MinHashTask(
int k,
DegreePartition partition,
List graphs,
int embeddingDimension,
HugeObjectArray currentEmbeddings,
HugeObjectArray previousEmbeddings,
List hashes,
TerminationFlag terminationFlag,
ProgressTracker progressTracker
) {
this.k = k;
this.partition = partition;
this.concurrentGraphs = graphs.stream().map(Graph::concurrentCopy).collect(Collectors.toList());
this.embeddingDimension = embeddingDimension;
this.currentEmbeddings = currentEmbeddings;
this.previousEmbeddings = previousEmbeddings;
this.hashes = hashes;
this.terminationFlag = terminationFlag;
this.progressTracker = progressTracker;
}
static void compute(
List degreePartition,
List graphs,
Concurrency concurrency,
int embeddingDensity,
int embeddingDimension,
HugeObjectArray currentEmbeddings,
HugeObjectArray previousEmbeddings,
List hashes,
ProgressTracker progressTracker,
TerminationFlag terminationFlag,
MutableLong totalFeatureCountOutput
) {
progressTracker.beginSubTask("Perform min-hashing");
progressTracker.setSteps(embeddingDensity * graphs.get(0).nodeCount());
var tasks = IntStream.range(0, embeddingDensity)
.mapToObj(k -> degreePartition.stream().map(p ->
new MinHashTask(
k,
p,
graphs,
embeddingDimension,
currentEmbeddings,
previousEmbeddings,
hashes,
terminationFlag,
progressTracker
)))
.flatMap(Function.identity())
.collect(Collectors.toList());
RunWithConcurrency.builder()
.concurrency(concurrency)
.tasks(tasks)
.terminationFlag(terminationFlag)
.run();
totalFeatureCountOutput.add(tasks.stream().mapToLong(MinHashTask::totalFeatureCount).sum());
progressTracker.endSubTask("Perform min-hashing");
}
@Override
public void run() {
var neighborsVector = new BitSet(embeddingDimension);
var selfMinAndArgMin = new HashGNN.MinAndArgmin();
var neighborsMinAndArgMin = new HashGNN.MinAndArgmin();
var tempMinAndArgMin = new HashGNN.MinAndArgmin();
terminationFlag.assertRunning();
var hashesForK = hashes.get(k);
var neighborsAggregationHashes = hashesForK.neighborsAggregationHashes();
var selfAggregationHashes = hashesForK.selfAggregationHashes();
var preAggregationHashes = hashesForK.preAggregationHashes();
partition.consume(nodeId -> {
var currentEmbedding = currentEmbeddings.get(nodeId);
hashArgMin(previousEmbeddings.get(nodeId), selfAggregationHashes, selfMinAndArgMin, tempMinAndArgMin);
neighborsVector.clear();
for (int i = 0; i < concurrentGraphs.size(); i++) {
var preAggregationHashesForRel = preAggregationHashes.get(i);
var currentGraph = concurrentGraphs.get(i);
currentGraph.forEachRelationship(nodeId, (src, trg) -> {
var prevTargetEmbedding = previousEmbeddings.get(trg);
hashArgMin(
prevTargetEmbedding,
preAggregationHashesForRel,
neighborsMinAndArgMin,
tempMinAndArgMin
);
int argMin = neighborsMinAndArgMin.argMin;
if (argMin != -1) {
neighborsVector.set(argMin);
}
return true;
});
}
hashArgMin(neighborsVector, neighborsAggregationHashes, neighborsMinAndArgMin);
int argMin = (neighborsMinAndArgMin.min < selfMinAndArgMin.min) ? neighborsMinAndArgMin.argMin : selfMinAndArgMin.argMin;
if (argMin != -1) {
if (!currentEmbedding.getAndSet(argMin)) {
totalFeatureCount++;
}
}
});
progressTracker.logSteps(partition.nodeCount());
}
public long totalFeatureCount() {
return totalFeatureCount;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy