All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.gds.embeddings.hashgnn.MinHashTask Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.gds.embeddings.hashgnn;

import com.carrotsearch.hppc.BitSet;
import org.apache.commons.lang3.mutable.MutableLong;
import org.neo4j.gds.api.Graph;
import org.neo4j.gds.core.concurrency.Concurrency;
import org.neo4j.gds.core.concurrency.RunWithConcurrency;
import org.neo4j.gds.termination.TerminationFlag;
import org.neo4j.gds.core.utils.paged.HugeAtomicBitSet;
import org.neo4j.gds.collections.ha.HugeObjectArray;
import org.neo4j.gds.core.utils.partition.DegreePartition;
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;

import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static org.neo4j.gds.embeddings.hashgnn.HashGNNCompanion.hashArgMin;

class MinHashTask implements Runnable {
    private final List hashes;
    private final int k;
    private final int embeddingDimension;
    private final DegreePartition partition;
    private final List concurrentGraphs;
    private final HugeObjectArray currentEmbeddings;
    private final HugeObjectArray previousEmbeddings;
    private final TerminationFlag terminationFlag;
    private final ProgressTracker progressTracker;
    private long totalFeatureCount = 0;

    MinHashTask(
        int k,
        DegreePartition partition,
        List graphs,
        int embeddingDimension,
        HugeObjectArray currentEmbeddings,
        HugeObjectArray previousEmbeddings,
        List hashes,
        TerminationFlag terminationFlag,
        ProgressTracker progressTracker
    ) {
        this.k = k;
        this.partition = partition;
        this.concurrentGraphs = graphs.stream().map(Graph::concurrentCopy).collect(Collectors.toList());
        this.embeddingDimension = embeddingDimension;
        this.currentEmbeddings = currentEmbeddings;
        this.previousEmbeddings = previousEmbeddings;
        this.hashes = hashes;
        this.terminationFlag = terminationFlag;
        this.progressTracker = progressTracker;
    }

    static void compute(
        List degreePartition,
        List graphs,
        Concurrency concurrency,
        int embeddingDensity,
        int embeddingDimension,
        HugeObjectArray currentEmbeddings,
        HugeObjectArray previousEmbeddings,
        List hashes,
        ProgressTracker progressTracker,
        TerminationFlag terminationFlag,
        MutableLong totalFeatureCountOutput
    ) {
        progressTracker.beginSubTask("Perform min-hashing");

        progressTracker.setSteps(embeddingDensity * graphs.get(0).nodeCount());

        var tasks = IntStream.range(0, embeddingDensity)
            .mapToObj(k -> degreePartition.stream().map(p ->
                new MinHashTask(
                    k,
                    p,
                    graphs,
                    embeddingDimension,
                    currentEmbeddings,
                    previousEmbeddings,
                    hashes,
                    terminationFlag,
                    progressTracker
                )))
            .flatMap(Function.identity())
            .collect(Collectors.toList());
        RunWithConcurrency.builder()
            .concurrency(concurrency)
            .tasks(tasks)
            .terminationFlag(terminationFlag)
            .run();

        totalFeatureCountOutput.add(tasks.stream().mapToLong(MinHashTask::totalFeatureCount).sum());

        progressTracker.endSubTask("Perform min-hashing");
    }

    @Override
    public void run() {
        var neighborsVector = new BitSet(embeddingDimension);
        var selfMinAndArgMin = new HashGNN.MinAndArgmin();
        var neighborsMinAndArgMin = new HashGNN.MinAndArgmin();
        var tempMinAndArgMin = new HashGNN.MinAndArgmin();

        terminationFlag.assertRunning();

        var hashesForK = hashes.get(k);
        var neighborsAggregationHashes = hashesForK.neighborsAggregationHashes();
        var selfAggregationHashes = hashesForK.selfAggregationHashes();
        var preAggregationHashes = hashesForK.preAggregationHashes();

        partition.consume(nodeId -> {
            var currentEmbedding = currentEmbeddings.get(nodeId);
            hashArgMin(previousEmbeddings.get(nodeId), selfAggregationHashes, selfMinAndArgMin, tempMinAndArgMin);

            neighborsVector.clear();

            for (int i = 0; i < concurrentGraphs.size(); i++) {
                var preAggregationHashesForRel = preAggregationHashes.get(i);
                var currentGraph = concurrentGraphs.get(i);
                currentGraph.forEachRelationship(nodeId, (src, trg) -> {
                    var prevTargetEmbedding = previousEmbeddings.get(trg);
                    hashArgMin(
                        prevTargetEmbedding,
                        preAggregationHashesForRel,
                        neighborsMinAndArgMin,
                        tempMinAndArgMin
                    );

                    int argMin = neighborsMinAndArgMin.argMin;
                    if (argMin != -1) {
                        neighborsVector.set(argMin);
                    }

                    return true;
                });
            }

            hashArgMin(neighborsVector, neighborsAggregationHashes, neighborsMinAndArgMin);
            int argMin = (neighborsMinAndArgMin.min < selfMinAndArgMin.min) ? neighborsMinAndArgMin.argMin : selfMinAndArgMin.argMin;
            if (argMin != -1) {
                if (!currentEmbedding.getAndSet(argMin)) {
                    totalFeatureCount++;
                }
            }
        });

        progressTracker.logSteps(partition.nodeCount());
    }

    public long totalFeatureCount() {
        return totalFeatureCount;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy