All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.gds.wcc.Wcc Maven / Gradle / Ivy

There is a newer version: 2.15.0
Show newest version
/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.gds.wcc;

import org.neo4j.gds.Algorithm;
import org.neo4j.gds.algorithms.community.CommunityCompanion;
import org.neo4j.gds.api.Graph;
import org.neo4j.gds.core.concurrency.ParallelUtil;
import org.neo4j.gds.core.utils.paged.dss.DisjointSetStruct;
import org.neo4j.gds.core.utils.paged.dss.HugeAtomicDisjointSetStruct;
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
import org.neo4j.gds.termination.TerminationFlag;

import java.util.Optional;
import java.util.concurrent.ExecutorService;

import static org.neo4j.gds.utils.StringFormatting.formatWithLocale;

/**
 * Parallel Union-Find Algorithm based on the
 * "Wait-free Parallel Algorithms for the Union-Find Problem" paper.
 *
 * @see HugeAtomicDisjointSetStruct
 * @see the paper
 * 

* For undirected graphs and directed graphs with index, we use a sampling based approach, as introduced in [1]. *

* The idea is to identify the largest component using a sampled subgraph. * Relationships of nodes that are already contained in the largest component are * not iterated. The compression step described in [1], is contained in * {@link DisjointSetStruct#setIdOf}. *

* [1] Michael Sutton, Tal Ben-Nun, and Amnon Barak. "Optimizing Parallel * Graph Connectivity Computation via Subgraph Sampling" Symposium on * Parallel and Distributed Processing, IPDPS 2018. */ public class Wcc extends Algorithm { private final WccParameters parameters; private final ExecutorService executorService; private final long batchSize; private final Graph graph; public Wcc( Graph graph, ExecutorService executor, int minBatchSize, WccParameters parameters, ProgressTracker progressTracker, TerminationFlag terminationFlag ) { super(progressTracker); this.terminationFlag = terminationFlag; this.graph = graph; this.parameters = parameters; this.executorService = executor; this.batchSize = ParallelUtil.adjustedBatchSize( graph.nodeCount(), parameters.concurrency(), minBatchSize, Integer.MAX_VALUE ); if (ParallelUtil.threadCount(batchSize, graph.nodeCount()) > Integer.MAX_VALUE) { throw new IllegalArgumentException(formatWithLocale( "Too many nodes (%d) to run WCC with the given concurrency (%d) and batchSize (%d)", graph.nodeCount(), parameters.concurrency().value(), batchSize )); } } @Override public DisjointSetStruct compute() { progressTracker.beginSubTask(); long nodeCount = graph.nodeCount(); var disjointSetStruct = parameters.seedProperty() .map(seedProperty -> { var initialComponents = CommunityCompanion.extractSeedingNodePropertyValues(graph, seedProperty); return new HugeAtomicDisjointSetStruct(nodeCount, initialComponents, parameters.concurrency()); }) .orElseGet(() -> new HugeAtomicDisjointSetStruct(nodeCount, parameters.concurrency())); if (graph.characteristics().isUndirected() || graph.characteristics().isInverseIndexed()) { new SampledStrategyBuilder() .graph(graph) .disjointSetStruct(disjointSetStruct) .threshold(threshold()) .concurrency(parameters.concurrency()) .terminationFlag(terminationFlag) .progressTracker(progressTracker) .executorService(executorService) .build() .compute(); } else { new UnsampledStrategyBuilder() .graph(graph) .disjointSetStruct(disjointSetStruct) .threshold(threshold()) .batchSize(batchSize) .terminationFlag(terminationFlag) .progressTracker(progressTracker) .executorService(executorService) .build() .compute(); } progressTracker.endSubTask(); return disjointSetStruct; } static double defaultWeight(double threshold) { return threshold + 1; } private Optional threshold() { return parameters.hasThreshold() ? Optional.of(parameters.threshold()) : Optional.empty(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy