From caf819e42d0b8d93bf540e962ca3b336ba5ed585 Mon Sep 17 00:00:00 2001 From: Lasse Westh-Nielsen Date: Tue, 3 Dec 2024 16:50:30 +0100 Subject: [PATCH] migrate estimation cli leiden to application layer --- .../gds/leiden/LeidenAlgorithmFactory.java | 96 ------------------- .../leiden/LeidenAlgorithmFactoryTest.java | 22 ++--- .../java/org/neo4j/gds/leiden/Constants.java | 27 ------ .../neo4j/gds/leiden/LeidenMutateSpec.java | 57 ----------- .../org/neo4j/gds/leiden/LeidenStatsSpec.java | 56 ----------- .../neo4j/gds/leiden/LeidenStreamSpec.java | 56 ----------- .../org/neo4j/gds/leiden/LeidenWriteSpec.java | 57 ----------- .../community/CommunityAlgorithms.java | 17 +--- .../algorithms/community/LeidenTask.java | 49 ++++++++++ 9 files changed, 61 insertions(+), 376 deletions(-) delete mode 100644 algo/src/main/java/org/neo4j/gds/leiden/LeidenAlgorithmFactory.java delete mode 100644 algorithm-specifications/src/main/java/org/neo4j/gds/leiden/Constants.java delete mode 100644 algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenMutateSpec.java delete mode 100644 algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStatsSpec.java delete mode 100644 algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStreamSpec.java delete mode 100644 algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenWriteSpec.java create mode 100644 applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/LeidenTask.java diff --git a/algo/src/main/java/org/neo4j/gds/leiden/LeidenAlgorithmFactory.java b/algo/src/main/java/org/neo4j/gds/leiden/LeidenAlgorithmFactory.java deleted file mode 100644 index 068a6e2efe..0000000000 --- a/algo/src/main/java/org/neo4j/gds/leiden/LeidenAlgorithmFactory.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -import org.neo4j.gds.GraphAlgorithmFactory; -import org.neo4j.gds.algorithms.community.CommunityCompanion; -import org.neo4j.gds.api.Graph; -import org.neo4j.gds.mem.MemoryEstimation; -import org.neo4j.gds.core.utils.progress.tasks.IterativeTask; -import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker; -import org.neo4j.gds.core.utils.progress.tasks.Task; -import org.neo4j.gds.core.utils.progress.tasks.Tasks; -import org.neo4j.gds.termination.TerminationFlag; - -import java.util.List; -import java.util.Optional; - -public class LeidenAlgorithmFactory extends GraphAlgorithmFactory { - - public Leiden build(Graph graph, LeidenParameters parameters, ProgressTracker progressTracker) { - if (!graph.schema().isUndirected()) { - throw new IllegalArgumentException( - "The Leiden algorithm works only with undirected graphs. Please orient the edges properly"); - } - var seedValues = Optional.ofNullable(parameters.seedProperty()) - .map(seedParameter -> CommunityCompanion.extractSeedingNodePropertyValues(graph, seedParameter)) - .orElse(null); - - return new Leiden( - graph, - parameters.maxLevels(), - parameters.gamma(), - parameters.theta(), - parameters.includeIntermediateCommunities(), - parameters.randomSeed().orElse(0L), - seedValues, - parameters.tolerance(), - parameters.concurrency(), - progressTracker, - TerminationFlag.RUNNING_TRUE - ); - } - - @Override - public Leiden build(Graph graph, CONFIG configuration, ProgressTracker progressTracker) { - return build(graph, configuration.toParameters(), progressTracker); - } - - @Override - public String taskName() { - return "Leiden"; - } - - @Override - public Task progressTask(Graph graph, CONFIG config) { - - var iterations = config.maxLevels(); - - IterativeTask iterativeTasks = Tasks.iterativeDynamic( - "Iteration", - () -> - List.of( - Tasks.leaf("Local Move", 1), - Tasks.leaf("Modularity Computation", graph.nodeCount()), - Tasks.leaf("Refinement", graph.nodeCount()), - Tasks.leaf("Aggregation", graph.nodeCount()) - ), - iterations - ); - var initilizationTask = Tasks.leaf("Initialization", graph.nodeCount()); - - return Tasks.task("Leiden", initilizationTask, iterativeTasks); - } - - @Override - public MemoryEstimation memoryEstimation(CONFIG config) { - return new LeidenMemoryEstimateDefinition(config.toMemoryEstimationParameters()).memoryEstimation(); - } -} diff --git a/algo/src/test/java/org/neo4j/gds/leiden/LeidenAlgorithmFactoryTest.java b/algo/src/test/java/org/neo4j/gds/leiden/LeidenAlgorithmFactoryTest.java index 0b05b05c50..ebc39b67ab 100644 --- a/algo/src/test/java/org/neo4j/gds/leiden/LeidenAlgorithmFactoryTest.java +++ b/algo/src/test/java/org/neo4j/gds/leiden/LeidenAlgorithmFactoryTest.java @@ -20,14 +20,15 @@ package org.neo4j.gds.leiden; import org.junit.jupiter.api.Test; -import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker; +import org.neo4j.gds.applications.algorithms.community.CommunityAlgorithms; +import org.neo4j.gds.applications.algorithms.community.LeidenTask; import org.neo4j.gds.core.utils.progress.tasks.Tasks; import org.neo4j.gds.gdl.GdlFactory; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException; class LeidenAlgorithmFactoryTest { @@ -38,8 +39,7 @@ void shouldProduceProgressTask() { var graph = GdlFactory.of(" CREATE (a:NODE), (b:NODE) ").build().getUnion(); - var task = new LeidenAlgorithmFactory<>().progressTask(graph, config); - + var task = LeidenTask.create(graph, config); var initialization = Tasks.leaf("Initialization", 2); var iteration = Tasks.iterativeDynamic("Iteration", () -> @@ -59,13 +59,11 @@ void shouldProduceProgressTask() { @Test void shouldThrowIfNotUndirected() { var graph = GdlFactory.of("(a)-->(b)").build().getUnion(); - var config = LeidenStatsConfigImpl.builder().maxLevels(3).build(); - var leidenFactory = new LeidenAlgorithmFactory<>(); - assertThatThrownBy(() -> leidenFactory.build( - graph, - config, - ProgressTracker.NULL_TRACKER - )).hasMessageContaining( - "undirected"); + + var communityAlgorithms = new CommunityAlgorithms(null, null); + + assertThatIllegalArgumentException() + .isThrownBy(() -> communityAlgorithms.leiden(graph, null)) + .withMessage("The Leiden algorithm works only with undirected graphs. Please orient the edges properly"); } } diff --git a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/Constants.java b/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/Constants.java deleted file mode 100644 index 362ad85e24..0000000000 --- a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/Constants.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -final class Constants { - static final String LEIDEN_DESCRIPTION = - "Leiden is a community detection algorithm, which guarantees that communities are well connected"; - - private Constants() {} -} diff --git a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenMutateSpec.java b/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenMutateSpec.java deleted file mode 100644 index 408aa11bfe..0000000000 --- a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenMutateSpec.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -import org.neo4j.gds.NullComputationResultConsumer; -import org.neo4j.gds.executor.AlgorithmSpec; -import org.neo4j.gds.executor.ComputationResultConsumer; -import org.neo4j.gds.executor.ExecutionContext; -import org.neo4j.gds.executor.ExecutionMode; -import org.neo4j.gds.executor.GdsCallable; -import org.neo4j.gds.procedures.algorithms.community.LeidenMutateResult; -import org.neo4j.gds.procedures.algorithms.configuration.NewConfigFunction; - -import java.util.stream.Stream; - -import static org.neo4j.gds.leiden.Constants.LEIDEN_DESCRIPTION; - - -@GdsCallable(name = "gds.leiden.mutate", aliases = {"gds.beta.leiden.mutate"}, description = LEIDEN_DESCRIPTION, executionMode = ExecutionMode.MUTATE_NODE_PROPERTY) -public class LeidenMutateSpec implements AlgorithmSpec, LeidenAlgorithmFactory> { - @Override - public String name() { - return "LeidenMutate"; - } - - @Override - public LeidenAlgorithmFactory algorithmFactory(ExecutionContext executionContext) { - return new LeidenAlgorithmFactory<>(); - } - - @Override - public NewConfigFunction newConfigFunction() { - return (__, config) -> LeidenMutateConfig.of(config); - } - - @Override - public ComputationResultConsumer> computationResultConsumer() { - return new NullComputationResultConsumer<>(); - } -} diff --git a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStatsSpec.java b/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStatsSpec.java deleted file mode 100644 index 1d92ad29ed..0000000000 --- a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStatsSpec.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -import org.neo4j.gds.NullComputationResultConsumer; -import org.neo4j.gds.executor.AlgorithmSpec; -import org.neo4j.gds.executor.ComputationResultConsumer; -import org.neo4j.gds.executor.ExecutionContext; -import org.neo4j.gds.executor.GdsCallable; -import org.neo4j.gds.procedures.algorithms.community.LeidenStatsResult; -import org.neo4j.gds.procedures.algorithms.configuration.NewConfigFunction; - -import java.util.stream.Stream; - -import static org.neo4j.gds.executor.ExecutionMode.STREAM; -import static org.neo4j.gds.leiden.Constants.LEIDEN_DESCRIPTION; - -@GdsCallable(name = "gds.leiden.stats", aliases = {"gds.beta.leiden.stats"}, description = LEIDEN_DESCRIPTION, executionMode = STREAM) -public class LeidenStatsSpec implements AlgorithmSpec, LeidenAlgorithmFactory> { - @Override - public String name() { - return "LeidenStats"; - } - - @Override - public LeidenAlgorithmFactory algorithmFactory(ExecutionContext executionContext) { - return new LeidenAlgorithmFactory<>(); - } - - @Override - public NewConfigFunction newConfigFunction() { - return (__, config) -> LeidenStatsConfig.of(config); - } - - @Override - public ComputationResultConsumer> computationResultConsumer() { - return new NullComputationResultConsumer<>(); - } -} diff --git a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStreamSpec.java b/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStreamSpec.java deleted file mode 100644 index 3cbf529699..0000000000 --- a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenStreamSpec.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -import org.neo4j.gds.NullComputationResultConsumer; -import org.neo4j.gds.executor.AlgorithmSpec; -import org.neo4j.gds.executor.ComputationResultConsumer; -import org.neo4j.gds.executor.ExecutionContext; -import org.neo4j.gds.executor.GdsCallable; -import org.neo4j.gds.procedures.algorithms.community.LeidenStreamResult; -import org.neo4j.gds.procedures.algorithms.configuration.NewConfigFunction; - -import java.util.stream.Stream; - -import static org.neo4j.gds.executor.ExecutionMode.STREAM; -import static org.neo4j.gds.leiden.Constants.LEIDEN_DESCRIPTION; - -@GdsCallable(name = "gds.leiden.stream", aliases = {"gds.beta.leiden.stream"}, description = LEIDEN_DESCRIPTION, executionMode = STREAM) -public class LeidenStreamSpec implements AlgorithmSpec, LeidenAlgorithmFactory> { - @Override - public String name() { - return "LeidenStream"; - } - - @Override - public LeidenAlgorithmFactory algorithmFactory(ExecutionContext executionContext) { - return new LeidenAlgorithmFactory<>(); - } - - @Override - public NewConfigFunction newConfigFunction() { - return (__, config) -> LeidenStreamConfig.of(config); - } - - @Override - public ComputationResultConsumer> computationResultConsumer() { - return new NullComputationResultConsumer<>(); - } -} diff --git a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenWriteSpec.java b/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenWriteSpec.java deleted file mode 100644 index 21af8bbb79..0000000000 --- a/algorithm-specifications/src/main/java/org/neo4j/gds/leiden/LeidenWriteSpec.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.leiden; - -import org.neo4j.gds.NullComputationResultConsumer; -import org.neo4j.gds.executor.AlgorithmSpec; -import org.neo4j.gds.executor.ComputationResultConsumer; -import org.neo4j.gds.executor.ExecutionContext; -import org.neo4j.gds.executor.ExecutionMode; -import org.neo4j.gds.executor.GdsCallable; -import org.neo4j.gds.procedures.algorithms.community.LeidenWriteResult; -import org.neo4j.gds.procedures.algorithms.configuration.NewConfigFunction; - -import java.util.stream.Stream; - -import static org.neo4j.gds.leiden.Constants.LEIDEN_DESCRIPTION; - - -@GdsCallable(name = "gds.leiden.write", aliases = {"gds.beta.leiden.write"}, description = LEIDEN_DESCRIPTION, executionMode = ExecutionMode.WRITE_NODE_PROPERTY) -public class LeidenWriteSpec implements AlgorithmSpec, LeidenAlgorithmFactory> { - @Override - public String name() { - return "LeidenWrite"; - } - - @Override - public LeidenAlgorithmFactory algorithmFactory(ExecutionContext executionContext) { - return new LeidenAlgorithmFactory<>(); - } - - @Override - public NewConfigFunction newConfigFunction() { - return (__, config) -> LeidenWriteConfig.of(config); - } - - @Override - public ComputationResultConsumer> computationResultConsumer() { - return new NullComputationResultConsumer<>(); - } -} diff --git a/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/CommunityAlgorithms.java b/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/CommunityAlgorithms.java index 6c50b5ad55..ee0938fa0a 100644 --- a/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/CommunityAlgorithms.java +++ b/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/CommunityAlgorithms.java @@ -270,26 +270,13 @@ LocalClusteringCoefficientResult lcc(Graph graph, LocalClusteringCoefficientBase ); } - LeidenResult leiden(Graph graph, LeidenBaseConfig configuration) { + public LeidenResult leiden(Graph graph, LeidenBaseConfig configuration) { if (!graph.schema().isUndirected()) { throw new IllegalArgumentException( "The Leiden algorithm works only with undirected graphs. Please orient the edges properly"); } - var iterations = configuration.maxLevels(); - var iterativeTasks = Tasks.iterativeDynamic( - "Iteration", - () -> - List.of( - Tasks.leaf("Local Move", 1), - Tasks.leaf("Modularity Computation", graph.nodeCount()), - Tasks.leaf("Refinement", graph.nodeCount()), - Tasks.leaf("Aggregation", graph.nodeCount()) - ), - iterations - ); - var initializationTask = Tasks.leaf("Initialization", graph.nodeCount()); - var task = Tasks.task(AlgorithmLabel.Leiden.asString(), initializationTask, iterativeTasks); + var task = LeidenTask.create(graph, configuration); var progressTracker = progressTrackerCreator.createProgressTracker(configuration, task); var parameters = configuration.toParameters(); diff --git a/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/LeidenTask.java b/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/LeidenTask.java new file mode 100644 index 0000000000..5e3679e478 --- /dev/null +++ b/applications/algorithms/community/src/main/java/org/neo4j/gds/applications/algorithms/community/LeidenTask.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.applications.algorithms.community; + +import org.neo4j.gds.api.IdMap; +import org.neo4j.gds.applications.algorithms.machinery.AlgorithmLabel; +import org.neo4j.gds.core.utils.progress.tasks.Task; +import org.neo4j.gds.core.utils.progress.tasks.Tasks; +import org.neo4j.gds.leiden.LeidenBaseConfig; + +import java.util.List; + +public final class LeidenTask { + private LeidenTask() {} + + public static Task create(IdMap idMap, LeidenBaseConfig configuration) { + var iterations = configuration.maxLevels(); + var iterativeTasks = Tasks.iterativeDynamic( + "Iteration", + () -> + List.of( + Tasks.leaf("Local Move", 1), + Tasks.leaf("Modularity Computation", idMap.nodeCount()), + Tasks.leaf("Refinement", idMap.nodeCount()), + Tasks.leaf("Aggregation", idMap.nodeCount()) + ), + iterations + ); + var initializationTask = Tasks.leaf("Initialization", idMap.nodeCount()); + return Tasks.task(AlgorithmLabel.Leiden.asString(), initializationTask, iterativeTasks); + } +}