Skip to content

Commit

Permalink
Merge pull request #8208 from IoannisPanagiotas/scc-write-onto-facade
Browse files Browse the repository at this point in the history
Add Scc Write facades
  • Loading branch information
IoannisPanagiotas authored Oct 4, 2023
2 parents 02ad542 + a7e7ad2 commit 7c7c48e
Show file tree
Hide file tree
Showing 32 changed files with 410 additions and 113 deletions.
25 changes: 15 additions & 10 deletions algo-common/src/main/java/org/neo4j/gds/result/HistogramUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,21 @@ public static Map<String, Object> similaritySummary(DoubleHistogram histogram) {
}

public static Map<String, Object> communitySummary(AbstractHistogram histogram) {
return Map.of(
"min", histogram.getMinValue(),
"mean", histogram.getMean(),
"max", histogram.getMaxValue(),
"p50", histogram.getValueAtPercentile(50),
"p75", histogram.getValueAtPercentile(75),
"p90", histogram.getValueAtPercentile(90),
"p95", histogram.getValueAtPercentile(95),
"p99", histogram.getValueAtPercentile(99),
"p999", histogram.getValueAtPercentile(99.9)
return Map.ofEntries(
entry("min", histogram.getMinValue()),
entry("max", histogram.getMaxValue()),
entry("mean", histogram.getMean()),
entry("p1", histogram.getValueAtPercentile(1)),
entry("p5", histogram.getValueAtPercentile(5)),
entry("p10", histogram.getValueAtPercentile(10)),
entry("p25", histogram.getValueAtPercentile(25)),
entry("p50", histogram.getValueAtPercentile(50)),
entry("p75", histogram.getValueAtPercentile(75)),
entry("p90", histogram.getValueAtPercentile(90)),
entry("p95", histogram.getValueAtPercentile(95)),
entry("p99", histogram.getValueAtPercentile(99)),
entry("p999", histogram.getValueAtPercentile(99.9))

);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.algorithms;

import java.util.Map;

public class AlphaSccSpecificFields implements CommunityStatisticsSpecificFields {

public static final AlphaSccSpecificFields EMPTY = new AlphaSccSpecificFields(
0,
0,
Map.of()
);
private final long nodes;
private final long communityCount;
private final Map<String, Object> communityDistribution;

public AlphaSccSpecificFields(
long nodes,
long communityCount,
Map<String, Object> communityDistribution
) {
this.nodes=nodes;
this.communityCount = communityCount;
this.communityDistribution = communityDistribution;
}

public long nodes() {
return nodes;
}

public long communityCount() {
return communityCount;
}

public Map<String, Object> communityDistribution() {
return communityDistribution;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
import org.neo4j.gds.modularityoptimization.ModularityOptimizationFactory;
import org.neo4j.gds.modularityoptimization.ModularityOptimizationResult;
import org.neo4j.gds.scc.SccAlgorithmFactory;
import org.neo4j.gds.scc.SccBaseConfig;
import org.neo4j.gds.scc.SccCommonBaseConfig;
import org.neo4j.gds.triangle.IntersectingTriangleCountFactory;
import org.neo4j.gds.triangle.LocalClusteringCoefficientBaseConfig;
import org.neo4j.gds.triangle.LocalClusteringCoefficientFactory;
Expand Down Expand Up @@ -198,7 +198,7 @@ AlgorithmComputationResult<LabelPropagationResult> labelPropagation(

AlgorithmComputationResult<HugeLongArray> scc(
String graphName,
SccBaseConfig config,
SccCommonBaseConfig config,
User user,
DatabaseId databaseId
) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.gds.algorithms.community;

import org.neo4j.gds.algorithms.AlgorithmComputationResult;
import org.neo4j.gds.algorithms.AlphaSccSpecificFields;
import org.neo4j.gds.algorithms.CommunityStatisticsSpecificFields;
import org.neo4j.gds.algorithms.KCoreSpecificFields;
import org.neo4j.gds.algorithms.NodePropertyWriteResult;
Expand All @@ -33,6 +34,8 @@
import org.neo4j.gds.kcore.KCoreDecompositionWriteConfig;
import org.neo4j.gds.result.CommunityStatistics;
import org.neo4j.gds.result.StatisticsComputationInstructions;
import org.neo4j.gds.scc.SccAlphaWriteConfig;
import org.neo4j.gds.scc.SccWriteConfig;
import org.neo4j.gds.wcc.WccWriteConfig;

import java.util.Optional;
Expand Down Expand Up @@ -124,6 +127,84 @@ public NodePropertyWriteResult<KCoreSpecificFields> kcore(

}

public NodePropertyWriteResult<StandardCommunityStatisticsSpecificFields> scc(
String graphName,
SccWriteConfig configuration,
User user,
DatabaseId databaseId,
StatisticsComputationInstructions statisticsComputationInstructions
) {

// 1. Run the algorithm and time the execution
var intermediateResult = AlgorithmRunner.runWithTiming(
() -> communityAlgorithmsFacade.scc(graphName, configuration, user, databaseId)
);
var algorithmResult = intermediateResult.algorithmResult;

return writeToDatabase(
algorithmResult,
configuration,
(result, config) -> CommunityResultCompanion.nodePropertyValues(
config.consecutiveIds(),
NodePropertyValuesAdapter.adapt(result),
Optional.empty(),
config.concurrency()
),
(result -> result::get),
(result, componentCount, communitySummary) -> {
return new StandardCommunityStatisticsSpecificFields(
componentCount,
communitySummary
);
},
statisticsComputationInstructions,
intermediateResult.computeMilliseconds,
() -> StandardCommunityStatisticsSpecificFields.EMPTY,
"SccWrite",
configuration.writeConcurrency(),
configuration.writeProperty(),
configuration.arrowConnectionInfo()
);

}

public NodePropertyWriteResult<AlphaSccSpecificFields> alphaScc(
String graphName,
SccAlphaWriteConfig configuration,
User user,
DatabaseId databaseId,
StatisticsComputationInstructions statisticsComputationInstructions
) {

// 1. Run the algorithm and time the execution
var intermediateResult = AlgorithmRunner.runWithTiming(
() -> communityAlgorithmsFacade.scc(graphName, configuration, user, databaseId)
);
var algorithmResult = intermediateResult.algorithmResult;

return writeToDatabase(
algorithmResult,
configuration,
(result, config) -> NodePropertyValuesAdapter.adapt(result),
(result -> result::get),
(result, componentCount, communitySummary) -> {
return new AlphaSccSpecificFields(
result.size(),
componentCount,
communitySummary
);
},
statisticsComputationInstructions,
intermediateResult.computeMilliseconds,
() -> AlphaSccSpecificFields.EMPTY.EMPTY,
"SccWrite",
configuration.writeConcurrency(),
configuration.writeProperty(),
configuration.arrowConnectionInfo()
);

}


<RESULT, CONFIG extends AlgoBaseConfig, ASF extends CommunityStatisticsSpecificFields> NodePropertyWriteResult<ASF> writeToDatabase(
AlgorithmComputationResult<RESULT> algorithmResult,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.neo4j.gds.core.utils.progress.tasks.Tasks;
import org.neo4j.gds.mem.MemoryUsage;

public class SccAlgorithmFactory<CONFIG extends SccBaseConfig> extends GraphAlgorithmFactory<Scc, CONFIG> {
public class SccAlgorithmFactory<CONFIG extends SccCommonBaseConfig> extends GraphAlgorithmFactory<Scc, CONFIG> {

@Override
public Scc build(Graph graph, CONFIG configuration, ProgressTracker progressTracker) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.neo4j.gds.core.CypherMapWrapper;

@Configuration
public interface SccAlphaWriteConfig extends SccBaseConfig, WritePropertyConfig {
public interface SccAlphaWriteConfig extends SccCommonBaseConfig, WritePropertyConfig {

@Value.Default
@Override
Expand Down
3 changes: 1 addition & 2 deletions algo/src/main/java/org/neo4j/gds/scc/SccBaseConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
*/
package org.neo4j.gds.scc;

import org.neo4j.gds.config.AlgoBaseConfig;
import org.neo4j.gds.config.ConsecutiveIdsConfig;

public interface SccBaseConfig extends AlgoBaseConfig, ConsecutiveIdsConfig {
public interface SccBaseConfig extends SccCommonBaseConfig, ConsecutiveIdsConfig {

}
26 changes: 26 additions & 0 deletions algo/src/main/java/org/neo4j/gds/scc/SccCommonBaseConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.gds.scc;

import org.neo4j.gds.config.AlgoBaseConfig;

public interface SccCommonBaseConfig extends AlgoBaseConfig {

}
10 changes: 5 additions & 5 deletions doc/modules/ROOT/pages/algorithms/kmeans.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ include::partial$/algorithms/kmeans/specific-configuration.adoc[]
| preProcessingMillis | Integer | Milliseconds for preprocessing the data.
| computeMillis | Integer | Milliseconds for running the algorithm.
| postProcessingMillis | Integer | Milliseconds for computing percentiles and community count.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| centroids | List of List of Float | List of centroid coordinates. Each item is a list containing the coordinates of one centroid.
| averageDistanceToCentroid | Float| Average distance between node and centroid.
| averageSilhouette | Float| Average silhouette score over all nodes.
Expand Down Expand Up @@ -193,7 +193,7 @@ include::partial$/algorithms/kmeans/specific-configuration.adoc[]
| mutateMillis | Integer | Milliseconds for adding properties to the projected graph.
| postProcessingMillis | Integer | Milliseconds for computing percentiles and community count.
| nodePropertiesWritten | Integer | Number of properties added to the projected graph.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| centroids | List of List of Float | List of centroid coordinates. Each item is a list containing the coordinates of one centroid.
| averageDistanceToCentroid | Float | Average distance between node and centroid.
| averageSilhouette | Float| Average silhouette score over all nodes.
Expand Down Expand Up @@ -243,7 +243,7 @@ include::partial$/algorithms/kmeans/specific-configuration.adoc[]
| writeMillis | Integer | Milliseconds for adding properties to the Neo4j database.
| postProcessingMillis | Integer | Milliseconds for computing percentiles and community count.
| nodePropertiesWritten | Integer | Number of properties added to the projected graph.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| centroids | List of List of Float | List of centroid coordinates. Each item is a list containing the coordinates of one centroid.
| averageDistanceToCentroid | Float | Average distance between node and centroid.
| averageSilhouette | Float| Average silhouette score over all nodes.
Expand Down Expand Up @@ -388,7 +388,7 @@ YIELD communityDistribution
[opts="header",cols="1"]
|===
| communityDistribution
| { "p99": 3, "min": 2, "max": 3, "mean": 2.6666666666666665, "p90": 3, "p50": 3, "p999": 3, "p95": 3, "p75": 3 }
| {max=3, mean=2.6666666666666665, min=2, p1=2, p10=2, p25=2, p5=2, p50=3, p75=3, p90=3, p95=3, p99=3, p999=3}
|===
--

Expand Down Expand Up @@ -416,7 +416,7 @@ YIELD communityDistribution
[opts="header"]
|===
| communityDistribution
| { "p99": 3, "min": 2, "max": 3, "mean": 2.6666666666666665, "p90": 3, "p50": 3, "p999": 3, "p95": 3, "p75": 3 }
| {max=3, mean=2.6666666666666665, min=2, p1=2, p10=2, p25=2, p5=2, p50=3, p75=3, p90=3, p95=3, p99=3, p999=3}
|===
--

Expand Down
6 changes: 3 additions & 3 deletions doc/modules/ROOT/pages/algorithms/label-propagation.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ include::partial$/algorithms/label-propagation/specific-configuration.adoc[]
| communityCount | Integer | The number of communities found.
| ranIterations | Integer | The number of iterations that were executed.
| didConverge | Boolean | True if the algorithm did converge to a stable labelling within the provided number of maximum iterations.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size.
| configuration | Map | The configuration used for running the algorithm.
|===

Expand Down Expand Up @@ -192,7 +192,7 @@ include::partial$/algorithms/label-propagation/specific-configuration.adoc[]
| communityCount | Integer | The number of communities found.
| ranIterations | Integer | The number of iterations that were executed.
| didConverge | Boolean | True if the algorithm did converge to a stable labelling within the provided number of maximum iterations.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size.
| configuration | Map | The configuration used for running the algorithm.
|===

Expand Down Expand Up @@ -243,7 +243,7 @@ include::partial$/algorithms/label-propagation/specific-configuration-write.adoc
| communityCount | Integer | The number of communities found.
| ranIterations | Integer | The number of iterations that were executed.
| didConverge | Boolean | True if the algorithm did converge to a stable labelling within the provided number of maximum iterations.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size.
| configuration | Map | The configuration used for running the algorithm.
|===

Expand Down
6 changes: 3 additions & 3 deletions doc/modules/ROOT/pages/algorithms/leiden.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ include::partial$/algorithms/leiden/specific-configuration.adoc[]
| modularities | List of Float | The modularity scores for each level.
| nodeCount | Integer | The number of nodes in the graph.
| didConverge | Boolean | Indicates if the algorithm converged.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| configuration | Map | The configuration used for running the algorithm.
|===
======
Expand Down Expand Up @@ -179,7 +179,7 @@ include::partial$/algorithms/leiden/specific-configuration.adoc[]
| nodeCount | Integer | Number of nodes in the graph.
| didConverge | Boolean | Indicates if the algorithm converged.
| nodePropertiesWritten | Integer | Number of properties added to the projected graph.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| configuration | Map | The configuration used for running the algorithm.
|===
======
Expand Down Expand Up @@ -235,7 +235,7 @@ include::partial$/algorithms/leiden/specific-configuration-write.adoc[]
| nodeCount | Integer | Number of nodes in the graph.
| didConverge | Boolean | Indicates if the algorithm converged.
| nodePropertiesWritten | Integer | Number of properties added to the Neo4j database.
| communityDistribution | Map | Map containing min, max, mean as well as p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| communityDistribution | Map | Map containing min, max, mean as well as p1, p5, p10, p25, p50, p75, p90, p95, p99 and p999 percentile values of community size for the last level.
| configuration | Map | The configuration used for running the algorithm.
|===
======
Expand Down
Loading

0 comments on commit 7c7c48e

Please sign in to comment.