From 48b2a2f9a9b532d6b9182381ad7f258f96cc6c93 Mon Sep 17 00:00:00 2001 From: Andrew Stahlman Date: Wed, 8 Apr 2020 13:39:34 -0700 Subject: [PATCH] Include S3 counters in heuristics (#670) The Tez heuristics assume that all of the S3-related counters are prefixed with either "S3A_" or "S3N_". In our case, it's simply "S3_", so our Mapper heuristics all show 0 bytes read per task. This updates the heuristics to include the "S3_*" counters (the MapReduce heuristics already do this). --- app/com/linkedin/drelephant/tez/data/TezCounterData.java | 7 ++++++- .../drelephant/tez/heuristics/MapperDataSkewHeuristic.java | 1 + .../drelephant/tez/heuristics/MapperSpeedHeuristic.java | 1 + .../drelephant/tez/heuristics/MapperTimeHeuristic.java | 1 + .../tez/heuristics/TezScopeDataSkewHeuristic.java | 1 + .../drelephant/tez/heuristics/TezScopeSpeedHeuristic.java | 1 + .../drelephant/tez/heuristics/TezScopeTimeHeuristic.java | 1 + 7 files changed, 12 insertions(+), 1 deletion(-) diff --git a/app/com/linkedin/drelephant/tez/data/TezCounterData.java b/app/com/linkedin/drelephant/tez/data/TezCounterData.java index 9aac00117..4571aaad7 100644 --- a/app/com/linkedin/drelephant/tez/data/TezCounterData.java +++ b/app/com/linkedin/drelephant/tez/data/TezCounterData.java @@ -100,11 +100,16 @@ public static enum CounterName { HDFS_READ_OPS(GroupName.FileSystemCounters, "HDFS_READ_OPS", "HDFS_READ_OPS"), HDFS_LARGE_READ_OPS(GroupName.FileSystemCounters, "HDFS_LARGE_READ_OPS", "HDFS_LARGE_READ_OPS"), HDFS_WRITE_OPS(GroupName.FileSystemCounters, "HDFS_WRITE_OPS", "HDFS_WRITE_OPS"), + S3_BYTES_READ(GroupName.FileSystemCounters, "S3_BYTES_READ", "S3_BYTES_READ"), + S3_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3_BYTES_WRITTEN", "S3_BYTES_WRITTEN"), + S3_READ_OPS(GroupName.FileSystemCounters, "S3_READ_OPS", "S3_READ_OPS"), + S3_LARGE_READ_OPS(GroupName.FileSystemCounters, "S3_LARGE_READ_OPS", "S3_LARGE_READ_OPS"), + S3_WRITE_OPS(GroupName.FileSystemCounters, "S3_WRITE_OPS", "S3_WRITE_OPS"), S3A_BYTES_READ(GroupName.FileSystemCounters, "S3A_BYTES_READ", "S3A_BYTES_READ"), S3A_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3A_BYTES_WRITTEN", "S3A_BYTES_WRITTEN"), S3A_READ_OPS(GroupName.FileSystemCounters, "S3A_READ_OPS", "S3A_READ_OPS"), S3A_LARGE_READ_OPS(GroupName.FileSystemCounters, "S3A_LARGE_READ_OPS", "S3A_LARGE_READ_OPS"), - S3A_WRITE_OPS(GroupName.FileSystemCounters, "S3A_WRITE_OPS", "S3_WRITE_OPS"), + S3A_WRITE_OPS(GroupName.FileSystemCounters, "S3A_WRITE_OPS", "S3A_WRITE_OPS"), S3N_BYTES_READ(GroupName.FileSystemCounters, "S3N_BYTES_READ", "S3N_BYTES_READ"), S3N_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3N_BYTES_WRITTEN", "S3N_BYTES_WRITTEN"), S3N_READ_OPS(GroupName.FileSystemCounters, "S3N_READ_OPS", "S3N_READ_OPS"), diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java index b17ebe3b2..1e5395313 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java @@ -35,6 +35,7 @@ public class MapperDataSkewHeuristic extends GenericDataSkewHeuristic { public MapperDataSkewHeuristic(HeuristicConfigurationData heuristicConfData) { super(Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ), heuristicConfData); diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java index 1694e32ea..07c3ee051 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java @@ -53,6 +53,7 @@ public class MapperSpeedHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java index 838bd7a59..bbf0d0639 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java @@ -51,6 +51,7 @@ public class MapperTimeHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java index 3486328d7..3d125b417 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java @@ -35,6 +35,7 @@ public class TezScopeDataSkewHeuristic extends GenericDataSkewHeuristic { public TezScopeDataSkewHeuristic(HeuristicConfigurationData heuristicConfData) { super(Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ), heuristicConfData); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java index 2ed0e833c..90b22e5cd 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java @@ -53,6 +53,7 @@ public class TezScopeSpeedHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java index 0a6f16283..c89bd666a 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java @@ -51,6 +51,7 @@ public class TezScopeTimeHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ );