From 58d9d69f5887fa1335f4e435e8a6ac349779a02c Mon Sep 17 00:00:00 2001 From: "J. Kalyana Sundaram" Date: Fri, 21 Apr 2023 10:47:03 -0700 Subject: [PATCH] Example to show how to achieve more consistent sampling across linked traces (#4346) --- OpenTelemetry.sln | 7 + .../LinksAndParentBasedSampler.cs | 53 +++++ .../links-based-sampler/LinksBasedSampler.cs | 49 +++++ docs/trace/links-based-sampler/Program.cs | 68 ++++++ docs/trace/links-based-sampler/README.md | 197 ++++++++++++++++++ .../links-based-sampler/links-sampler.csproj | 5 + 6 files changed, 379 insertions(+) create mode 100644 docs/trace/links-based-sampler/LinksAndParentBasedSampler.cs create mode 100644 docs/trace/links-based-sampler/LinksBasedSampler.cs create mode 100644 docs/trace/links-based-sampler/Program.cs create mode 100644 docs/trace/links-based-sampler/README.md create mode 100644 docs/trace/links-based-sampler/links-sampler.csproj diff --git a/OpenTelemetry.sln b/OpenTelemetry.sln index 0e694c421f1..251abf668bd 100644 --- a/OpenTelemetry.sln +++ b/OpenTelemetry.sln @@ -255,6 +255,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "tail-based-sampling-example EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "stratified-sampling-example", "docs\trace\stratified-sampling-example\stratified-sampling-example.csproj", "{9C99621C-343E-479C-A943-332DB6129B71}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "links-sampler", "docs\trace\links-based-sampler\links-sampler.csproj", "{62AF4BD3-DCAE-4D44-AA5B-991C1071166B}" +EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenTelemetry.Api.Tests", "test\OpenTelemetry.Api.Tests\OpenTelemetry.Api.Tests.csproj", "{FD8433F4-EDCF-475C-9B4A-625D3DE11671}" EndProject Global @@ -543,6 +545,10 @@ Global {9C99621C-343E-479C-A943-332DB6129B71}.Debug|Any CPU.Build.0 = Debug|Any CPU {9C99621C-343E-479C-A943-332DB6129B71}.Release|Any CPU.ActiveCfg = Release|Any CPU {9C99621C-343E-479C-A943-332DB6129B71}.Release|Any CPU.Build.0 = Release|Any CPU + {62AF4BD3-DCAE-4D44-AA5B-991C1071166B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {62AF4BD3-DCAE-4D44-AA5B-991C1071166B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {62AF4BD3-DCAE-4D44-AA5B-991C1071166B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {62AF4BD3-DCAE-4D44-AA5B-991C1071166B}.Release|Any CPU.Build.0 = Release|Any CPU {FD8433F4-EDCF-475C-9B4A-625D3DE11671}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {FD8433F4-EDCF-475C-9B4A-625D3DE11671}.Debug|Any CPU.Build.0 = Debug|Any CPU {FD8433F4-EDCF-475C-9B4A-625D3DE11671}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -589,6 +595,7 @@ Global {A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818} {800DB925-6014-4136-AC01-3356CF7CADD3} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818} {9C99621C-343E-479C-A943-332DB6129B71} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818} + {62AF4BD3-DCAE-4D44-AA5B-991C1071166B} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {55639B5C-0770-4A22-AB56-859604650521} diff --git a/docs/trace/links-based-sampler/LinksAndParentBasedSampler.cs b/docs/trace/links-based-sampler/LinksAndParentBasedSampler.cs new file mode 100644 index 00000000000..2ea3ec78239 --- /dev/null +++ b/docs/trace/links-based-sampler/LinksAndParentBasedSampler.cs @@ -0,0 +1,53 @@ +// +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +using OpenTelemetry.Trace; + +namespace LinksAndParentBasedSamplerExample; + +/// +/// An example of a composite sampler that has: +/// 1. A parent based sampler. +/// 2. A links based sampler. +/// The composite sampler first delegates to the parent based sampler and then to the +/// links based sampler. If either of these samplers decide to sample, +/// this composite sampler decides to sample. +/// +internal class LinksAndParentBasedSampler : Sampler +{ + private readonly ParentBasedSampler parentBasedSampler; + private readonly LinksBasedSampler linksBasedSampler; + + public LinksAndParentBasedSampler(ParentBasedSampler parentBasedSampler) + { + this.parentBasedSampler = parentBasedSampler ?? throw new ArgumentNullException(nameof(parentBasedSampler)); + this.linksBasedSampler = new LinksBasedSampler(); + } + + public override SamplingResult ShouldSample(in SamplingParameters samplingParameters) + { + var samplingResult = this.parentBasedSampler.ShouldSample(samplingParameters); + if (samplingResult.Decision != SamplingDecision.Drop) + { + Console.WriteLine($"{samplingParameters.TraceId}: ParentBasedSampler decision: RecordAndSample"); + return samplingResult; + } + + Console.WriteLine($"{samplingParameters.TraceId}: ParentBasedSampler decision: Drop"); + + return this.linksBasedSampler.ShouldSample(samplingParameters); + } +} diff --git a/docs/trace/links-based-sampler/LinksBasedSampler.cs b/docs/trace/links-based-sampler/LinksBasedSampler.cs new file mode 100644 index 00000000000..7d6342c756a --- /dev/null +++ b/docs/trace/links-based-sampler/LinksBasedSampler.cs @@ -0,0 +1,49 @@ +// +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +using System.Diagnostics; +using OpenTelemetry.Trace; + +namespace LinksAndParentBasedSamplerExample; + +/// +/// A non-probabilistic sampler that samples an activity if ANY of the linked activities +/// is sampled. +/// +internal class LinksBasedSampler : Sampler +{ + public override SamplingResult ShouldSample(in SamplingParameters samplingParameters) + { + if (samplingParameters.Links != null) + { + foreach (var activityLink in samplingParameters.Links) + { + if ((activityLink.Context.TraceFlags & + ActivityTraceFlags.Recorded) != 0) + { + // If any linked activity is sampled, we will include this activity as well. + Console.WriteLine($"{samplingParameters.TraceId}: At least one linked activity (TraceID: {activityLink.Context.TraceId}, SpanID: {activityLink.Context.SpanId}) is sampled. Hence, LinksBasedSampler decision is RecordAndSample"); + return new SamplingResult(SamplingDecision.RecordAndSample); + } + } + } + + // There are either no linked activities or none of them are sampled. + // Hence, we will drop this activity. + Console.WriteLine($"{samplingParameters.TraceId}: No linked span is sampled. Hence, LinksBasedSampler decision is Drop."); + return new SamplingResult(SamplingDecision.Drop); + } +} diff --git a/docs/trace/links-based-sampler/Program.cs b/docs/trace/links-based-sampler/Program.cs new file mode 100644 index 00000000000..85ae36acac2 --- /dev/null +++ b/docs/trace/links-based-sampler/Program.cs @@ -0,0 +1,68 @@ +// +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +using System.Diagnostics; +using OpenTelemetry; +using OpenTelemetry.Trace; + +namespace LinksAndParentBasedSamplerExample; + +internal class Program +{ + private static readonly ActivitySource MyActivitySource = new("LinksAndParentBasedSampler.Example"); + + public static void Main(string[] args) + { + using var tracerProvider = Sdk.CreateTracerProviderBuilder() + .SetSampler(new LinksAndParentBasedSampler(new ParentBasedSampler(new TraceIdRatioBasedSampler(0.2)))) + .AddSource("LinksAndParentBasedSampler.Example") + .AddConsoleExporter() + .Build(); + + for (var i = 0; i < 10; i++) + { + var links = GetActivityLinks(i); + + // Create a new activity that links to the activities in the list of activity links. + using (var activity = MyActivitySource.StartActivity(ActivityKind.Internal, parentContext: default, tags: default, links: links)) + { + activity?.SetTag("foo", "bar"); + } + + Console.WriteLine(); + } + } + + /// + /// Generates a list of activity links. A linked activity is sampled with a probability of 0.1. + /// + /// A list of links. + private static IEnumerable GetActivityLinks(int seed) + { + var random = new Random(seed); + var linkedActivitiesList = new List(); + + for (var i = 0; i < 5; i++) + { + int randomValue = random.Next(10); + var traceFlags = (randomValue == 0) ? ActivityTraceFlags.Recorded : ActivityTraceFlags.None; + var context = new ActivityContext(ActivityTraceId.CreateRandom(), ActivitySpanId.CreateRandom(), traceFlags); + linkedActivitiesList.Add(new ActivityLink(context)); + } + + return linkedActivitiesList; + } +} diff --git a/docs/trace/links-based-sampler/README.md b/docs/trace/links-based-sampler/README.md new file mode 100644 index 00000000000..8b15167f9fa --- /dev/null +++ b/docs/trace/links-based-sampler/README.md @@ -0,0 +1,197 @@ +# Links Based Sampling: An Example + +Certain scenarios such as a producer consumer scenario can be modelled using +"span links" to express causality between activities. An activity (span) in a trace +can link to any number of activities in other traces. When using a Parent Based +sampler, the sampling decision is made at the level of a single trace. This implies +that the sampling decision across such linked traces is taken independently without +any consideration to the links. This can result in incomplete information to reason +about a system. Ideally, it would be desirable to sample all linked traces together. + +As one possible way to address this, this example shows how we can increase the +likelihood of having complete traces across linked traces. + +## How does this sampling example work? + +We use a composite sampler that makes use of two samplers: + +1. A parent based sampler. +2. A links based sampler. + +This composite sampler first delegates to the parent based sampler. If the +parent based sampler decides to sample, then the composite sampler decides +to sample. However, if the parent based sampler decides to drop, the composite +sampler delegates to the links based sampler. The links based sampler decides +to sample if the activity has any linked activities and if at least ONE of those +linked activities is sampled. + +The links based sampler is not a probabilistic sampler. It is a biased sampler +that decides to sample an activity if any of the linked contexts are sampled. + +## When should you consider such an option? What are the tradeoffs? + +This may be a good option to consider if you want to get more complete traces +across linked traces. However, there are a few tradeoffs to consider: + +- **Not guaranteed to give consistent sampling in all situations**: This +approach doesn't guarantee that you will get complete traces across linked +traces in all situations. + +Let's look at a couple of cases using the same producer-consumer example +scenario. Let's say we have a producer activity (say with ID S1 in Trace T1) that +produces a message and a consumer activity (say with ID S2 in Trace T2) that +consumes the message. + +Now, let's say that the producing activity S1 in trace T1 is sampled, say using the +decision of a parent based sampler. Now, let's say that the activity S2 in trace +T2 is not sampled based on the parent based sampler decision for T2. However, +since this activity S2 in T2 is linked to the producing activity (S1 in T1) that +is sampled, this mechanism ensures that the consuming activity (S2 in T2) will +also get sampled. + +Alternatively, let's consider what happens if the producing activity S1 in +trace T1 is not sampled, say using the decision of a parent based sampler. +Now, let's say that the consuming activity S2 in trace T2 is sampled, based +on the decision of a parent based sampler. In this case, we can see that +activity S2 in trace T2 is sampled even though activity S1 in trace T1 is not +sampled. This is an example of a situation where this approach is not helpful. + +Another example of a situation where you would get a partial trace is if the +consuming activity S2 in trace T2 is not the root activity in trace T2. In this +case, let's say there's a different activity S3 in trace T2 that is the root +activity. Let's say that the sampling decision for activity S3 was to drop it. +Now, since S2 in trace T2 links to S1 in trace T1, with this approach S2 will +be sampled (based on the linked context). Hence, the produced trace T2 will be +a partial trace as it will not include activity S3 but will include activity S2. + +- **Can lead to higher volume of data**: Since this approach will sample in +activities even if one of the linked activities is sampled, it can lead to higher +volumes of data, as compared to regular head based sampling. This is because +we are making a non-probabilistic sampling decision here based on the sampling +decisions of linked activities. For example, if there are 20 linked activities and +even if only one of them is sampled, then the linking activity will be sampled. + +## Sample Output + +You should see output such as the below when you run this example. + +```text +af448bc1cb3e5be4e4b56a8b6650785c: ParentBasedSampler decision: Drop +af448bc1cb3e5be4e4b56a8b6650785c: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +1b08120fa35c3f4a37e0b6326dc7688c: ParentBasedSampler decision: Drop +1b08120fa35c3f4a37e0b6326dc7688c: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +ff710bd70baf2e8e843e7b38d1fc4cc1: ParentBasedSampler decision: RecordAndSample +Activity.TraceId: ff710bd70baf2e8e843e7b38d1fc4cc1 +Activity.SpanId: 620d9b218afbf926 +Activity.TraceFlags: Recorded +Activity.ActivitySourceName: LinksAndParentBasedSampler.Example +Activity.DisplayName: Main +Activity.Kind: Internal +Activity.StartTime: 2023-04-18T16:52:16.0373932Z +Activity.Duration: 00:00:00.0022481 +Activity.Tags: + foo: bar +Activity.Links: + f7464f714b23713c9008f8fc884fc391 7d1c96a6f2c95556 + 6660db8951e10644f63cd385e7b9549e 526e615b7a70121a + 4c94df8e520b32ff25fc44e0c8063c81 8080d0aaafa641af + 70d8ba08181b5ec073ec8b5db778c41f 99ea6162257046ab + d96954e9e76835f442f62eece3066be4 ae9332547b80f50f +Resource associated with Activity: + service.name: unknown_service:links-sampler + + +68121534d69b2248c4816c0c5281f908: ParentBasedSampler decision: Drop +68121534d69b2248c4816c0c5281f908: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +5042f2c52a08143f5f42be3818eb41fa: ParentBasedSampler decision: Drop +5042f2c52a08143f5f42be3818eb41fa: At least one linked activity +(TraceID: 5c1185c94f56ebe3c2ccb4b9880afb17, SpanID: 1f77abf0bded4ab9) is sampled. +Hence, LinksBasedSampler decision is RecordAndSample + +Activity.TraceId: 5042f2c52a08143f5f42be3818eb41fa +Activity.SpanId: 0f8a9bfa9d7770e6 +Activity.TraceFlags: Recorded +Activity.ActivitySourceName: LinksAndParentBasedSampler.Example +Activity.DisplayName: Main +Activity.Kind: Internal +Activity.StartTime: 2023-04-18T16:52:16.0806081Z +Activity.Duration: 00:00:00.0018874 +Activity.Tags: + foo: bar +Activity.Links: + ed77487f4a646399aea5effc818d8bfa fcdde951f29a13e0 + f79860fdfb949f2c1f1698d1ed8036b9 e422cb771057bf7c + 6326338d0c0cf3afe7c5946d648b94dc affc7a6c013ea273 + c0750a9fa146062083b55227ac965ad4 b09d59ed3129779d + 5c1185c94f56ebe3c2ccb4b9880afb17 1f77abf0bded4ab9 +Resource associated with Activity: + service.name: unknown_service:links-sampler + + +568a2b9489c58e7a5a769d264a9ddf28: ParentBasedSampler decision: Drop +568a2b9489c58e7a5a769d264a9ddf28: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +4f8d972b0d7727821ce4a307a7be8e8f: ParentBasedSampler decision: Drop +4f8d972b0d7727821ce4a307a7be8e8f: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +ce940241ed33e1a030da3e9d201101d3: ParentBasedSampler decision: Drop +ce940241ed33e1a030da3e9d201101d3: At least one linked activity +(TraceID: ba0d91887309399029719e2a71a12f62, SpanID: 61aafe295913080f) is sampled. +Hence, LinksBasedSampler decision is RecordAndSample + +Activity.TraceId: ce940241ed33e1a030da3e9d201101d3 +Activity.SpanId: 5cf3d63926ce4fd5 +Activity.TraceFlags: Recorded +Activity.ActivitySourceName: LinksAndParentBasedSampler.Example +Activity.DisplayName: Main +Activity.Kind: Internal +Activity.StartTime: 2023-04-18T16:52:16.1127688Z +Activity.Duration: 00:00:00.0021072 +Activity.Tags: + foo: bar +Activity.Links: + 5223cff39311c741ef50aca58e4270c3 e401b6840acebf43 + 398b43fee8a75b068cdd11018ef528b0 24cfa4d5fb310b9d + 34351a0f492d65ef92ca0db3238f5146 5c0a56a16291d765 + ba0d91887309399029719e2a71a12f62 61aafe295913080f + de18a8af2d20972cd4f9439fcd51e909 4c40bc6037e58bf9 +Resource associated with Activity: + service.name: unknown_service:links-sampler + + +ac46618da4495897bacd7d399e6fc6d8: ParentBasedSampler decision: Drop +ac46618da4495897bacd7d399e6fc6d8: No linked span is sampled. Hence, +LinksBasedSampler decision is Drop. + +68a3a05e0348d2a2c1c3db34bc3fd2f5: ParentBasedSampler decision: Drop +68a3a05e0348d2a2c1c3db34bc3fd2f5: At least one linked activity +(TraceID: 87773d89fba942b0109d6ce0876bb67e, SpanID: 2aaac98d4e48c261) is sampled. +Hence, LinksBasedSampler decision is RecordAndSample + +Activity.TraceId: 68a3a05e0348d2a2c1c3db34bc3fd2f5 +Activity.SpanId: 3d0222f56b0e1e5d +Activity.TraceFlags: Recorded +Activity.ActivitySourceName: LinksAndParentBasedSampler.Example +Activity.DisplayName: Main +Activity.Kind: Internal +Activity.StartTime: 2023-04-18T16:52:16.1553354Z +Activity.Duration: 00:00:00.0049821 +Activity.Tags: + foo: bar +Activity.Links: + 7175fbd18da2783dc594d1e8f3260c74 13019d9a06a5505b + 59c9bdd52eb5cf23eae9001006743fcf 25573e0f1b290b8d + 87773d89fba942b0109d6ce0876bb67e 2aaac98d4e48c261 + 0a1f65c47f556336b4028b515d363810 0816a2a2b7d4ea0b + 7602375d3eae7e849a9dc27e858dc1c2 b918787b895b1374 +Resource associated with Activity: + service.name: unknown_service:links-sampler +``` diff --git a/docs/trace/links-based-sampler/links-sampler.csproj b/docs/trace/links-based-sampler/links-sampler.csproj new file mode 100644 index 00000000000..19aa9791432 --- /dev/null +++ b/docs/trace/links-based-sampler/links-sampler.csproj @@ -0,0 +1,5 @@ + + + + +