From f2d8d4101ec60b359a442da447fdc0b91c933127 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Fri, 10 May 2024 12:08:25 +0200 Subject: [PATCH 01/41] Initial POC import --- go.mod | 19 +- go.sum | 50 +- x-pack/filebeat/input/azureeventhub/config.go | 13 +- x-pack/filebeat/input/azureeventhub/eph.go | 114 ----- x-pack/filebeat/input/azureeventhub/input.go | 478 +++++++++--------- .../filebeat/input/azureeventhub/metrics.go | 1 - x-pack/filebeat/input/azureeventhub/v1_eph.go | 118 +++++ .../{eph_test.go => v1_eph_test.go} | 0 .../filebeat/input/azureeventhub/v1_input.go | 308 +++++++++++ .../filebeat/input/azureeventhub/v2_input.go | 302 +++++++++++ .../input/default-inputs/inputs_other.go | 2 + 11 files changed, 1011 insertions(+), 394 deletions(-) delete mode 100644 x-pack/filebeat/input/azureeventhub/eph.go create mode 100644 x-pack/filebeat/input/azureeventhub/v1_eph.go rename x-pack/filebeat/input/azureeventhub/{eph_test.go => v1_eph_test.go} (100%) create mode 100644 x-pack/filebeat/input/azureeventhub/v1_input.go create mode 100644 x-pack/filebeat/input/azureeventhub/v2_input.go diff --git a/go.mod b/go.mod index 8d278dae027..b34cdd651fe 100644 --- a/go.mod +++ b/go.mod @@ -102,7 +102,7 @@ require ( github.com/google/flatbuffers v23.5.26+incompatible github.com/google/go-cmp v0.6.0 github.com/google/gopacket v1.1.19 - github.com/google/uuid v1.3.1 + github.com/google/uuid v1.6.0 github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 github.com/h2non/filetype v1.1.1 github.com/hashicorp/go-multierror v1.1.1 @@ -186,14 +186,15 @@ require ( cloud.google.com/go v0.110.8 cloud.google.com/go/compute v1.23.0 cloud.google.com/go/redis v1.13.1 - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.0 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 + github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/consumption/armconsumption v1.1.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor v0.8.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1 - github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.2 github.com/Azure/go-autorest/autorest/adal v0.9.21 github.com/apache/arrow/go/v14 v14.0.2 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 @@ -247,15 +248,15 @@ require ( github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/Azure/azure-amqp-common-go/v4 v4.2.0 // indirect github.com/Azure/azure-pipeline-go v0.2.3 // indirect - github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0 // indirect - github.com/Azure/go-amqp v1.0.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.7.0 // indirect + github.com/Azure/go-amqp v1.0.5 // indirect github.com/Azure/go-autorest v14.2.0+incompatible // indirect github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect github.com/Azure/go-autorest/logger v0.2.1 // indirect github.com/Azure/go-autorest/tracing v0.6.0 // indirect github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect - github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/andybalholm/brotli v1.0.5 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect @@ -303,7 +304,7 @@ require ( github.com/goccy/go-json v0.10.2 // indirect github.com/godror/knownpb v0.1.0 // indirect github.com/golang-jwt/jwt/v4 v4.5.0 // indirect - github.com/golang-jwt/jwt/v5 v5.0.0 // indirect + github.com/golang-jwt/jwt/v5 v5.2.1 // indirect github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect github.com/golang-sql/sqlexp v0.1.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect @@ -358,7 +359,7 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect github.com/pierrec/lz4 v2.6.0+incompatible // indirect - github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect + github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/client_golang v1.11.1 // indirect github.com/rootless-containers/rootlesskit v1.1.0 // indirect diff --git a/go.sum b/go.sum index e91e46ce2c6..470f4bd5a78 100644 --- a/go.sum +++ b/go.sum @@ -96,22 +96,26 @@ github.com/Azure/azure-sdk-for-go/sdk/azcore v0.19.0/go.mod h1:h6H6c8enJmmocHUbL github.com/Azure/azure-sdk-for-go/sdk/azcore v1.0.0/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.3.0/go.mod h1:tZoQYdDZNOiIjdSn0dVWVfl0NEPGOJqVLzSrcFk4Is0= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.4.0/go.mod h1:ON4tFdPTwRcgWEaVDrN3584Ef+b7GgSJaXxe5fW9t4M= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.0 h1:fb8kj/Dh4CSwgsOzHeZY4Xh68cFVbzXx+ONXGMY//4w= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.0/go.mod h1:uReU2sSxZExRPBAg3qKzmAucSi51+SP1OhohieR821Q= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo= github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.11.0/go.mod h1:HcM1YX14R7CJcghJGOYCgdezslRSVzqwLf/q+4Y2r/0= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.1.0/go.mod h1:bhXu1AjYL+wutSL/kpSq6s7733q2Rb0yuot9Zgfqa/0= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0 h1:BMAjVKJM0U/CYF27gA0ZMmXGkOcvfFtD0oHVZ1TIPRI= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0/go.mod h1:1fXstnBMas5kzG+S3q8UoJcmyU6nUeunJcMDHcRYHhs= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 h1:FDif4R1+UUR+00q6wquyX90K7A8dN+R5E8GEadoP7sU= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2/go.mod h1:aiYBYui4BJ/BJCAIKs92XiPyQfTaBWqvHujDwKb6CBU= github.com/Azure/azure-sdk-for-go/sdk/internal v0.7.0/go.mod h1:yqy467j36fJxcRV2TzfVZ1pCb5vxm4BtZPUdYWe/Xo8= github.com/Azure/azure-sdk-for-go/sdk/internal v1.0.0/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w= github.com/Azure/azure-sdk-for-go/sdk/internal v1.1.1/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w= github.com/Azure/azure-sdk-for-go/sdk/internal v1.1.2/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0 h1:d81/ng9rET2YqdVkVwkb6EXeRrLJIwyGnJcAlAWKwhs= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0/go.mod h1:s4kgfzA0covAXNicZHDMN58jExvcng2mC/DepXiF1EI= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.7.0 h1:rTfKOCZGy5ViVrlA74ZPE99a+SgoEE2K/yg3RyW9dFA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.7.0/go.mod h1:4OG6tQ9EOP/MT0NMjDlRzWoVFxfu9rN9B2X+tlSVktg= +github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0 h1:NYd6adRnLdeTwr1QWeiL83Fgqg7clkPLm4DCz4jYywE= +github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0/go.mod h1:vMGz6NOUGJ9h5ONl2kkyaqq5E0g7s4CHNSrXN5fl8UY= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0 h1:AAIdAyPkFff6XTct2lQCxOWN/+LnA41S7kIkzKaMbyE= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0/go.mod h1:noQIdW75SiQFB3mSFJBr4iRRH83S9skaFiBv4C0uEs0= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0 h1:1MRED2aeLx/BPHC23XRtr8Mk6zcc70HNRYPQ73R0gHw= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0/go.mod h1:Am1cUioOk0HdZIsjpXJkQ4RIeQbwYsW6LkNIc5z/5XY= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.2.0 h1:+dggnR89/BIIlRlQ6d19dkhhdd/mQUiQbXhyHUFiB4w= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.2.0/go.mod h1:tI9M2Q/ueFi287QRkdrhb9LHm6ZnXgkVYLRC3FhYkPw= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2 h1:mLY+pNLjCUeKhgnAJWAKhEUQM+RJQo2H1fuGSw1Ky1E= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2/go.mod h1:FbdwsQ2EzwvXxOPcMFYO8ogEc9uMMIj3YkmCdXdAFmk= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= @@ -122,12 +126,15 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor v0.8.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor v0.8.0/go.mod h1:kzRLpzzlw6eBUXE7eBw3oqfmKR/kxaHOk4+h9sAe6Yo= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1 h1:7CBQ+Ei8SP2c6ydQTGCCrS35bDxgTMfoP2miAwK++OU= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1/go.mod h1:c/wcGeGx5FUPbM/JltUYHZcKmigwyVLJlDq+4HdtXaw= -github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0 h1:AifHbc4mg0x9zW52WOpKbsHaDKuRhlI7TVl47thgQ70= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0/go.mod h1:T5RfihdXtBDxt1Ch2wobif3TvzTdumDy29kahv6AV9A= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0/go.mod h1:2e8rMJtl2+2j+HXbTBwnyGpm5Nou7KhvSfxOq8JpTag= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.2 h1:YUUxeiOWgdAQE3pXt2H7QXzZs0q8UBjgRbl56qo8GYM= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.2/go.mod h1:dmXQgZuiSubAecswZE+Sm8jkvEa7kQgTPVRvwL/nd0E= github.com/Azure/azure-storage-blob-go v0.15.0 h1:rXtgp8tN1p29GvpGgfJetavIG0V7OgcSXPpwp3tx6qk= github.com/Azure/azure-storage-blob-go v0.15.0/go.mod h1:vbjsVbX0dlxnRc4FFMPsS9BsJWPcne7GB7onqlPvz58= -github.com/Azure/go-amqp v1.0.0 h1:QfCugi1M+4F2JDTRgVnRw7PYXLXZ9hmqk3+9+oJh3OA= -github.com/Azure/go-amqp v1.0.0/go.mod h1:+bg0x3ce5+Q3ahCEXnCsGG3ETpDQe3MEVnOuT2ywPwc= +github.com/Azure/go-amqp v1.0.5 h1:po5+ljlcNSU8xtapHTe8gIc8yHxCzC03E8afH2g1ftU= +github.com/Azure/go-amqp v1.0.5/go.mod h1:vZAogwdrkbyK3Mla8m/CxSc/aKdnTZ4IbPxl51Y5WZE= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= @@ -183,8 +190,8 @@ github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBp github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/AzureAD/microsoft-authentication-library-for-go v0.5.1/go.mod h1:Vt9sXTKwMyGcOxSmLDMnGPgqsUg7m8pe215qMLrDXw4= -github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 h1:WpB/QDNLpMw72xHJc34BNNykqSOeEJDAWkhf0u12/Jk= -github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= +github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= +github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= @@ -500,7 +507,6 @@ github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/dnaeon/go-vcr v1.1.0/go.mod h1:M7tiix8f0r6mKKJ3Yq/kqU1OYf3MnfmBWVbPx/yU9ko= -github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/dnephin/pflag v1.0.7 h1:oxONGlWxhmUct0YzKTgrpQv9AUA1wtPBn7zuSjJqptk= github.com/dnephin/pflag v1.0.7/go.mod h1:uxE91IoWURlOiTUIA8Mq5ZZkAv3dPUfZNaT80Zm7OQE= @@ -866,8 +872,8 @@ github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzw github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= -github.com/golang-jwt/jwt/v5 v5.0.0 h1:1n1XNM9hk7O9mnQoNBGolZvzebBQ7p93ULHRc28XJUE= -github.com/golang-jwt/jwt/v5 v5.0.0/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= +github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY= github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= @@ -986,8 +992,9 @@ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.2.4 h1:uGy6JWR/uMIILU8wbf+OkstIrNiMjGpEIyhx8f6W7s4= github.com/googleapis/enterprise-certificate-proxy v0.2.4/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= @@ -1150,8 +1157,9 @@ github.com/jmoiron/sqlx v1.3.1 h1:aLN7YINNZ7cYOPK3QC83dbM6KT0NMqVMw961TqrejlE= github.com/jmoiron/sqlx v1.3.1/go.mod h1:2BljVx/86SuTyjE+aPYlHCTNvZrnJXghYGpNiXLBMCQ= github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 h1:rp+c0RAYOWj8l6qbCUTSiRLG/iKnW3K3/QfPPuSsBt4= github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901/go.mod h1:Z86h9688Y0wesXCyonoVr47MasHilkuLMqGhRZ4Hpak= -github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= @@ -1445,8 +1453,9 @@ github.com/pierrre/gotestcover v0.0.0-20160517101806-924dca7d15f0 h1:i5VIxp6QB8o github.com/pierrre/gotestcover v0.0.0-20160517101806-924dca7d15f0/go.mod h1:4xpMLz7RBWyB+ElzHu8Llua96TRCB3YwX+l5EP1wmHk= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4/go.mod h1:N6UoU20jOqggOuDwUaBQpluzLNDqif3kq9z2wpdYEfQ= -github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -1527,8 +1536,8 @@ github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rootless-containers/rootlesskit v1.1.0 h1:cRaRIYxY8oce4eE/zeAUZhgKu/4tU1p9YHN4+suwV7M= github.com/rootless-containers/rootlesskit v1.1.0/go.mod h1:H+o9ndNe7tS91WqU0/+vpvc+VaCd7TCIWaJjnV0ujUo= github.com/rs/cors v1.6.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= @@ -2108,6 +2117,7 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -2551,6 +2561,8 @@ modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= mvdan.cc/garble v0.7.1 h1:9Qffp7HzKLBfQxYZ8mBF/EoYefV54ooY8v9UR4ByTPw= mvdan.cc/garble v0.7.1/go.mod h1:7F2EWpOklhK2qWzv1Hbin8sP2TYBO+EALIx4kFTmtu8= +nhooyr.io/websocket v1.8.11 h1:f/qXNc2/3DpoSZkHt1DQu6rj4zGC8JmkkLkWss0MgN0= +nhooyr.io/websocket v1.8.11/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index 80c2a905162..c6b376268ce 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -20,13 +20,16 @@ type azureInputConfig struct { EventHubName string `config:"eventhub" validate:"required"` ConsumerGroup string `config:"consumer_group"` // Azure Storage container to store leases and checkpoints - SAName string `config:"storage_account"` - SAKey string `config:"storage_account_key"` - SAContainer string `config:"storage_account_container"` + SAName string `config:"storage_account"` + SAConnectionString string `config:"storage_account_connection_string"` // engine v2 only + SAKey string `config:"storage_account_key"` + SAContainer string `config:"storage_account_container"` // by default the azure public environment is used, to override, users can provide a specific resource manager endpoint OverrideEnvironment string `config:"resource_manager_endpoint"` // cleanup the log JSON input for known issues, options: SINGLE_QUOTES, NEW_LINES SanitizeOptions []string `config:"sanitize_options"` + // Engine version to use (v1 or v2). Default is v1. + EngineVersion string `config:"engine_version"` } const ephContainerName = "filebeat" @@ -73,6 +76,10 @@ func (conf *azureInputConfig) Validate() error { } } + if conf.EngineVersion == "" { + conf.EngineVersion = "v1" + } + return nil } diff --git a/x-pack/filebeat/input/azureeventhub/eph.go b/x-pack/filebeat/input/azureeventhub/eph.go deleted file mode 100644 index f6981b4882e..00000000000 --- a/x-pack/filebeat/input/azureeventhub/eph.go +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build !aix - -package azureeventhub - -import ( - "context" - "errors" - "fmt" - - eventhub "github.com/Azure/azure-event-hubs-go/v3" - "github.com/Azure/azure-event-hubs-go/v3/eph" - "github.com/Azure/azure-event-hubs-go/v3/storage" - "github.com/Azure/azure-storage-blob-go/azblob" - "github.com/Azure/go-autorest/autorest/azure" -) - -// users can select from one of the already defined azure cloud envs -var environments = map[string]azure.Environment{ - azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, - azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, - azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, - azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, -} - -// runWithEPH will consume ingested events using the Event Processor Host (EPH). -// -// To learn more, check the following resources: -// - https://github.com/Azure/azure-event-hubs-go#event-processor-host -// - https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-event-processor-host -func (a *azureInput) runWithEPH() error { - // create a new Azure Storage Leaser / Checkpointer - cred, err := azblob.NewSharedKeyCredential(a.config.SAName, a.config.SAKey) - if err != nil { - return err - } - env, err := getAzureEnvironment(a.config.OverrideEnvironment) - if err != nil { - return err - } - leaserCheckpointer, err := storage.NewStorageLeaserCheckpointer(cred, a.config.SAName, a.config.SAContainer, env) - if err != nil { - a.log.Errorw("error creating storage leaser checkpointer", "error", err) - return err - } - - // adding a nil EventProcessorHostOption will break the code, - // this is why a condition is added and a.processor is assigned. - if a.config.ConsumerGroup != "" { - a.processor, err = eph.NewFromConnectionString( - a.workerCtx, - fmt.Sprintf("%s%s%s", a.config.ConnectionString, eventHubConnector, a.config.EventHubName), - leaserCheckpointer, - leaserCheckpointer, - eph.WithConsumerGroup(a.config.ConsumerGroup), - eph.WithNoBanner()) - } else { - a.processor, err = eph.NewFromConnectionString( - a.workerCtx, - fmt.Sprintf("%s%s%s", a.config.ConnectionString, eventHubConnector, a.config.EventHubName), - leaserCheckpointer, - leaserCheckpointer, - eph.WithNoBanner()) - } - if err != nil { - a.log.Errorw("error creating processor", "error", err) - return err - } - - // register a message handler -- many can be registered - handlerID, err := a.processor.RegisterHandler(a.workerCtx, - func(c context.Context, e *eventhub.Event) error { - var onEventErr error - // partitionID is not yet mapped in the azure-eventhub sdk - ok := a.processEvents(e, "") - if !ok { - onEventErr = errors.New("OnEvent function returned false. Stopping input worker") - a.log.Error(onEventErr.Error()) - a.Stop() - } - return onEventErr - }) - if err != nil { - a.log.Errorw("error registering handler", "error", err) - return err - } - a.log.Infof("handler id: %q is registered\n", handlerID) - - // Start handling messages from all of the partitions balancing across - // multiple consumers. - // The processor can be stopped by calling `Close()` on the processor. - err = a.processor.StartNonBlocking(a.workerCtx) - if err != nil { - a.log.Errorw("error starting the processor", "error", err) - return err - } - - return nil -} - -func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { - // if no override is set then the azure public cloud is used - if overrideResManager == "" || overrideResManager == "" { - return azure.PublicCloud, nil - } - if env, ok := environments[overrideResManager]; ok { - return env, nil - } - // can retrieve hybrid env from the resource manager endpoint - return azure.EnvironmentFromURL(overrideResManager) -} diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index bc2244925e6..618db29b1a5 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -7,23 +7,15 @@ package azureeventhub import ( - "context" - "encoding/json" "fmt" - "strings" - "sync" - "time" - - eventhub "github.com/Azure/azure-event-hubs-go/v3" - "github.com/Azure/azure-event-hubs-go/v3/eph" - "github.com/mitchellh/hashstructure" - - "github.com/elastic/beats/v7/filebeat/channel" - "github.com/elastic/beats/v7/filebeat/input" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/feature" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" - "github.com/elastic/elastic-agent-libs/mapstr" + "github.com/elastic/go-concert/unison" + "strings" ) const ( @@ -32,262 +24,252 @@ const ( inputName = "azure-eventhub" ) -func init() { - err := input.Register(inputName, NewInput) - if err != nil { - panic(fmt.Errorf("failed to register %v input: %w", inputName, err)) +func Plugin(log *logp.Logger) v2.Plugin { + return v2.Plugin{ + Name: inputName, + Stability: feature.Stable, + Deprecated: false, + Info: "Collect logs from Azure Event Hub", + Manager: &eventHubInputManager{ + log: log, + }, } } -// configID computes a unique ID for the input configuration. -// -// It is used to identify the input in the registry and to detect -// changes in the configuration. -// -// We will remove this function as we upgrade the input to the -// v2 API (there is an ID in the v2 context). -func configID(config *conf.C) (string, error) { - var tmp struct { - ID string `config:"id"` - } - if err := config.Unpack(&tmp); err != nil { - return "", fmt.Errorf("error extracting ID: %w", err) - } - if tmp.ID != "" { - return tmp.ID, nil - } - - var h map[string]interface{} - _ = config.Unpack(&h) - id, err := hashstructure.Hash(h, nil) - if err != nil { - return "", fmt.Errorf("can not compute ID from configuration: %w", err) - } - - return fmt.Sprintf("%16X", id), nil +type eventHubInputManager struct { + log *logp.Logger } -// azureInput struct for the azure-eventhub input -type azureInput struct { - config azureInputConfig // azure-eventhub configuration - context input.Context - outlet channel.Outleter - log *logp.Logger // logging info and error messages - workerCtx context.Context // worker goroutine context. It's cancelled when the input stops or the worker exits. - workerCancel context.CancelFunc // used to signal that the worker should stop. - workerOnce sync.Once // guarantees that the worker goroutine is only started once. - processor *eph.EventProcessorHost // eph will be assigned if users have enabled the option - id string // ID of the input; used to identify the input in the input metrics registry only, and will be removed once the input is migrated to v2. - metrics *inputMetrics // Metrics for the input. +func (m *eventHubInputManager) Init(unison.Group) error { + return nil } -// NewInput creates a new azure-eventhub input -func NewInput( - cfg *conf.C, - connector channel.Connector, - inputContext input.Context, -) (input.Input, error) { +func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { var config azureInputConfig if err := cfg.Unpack(&config); err != nil { return nil, fmt.Errorf("reading %s input config: %w", inputName, err) } - // Since this is a v1 input, we need to set the ID manually. - // - // We need an ID to identify the input in the input metrics - // registry. - // - // This is a temporary workaround until we migrate the input to v2. - inputId, err := configID(cfg) - if err != nil { - return nil, err + switch config.EngineVersion { + case "v1": + return newEventHubInputV1(config, m.log) + case "v2": + return newEventHubInputV2(config, m.log) + default: + return nil, fmt.Errorf("invalid azure-eventhub engine version: %s", config.EngineVersion) } - inputCtx, cancelInputCtx := context.WithCancel(context.Background()) - go func() { - defer cancelInputCtx() - select { - case <-inputContext.Done: - case <-inputCtx.Done(): - } - }() - - // If the input ever needs to be made restartable, then context would need - // to be recreated with each restart. - workerCtx, workerCancel := context.WithCancel(inputCtx) - - in := azureInput{ - id: inputId, - config: config, - log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), - context: inputContext, - workerCtx: workerCtx, - workerCancel: workerCancel, - } - out, err := connector.Connect(cfg) - if err != nil { - return nil, err - } - in.outlet = out - in.log.Infof("Initialized %s input.", inputName) - - return &in, nil + //return &azureInput{ + // config: config, + // log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), + //}, nil } -// Run starts the `azure-eventhub` input and then returns. +// func init() { +// err := input.Register(inputName, NewInput) +// if err != nil { +// panic(fmt.Errorf("failed to register %v input: %w", inputName, err)) +// } +// } + +// // configID computes a unique ID for the input configuration. +// // +// // It is used to identify the input in the registry and to detect +// // changes in the configuration. +// // +// // We will remove this function as we upgrade the input to the +// // v2 API (there is an ID in the v2 context). +// func configID(config *conf.C) (string, error) { +// var tmp struct { +// ID string `config:"id"` +// } +// if err := config.Unpack(&tmp); err != nil { +// return "", fmt.Errorf("error extracting ID: %w", err) +// } +// if tmp.ID != "" { +// return tmp.ID, nil +// } + +// var h map[string]interface{} +// _ = config.Unpack(&h) +// id, err := hashstructure.Hash(h, nil) +// if err != nil { +// return "", fmt.Errorf("can not compute ID from configuration: %w", err) +// } + +// return fmt.Sprintf("%16X", id), nil +// } + +//// azureInput struct for the azure-eventhub input +//type azureInput struct { +// config azureInputConfig // azure-eventhub configuration +// context input.Context +// outlet channel.Outleter +// log *logp.Logger // logging info and error messages +// workerCtx context.Context // worker goroutine context. It's cancelled when the input stops or the worker exits. +// workerCancel context.CancelFunc // used to signal that the worker should stop. +// workerOnce sync.Once // guarantees that the worker goroutine is only started once. +// processor *eph.EventProcessorHost // eph will be assigned if users have enabled the option +// id string // ID of the input; used to identify the input in the input metrics registry only, and will be removed once the input is migrated to v2. +// metrics *inputMetrics // Metrics for the input. +//} + +// // NewInput creates a new azure-eventhub input +// func NewInput( +// cfg *conf.C, +// connector channel.Connector, +// inputContext input.Context, +// ) (input.Input, error) { +// var config azureInputConfig +// if err := cfg.Unpack(&config); err != nil { +// return nil, fmt.Errorf("reading %s input config: %w", inputName, err) +// } + +// // Since this is a v1 input, we need to set the ID manually. +// // +// // We need an ID to identify the input in the input metrics +// // registry. +// // +// // This is a temporary workaround until we migrate the input to v2. +// inputId, err := configID(cfg) +// if err != nil { +// return nil, err +// } + +// inputCtx, cancelInputCtx := context.WithCancel(context.Background()) +// go func() { +// defer cancelInputCtx() +// select { +// case <-inputContext.Done: +// case <-inputCtx.Done(): +// } +// }() + +// // If the input ever needs to be made restartable, then context would need +// // to be recreated with each restart. +// workerCtx, workerCancel := context.WithCancel(inputCtx) + +// in := azureInput{ +// id: inputId, +// config: config, +// log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), +// context: inputContext, +// workerCtx: workerCtx, +// workerCancel: workerCancel, +// } +// out, err := connector.Connect(cfg) +// if err != nil { +// return nil, err +// } +// in.outlet = out +// in.log.Infof("Initialized %s input.", inputName) + +// return &in, nil +// } // -// The first invocation will start an input worker. All subsequent -// invocations will be no-ops. +//func (a *azureInput) Name() string { +// return inputName +//} // -// The input worker will continue fetching data from the event hub until -// the input Runner calls the `Stop()` method. -func (a *azureInput) Run() { - // `Run` is invoked periodically by the input Runner. The `sync.Once` - // guarantees that we only start the worker once during the first - // invocation. - a.workerOnce.Do(func() { - a.log.Infof("%s input worker is starting.", inputName) - - // We set up the metrics in the `Run()` method and tear them down - // in the `Stop()` method. - // - // The factory method `NewInput` is not a viable solution because - // the Runner invokes it during the configuration check without - // calling the `Stop()` function; this causes panics - // due to multiple metrics registrations. - a.metrics = newInputMetrics(a.id, nil) - - err := a.runWithEPH() - if err != nil { - a.log.Errorw("error starting the input worker", "error", err) - return - } - a.log.Infof("%s input worker has started.", inputName) +//func (a *azureInput) Test(v2.TestContext) error { +// return nil +//} +// +//// Run starts the `azure-eventhub` input and then returns. +//// +//// The first invocation will start an input worker. All subsequent +//// invocations will be no-ops. +//// +//// The input worker will continue fetching data from the event hub until +//// the input Runner calls the `Stop()` method. +//func (a *azureInput) Run(inputContext v2.Context, pipeline beat.Pipeline) error { +// ctx := v2.GoContextFromCanceler(inputContext.Cancelation) +// +// // `Run` is invoked periodically by the input Runner. The `sync.Once` +// // guarantees that we only start the worker once during the first +// // invocation. +// // a.workerOnce.Do(func() { +// a.log.Infof("%s input worker is starting.", inputName) +// +// // We set up the metrics in the `Run()` method and tear them down +// // in the `Stop()` method. +// // +// // The factory method `NewInput` is not a viable solution because +// // the Runner invokes it during the configuration check without +// // calling the `Stop()` function; this causes panics +// // due to multiple metrics registrations. +// a.metrics = newInputMetrics(inputContext.ID, nil) +// +// err := a.runWithEPH() +// if err != nil { +// a.log.Errorw("error starting the input worker", "error", err) +// return err +// } +// a.log.Infof("%s input worker has started.", inputName) +// // }) +// +// for { +// select { +// case <-ctx.Done(): +// a.log.Infof("%s input worker is stopping.", inputName) +// if a.processor != nil { +// // Tells the processor to stop processing events and release all +// // resources (like scheduler, leaser, checkpointer, and client). +// err := a.processor.Close(context.Background()) +// if err != nil { +// a.log.Errorw("error while closing eventhostprocessor", "error", err) +// } +// } +// +// if a.metrics != nil { +// a.metrics.Close() +// } +// +// // a.workerCancel() // FIXME: is this needed? +// a.log.Infof("%s input worker has stopped.", inputName) +// } +// +// break +// } +// +// return nil +//} +// +//// // Stop stops `azure-eventhub` input. +//// func (a *azureInput) Stop() { +//// a.log.Infof("%s input worker is stopping.", inputName) +//// if a.processor != nil { +//// // Tells the processor to stop processing events and release all +//// // resources (like scheduler, leaser, checkpointer, and client). +//// err := a.processor.Close(context.Background()) +//// if err != nil { +//// a.log.Errorw("error while closing eventhostprocessor", "error", err) +//// } +//// } +// +//// if a.metrics != nil { +//// a.metrics.Close() +//// } +// +//// a.workerCancel() +//// a.log.Infof("%s input worker has stopped.", inputName) +//// } +// +//// // Wait stop the current server +//// func (a *azureInput) Wait() { +//// a.Stop() +//// } + +func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { + return pipeline.ConnectWith(beat.ClientConfig{ + Processing: beat.ProcessingConfig{ + // This input only produces events with basic types so normalization + // is not required. + EventNormalization: to.Ptr(false), + }, }) } -// Stop stops `azure-eventhub` input. -func (a *azureInput) Stop() { - a.log.Infof("%s input worker is stopping.", inputName) - if a.processor != nil { - // Tells the processor to stop processing events and release all - // resources (like scheduler, leaser, checkpointer, and client). - err := a.processor.Close(context.Background()) - if err != nil { - a.log.Errorw("error while closing eventhostprocessor", "error", err) - } - } - - if a.metrics != nil { - a.metrics.Close() - } - - a.workerCancel() - a.log.Infof("%s input worker has stopped.", inputName) -} - -// Wait stop the current server -func (a *azureInput) Wait() { - a.Stop() -} - -func (a *azureInput) processEvents(event *eventhub.Event, partitionID string) bool { - processingStartTime := time.Now() - azure := mapstr.M{ - // partitionID is only mapped in the non-eph option which is not available yet, this field will be temporary unavailable - //"partition_id": partitionID, - "eventhub": a.config.EventHubName, - "consumer_group": a.config.ConsumerGroup, - } - - // update the input metrics - a.metrics.receivedMessages.Inc() - a.metrics.receivedBytes.Add(uint64(len(event.Data))) - - records := a.parseMultipleRecords(event.Data) - - for _, record := range records { - _, _ = azure.Put("offset", event.SystemProperties.Offset) - _, _ = azure.Put("sequence_number", event.SystemProperties.SequenceNumber) - _, _ = azure.Put("enqueued_time", event.SystemProperties.EnqueuedTime) - ok := a.outlet.OnEvent(beat.Event{ - // this is the default value for the @timestamp field; usually the ingest - // pipeline replaces it with a value in the payload. - Timestamp: processingStartTime, - Fields: mapstr.M{ - "message": record, - "azure": azure, - }, - Private: event.Data, - }) - if !ok { - a.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) - return ok - } - - a.metrics.sentEvents.Inc() - } - - a.metrics.processedMessages.Inc() - a.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) - - return true -} - -// parseMultipleRecords will try to split the message into multiple ones based on the group field provided by the configuration -func (a *azureInput) parseMultipleRecords(bMessage []byte) []string { - var mapObject map[string][]interface{} - var messages []string - - // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. - // Sanitization occurs if options are available and the message contains an invalid JSON. - // - // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps - if len(a.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { - bMessage = sanitize(bMessage, a.config.SanitizeOptions...) - a.metrics.sanitizedMessages.Inc() - } - - // check if the message is a "records" object containing a list of events - err := json.Unmarshal(bMessage, &mapObject) - if err == nil { - if len(mapObject[expandEventListFromField]) > 0 { - for _, ms := range mapObject[expandEventListFromField] { - js, err := json.Marshal(ms) - if err == nil { - messages = append(messages, string(js)) - a.metrics.receivedEvents.Inc() - } else { - a.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - } else { - a.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) - // in some cases the message is an array - var arrayObject []interface{} - err = json.Unmarshal(bMessage, &arrayObject) - if err != nil { - // return entire message - a.log.Debugf("deserializing multiple messages to an array returning error: %s", err) - a.metrics.decodeErrors.Inc() - return []string{string(bMessage)} - } - - for _, ms := range arrayObject { - js, err := json.Marshal(ms) - if err == nil { - messages = append(messages, string(js)) - a.metrics.receivedEvents.Inc() - } else { - a.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - - return messages -} - // Strip connection string to remove sensitive information // A connection string should look like this: // Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKeyName=DummyAccessKeyName;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly= diff --git a/x-pack/filebeat/input/azureeventhub/metrics.go b/x-pack/filebeat/input/azureeventhub/metrics.go index 8aeabc57265..70efb1bdb9c 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics.go +++ b/x-pack/filebeat/input/azureeventhub/metrics.go @@ -3,7 +3,6 @@ // you may not use this file except in compliance with the Elastic License. //go:build !aix -// +build !aix package azureeventhub diff --git a/x-pack/filebeat/input/azureeventhub/v1_eph.go b/x-pack/filebeat/input/azureeventhub/v1_eph.go new file mode 100644 index 00000000000..c771dbbb463 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v1_eph.go @@ -0,0 +1,118 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import "github.com/Azure/go-autorest/autorest/azure" + +// import ( +// +// "context" +// "errors" +// "fmt" +// +// eventhub "github.com/Azure/azure-event-hubs-go/v3" +// "github.com/Azure/azure-event-hubs-go/v3/eph" +// "github.com/Azure/azure-event-hubs-go/v3/storage" +// "github.com/Azure/azure-storage-blob-go/azblob" +// "github.com/Azure/go-autorest/autorest/azure" +// +// ) +// +// // users can select from one of the already defined azure cloud envs +var environments = map[string]azure.Environment{ + azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, + azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, + azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, + azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, +} + +// // runWithEPH will consume ingested events using the Event Processor Host (EPH). +// // +// // To learn more, check the following resources: +// // - https://github.com/Azure/azure-event-hubs-go#event-processor-host +// // - https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-event-processor-host +// +// func (in *eventHubInputV1) runWithEPH() error { +// // create a new Azure Storage Leaser / Checkpointer +// cred, err := azblob.NewSharedKeyCredential(in.config.SAName, in.config.SAKey) +// if err != nil { +// return err +// } +// env, err := getAzureEnvironment(in.config.OverrideEnvironment) +// if err != nil { +// return err +// } +// leaserCheckpointer, err := storage.NewStorageLeaserCheckpointer(cred, in.config.SAName, in.config.SAContainer, env) +// if err != nil { +// in.log.Errorw("error creating storage leaser checkpointer", "error", err) +// return err +// } +// +// // adding a nil EventProcessorHostOption will break the code, +// // this is why a condition is added and a.processor is assigned. +// if in.config.ConsumerGroup != "" { +// in.processor, err = eph.NewFromConnectionString( +// in.workerCtx, +// fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), +// leaserCheckpointer, +// leaserCheckpointer, +// eph.WithConsumerGroup(in.config.ConsumerGroup), +// eph.WithNoBanner()) +// } else { +// in.processor, err = eph.NewFromConnectionString( +// in.workerCtx, +// fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), +// leaserCheckpointer, +// leaserCheckpointer, +// eph.WithNoBanner()) +// } +// if err != nil { +// in.log.Errorw("error creating processor", "error", err) +// return err +// } +// +// // register a message handler -- many can be registered +// handlerID, err := in.processor.RegisterHandler(in.workerCtx, +// func(c context.Context, e *eventhub.Event) error { +// var onEventErr error +// // partitionID is not yet mapped in the azure-eventhub sdk +// ok := in.processEvents(e, "") +// if !ok { +// onEventErr = errors.New("OnEvent function returned false. Stopping input worker") +// in.log.Error(onEventErr.Error()) +// in.Stop() +// } +// return onEventErr +// }) +// if err != nil { +// in.log.Errorw("error registering handler", "error", err) +// return err +// } +// in.log.Infof("handler id: %q is registered\n", handlerID) +// +// // Start handling messages from all of the partitions balancing across +// // multiple consumers. +// // The processor can be stopped by calling `Close()` on the processor. +// err = in.processor.StartNonBlocking(in.workerCtx) +// if err != nil { +// in.log.Errorw("error starting the processor", "error", err) +// return err +// } +// +// return nil +// } +func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { + // if no override is set then the azure public cloud is used + if overrideResManager == "" || overrideResManager == "" { + return azure.PublicCloud, nil + } + if env, ok := environments[overrideResManager]; ok { + return env, nil + } + // can retrieve hybrid env from the resource manager endpoint + return azure.EnvironmentFromURL(overrideResManager) +} diff --git a/x-pack/filebeat/input/azureeventhub/eph_test.go b/x-pack/filebeat/input/azureeventhub/v1_eph_test.go similarity index 100% rename from x-pack/filebeat/input/azureeventhub/eph_test.go rename to x-pack/filebeat/input/azureeventhub/v1_eph_test.go diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go new file mode 100644 index 00000000000..91903217907 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -0,0 +1,308 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + eventhub "github.com/Azure/azure-event-hubs-go/v3" + "github.com/Azure/azure-event-hubs-go/v3/eph" + "github.com/Azure/azure-event-hubs-go/v3/storage" + "github.com/Azure/azure-storage-blob-go/azblob" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" +) + +type eventHubInputV1 struct { + config azureInputConfig + log *logp.Logger + metrics *inputMetrics + processor *eph.EventProcessorHost + client beat.Client +} + +func newEventHubInputV1(config azureInputConfig, log *logp.Logger) (v2.Input, error) { + //log := logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)) + return &eventHubInputV1{ + config: config, + log: log.Named(inputName), + }, nil +} + +func (in *eventHubInputV1) Name() string { + return inputName +} + +func (in *eventHubInputV1) Test(v2.TestContext) error { + return nil +} + +func (in *eventHubInputV1) Run( + inputContext v2.Context, + pipeline beat.Pipeline, +) error { + var err error + + // Create client for publishing events and receive notification of their ACKs. + in.client, err = createPipelineClient(pipeline) + if err != nil { + return fmt.Errorf("failed to create pipeline client: %w", err) + } + defer in.client.Close() + + // Setup input metrics + inputMetrics := newInputMetrics(inputContext.ID, nil) + if err != nil { + return fmt.Errorf("failed to create input metrics: %w", err) + } + defer inputMetrics.Close() + in.metrics = inputMetrics + + ctx := v2.GoContextFromCanceler(inputContext.Cancelation) + + // Initialize everything for this run + err = in.setup(ctx) + if err != nil { + return err + } + + // Start the main run loop + err = in.run(ctx) + if err != nil { + return err + } + + return nil +} + +func (in *eventHubInputV1) setup(ctx context.Context) error { + + // ---------------------------------------------------- + // 1 — Create a new Azure Storage Leaser / Checkpointer + // ---------------------------------------------------- + + cred, err := azblob.NewSharedKeyCredential(in.config.SAName, in.config.SAKey) + if err != nil { + return err + } + + env, err := getAzureEnvironment(in.config.OverrideEnvironment) + if err != nil { + return err + } + + leaserCheckpointer, err := storage.NewStorageLeaserCheckpointer(cred, in.config.SAName, in.config.SAContainer, env) + if err != nil { + in.log.Errorw("error creating storage leaser checkpointer", "error", err) + return err + } + + // ------------------------------------------------ + // 2 — Create a new event processor host + // ------------------------------------------------ + + // adding a nil EventProcessorHostOption will break the code, + // this is why a condition is added and a.processor is assigned. + if in.config.ConsumerGroup != "" { + in.processor, err = eph.NewFromConnectionString( + ctx, + fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), + leaserCheckpointer, + leaserCheckpointer, + eph.WithConsumerGroup(in.config.ConsumerGroup), + eph.WithNoBanner()) + } else { + in.processor, err = eph.NewFromConnectionString( + ctx, + fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), + leaserCheckpointer, + leaserCheckpointer, + eph.WithNoBanner()) + } + if err != nil { + in.log.Errorw("error creating processor", "error", err) + return err + } + + // ------------------------------------------------ + // 3 — Register a message handler + // ------------------------------------------------ + + // register a message handler -- many can be registered + handlerID, err := in.processor.RegisterHandler(ctx, + func(c context.Context, e *eventhub.Event) error { + var onEventErr error + // partitionID is not yet mapped in the azure-eventhub sdk + ok := in.processEvents(e, "") + if !ok { + onEventErr = errors.New("OnEvent function returned false. Stopping input worker") + in.log.Error(onEventErr.Error()) + + // FIXME: should we stop the processor here? + // in.Stop() + } + + return onEventErr + }) + if err != nil { + in.log.Errorw("error registering handler", "error", err) + return err + } + + in.log.Infof("handler id: %q is registered\n", handlerID) + + return nil +} + +func (in *eventHubInputV1) run(ctx context.Context) error { + // Start handling messages from all the partitions balancing across + // multiple consumers. + // The processor can be stopped by calling `Close()` on the processor. + err := in.processor.StartNonBlocking(ctx) + if err != nil { + in.log.Errorw("error starting the processor", "error", err) + return err + } + defer func() { + in.log.Infof("%s input worker is stopping.", inputName) + err := in.processor.Close(context.Background()) + if err != nil { + in.log.Errorw("error while closing eventhostprocessor", "error", err) + } + in.log.Infof("%s input worker has stopped.", inputName) + }() + + in.log.Infof("%s input worker has started.", inputName) + + for ctx.Err() == nil { + select { + case <-ctx.Done(): + return nil + } + } + + in.log.Errorw("error during processing", "error", ctx.Err()) + + return ctx.Err() +} + +func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID string) bool { + processingStartTime := time.Now() + azure := mapstr.M{ + // partitionID is only mapped in the non-eph option which is not available yet, this field will be temporary unavailable + //"partition_id": partitionID, + "eventhub": in.config.EventHubName, + "consumer_group": in.config.ConsumerGroup, + } + + // update the input metrics + in.metrics.receivedMessages.Inc() + in.metrics.receivedBytes.Add(uint64(len(event.Data))) + + records := in.parseMultipleRecords(event.Data) + + for _, record := range records { + _, _ = azure.Put("offset", event.SystemProperties.Offset) + _, _ = azure.Put("sequence_number", event.SystemProperties.SequenceNumber) + _, _ = azure.Put("enqueued_time", event.SystemProperties.EnqueuedTime) + + //ok := in.outlet.OnEvent(beat.Event{ + // // this is the default value for the @timestamp field; usually the ingest + // // pipeline replaces it with a value in the payload. + // Timestamp: processingStartTime, + // Fields: mapstr.M{ + // "message": record, + // "azure": azure, + // }, + // Private: event.Data, + //}) + //if !ok { + // in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) + // return ok + //} + + event := beat.Event{ + // this is the default value for the @timestamp field; usually the ingest + // pipeline replaces it with a value in the payload. + Timestamp: processingStartTime, + Fields: mapstr.M{ + "message": record, + "azure": azure, + }, + Private: event.Data, + } + + in.client.Publish(event) + + in.metrics.sentEvents.Inc() + } + + in.metrics.processedMessages.Inc() + in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) + + return true +} + +// parseMultipleRecords will try to split the message into multiple ones based on the group field provided by the configuration +func (in *eventHubInputV1) parseMultipleRecords(bMessage []byte) []string { + var mapObject map[string][]interface{} + var messages []string + + // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. + // Sanitization occurs if options are available and the message contains an invalid JSON. + // + // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps + if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { + bMessage = sanitize(bMessage, in.config.SanitizeOptions...) + in.metrics.sanitizedMessages.Inc() + } + + // check if the message is a "records" object containing a list of events + err := json.Unmarshal(bMessage, &mapObject) + if err == nil { + if len(mapObject[expandEventListFromField]) > 0 { + for _, ms := range mapObject[expandEventListFromField] { + js, err := json.Marshal(ms) + if err == nil { + messages = append(messages, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + } else { + in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) + // in some cases the message is an array + var arrayObject []interface{} + err = json.Unmarshal(bMessage, &arrayObject) + if err != nil { + // return entire message + in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) + in.metrics.decodeErrors.Inc() + return []string{string(bMessage)} + } + + for _, ms := range arrayObject { + js, err := json.Marshal(ms) + if err == nil { + messages = append(messages, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + + return messages +} diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go new file mode 100644 index 00000000000..162be4df23e --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -0,0 +1,302 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" + "time" +) + +type eventHubInputV2 struct { + config azureInputConfig + log *logp.Logger + metrics *inputMetrics + checkpointStore *checkpoints.BlobStore + consumerClient *azeventhubs.ConsumerClient + client beat.Client +} + +func newEventHubInputV2(config azureInputConfig, log *logp.Logger) (v2.Input, error) { + return &eventHubInputV2{ + config: config, + log: log.Named(inputName), + }, nil +} + +func (in *eventHubInputV2) Name() string { + return inputName +} + +func (in *eventHubInputV2) Test(v2.TestContext) error { + return nil +} + +func (in *eventHubInputV2) Run( + inputContext v2.Context, + pipeline beat.Pipeline, +) error { + var err error + + // Create client for publishing events and receive notification of their ACKs. + in.client, err = createPipelineClient(pipeline) + if err != nil { + return fmt.Errorf("failed to create pipeline client: %w", err) + } + defer in.client.Close() + + // Setup input metrics + inputMetrics := newInputMetrics(inputContext.ID, nil) + if err != nil { + return fmt.Errorf("failed to create input metrics: %w", err) + } + defer inputMetrics.Close() + in.metrics = inputMetrics + + ctx := v2.GoContextFromCanceler(inputContext.Cancelation) + + // Initialize everything for this run + err = in.setup(ctx) + if err != nil { + return err + } + defer in.consumerClient.Close(context.Background()) + + // Start the main run loop + in.run(ctx) + + return nil +} + +func (in *eventHubInputV2) setup(ctx context.Context) error { + // FIXME: check more client creation options. + blobContainerClient, err := container.NewClientFromConnectionString( + in.config.SAConnectionString, + in.config.SAContainer, + nil, + ) + if err != nil { + return fmt.Errorf("failed to create blob container client: %w", err) + } + + checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) + if err != nil { + return fmt.Errorf("failed to create checkpoint store: %w", err) + } + in.checkpointStore = checkpointStore + + consumerClient, err := azeventhubs.NewConsumerClientFromConnectionString( + in.config.ConnectionString, + in.config.EventHubName, + in.config.ConsumerGroup, + nil, + ) + if err != nil { + return fmt.Errorf("failed to create consumer client: %w", err) + } + in.consumerClient = consumerClient + + return nil +} + +func (in *eventHubInputV2) run(ctx context.Context) { + processor, err := azeventhubs.NewProcessor( + in.consumerClient, + in.checkpointStore, + nil, + ) + if err != nil { + in.log.Errorw("error creating event processor", "error", err) + return + } + + // Run in the background, launching goroutines to process each partition + go in.workersLoop(processor) + + if err := processor.Run(ctx); err != nil { + in.log.Errorw("error running processor", "error", err) + } +} + +func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { + for { + processorPartitionClient := processor.NextPartitionClient(context.TODO()) + + if processorPartitionClient == nil { + // Processor has stopped + break + } + + go func() { + if err := in.processEventsForPartition(processorPartitionClient); err != nil { + //panic(err) + logp.Info("error processing events for partition: %v", err) + } + }() + } +} + +// processEventsForPartition shows the typical pattern for processing a partition. +func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhubs.ProcessorPartitionClient) error { + // 1. [BEGIN] Initialize any partition specific resources for your application. + // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). + // 3. [END] Cleanup any resources. + + defer func() { + // 3/3 [END] Do cleanup here, like shutting down database clients + // or other resources used for processing this partition. + shutdownPartitionResources(partitionClient) + }() + + // 1/3 [BEGIN] Initialize any partition specific resources for your application. + if err := initializePartitionResources(partitionClient.PartitionID()); err != nil { + return err + } + + // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. + for { + // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. + receiveCtx, cancelReceive := context.WithTimeout(context.TODO(), 10*time.Second) + events, err := partitionClient.ReceiveEvents(receiveCtx, 100, nil) + cancelReceive() + + if err != nil && !errors.Is(err, context.DeadlineExceeded) { + var eventHubError *azeventhubs.Error + + if errors.As(err, &eventHubError) && eventHubError.Code == azeventhubs.ErrorCodeOwnershipLost { + return nil + } + + return err + } + + if len(events) == 0 { + continue + } + + err = in.processReceivedEvents(events) + if err != nil { + return fmt.Errorf("error processing received events: %w", err) + } + + // Updates the checkpoint with the latest event received. If processing needs to restart + // it will restart from this point, automatically. + if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { + return err + } + } +} + +func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData) error { + processingStartTime := time.Now() + azure := mapstr.M{ + // The partition ID is not available. + // "partition_id": partitionID, + "eventhub": in.config.EventHubName, + "consumer_group": in.config.ConsumerGroup, + } + + for _, receivedEventData := range receivedEvents { + // A single event can contain multiple records. We create a new event for each record. + records := in.parseEvent(receivedEventData.Body) + + for record := range records { + _, _ = azure.Put("offset", receivedEventData.Offset) + _, _ = azure.Put("sequence_number", receivedEventData.SequenceNumber) + _, _ = azure.Put("enqueued_time", receivedEventData.EnqueuedTime) + + event := beat.Event{ + // this is the default value for the @timestamp field; usually the ingest + // pipeline replaces it with a value in the payload. + Timestamp: processingStartTime, + Fields: mapstr.M{ + "message": record, + "azure": azure, + }, + Private: receivedEventData.Body, + } + + in.client.Publish(event) + } + } + + return nil +} + +func (in *eventHubInputV2) parseEvent(bMessage []byte) []string { + var mapObject map[string][]interface{} + var records []string + + // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. + // Sanitization occurs if options are available and the message contains an invalid JSON. + // + // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps + if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { + bMessage = sanitize(bMessage, in.config.SanitizeOptions...) + in.metrics.sanitizedMessages.Inc() + } + + // check if the message is a "records" object containing a list of events + err := json.Unmarshal(bMessage, &mapObject) + if err == nil { + if len(mapObject[expandEventListFromField]) > 0 { + for _, ms := range mapObject[expandEventListFromField] { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + } else { + in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) + // in some cases the message is an array + var arrayObject []interface{} + err = json.Unmarshal(bMessage, &arrayObject) + if err != nil { + // return entire message + in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) + in.metrics.decodeErrors.Inc() + return []string{string(bMessage)} + } + + for _, ms := range arrayObject { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + + return records +} + +func initializePartitionResources(partitionID string) error { + // initialize things that might be partition specific, like a + // database connection. + return nil +} + +func shutdownPartitionResources(partitionClient *azeventhubs.ProcessorPartitionClient) { + // Each PartitionClient holds onto an external resource and should be closed if you're + // not processing them anymore. + defer partitionClient.Close(context.TODO()) +} diff --git a/x-pack/filebeat/input/default-inputs/inputs_other.go b/x-pack/filebeat/input/default-inputs/inputs_other.go index 2fa63535dbb..6c7708d7c0f 100644 --- a/x-pack/filebeat/input/default-inputs/inputs_other.go +++ b/x-pack/filebeat/input/default-inputs/inputs_other.go @@ -13,6 +13,7 @@ import ( "github.com/elastic/beats/v7/x-pack/filebeat/input/awscloudwatch" "github.com/elastic/beats/v7/x-pack/filebeat/input/awss3" "github.com/elastic/beats/v7/x-pack/filebeat/input/azureblobstorage" + "github.com/elastic/beats/v7/x-pack/filebeat/input/azureeventhub" "github.com/elastic/beats/v7/x-pack/filebeat/input/benchmark" "github.com/elastic/beats/v7/x-pack/filebeat/input/cel" "github.com/elastic/beats/v7/x-pack/filebeat/input/cloudfoundry" @@ -31,6 +32,7 @@ import ( func xpackInputs(info beat.Info, log *logp.Logger, store beater.StateStore) []v2.Plugin { return []v2.Plugin{ azureblobstorage.Plugin(log, store), + azureeventhub.Plugin(log), cel.Plugin(log, store), cloudfoundry.Plugin(), entityanalytics.Plugin(log), From b19a460c96f8822b9c116a4e60aaf96c045429fa Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 16 May 2024 16:34:39 +0200 Subject: [PATCH 02/41] Rename client to pipelineClient for clarity --- x-pack/filebeat/input/azureeventhub/input.go | 4 ++-- x-pack/filebeat/input/azureeventhub/metrics.go | 2 +- .../filebeat/input/azureeventhub/v1_input.go | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 618db29b1a5..ff19a1dc3f4 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -214,7 +214,7 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { // a.log.Infof("%s input worker is stopping.", inputName) // if a.processor != nil { // // Tells the processor to stop processing events and release all -// // resources (like scheduler, leaser, checkpointer, and client). +// // resources (like scheduler, leaser, checkpointer, and pipelineClient). // err := a.processor.Close(context.Background()) // if err != nil { // a.log.Errorw("error while closing eventhostprocessor", "error", err) @@ -240,7 +240,7 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { //// a.log.Infof("%s input worker is stopping.", inputName) //// if a.processor != nil { //// // Tells the processor to stop processing events and release all -//// // resources (like scheduler, leaser, checkpointer, and client). +//// // resources (like scheduler, leaser, checkpointer, and pipelineClient). //// err := a.processor.Close(context.Background()) //// if err != nil { //// a.log.Errorw("error while closing eventhostprocessor", "error", err) diff --git a/x-pack/filebeat/input/azureeventhub/metrics.go b/x-pack/filebeat/input/azureeventhub/metrics.go index 70efb1bdb9c..efef262d2c0 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics.go +++ b/x-pack/filebeat/input/azureeventhub/metrics.go @@ -14,7 +14,7 @@ import ( "github.com/elastic/elastic-agent-libs/monitoring/adapter" ) -// newInputMetrics creates a new `*inputMetrics` to track metrics. +// newInputMetrics creates a new `*inputMetrics` to track input metrics. func newInputMetrics(id string, parentRegistry *monitoring.Registry) *inputMetrics { reg, unregister := inputmon.NewInputRegistry(inputName, id, parentRegistry) inputMetrics := inputMetrics{ diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 91903217907..8052cf2e0f3 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -24,11 +24,11 @@ import ( ) type eventHubInputV1 struct { - config azureInputConfig - log *logp.Logger - metrics *inputMetrics - processor *eph.EventProcessorHost - client beat.Client + config azureInputConfig + log *logp.Logger + metrics *inputMetrics + processor *eph.EventProcessorHost + pipelineClient beat.Client } func newEventHubInputV1(config azureInputConfig, log *logp.Logger) (v2.Input, error) { @@ -53,12 +53,12 @@ func (in *eventHubInputV1) Run( ) error { var err error - // Create client for publishing events and receive notification of their ACKs. - in.client, err = createPipelineClient(pipeline) + // Create pipelineClient for publishing events and receive notification of their ACKs. + in.pipelineClient, err = createPipelineClient(pipeline) if err != nil { - return fmt.Errorf("failed to create pipeline client: %w", err) + return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) } - defer in.client.Close() + defer in.pipelineClient.Close() // Setup input metrics inputMetrics := newInputMetrics(inputContext.ID, nil) From 2a90f7eafd35fecdcfe278f11da874ab2b87d5d7 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 16 May 2024 16:35:28 +0200 Subject: [PATCH 03/41] Rename azure to eventHubMetadata for clarity --- x-pack/filebeat/input/azureeventhub/v1_input.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 8052cf2e0f3..4f0299d8040 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -198,9 +198,9 @@ func (in *eventHubInputV1) run(ctx context.Context) error { func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID string) bool { processingStartTime := time.Now() - azure := mapstr.M{ - // partitionID is only mapped in the non-eph option which is not available yet, this field will be temporary unavailable - //"partition_id": partitionID, + eventHubMetadata := mapstr.M{ + // The `partition_id` is not available in the + // current version of the SDK. "eventhub": in.config.EventHubName, "consumer_group": in.config.ConsumerGroup, } @@ -212,9 +212,9 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri records := in.parseMultipleRecords(event.Data) for _, record := range records { - _, _ = azure.Put("offset", event.SystemProperties.Offset) - _, _ = azure.Put("sequence_number", event.SystemProperties.SequenceNumber) - _, _ = azure.Put("enqueued_time", event.SystemProperties.EnqueuedTime) + _, _ = eventHubMetadata.Put("offset", event.SystemProperties.Offset) + _, _ = eventHubMetadata.Put("sequence_number", event.SystemProperties.SequenceNumber) + _, _ = eventHubMetadata.Put("enqueued_time", event.SystemProperties.EnqueuedTime) //ok := in.outlet.OnEvent(beat.Event{ // // this is the default value for the @timestamp field; usually the ingest @@ -237,7 +237,7 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri Timestamp: processingStartTime, Fields: mapstr.M{ "message": record, - "azure": azure, + "azure": eventHubMetadata, }, Private: event.Data, } From e5aa369f0b1f2896f345ce7318cc67d6f57a5c16 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 16 May 2024 16:36:21 +0200 Subject: [PATCH 04/41] Move remaining code out of eph.go --- x-pack/filebeat/input/azureeventhub/v1_eph.go | 36 +++++++++---------- .../filebeat/input/azureeventhub/v1_input.go | 20 +++++++++++ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_eph.go b/x-pack/filebeat/input/azureeventhub/v1_eph.go index c771dbbb463..bae2b465c6e 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_eph.go +++ b/x-pack/filebeat/input/azureeventhub/v1_eph.go @@ -6,8 +6,6 @@ package azureeventhub -import "github.com/Azure/go-autorest/autorest/azure" - // import ( // // "context" @@ -23,12 +21,12 @@ import "github.com/Azure/go-autorest/autorest/azure" // ) // // // users can select from one of the already defined azure cloud envs -var environments = map[string]azure.Environment{ - azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, - azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, - azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, - azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, -} +//var environments = map[string]azure.Environment{ +// azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, +// azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, +// azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, +// azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, +//} // // runWithEPH will consume ingested events using the Event Processor Host (EPH). // // @@ -105,14 +103,14 @@ var environments = map[string]azure.Environment{ // // return nil // } -func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { - // if no override is set then the azure public cloud is used - if overrideResManager == "" || overrideResManager == "" { - return azure.PublicCloud, nil - } - if env, ok := environments[overrideResManager]; ok { - return env, nil - } - // can retrieve hybrid env from the resource manager endpoint - return azure.EnvironmentFromURL(overrideResManager) -} +//func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { +// // if no override is set then the azure public cloud is used +// if overrideResManager == "" || overrideResManager == "" { +// return azure.PublicCloud, nil +// } +// if env, ok := environments[overrideResManager]; ok { +// return env, nil +// } +// // can retrieve hybrid env from the resource manager endpoint +// return azure.EnvironmentFromURL(overrideResManager) +//} diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 4f0299d8040..de52fc38e23 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -11,6 +11,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/Azure/go-autorest/autorest/azure" "time" eventhub "github.com/Azure/azure-event-hubs-go/v3" @@ -23,6 +24,13 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +var environments = map[string]azure.Environment{ + azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, + azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, + azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, + azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, +} + type eventHubInputV1 struct { config azureInputConfig log *logp.Logger @@ -306,3 +314,15 @@ func (in *eventHubInputV1) parseMultipleRecords(bMessage []byte) []string { return messages } + +func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { + // if no override is set then the azure public cloud is used + if overrideResManager == "" || overrideResManager == "" { + return azure.PublicCloud, nil + } + if env, ok := environments[overrideResManager]; ok { + return env, nil + } + // can retrieve hybrid env from the resource manager endpoint + return azure.EnvironmentFromURL(overrideResManager) +} From f44b51e8bb20423cb00891ae8cbda1d30d29b498 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 16 May 2024 16:39:07 +0200 Subject: [PATCH 05/41] Remove erroneous error check --- x-pack/filebeat/input/azureeventhub/v1_input.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index de52fc38e23..7ac2bb153aa 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -69,12 +69,8 @@ func (in *eventHubInputV1) Run( defer in.pipelineClient.Close() // Setup input metrics - inputMetrics := newInputMetrics(inputContext.ID, nil) - if err != nil { - return fmt.Errorf("failed to create input metrics: %w", err) - } - defer inputMetrics.Close() - in.metrics = inputMetrics + in.metrics = newInputMetrics(inputContext.ID, nil) + defer in.metrics.Close() ctx := v2.GoContextFromCanceler(inputContext.Cancelation) From 62b90b321b33012d43b488f3ee9416fd406d56e7 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 10:38:24 +0200 Subject: [PATCH 06/41] Update tests Switch from the v1 Outlet to the v2 PipelineClient --- x-pack/filebeat/input/azureeventhub/input.go | 8 + .../input/azureeventhub/input_test.go | 172 ++++++++++++------ .../input/azureeventhub/metrics_test.go | 29 +-- .../input/azureeventhub/sanitization_test.go | 10 +- x-pack/filebeat/input/azureeventhub/v1_eph.go | 116 ------------ .../input/azureeventhub/v1_eph_test.go | 48 ----- .../filebeat/input/azureeventhub/v1_input.go | 43 +++-- .../input/azureeventhub/v1_input_test.go | 7 + .../filebeat/input/azureeventhub/v2_input.go | 26 ++- 9 files changed, 196 insertions(+), 263 deletions(-) delete mode 100644 x-pack/filebeat/input/azureeventhub/v1_eph.go delete mode 100644 x-pack/filebeat/input/azureeventhub/v1_eph_test.go create mode 100644 x-pack/filebeat/input/azureeventhub/v1_input_test.go diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index ff19a1dc3f4..6ccc7863091 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -9,6 +9,7 @@ package azureeventhub import ( "fmt" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + "github.com/Azure/go-autorest/autorest/azure" v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/feature" @@ -24,6 +25,13 @@ const ( inputName = "azure-eventhub" ) +var environments = map[string]azure.Environment{ + azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, + azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, + azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, + azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, +} + func Plugin(log *logp.Logger) v2.Plugin { return v2.Plugin{ Name: inputName, diff --git a/x-pack/filebeat/input/azureeventhub/input_test.go b/x-pack/filebeat/input/azureeventhub/input_test.go index a5fab488dfc..0fe95b2614f 100644 --- a/x-pack/filebeat/input/azureeventhub/input_test.go +++ b/x-pack/filebeat/input/azureeventhub/input_test.go @@ -8,24 +8,21 @@ package azureeventhub import ( "fmt" + "github.com/Azure/go-autorest/autorest/azure" + "github.com/elastic/elastic-agent-libs/logp" "sync" "testing" "time" - "github.com/elastic/elastic-agent-libs/logp" - "github.com/elastic/elastic-agent-libs/mapstr" "github.com/elastic/elastic-agent-libs/monitoring" eventhub "github.com/Azure/azure-event-hubs-go/v3" "github.com/stretchr/testify/assert" - "github.com/elastic/beats/v7/filebeat/channel" - "github.com/elastic/beats/v7/filebeat/input/inputtest" "github.com/elastic/beats/v7/libbeat/beat" - conf "github.com/elastic/elastic-agent-libs/config" ) -var config = azureInputConfig{ +var defaultTestConfig = azureInputConfig{ SAKey: "", SAName: "", SAContainer: ephContainerName, @@ -33,21 +30,38 @@ var config = azureInputConfig{ ConsumerGroup: "", } +func TestGetAzureEnvironment(t *testing.T) { + resMan := "" + env, err := getAzureEnvironment(resMan) + assert.NoError(t, err) + assert.Equal(t, env, azure.PublicCloud) + resMan = "https://management.microsoftazure.de/" + env, err = getAzureEnvironment(resMan) + assert.NoError(t, err) + assert.Equal(t, env, azure.GermanCloud) + resMan = "http://management.invalidhybrid.com/" + _, err = getAzureEnvironment(resMan) + assert.Errorf(t, err, "invalid character 'F' looking for beginning of value") + resMan = "" + env, err = getAzureEnvironment(resMan) + assert.NoError(t, err) + assert.Equal(t, env, azure.PublicCloud) +} + func TestProcessEvents(t *testing.T) { - // Stub outlet for receiving events generated by the input. - o := &stubOutleter{} - out, err := newStubOutlet(o) - if err != nil { - t.Fatal(err) - } + log := logp.NewLogger(fmt.Sprintf("%s test for input", inputName)) + reg := monitoring.NewRegistry() metrics := newInputMetrics("test", reg) defer metrics.Close() - input := azureInput{ - config: config, - metrics: metrics, - outlet: out, + fakePipelineClient := fakeClient{} + + input := eventHubInputV1{ + config: defaultTestConfig, + log: log, + metrics: metrics, + pipelineClient: &fakePipelineClient, } var sn int64 = 12 now := time.Now() @@ -71,8 +85,9 @@ func TestProcessEvents(t *testing.T) { if !ok { t.Fatal("OnEvent function returned false") } - assert.Equal(t, len(o.Events), 1) - message, err := o.Events[0].Fields.GetValue("message") + + assert.Equal(t, len(fakePipelineClient.publishedEvents), 1) + message, err := fakePipelineClient.publishedEvents[0].Fields.GetValue("message") if err != nil { t.Fatal(err) } @@ -94,12 +109,16 @@ func TestParseMultipleRecords(t *testing.T) { metrics := newInputMetrics("test", reg) defer metrics.Close() - input := azureInput{ - metrics: metrics, - log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), + fakePipelineClient := fakeClient{} + + input := eventHubInputV1{ + config: azureInputConfig{}, + log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), + metrics: metrics, + pipelineClient: &fakePipelineClient, } - messages := input.parseMultipleRecords([]byte(msg)) + messages := input.unpackRecords([]byte(msg)) assert.NotNil(t, messages) assert.Equal(t, len(messages), 3) for _, ms := range messages { @@ -110,7 +129,7 @@ func TestParseMultipleRecords(t *testing.T) { msg1 := "[{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]" - messages = input.parseMultipleRecords([]byte(msg1)) + messages = input.unpackRecords([]byte(msg1)) assert.NotNil(t, messages) assert.Equal(t, len(messages), 3) for _, ms := range messages { @@ -119,7 +138,7 @@ func TestParseMultipleRecords(t *testing.T) { // one event only msg2 := "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}" - messages = input.parseMultipleRecords([]byte(msg2)) + messages = input.unpackRecords([]byte(msg2)) assert.NotNil(t, messages) assert.Equal(t, len(messages), 1) for _, ms := range messages { @@ -127,15 +146,16 @@ func TestParseMultipleRecords(t *testing.T) { } } -func TestNewInputDone(t *testing.T) { - config := mapstr.M{ - "connection_string": "Endpoint=sb://something", - "eventhub": "insights-operational-logs", - "storage_account": "someaccount", - "storage_account_key": "secret", - } - inputtest.AssertNotStartedInputCanBeDone(t, NewInput, &config) -} +//func TestNewInputDone(t *testing.T) { +// log := logp.NewLogger(fmt.Sprintf("%s test for input", inputName)) +// config := mapstr.M{ +// "connection_string": "Endpoint=sb://something", +// "eventhub": "insights-operational-logs", +// "storage_account": "someaccount", +// "storage_account_key": "secret", +// } +// inputtest.AssertNotStartedInputCanBeDone(t, NewInput, &config) +//} func TestStripConnectionString(t *testing.T) { tests := []struct { @@ -161,36 +181,70 @@ func TestStripConnectionString(t *testing.T) { } } -type stubOutleter struct { - sync.Mutex - cond *sync.Cond - done bool - Events []beat.Event +// fakePipeline returns new fakeClients for simple tests. +type fakePipeline struct{} + +func (c *fakePipeline) ConnectWith(clientConfig beat.ClientConfig) (beat.Client, error) { + return &fakeClient{}, nil +} + +func (c *fakePipeline) Connect() (beat.Client, error) { + return &fakeClient{}, nil } -func newStubOutlet(stub *stubOutleter) (channel.Outleter, error) { - stub.cond = sync.NewCond(stub) - defer stub.Close() +var _ beat.Client = (*fakeClient)(nil) - connector := channel.ConnectorFunc(func(_ *conf.C, _ beat.ClientConfig) (channel.Outleter, error) { - return stub, nil - }) - return connector.ConnectWith(nil, beat.ClientConfig{ - Processing: beat.ProcessingConfig{}, - }) +// ackClient is a fake beat.Client that ACKs the published messages. +type fakeClient struct { + sync.Mutex + publishedEvents []beat.Event } -func (o *stubOutleter) Close() error { - o.Lock() - defer o.Unlock() - o.done = true - return nil +func (c *fakeClient) Close() error { return nil } + +func (c *fakeClient) Publish(event beat.Event) { + c.Lock() + defer c.Unlock() + c.publishedEvents = append(c.publishedEvents, event) } -func (o *stubOutleter) Done() <-chan struct{} { return nil } -func (o *stubOutleter) OnEvent(event beat.Event) bool { - o.Lock() - defer o.Unlock() - o.Events = append(o.Events, event) - o.cond.Broadcast() - return o.done + +func (c *fakeClient) PublishAll(event []beat.Event) { + for _, e := range event { + c.Publish(e) + } } + +// +//type stubOutleter struct { +// sync.Mutex +// cond *sync.Cond +// done bool +// Events []beat.Event +//} +// +//func newStubOutlet(stub *stubOutleter) (channel.Outleter, error) { +// stub.cond = sync.NewCond(stub) +// defer stub.Close() +// +// connector := channel.ConnectorFunc(func(_ *conf.C, _ beat.ClientConfig) (channel.Outleter, error) { +// return stub, nil +// }) +// return connector.ConnectWith(nil, beat.ClientConfig{ +// Processing: beat.ProcessingConfig{}, +// }) +//} +// +//func (o *stubOutleter) Close() error { +// o.Lock() +// defer o.Unlock() +// o.done = true +// return nil +//} +//func (o *stubOutleter) Done() <-chan struct{} { return nil } +//func (o *stubOutleter) OnEvent(event beat.Event) bool { +// o.Lock() +// defer o.Unlock() +// o.Events = append(o.Events, event) +// o.cond.Broadcast() +// return o.done +//} diff --git a/x-pack/filebeat/input/azureeventhub/metrics_test.go b/x-pack/filebeat/input/azureeventhub/metrics_test.go index b6730c34956..da1a7ae84f4 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics_test.go +++ b/x-pack/filebeat/input/azureeventhub/metrics_test.go @@ -117,18 +117,19 @@ func TestInputMetricsEventsReceived(t *testing.T) { reg := monitoring.NewRegistry() metrics := newInputMetrics("test", reg) - // Stub outlet for receiving events generated by the input. - o := &stubOutleter{} - out, err := newStubOutlet(o) - if err != nil { - t.Fatal(err) - } - - input := azureInput{ - config: inputConfig, - metrics: metrics, - outlet: out, - log: log, + //// Stub outlet for receiving events generated by the input. + //o := &stubOutleter{} + //out, err := newStubOutlet(o) + //if err != nil { + // t.Fatal(err) + //} + fakeClient := fakeClient{} + + input := eventHubInputV1{ + config: inputConfig, + metrics: metrics, + pipelineClient: &fakeClient, + log: log, } ev := eventhub.Event{ @@ -141,8 +142,8 @@ func TestInputMetricsEventsReceived(t *testing.T) { t.Fatal("OnEvent function returned false") } - if ok := assert.Equal(t, len(tc.expectedRecords), len(o.Events)); ok { - for i, e := range o.Events { + if ok := assert.Equal(t, len(tc.expectedRecords), len(fakeClient.publishedEvents)); ok { + for i, e := range fakeClient.publishedEvents { msg, err := e.Fields.GetValue("message") if err != nil { t.Fatal(err) diff --git a/x-pack/filebeat/input/azureeventhub/sanitization_test.go b/x-pack/filebeat/input/azureeventhub/sanitization_test.go index f4d072f5036..6e2645c40d7 100644 --- a/x-pack/filebeat/input/azureeventhub/sanitization_test.go +++ b/x-pack/filebeat/input/azureeventhub/sanitization_test.go @@ -3,7 +3,6 @@ // you may not use this file except in compliance with the Elastic License. //go:build !aix -// +build !aix package azureeventhub @@ -31,15 +30,16 @@ func TestParseMultipleRecordsSanitization(t *testing.T) { metrics := newInputMetrics("test", reg) defer metrics.Close() - input := azureInput{ + input := eventHubInputV1{ config: azureInputConfig{ SanitizeOptions: []string{"SINGLE_QUOTES", "NEW_LINES"}, }, - metrics: metrics, - log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), + log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), + metrics: metrics, + pipelineClient: &fakeClient{}, } - messages := input.parseMultipleRecords([]byte(msg)) + messages := input.unpackRecords([]byte(msg)) assert.NotNil(t, messages) assert.Equal(t, len(messages), 3) for _, ms := range messages { diff --git a/x-pack/filebeat/input/azureeventhub/v1_eph.go b/x-pack/filebeat/input/azureeventhub/v1_eph.go deleted file mode 100644 index bae2b465c6e..00000000000 --- a/x-pack/filebeat/input/azureeventhub/v1_eph.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build !aix - -package azureeventhub - -// import ( -// -// "context" -// "errors" -// "fmt" -// -// eventhub "github.com/Azure/azure-event-hubs-go/v3" -// "github.com/Azure/azure-event-hubs-go/v3/eph" -// "github.com/Azure/azure-event-hubs-go/v3/storage" -// "github.com/Azure/azure-storage-blob-go/azblob" -// "github.com/Azure/go-autorest/autorest/azure" -// -// ) -// -// // users can select from one of the already defined azure cloud envs -//var environments = map[string]azure.Environment{ -// azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, -// azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, -// azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, -// azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, -//} - -// // runWithEPH will consume ingested events using the Event Processor Host (EPH). -// // -// // To learn more, check the following resources: -// // - https://github.com/Azure/azure-event-hubs-go#event-processor-host -// // - https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-event-processor-host -// -// func (in *eventHubInputV1) runWithEPH() error { -// // create a new Azure Storage Leaser / Checkpointer -// cred, err := azblob.NewSharedKeyCredential(in.config.SAName, in.config.SAKey) -// if err != nil { -// return err -// } -// env, err := getAzureEnvironment(in.config.OverrideEnvironment) -// if err != nil { -// return err -// } -// leaserCheckpointer, err := storage.NewStorageLeaserCheckpointer(cred, in.config.SAName, in.config.SAContainer, env) -// if err != nil { -// in.log.Errorw("error creating storage leaser checkpointer", "error", err) -// return err -// } -// -// // adding a nil EventProcessorHostOption will break the code, -// // this is why a condition is added and a.processor is assigned. -// if in.config.ConsumerGroup != "" { -// in.processor, err = eph.NewFromConnectionString( -// in.workerCtx, -// fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), -// leaserCheckpointer, -// leaserCheckpointer, -// eph.WithConsumerGroup(in.config.ConsumerGroup), -// eph.WithNoBanner()) -// } else { -// in.processor, err = eph.NewFromConnectionString( -// in.workerCtx, -// fmt.Sprintf("%s%s%s", in.config.ConnectionString, eventHubConnector, in.config.EventHubName), -// leaserCheckpointer, -// leaserCheckpointer, -// eph.WithNoBanner()) -// } -// if err != nil { -// in.log.Errorw("error creating processor", "error", err) -// return err -// } -// -// // register a message handler -- many can be registered -// handlerID, err := in.processor.RegisterHandler(in.workerCtx, -// func(c context.Context, e *eventhub.Event) error { -// var onEventErr error -// // partitionID is not yet mapped in the azure-eventhub sdk -// ok := in.processEvents(e, "") -// if !ok { -// onEventErr = errors.New("OnEvent function returned false. Stopping input worker") -// in.log.Error(onEventErr.Error()) -// in.Stop() -// } -// return onEventErr -// }) -// if err != nil { -// in.log.Errorw("error registering handler", "error", err) -// return err -// } -// in.log.Infof("handler id: %q is registered\n", handlerID) -// -// // Start handling messages from all of the partitions balancing across -// // multiple consumers. -// // The processor can be stopped by calling `Close()` on the processor. -// err = in.processor.StartNonBlocking(in.workerCtx) -// if err != nil { -// in.log.Errorw("error starting the processor", "error", err) -// return err -// } -// -// return nil -// } -//func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { -// // if no override is set then the azure public cloud is used -// if overrideResManager == "" || overrideResManager == "" { -// return azure.PublicCloud, nil -// } -// if env, ok := environments[overrideResManager]; ok { -// return env, nil -// } -// // can retrieve hybrid env from the resource manager endpoint -// return azure.EnvironmentFromURL(overrideResManager) -//} diff --git a/x-pack/filebeat/input/azureeventhub/v1_eph_test.go b/x-pack/filebeat/input/azureeventhub/v1_eph_test.go deleted file mode 100644 index 86922d29559..00000000000 --- a/x-pack/filebeat/input/azureeventhub/v1_eph_test.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build !aix - -package azureeventhub - -import ( - "testing" - - "github.com/Azure/go-autorest/autorest/azure" - - "github.com/stretchr/testify/assert" -) - -var invalidConfig = azureInputConfig{ - SAKey: "invalid_key", - SAName: "storage", - SAContainer: ephContainerName, - ConnectionString: "invalid_connection_string", - ConsumerGroup: "$Default", -} - -func TestRunWithEPH(t *testing.T) { - input := azureInput{config: invalidConfig} - // decoding error when key is invalid - err := input.runWithEPH() - assert.Error(t, err, '7') -} - -func TestGetAzureEnvironment(t *testing.T) { - resMan := "" - env, err := getAzureEnvironment(resMan) - assert.NoError(t, err) - assert.Equal(t, env, azure.PublicCloud) - resMan = "https://management.microsoftazure.de/" - env, err = getAzureEnvironment(resMan) - assert.NoError(t, err) - assert.Equal(t, env, azure.GermanCloud) - resMan = "http://management.invalidhybrid.com/" - _, err = getAzureEnvironment(resMan) - assert.Errorf(t, err, "invalid character 'F' looking for beginning of value") - resMan = "" - env, err = getAzureEnvironment(resMan) - assert.NoError(t, err) - assert.Equal(t, env, azure.PublicCloud) -} diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 7ac2bb153aa..606ced27f94 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -24,13 +24,6 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) -var environments = map[string]azure.Environment{ - azure.ChinaCloud.ResourceManagerEndpoint: azure.ChinaCloud, - azure.GermanCloud.ResourceManagerEndpoint: azure.GermanCloud, - azure.PublicCloud.ResourceManagerEndpoint: azure.PublicCloud, - azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, -} - type eventHubInputV1 struct { config azureInputConfig log *logp.Logger @@ -39,11 +32,18 @@ type eventHubInputV1 struct { pipelineClient beat.Client } -func newEventHubInputV1(config azureInputConfig, log *logp.Logger) (v2.Input, error) { - //log := logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)) +// newEventHubInputV1 creates a new instance of the Azure Event Hub input V1. +// This input uses the Azure Event Hub SDK v3 (legacy). +func newEventHubInputV1(config azureInputConfig, logger *logp.Logger) (v2.Input, error) { + log := logger. + Named(inputName). + With( + "connection string", stripConnectionString(config.ConnectionString), + ) + return &eventHubInputV1{ config: config, - log: log.Named(inputName), + log: log, }, nil } @@ -145,6 +145,7 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // register a message handler -- many can be registered handlerID, err := in.processor.RegisterHandler(ctx, func(c context.Context, e *eventhub.Event) error { + in.log.Debugw("received event", "ts", time.Now().String()) var onEventErr error // partitionID is not yet mapped in the azure-eventhub sdk ok := in.processEvents(e, "") @@ -156,6 +157,8 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // in.Stop() } + //time.Sleep(5 * time.Second) + return onEventErr }) if err != nil { @@ -172,6 +175,10 @@ func (in *eventHubInputV1) run(ctx context.Context) error { // Start handling messages from all the partitions balancing across // multiple consumers. // The processor can be stopped by calling `Close()` on the processor. + + // The `Start()` function is not an option because + // it waits for an `os.Interrupt` signal to stop + // the processor. err := in.processor.StartNonBlocking(ctx) if err != nil { in.log.Errorw("error starting the processor", "error", err) @@ -213,7 +220,7 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri in.metrics.receivedMessages.Inc() in.metrics.receivedBytes.Add(uint64(len(event.Data))) - records := in.parseMultipleRecords(event.Data) + records := in.unpackRecords(event.Data) for _, record := range records { _, _ = eventHubMetadata.Put("offset", event.SystemProperties.Offset) @@ -246,7 +253,15 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri Private: event.Data, } - in.client.Publish(event) + // FIXME: error handling on publish? + // The previous implementation was using an Outlet + // to send the event to the pipeline. + // The Outlet.OnEvent() function returns a `false` + // value if the outlet is closed. + // + // Should the new implementation use the `Publish()` + // function do something? + in.pipelineClient.Publish(event) in.metrics.sentEvents.Inc() } @@ -257,8 +272,8 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri return true } -// parseMultipleRecords will try to split the message into multiple ones based on the group field provided by the configuration -func (in *eventHubInputV1) parseMultipleRecords(bMessage []byte) []string { +// unpackRecords will try to split the message into multiple ones based on the group field provided by the configuration +func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { var mapObject map[string][]interface{} var messages []string diff --git a/x-pack/filebeat/input/azureeventhub/v1_input_test.go b/x-pack/filebeat/input/azureeventhub/v1_input_test.go new file mode 100644 index 00000000000..f288a5f852f --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v1_input_test.go @@ -0,0 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 162be4df23e..3b5998773af 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -51,10 +51,10 @@ func (in *eventHubInputV2) Run( ) error { var err error - // Create client for publishing events and receive notification of their ACKs. + // Create pipelineClient for publishing events and receive notification of their ACKs. in.client, err = createPipelineClient(pipeline) if err != nil { - return fmt.Errorf("failed to create pipeline client: %w", err) + return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) } defer in.client.Close() @@ -82,14 +82,14 @@ func (in *eventHubInputV2) Run( } func (in *eventHubInputV2) setup(ctx context.Context) error { - // FIXME: check more client creation options. + // FIXME: check more pipelineClient creation options. blobContainerClient, err := container.NewClientFromConnectionString( in.config.SAConnectionString, in.config.SAContainer, nil, ) if err != nil { - return fmt.Errorf("failed to create blob container client: %w", err) + return fmt.Errorf("failed to create blob container pipelineClient: %w", err) } checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) @@ -105,7 +105,7 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { nil, ) if err != nil { - return fmt.Errorf("failed to create consumer client: %w", err) + return fmt.Errorf("failed to create consumer pipelineClient: %w", err) } in.consumerClient = consumerClient @@ -127,6 +127,9 @@ func (in *eventHubInputV2) run(ctx context.Context) { go in.workersLoop(processor) if err := processor.Run(ctx); err != nil { + // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. + // We should check the error and decide if we want to retry or not. Should + // we add an exponential backoff and retry mechanism? in.log.Errorw("error running processor", "error", err) } } @@ -134,7 +137,6 @@ func (in *eventHubInputV2) run(ctx context.Context) { func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { for { processorPartitionClient := processor.NextPartitionClient(context.TODO()) - if processorPartitionClient == nil { // Processor has stopped break @@ -143,8 +145,17 @@ func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { go func() { if err := in.processEventsForPartition(processorPartitionClient); err != nil { //panic(err) - logp.Info("error processing events for partition: %v", err) + in.log.Errorw( + "processing events for partition failed", + "error", err, + "partition", processorPartitionClient.PartitionID(), + ) } + + in.log.Infow( + "partition worker exited", + "partition", processorPartitionClient.PartitionID(), + ) }() } } @@ -195,6 +206,7 @@ func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhub // Updates the checkpoint with the latest event received. If processing needs to restart // it will restart from this point, automatically. if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { + in.log.Errorw("error updating checkpoint", "error", err) return err } } From 0d80bb45c14f9f22be3643290f21dd9f2b5cf1cb Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 11:22:26 +0200 Subject: [PATCH 07/41] Address linter complaints --- x-pack/filebeat/input/azureeventhub/input.go | 4 ++- .../input/azureeventhub/input_test.go | 28 ++++++++++--------- .../filebeat/input/azureeventhub/v1_input.go | 4 ++- .../filebeat/input/azureeventhub/v2_input.go | 4 ++- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 6ccc7863091..cb17eb02811 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -8,15 +8,17 @@ package azureeventhub import ( "fmt" + "strings" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/go-autorest/autorest/azure" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/beats/v7/libbeat/feature" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/go-concert/unison" - "strings" ) const ( diff --git a/x-pack/filebeat/input/azureeventhub/input_test.go b/x-pack/filebeat/input/azureeventhub/input_test.go index 0fe95b2614f..27098763358 100644 --- a/x-pack/filebeat/input/azureeventhub/input_test.go +++ b/x-pack/filebeat/input/azureeventhub/input_test.go @@ -8,12 +8,14 @@ package azureeventhub import ( "fmt" - "github.com/Azure/go-autorest/autorest/azure" - "github.com/elastic/elastic-agent-libs/logp" "sync" "testing" "time" + "github.com/Azure/go-autorest/autorest/azure" + + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/monitoring" eventhub "github.com/Azure/azure-event-hubs-go/v3" @@ -181,18 +183,18 @@ func TestStripConnectionString(t *testing.T) { } } -// fakePipeline returns new fakeClients for simple tests. -type fakePipeline struct{} - -func (c *fakePipeline) ConnectWith(clientConfig beat.ClientConfig) (beat.Client, error) { - return &fakeClient{}, nil -} - -func (c *fakePipeline) Connect() (beat.Client, error) { - return &fakeClient{}, nil -} +//// fakePipeline returns new fakeClients for simple tests. +//type fakePipeline struct{} +// +//func (c *fakePipeline) ConnectWith(clientConfig beat.ClientConfig) (beat.Client, error) { +// return &fakeClient{}, nil +//} +// +//func (c *fakePipeline) Connect() (beat.Client, error) { +// return &fakeClient{}, nil +//} -var _ beat.Client = (*fakeClient)(nil) +//var _ beat.Client = (*fakeClient)(nil) // ackClient is a fake beat.Client that ACKs the published messages. type fakeClient struct { diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 606ced27f94..1e37ec084b7 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -11,13 +11,15 @@ import ( "encoding/json" "errors" "fmt" - "github.com/Azure/go-autorest/autorest/azure" "time" + "github.com/Azure/go-autorest/autorest/azure" + eventhub "github.com/Azure/azure-event-hubs-go/v3" "github.com/Azure/azure-event-hubs-go/v3/eph" "github.com/Azure/azure-event-hubs-go/v3/storage" "github.com/Azure/azure-storage-blob-go/azblob" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/elastic-agent-libs/logp" diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 3b5998773af..e06cad2b41a 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -11,14 +11,16 @@ import ( "encoding/json" "errors" "fmt" + "time" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" - "time" ) type eventHubInputV2 struct { From af52eb0c28da957efcef322f186bf3f956242e31 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 11:28:00 +0200 Subject: [PATCH 08/41] Simplify run() It seems we only need to wait for the context to be done here. --- x-pack/filebeat/input/azureeventhub/v1_input.go | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 1e37ec084b7..235d1f46794 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -85,6 +85,7 @@ func (in *eventHubInputV1) Run( // Start the main run loop err = in.run(ctx) if err != nil { + in.log.Errorw("error running input", "error", err) return err } @@ -197,14 +198,8 @@ func (in *eventHubInputV1) run(ctx context.Context) error { in.log.Infof("%s input worker has started.", inputName) - for ctx.Err() == nil { - select { - case <-ctx.Done(): - return nil - } - } - - in.log.Errorw("error during processing", "error", ctx.Err()) + // wait for the context to be done + <-ctx.Done() return ctx.Err() } From 525b291c53708e38ce7d7b95000a45d2cb37540e Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 11:51:01 +0200 Subject: [PATCH 09/41] Update NOTICE --- NOTICE.txt | 192 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 56 deletions(-) diff --git a/NOTICE.txt b/NOTICE.txt index aad81b518ac..471ba2387a1 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1742,11 +1742,11 @@ SOFTWARE. -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/azcore -Version: v1.9.0 +Version: v1.11.1 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/azcore@v1.9.0/LICENSE.txt: +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/azcore@v1.11.1/LICENSE.txt: MIT License @@ -1773,11 +1773,11 @@ SOFTWARE -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/azidentity -Version: v1.4.0 +Version: v1.5.2 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/azidentity@v1.4.0/LICENSE.txt: +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/azidentity@v1.5.2/LICENSE.txt: MIT License @@ -1802,6 +1802,37 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE +-------------------------------------------------------------------------------- +Dependency : github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs +Version: v1.2.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/messaging/azeventhubs@v1.2.0/LICENSE.txt: + +Copyright (c) Microsoft Corporation. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : github.com/elastic/azure-sdk-for-go/sdk/resourcemanager/consumption/armconsumption Version: v1.1.0-elastic @@ -1954,11 +1985,11 @@ SOFTWARE. -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/storage/azblob -Version: v1.0.0 +Version: v1.3.2 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/storage/azblob@v1.0.0/LICENSE.txt: +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/storage/azblob@v1.3.2/LICENSE.txt: MIT License @@ -18527,11 +18558,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- Dependency : github.com/google/uuid -Version: v1.3.1 +Version: v1.6.0 Licence type (autodetected): BSD-3-Clause -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/google/uuid@v1.3.1/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/google/uuid@v1.6.0/LICENSE: Copyright (c) 2009,2014 Google Inc. All rights reserved. @@ -29755,11 +29786,11 @@ Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-pipeline-g -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/internal -Version: v1.5.0 +Version: v1.7.0 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/internal@v1.5.0/LICENSE.txt: +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/internal@v1.7.0/LICENSE.txt: MIT License @@ -29784,6 +29815,36 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE +-------------------------------------------------------------------------------- +Dependency : github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub +Version: v1.2.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub@v1.2.0/LICENSE.txt: + +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal Version: v1.1.2 @@ -29876,13 +29937,43 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +-------------------------------------------------------------------------------- +Dependency : github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage +Version: v1.5.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage@v1.5.0/LICENSE.txt: + +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + -------------------------------------------------------------------------------- Dependency : github.com/Azure/go-amqp -Version: v1.0.0 +Version: v1.0.5 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure/go-amqp@v1.0.0/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/!azure/go-amqp@v1.0.5/LICENSE: MIT License @@ -31580,11 +31671,11 @@ SOFTWARE. -------------------------------------------------------------------------------- Dependency : github.com/AzureAD/microsoft-authentication-library-for-go -Version: v1.1.1 +Version: v1.2.2 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/!azure!a!d/microsoft-authentication-library-for-go@v1.1.1/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/!azure!a!d/microsoft-authentication-library-for-go@v1.2.2/LICENSE: MIT License @@ -37783,40 +37874,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -Dependency : github.com/dnaeon/go-vcr -Version: v1.2.0 -Licence type (autodetected): BSD-2-Clause --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/dnaeon/go-vcr@v1.2.0/LICENSE: - -Copyright (c) 2015-2016 Marin Atanasov Nikolov -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer - in this position and unchanged. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -------------------------------------------------------------------------------- Dependency : github.com/dnephin/pflag Version: v1.0.7 @@ -40131,11 +40188,11 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI -------------------------------------------------------------------------------- Dependency : github.com/golang-jwt/jwt/v5 -Version: v5.0.0 +Version: v5.2.1 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/golang-jwt/jwt/v5@v5.0.0/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/golang-jwt/jwt/v5@v5.2.1/LICENSE: Copyright (c) 2012 Dave Grijalva Copyright (c) 2021 golang-jwt maintainers @@ -46065,11 +46122,11 @@ SOFTWARE. -------------------------------------------------------------------------------- Dependency : github.com/joho/godotenv -Version: v1.3.0 +Version: v1.5.1 Licence type (autodetected): MIT -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/joho/godotenv@v1.3.0/LICENCE: +Contents of probable licence file $GOMODCACHE/github.com/joho/godotenv@v1.5.1/LICENCE: Copyright (c) 2013 John Barton @@ -50118,11 +50175,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- Dependency : github.com/pkg/browser -Version: v0.0.0-20210911075715-681adbf594b8 +Version: v0.0.0-20240102092130-5ac0b6a4141c Licence type (autodetected): BSD-2-Clause -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/pkg/browser@v0.0.0-20210911075715-681adbf594b8/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/pkg/browser@v0.0.0-20240102092130-5ac0b6a4141c/LICENSE: Copyright (c) 2014, Dave Cheney All rights reserved. @@ -50432,11 +50489,11 @@ Contents of probable licence file $GOMODCACHE/github.com/prometheus/client_golan -------------------------------------------------------------------------------- Dependency : github.com/rogpeppe/go-internal -Version: v1.11.0 +Version: v1.12.0 Licence type (autodetected): BSD-3-Clause -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/rogpeppe/go-internal@v1.11.0/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/rogpeppe/go-internal@v1.12.0/LICENSE: Copyright (c) 2018 The Go Authors. All rights reserved. @@ -57122,6 +57179,29 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- +Dependency : nhooyr.io/websocket +Version: v1.8.11 +Licence type (autodetected): ISC +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/nhooyr.io/websocket@v1.8.11/LICENSE.txt: + +Copyright (c) 2023 Anmol Sethi + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : sigs.k8s.io/json Version: v0.0.0-20211020170558-c049b76a60c6 From 34206a21c61d11fd4399e559608c675d342fa5e7 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 12:19:31 +0200 Subject: [PATCH 10/41] Remove the eventHubInputV2 POC --- x-pack/filebeat/input/azureeventhub/config.go | 13 +- x-pack/filebeat/input/azureeventhub/input.go | 209 +----------- .../filebeat/input/azureeventhub/v2_input.go | 316 ------------------ 3 files changed, 4 insertions(+), 534 deletions(-) delete mode 100644 x-pack/filebeat/input/azureeventhub/v2_input.go diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index c6b376268ce..80c2a905162 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -20,16 +20,13 @@ type azureInputConfig struct { EventHubName string `config:"eventhub" validate:"required"` ConsumerGroup string `config:"consumer_group"` // Azure Storage container to store leases and checkpoints - SAName string `config:"storage_account"` - SAConnectionString string `config:"storage_account_connection_string"` // engine v2 only - SAKey string `config:"storage_account_key"` - SAContainer string `config:"storage_account_container"` + SAName string `config:"storage_account"` + SAKey string `config:"storage_account_key"` + SAContainer string `config:"storage_account_container"` // by default the azure public environment is used, to override, users can provide a specific resource manager endpoint OverrideEnvironment string `config:"resource_manager_endpoint"` // cleanup the log JSON input for known issues, options: SINGLE_QUOTES, NEW_LINES SanitizeOptions []string `config:"sanitize_options"` - // Engine version to use (v1 or v2). Default is v1. - EngineVersion string `config:"engine_version"` } const ephContainerName = "filebeat" @@ -76,10 +73,6 @@ func (conf *azureInputConfig) Validate() error { } } - if conf.EngineVersion == "" { - conf.EngineVersion = "v1" - } - return nil } diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index cb17eb02811..e22b65b867b 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -60,216 +60,9 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { return nil, fmt.Errorf("reading %s input config: %w", inputName, err) } - switch config.EngineVersion { - case "v1": - return newEventHubInputV1(config, m.log) - case "v2": - return newEventHubInputV2(config, m.log) - default: - return nil, fmt.Errorf("invalid azure-eventhub engine version: %s", config.EngineVersion) - } - - //return &azureInput{ - // config: config, - // log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), - //}, nil + return newEventHubInputV1(config, m.log) } -// func init() { -// err := input.Register(inputName, NewInput) -// if err != nil { -// panic(fmt.Errorf("failed to register %v input: %w", inputName, err)) -// } -// } - -// // configID computes a unique ID for the input configuration. -// // -// // It is used to identify the input in the registry and to detect -// // changes in the configuration. -// // -// // We will remove this function as we upgrade the input to the -// // v2 API (there is an ID in the v2 context). -// func configID(config *conf.C) (string, error) { -// var tmp struct { -// ID string `config:"id"` -// } -// if err := config.Unpack(&tmp); err != nil { -// return "", fmt.Errorf("error extracting ID: %w", err) -// } -// if tmp.ID != "" { -// return tmp.ID, nil -// } - -// var h map[string]interface{} -// _ = config.Unpack(&h) -// id, err := hashstructure.Hash(h, nil) -// if err != nil { -// return "", fmt.Errorf("can not compute ID from configuration: %w", err) -// } - -// return fmt.Sprintf("%16X", id), nil -// } - -//// azureInput struct for the azure-eventhub input -//type azureInput struct { -// config azureInputConfig // azure-eventhub configuration -// context input.Context -// outlet channel.Outleter -// log *logp.Logger // logging info and error messages -// workerCtx context.Context // worker goroutine context. It's cancelled when the input stops or the worker exits. -// workerCancel context.CancelFunc // used to signal that the worker should stop. -// workerOnce sync.Once // guarantees that the worker goroutine is only started once. -// processor *eph.EventProcessorHost // eph will be assigned if users have enabled the option -// id string // ID of the input; used to identify the input in the input metrics registry only, and will be removed once the input is migrated to v2. -// metrics *inputMetrics // Metrics for the input. -//} - -// // NewInput creates a new azure-eventhub input -// func NewInput( -// cfg *conf.C, -// connector channel.Connector, -// inputContext input.Context, -// ) (input.Input, error) { -// var config azureInputConfig -// if err := cfg.Unpack(&config); err != nil { -// return nil, fmt.Errorf("reading %s input config: %w", inputName, err) -// } - -// // Since this is a v1 input, we need to set the ID manually. -// // -// // We need an ID to identify the input in the input metrics -// // registry. -// // -// // This is a temporary workaround until we migrate the input to v2. -// inputId, err := configID(cfg) -// if err != nil { -// return nil, err -// } - -// inputCtx, cancelInputCtx := context.WithCancel(context.Background()) -// go func() { -// defer cancelInputCtx() -// select { -// case <-inputContext.Done: -// case <-inputCtx.Done(): -// } -// }() - -// // If the input ever needs to be made restartable, then context would need -// // to be recreated with each restart. -// workerCtx, workerCancel := context.WithCancel(inputCtx) - -// in := azureInput{ -// id: inputId, -// config: config, -// log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), -// context: inputContext, -// workerCtx: workerCtx, -// workerCancel: workerCancel, -// } -// out, err := connector.Connect(cfg) -// if err != nil { -// return nil, err -// } -// in.outlet = out -// in.log.Infof("Initialized %s input.", inputName) - -// return &in, nil -// } -// -//func (a *azureInput) Name() string { -// return inputName -//} -// -//func (a *azureInput) Test(v2.TestContext) error { -// return nil -//} -// -//// Run starts the `azure-eventhub` input and then returns. -//// -//// The first invocation will start an input worker. All subsequent -//// invocations will be no-ops. -//// -//// The input worker will continue fetching data from the event hub until -//// the input Runner calls the `Stop()` method. -//func (a *azureInput) Run(inputContext v2.Context, pipeline beat.Pipeline) error { -// ctx := v2.GoContextFromCanceler(inputContext.Cancelation) -// -// // `Run` is invoked periodically by the input Runner. The `sync.Once` -// // guarantees that we only start the worker once during the first -// // invocation. -// // a.workerOnce.Do(func() { -// a.log.Infof("%s input worker is starting.", inputName) -// -// // We set up the metrics in the `Run()` method and tear them down -// // in the `Stop()` method. -// // -// // The factory method `NewInput` is not a viable solution because -// // the Runner invokes it during the configuration check without -// // calling the `Stop()` function; this causes panics -// // due to multiple metrics registrations. -// a.metrics = newInputMetrics(inputContext.ID, nil) -// -// err := a.runWithEPH() -// if err != nil { -// a.log.Errorw("error starting the input worker", "error", err) -// return err -// } -// a.log.Infof("%s input worker has started.", inputName) -// // }) -// -// for { -// select { -// case <-ctx.Done(): -// a.log.Infof("%s input worker is stopping.", inputName) -// if a.processor != nil { -// // Tells the processor to stop processing events and release all -// // resources (like scheduler, leaser, checkpointer, and pipelineClient). -// err := a.processor.Close(context.Background()) -// if err != nil { -// a.log.Errorw("error while closing eventhostprocessor", "error", err) -// } -// } -// -// if a.metrics != nil { -// a.metrics.Close() -// } -// -// // a.workerCancel() // FIXME: is this needed? -// a.log.Infof("%s input worker has stopped.", inputName) -// } -// -// break -// } -// -// return nil -//} -// -//// // Stop stops `azure-eventhub` input. -//// func (a *azureInput) Stop() { -//// a.log.Infof("%s input worker is stopping.", inputName) -//// if a.processor != nil { -//// // Tells the processor to stop processing events and release all -//// // resources (like scheduler, leaser, checkpointer, and pipelineClient). -//// err := a.processor.Close(context.Background()) -//// if err != nil { -//// a.log.Errorw("error while closing eventhostprocessor", "error", err) -//// } -//// } -// -//// if a.metrics != nil { -//// a.metrics.Close() -//// } -// -//// a.workerCancel() -//// a.log.Infof("%s input worker has stopped.", inputName) -//// } -// -//// // Wait stop the current server -//// func (a *azureInput) Wait() { -//// a.Stop() -//// } - func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { return pipeline.ConnectWith(beat.ClientConfig{ Processing: beat.ProcessingConfig{ diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go deleted file mode 100644 index e06cad2b41a..00000000000 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build !aix - -package azureeventhub - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "time" - - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" - - v2 "github.com/elastic/beats/v7/filebeat/input/v2" - "github.com/elastic/beats/v7/libbeat/beat" - "github.com/elastic/elastic-agent-libs/logp" - "github.com/elastic/elastic-agent-libs/mapstr" -) - -type eventHubInputV2 struct { - config azureInputConfig - log *logp.Logger - metrics *inputMetrics - checkpointStore *checkpoints.BlobStore - consumerClient *azeventhubs.ConsumerClient - client beat.Client -} - -func newEventHubInputV2(config azureInputConfig, log *logp.Logger) (v2.Input, error) { - return &eventHubInputV2{ - config: config, - log: log.Named(inputName), - }, nil -} - -func (in *eventHubInputV2) Name() string { - return inputName -} - -func (in *eventHubInputV2) Test(v2.TestContext) error { - return nil -} - -func (in *eventHubInputV2) Run( - inputContext v2.Context, - pipeline beat.Pipeline, -) error { - var err error - - // Create pipelineClient for publishing events and receive notification of their ACKs. - in.client, err = createPipelineClient(pipeline) - if err != nil { - return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) - } - defer in.client.Close() - - // Setup input metrics - inputMetrics := newInputMetrics(inputContext.ID, nil) - if err != nil { - return fmt.Errorf("failed to create input metrics: %w", err) - } - defer inputMetrics.Close() - in.metrics = inputMetrics - - ctx := v2.GoContextFromCanceler(inputContext.Cancelation) - - // Initialize everything for this run - err = in.setup(ctx) - if err != nil { - return err - } - defer in.consumerClient.Close(context.Background()) - - // Start the main run loop - in.run(ctx) - - return nil -} - -func (in *eventHubInputV2) setup(ctx context.Context) error { - // FIXME: check more pipelineClient creation options. - blobContainerClient, err := container.NewClientFromConnectionString( - in.config.SAConnectionString, - in.config.SAContainer, - nil, - ) - if err != nil { - return fmt.Errorf("failed to create blob container pipelineClient: %w", err) - } - - checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) - if err != nil { - return fmt.Errorf("failed to create checkpoint store: %w", err) - } - in.checkpointStore = checkpointStore - - consumerClient, err := azeventhubs.NewConsumerClientFromConnectionString( - in.config.ConnectionString, - in.config.EventHubName, - in.config.ConsumerGroup, - nil, - ) - if err != nil { - return fmt.Errorf("failed to create consumer pipelineClient: %w", err) - } - in.consumerClient = consumerClient - - return nil -} - -func (in *eventHubInputV2) run(ctx context.Context) { - processor, err := azeventhubs.NewProcessor( - in.consumerClient, - in.checkpointStore, - nil, - ) - if err != nil { - in.log.Errorw("error creating event processor", "error", err) - return - } - - // Run in the background, launching goroutines to process each partition - go in.workersLoop(processor) - - if err := processor.Run(ctx); err != nil { - // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. - // We should check the error and decide if we want to retry or not. Should - // we add an exponential backoff and retry mechanism? - in.log.Errorw("error running processor", "error", err) - } -} - -func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { - for { - processorPartitionClient := processor.NextPartitionClient(context.TODO()) - if processorPartitionClient == nil { - // Processor has stopped - break - } - - go func() { - if err := in.processEventsForPartition(processorPartitionClient); err != nil { - //panic(err) - in.log.Errorw( - "processing events for partition failed", - "error", err, - "partition", processorPartitionClient.PartitionID(), - ) - } - - in.log.Infow( - "partition worker exited", - "partition", processorPartitionClient.PartitionID(), - ) - }() - } -} - -// processEventsForPartition shows the typical pattern for processing a partition. -func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhubs.ProcessorPartitionClient) error { - // 1. [BEGIN] Initialize any partition specific resources for your application. - // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). - // 3. [END] Cleanup any resources. - - defer func() { - // 3/3 [END] Do cleanup here, like shutting down database clients - // or other resources used for processing this partition. - shutdownPartitionResources(partitionClient) - }() - - // 1/3 [BEGIN] Initialize any partition specific resources for your application. - if err := initializePartitionResources(partitionClient.PartitionID()); err != nil { - return err - } - - // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. - for { - // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. - receiveCtx, cancelReceive := context.WithTimeout(context.TODO(), 10*time.Second) - events, err := partitionClient.ReceiveEvents(receiveCtx, 100, nil) - cancelReceive() - - if err != nil && !errors.Is(err, context.DeadlineExceeded) { - var eventHubError *azeventhubs.Error - - if errors.As(err, &eventHubError) && eventHubError.Code == azeventhubs.ErrorCodeOwnershipLost { - return nil - } - - return err - } - - if len(events) == 0 { - continue - } - - err = in.processReceivedEvents(events) - if err != nil { - return fmt.Errorf("error processing received events: %w", err) - } - - // Updates the checkpoint with the latest event received. If processing needs to restart - // it will restart from this point, automatically. - if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { - in.log.Errorw("error updating checkpoint", "error", err) - return err - } - } -} - -func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData) error { - processingStartTime := time.Now() - azure := mapstr.M{ - // The partition ID is not available. - // "partition_id": partitionID, - "eventhub": in.config.EventHubName, - "consumer_group": in.config.ConsumerGroup, - } - - for _, receivedEventData := range receivedEvents { - // A single event can contain multiple records. We create a new event for each record. - records := in.parseEvent(receivedEventData.Body) - - for record := range records { - _, _ = azure.Put("offset", receivedEventData.Offset) - _, _ = azure.Put("sequence_number", receivedEventData.SequenceNumber) - _, _ = azure.Put("enqueued_time", receivedEventData.EnqueuedTime) - - event := beat.Event{ - // this is the default value for the @timestamp field; usually the ingest - // pipeline replaces it with a value in the payload. - Timestamp: processingStartTime, - Fields: mapstr.M{ - "message": record, - "azure": azure, - }, - Private: receivedEventData.Body, - } - - in.client.Publish(event) - } - } - - return nil -} - -func (in *eventHubInputV2) parseEvent(bMessage []byte) []string { - var mapObject map[string][]interface{} - var records []string - - // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. - // Sanitization occurs if options are available and the message contains an invalid JSON. - // - // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps - if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { - bMessage = sanitize(bMessage, in.config.SanitizeOptions...) - in.metrics.sanitizedMessages.Inc() - } - - // check if the message is a "records" object containing a list of events - err := json.Unmarshal(bMessage, &mapObject) - if err == nil { - if len(mapObject[expandEventListFromField]) > 0 { - for _, ms := range mapObject[expandEventListFromField] { - js, err := json.Marshal(ms) - if err == nil { - records = append(records, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - } else { - in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) - // in some cases the message is an array - var arrayObject []interface{} - err = json.Unmarshal(bMessage, &arrayObject) - if err != nil { - // return entire message - in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) - in.metrics.decodeErrors.Inc() - return []string{string(bMessage)} - } - - for _, ms := range arrayObject { - js, err := json.Marshal(ms) - if err == nil { - records = append(records, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - - return records -} - -func initializePartitionResources(partitionID string) error { - // initialize things that might be partition specific, like a - // database connection. - return nil -} - -func shutdownPartitionResources(partitionClient *azeventhubs.ProcessorPartitionClient) { - // Each PartitionClient holds onto an external resource and should be closed if you're - // not processing them anymore. - defer partitionClient.Close(context.TODO()) -} From e0a73a042922dece7f97747fa9bedf7451c95f1f Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 12:23:20 +0200 Subject: [PATCH 11/41] Cleanup --- .../input/azureeventhub/input_test.go | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/input_test.go b/x-pack/filebeat/input/azureeventhub/input_test.go index 27098763358..21d66b4522e 100644 --- a/x-pack/filebeat/input/azureeventhub/input_test.go +++ b/x-pack/filebeat/input/azureeventhub/input_test.go @@ -183,19 +183,6 @@ func TestStripConnectionString(t *testing.T) { } } -//// fakePipeline returns new fakeClients for simple tests. -//type fakePipeline struct{} -// -//func (c *fakePipeline) ConnectWith(clientConfig beat.ClientConfig) (beat.Client, error) { -// return &fakeClient{}, nil -//} -// -//func (c *fakePipeline) Connect() (beat.Client, error) { -// return &fakeClient{}, nil -//} - -//var _ beat.Client = (*fakeClient)(nil) - // ackClient is a fake beat.Client that ACKs the published messages. type fakeClient struct { sync.Mutex @@ -215,38 +202,3 @@ func (c *fakeClient) PublishAll(event []beat.Event) { c.Publish(e) } } - -// -//type stubOutleter struct { -// sync.Mutex -// cond *sync.Cond -// done bool -// Events []beat.Event -//} -// -//func newStubOutlet(stub *stubOutleter) (channel.Outleter, error) { -// stub.cond = sync.NewCond(stub) -// defer stub.Close() -// -// connector := channel.ConnectorFunc(func(_ *conf.C, _ beat.ClientConfig) (channel.Outleter, error) { -// return stub, nil -// }) -// return connector.ConnectWith(nil, beat.ClientConfig{ -// Processing: beat.ProcessingConfig{}, -// }) -//} -// -//func (o *stubOutleter) Close() error { -// o.Lock() -// defer o.Unlock() -// o.done = true -// return nil -//} -//func (o *stubOutleter) Done() <-chan struct{} { return nil } -//func (o *stubOutleter) OnEvent(event beat.Event) bool { -// o.Lock() -// defer o.Unlock() -// o.Events = append(o.Events, event) -// o.cond.Broadcast() -// return o.done -//} From b0efa53623d88a4605ec1cef67b51e12d441e88a Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 12:31:56 +0200 Subject: [PATCH 12/41] Cleanup --- NOTICE.txt | 84 --------------------------------- go.mod | 2 +- go.sum | 6 --- x-pack/filebeat/include/list.go | 1 - 4 files changed, 1 insertion(+), 92 deletions(-) diff --git a/NOTICE.txt b/NOTICE.txt index 471ba2387a1..1e23bb121c1 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1802,37 +1802,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE --------------------------------------------------------------------------------- -Dependency : github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs -Version: v1.2.0 -Licence type (autodetected): MIT --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/messaging/azeventhubs@v1.2.0/LICENSE.txt: - -Copyright (c) Microsoft Corporation. - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -------------------------------------------------------------------------------- Dependency : github.com/elastic/azure-sdk-for-go/sdk/resourcemanager/consumption/armconsumption Version: v1.1.0-elastic @@ -29815,36 +29784,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE --------------------------------------------------------------------------------- -Dependency : github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub -Version: v1.2.0 -Licence type (autodetected): MIT --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/!azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub@v1.2.0/LICENSE.txt: - -MIT License - -Copyright (c) Microsoft Corporation. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -------------------------------------------------------------------------------- Dependency : github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal Version: v1.1.2 @@ -57179,29 +57118,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- -Dependency : nhooyr.io/websocket -Version: v1.8.11 -Licence type (autodetected): ISC --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/nhooyr.io/websocket@v1.8.11/LICENSE.txt: - -Copyright (c) 2023 Anmol Sethi - -Permission to use, copy, modify, and distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - -------------------------------------------------------------------------------- Dependency : sigs.k8s.io/json Version: v0.0.0-20211020170558-c049b76a60c6 diff --git a/go.mod b/go.mod index b34cdd651fe..7111770dd70 100644 --- a/go.mod +++ b/go.mod @@ -188,7 +188,6 @@ require ( cloud.google.com/go/redis v1.13.1 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 - github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/consumption/armconsumption v1.1.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0 @@ -328,6 +327,7 @@ require ( github.com/jcmturner/gokrb5/v8 v8.4.2 // indirect github.com/jcmturner/rpc/v2 v2.0.3 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/joho/godotenv v1.5.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/karrick/godirwalk v1.17.0 // indirect diff --git a/go.sum b/go.sum index 470f4bd5a78..53c571c3523 100644 --- a/go.sum +++ b/go.sum @@ -108,14 +108,10 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.1.1/go.mod h1:eWRD7oawr1Mu1sLC github.com/Azure/azure-sdk-for-go/sdk/internal v1.1.2/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w= github.com/Azure/azure-sdk-for-go/sdk/internal v1.7.0 h1:rTfKOCZGy5ViVrlA74ZPE99a+SgoEE2K/yg3RyW9dFA= github.com/Azure/azure-sdk-for-go/sdk/internal v1.7.0/go.mod h1:4OG6tQ9EOP/MT0NMjDlRzWoVFxfu9rN9B2X+tlSVktg= -github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0 h1:NYd6adRnLdeTwr1QWeiL83Fgqg7clkPLm4DCz4jYywE= -github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.2.0/go.mod h1:vMGz6NOUGJ9h5ONl2kkyaqq5E0g7s4CHNSrXN5fl8UY= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0 h1:AAIdAyPkFff6XTct2lQCxOWN/+LnA41S7kIkzKaMbyE= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4 v4.6.0/go.mod h1:noQIdW75SiQFB3mSFJBr4iRRH83S9skaFiBv4C0uEs0= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0 h1:1MRED2aeLx/BPHC23XRtr8Mk6zcc70HNRYPQ73R0gHw= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/costmanagement/armcostmanagement v1.1.0/go.mod h1:Am1cUioOk0HdZIsjpXJkQ4RIeQbwYsW6LkNIc5z/5XY= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.2.0 h1:+dggnR89/BIIlRlQ6d19dkhhdd/mQUiQbXhyHUFiB4w= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.2.0/go.mod h1:tI9M2Q/ueFi287QRkdrhb9LHm6ZnXgkVYLRC3FhYkPw= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2 h1:mLY+pNLjCUeKhgnAJWAKhEUQM+RJQo2H1fuGSw1Ky1E= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2/go.mod h1:FbdwsQ2EzwvXxOPcMFYO8ogEc9uMMIj3YkmCdXdAFmk= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= @@ -2561,8 +2557,6 @@ modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= mvdan.cc/garble v0.7.1 h1:9Qffp7HzKLBfQxYZ8mBF/EoYefV54ooY8v9UR4ByTPw= mvdan.cc/garble v0.7.1/go.mod h1:7F2EWpOklhK2qWzv1Hbin8sP2TYBO+EALIx4kFTmtu8= -nhooyr.io/websocket v1.8.11 h1:f/qXNc2/3DpoSZkHt1DQu6rj4zGC8JmkkLkWss0MgN0= -nhooyr.io/websocket v1.8.11/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= diff --git a/x-pack/filebeat/include/list.go b/x-pack/filebeat/include/list.go index fcca6f27de8..5e2cc02a4c9 100644 --- a/x-pack/filebeat/include/list.go +++ b/x-pack/filebeat/include/list.go @@ -15,7 +15,6 @@ import ( // Import packages that perform 'func init()'. _ "github.com/elastic/beats/v7/x-pack/filebeat/input/awscloudwatch" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/awss3" - _ "github.com/elastic/beats/v7/x-pack/filebeat/input/azureeventhub" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/cometd" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/etw" _ "github.com/elastic/beats/v7/x-pack/filebeat/input/gcppubsub" From e3d80fd430e045ee81a06cd16de24c33b974c6ec Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 21 May 2024 13:31:59 +0200 Subject: [PATCH 13/41] Cleanup and add comments --- x-pack/filebeat/input/azureeventhub/input.go | 8 ++ .../filebeat/input/azureeventhub/v1_input.go | 121 ++++++++++++------ 2 files changed, 87 insertions(+), 42 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index e22b65b867b..a12083f2abe 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -34,6 +34,10 @@ var environments = map[string]azure.Environment{ azure.USGovernmentCloud.ResourceManagerEndpoint: azure.USGovernmentCloud, } +// Plugin returns the Azure Event Hub input plugin. +// +// Required register the plugin loader for the +// input API v2. func Plugin(log *logp.Logger) v2.Plugin { return v2.Plugin{ Name: inputName, @@ -46,6 +50,10 @@ func Plugin(log *logp.Logger) v2.Plugin { } } +// eventHubInputManager is the manager for the Azure Event Hub input. +// +// It is responsible for creating new instances of the input, according +// to the configuration provided. type eventHubInputManager struct { log *logp.Logger } diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 235d1f46794..5ccd5a6e704 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -9,7 +9,6 @@ package azureeventhub import ( "context" "encoding/json" - "errors" "fmt" "time" @@ -26,6 +25,9 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +// eventHubInputV1 is the Azure Event Hub input V1. +// +// This input uses the Azure Event Hub SDK v3 (legacy). type eventHubInputV1 struct { config azureInputConfig log *logp.Logger @@ -63,7 +65,7 @@ func (in *eventHubInputV1) Run( ) error { var err error - // Create pipelineClient for publishing events and receive notification of their ACKs. + // Create pipelineClient for publishing events. in.pipelineClient, err = createPipelineClient(pipeline) if err != nil { return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) @@ -76,7 +78,8 @@ func (in *eventHubInputV1) Run( ctx := v2.GoContextFromCanceler(inputContext.Cancelation) - // Initialize everything for this run + // Initialize the input components + // in preparation for the main run loop. err = in.setup(ctx) if err != nil { return err @@ -92,6 +95,12 @@ func (in *eventHubInputV1) Run( return nil } +// setup initializes the input components. +// +// The main components are: +// 1. Azure Storage Leaser / Checkpointer +// 2. Event Processor Host +// 3. Message handler func (in *eventHubInputV1) setup(ctx context.Context) error { // ---------------------------------------------------- @@ -114,6 +123,8 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { return err } + in.log.Infof("storage leaser checkpointer created for container %q", in.config.SAContainer) + // ------------------------------------------------ // 2 — Create a new event processor host // ------------------------------------------------ @@ -141,6 +152,8 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { return err } + in.log.Infof("event processor host created for event hub %q", in.config.EventHubName) + // ------------------------------------------------ // 3 — Register a message handler // ------------------------------------------------ @@ -148,21 +161,25 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // register a message handler -- many can be registered handlerID, err := in.processor.RegisterHandler(ctx, func(c context.Context, e *eventhub.Event) error { - in.log.Debugw("received event", "ts", time.Now().String()) - var onEventErr error - // partitionID is not yet mapped in the azure-eventhub sdk - ok := in.processEvents(e, "") - if !ok { - onEventErr = errors.New("OnEvent function returned false. Stopping input worker") - in.log.Error(onEventErr.Error()) - - // FIXME: should we stop the processor here? - // in.Stop() - } - - //time.Sleep(5 * time.Second) - - return onEventErr + in.log.Debugw("received event") + //var onEventErr error + //ok := in.processEvents(e) + //if !ok { + // onEventErr = errors.New("OnEvent function returned false. Stopping input worker") + // in.log.Error(onEventErr.Error()) + // + // // FIXME: should we stop the processor here? + // // in.Stop() + //} + // + //return onEventErr + + // FIXME: + // No function in `processEvents()` returns errors: + // can we safely ignore the return value? + in.processEvents(e) + + return nil }) if err != nil { in.log.Errorw("error registering handler", "error", err) @@ -204,7 +221,7 @@ func (in *eventHubInputV1) run(ctx context.Context) error { return ctx.Err() } -func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID string) bool { +func (in *eventHubInputV1) processEvents(event *eventhub.Event) { processingStartTime := time.Now() eventHubMetadata := mapstr.M{ // The `partition_id` is not available in the @@ -224,21 +241,6 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri _, _ = eventHubMetadata.Put("sequence_number", event.SystemProperties.SequenceNumber) _, _ = eventHubMetadata.Put("enqueued_time", event.SystemProperties.EnqueuedTime) - //ok := in.outlet.OnEvent(beat.Event{ - // // this is the default value for the @timestamp field; usually the ingest - // // pipeline replaces it with a value in the payload. - // Timestamp: processingStartTime, - // Fields: mapstr.M{ - // "message": record, - // "azure": azure, - // }, - // Private: event.Data, - //}) - //if !ok { - // in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) - // return ok - //} - event := beat.Event{ // this is the default value for the @timestamp field; usually the ingest // pipeline replaces it with a value in the payload. @@ -250,14 +252,21 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri Private: event.Data, } - // FIXME: error handling on publish? + // FIXME: // The previous implementation was using an Outlet - // to send the event to the pipeline. + // to send the event to the pipeline (an input v1 + // thing). + // + // The input v2 equivalent is to use the `Publish()` + // function on a `beat.Client` to publish the event + // to the pipeline. + // // The Outlet.OnEvent() function returns a `false` - // value if the outlet is closed. + // value if the outlet is closed. When this happens, + // the input worker should stop processing events. + // + // Is there a v2 equivalent for this? // - // Should the new implementation use the `Publish()` - // function do something? in.pipelineClient.Publish(event) in.metrics.sentEvents.Inc() @@ -265,11 +274,39 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event, partitionID stri in.metrics.processedMessages.Inc() in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) - - return true } -// unpackRecords will try to split the message into multiple ones based on the group field provided by the configuration +// unpackRecords will try to split the message into multiple ones based on +// the group field provided by the configuration. +// +// `unpackRecords()` supports two types of messages: +// +// 1. A message with an object with a `records` +// field containing a list of events. +// 2. A message with a single event. +// +// (1) Here is an example of a message containing an object with +// a `records` field: +// +// { +// "records": [ +// { +// "time": "2019-12-17T13:43:44.4946995Z", +// "test": "this is some message" +// } +// ] +// } +// +// (2) Here is an example of a message with a single event: +// +// { +// "time": "2019-12-17T13:43:44.4946995Z", +// "test": "this is some message" +// } +// +// The Diagnostic Settings uses the single object with `records` +// fields (1) when exporting data from an Azure service to an +// event hub. This is the most common case. func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { var mapObject map[string][]interface{} var messages []string From f91f7da78781ea138359611d9f59f572544387a5 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 21 May 2024 13:32:58 +0200 Subject: [PATCH 14/41] Update tests --- x-pack/filebeat/input/azureeventhub/input_test.go | 5 +---- x-pack/filebeat/input/azureeventhub/metrics_test.go | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/input_test.go b/x-pack/filebeat/input/azureeventhub/input_test.go index 21d66b4522e..64f0a1b68e6 100644 --- a/x-pack/filebeat/input/azureeventhub/input_test.go +++ b/x-pack/filebeat/input/azureeventhub/input_test.go @@ -83,10 +83,7 @@ func TestProcessEvents(t *testing.T) { Data: []byte(msg), SystemProperties: &properties, } - ok := input.processEvents(&ev, "0") - if !ok { - t.Fatal("OnEvent function returned false") - } + input.processEvents(&ev) assert.Equal(t, len(fakePipelineClient.publishedEvents), 1) message, err := fakePipelineClient.publishedEvents[0].Fields.GetValue("message") diff --git a/x-pack/filebeat/input/azureeventhub/metrics_test.go b/x-pack/filebeat/input/azureeventhub/metrics_test.go index da1a7ae84f4..7ae3f863da0 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics_test.go +++ b/x-pack/filebeat/input/azureeventhub/metrics_test.go @@ -137,10 +137,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { SystemProperties: &properties, } - ok := input.processEvents(&ev, "0") - if !ok { - t.Fatal("OnEvent function returned false") - } + input.processEvents(&ev) if ok := assert.Equal(t, len(tc.expectedRecords), len(fakeClient.publishedEvents)); ok { for i, e := range fakeClient.publishedEvents { From 9246f36df7921790c1f2af5d112e190aa5f5af88 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 21 May 2024 17:26:04 +0200 Subject: [PATCH 15/41] Cleanup --- .../filebeat/input/azureeventhub/v1_input.go | 49 ++++--------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 5ccd5a6e704..e73d72ff0da 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -159,28 +159,11 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // ------------------------------------------------ // register a message handler -- many can be registered - handlerID, err := in.processor.RegisterHandler(ctx, - func(c context.Context, e *eventhub.Event) error { - in.log.Debugw("received event") - //var onEventErr error - //ok := in.processEvents(e) - //if !ok { - // onEventErr = errors.New("OnEvent function returned false. Stopping input worker") - // in.log.Error(onEventErr.Error()) - // - // // FIXME: should we stop the processor here? - // // in.Stop() - //} - // - //return onEventErr - - // FIXME: - // No function in `processEvents()` returns errors: - // can we safely ignore the return value? - in.processEvents(e) - - return nil - }) + handlerID, err := in.processor.RegisterHandler(ctx, func(c context.Context, e *eventhub.Event) error { + in.log.Debugw("received event") + in.processEvents(e) + return nil + }) if err != nil { in.log.Errorw("error registering handler", "error", err) return err @@ -242,8 +225,11 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { _, _ = eventHubMetadata.Put("enqueued_time", event.SystemProperties.EnqueuedTime) event := beat.Event{ - // this is the default value for the @timestamp field; usually the ingest - // pipeline replaces it with a value in the payload. + // We set the timestamp to the processing + // start time as default value. + // + // Usually, the ingest pipeline replaces it + // with a value in the payload. Timestamp: processingStartTime, Fields: mapstr.M{ "message": record, @@ -252,21 +238,6 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { Private: event.Data, } - // FIXME: - // The previous implementation was using an Outlet - // to send the event to the pipeline (an input v1 - // thing). - // - // The input v2 equivalent is to use the `Publish()` - // function on a `beat.Client` to publish the event - // to the pipeline. - // - // The Outlet.OnEvent() function returns a `false` - // value if the outlet is closed. When this happens, - // the input worker should stop processing events. - // - // Is there a v2 equivalent for this? - // in.pipelineClient.Publish(event) in.metrics.sentEvents.Inc() From a9d1ad314a27e63189a0045592c65a47f379d6aa Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 10:09:36 +0200 Subject: [PATCH 16/41] Clarify handler tradeoffs The existing input version does not handle publishing acks from the Beats pipeline. The input API v1 does not seem to offer this feature. With the transition to the input API v2, we have acks management. However, the legacy event hub SDK internally updates the checkpoint info after a successful handler call, and does not seem to offer hooks for acks management. Since the new modern SDK offers better checkpoint management, we keep the current behavior intact, and we'll implement ACKs in the event hub input v2. --- x-pack/filebeat/input/azureeventhub/v1_input.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index e73d72ff0da..cf01eb0b8e7 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -160,8 +160,23 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // register a message handler -- many can be registered handlerID, err := in.processor.RegisterHandler(ctx, func(c context.Context, e *eventhub.Event) error { - in.log.Debugw("received event") + + // Take the event message from the event hub, + // creates and publishes one (or more) events + // to the beats pipeline. in.processEvents(e) + + // Why is this function always returning no error? + // + // The legacy SDK does not offer hooks to control + // checkpointing (it internally updates the checkpoint + // info after a successful handler execution). + // + // So we are keeping the existing behaviour (do not + // handle publish acks). + // + // On shutdown, Filebeat stops the input, waits for + // the output to process all the events in the queue. return nil }) if err != nil { From 18455fd6aa70bea5686b66e0e645e8cc7be60634 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 10:28:06 +0200 Subject: [PATCH 17/41] Cleanup --- x-pack/filebeat/input/azureeventhub/metrics_test.go | 6 ------ x-pack/filebeat/input/azureeventhub/v1_input.go | 6 +++--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/metrics_test.go b/x-pack/filebeat/input/azureeventhub/metrics_test.go index 7ae3f863da0..52b9f008f5c 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics_test.go +++ b/x-pack/filebeat/input/azureeventhub/metrics_test.go @@ -117,12 +117,6 @@ func TestInputMetricsEventsReceived(t *testing.T) { reg := monitoring.NewRegistry() metrics := newInputMetrics("test", reg) - //// Stub outlet for receiving events generated by the input. - //o := &stubOutleter{} - //out, err := newStubOutlet(o) - //if err != nil { - // t.Fatal(err) - //} fakeClient := fakeClient{} input := eventHubInputV1{ diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index cf01eb0b8e7..43573c812b6 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -160,9 +160,9 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // register a message handler -- many can be registered handlerID, err := in.processor.RegisterHandler(ctx, func(c context.Context, e *eventhub.Event) error { - + // Take the event message from the event hub, - // creates and publishes one (or more) events + // creates and publishes one (or more) events // to the beats pipeline. in.processEvents(e) @@ -171,7 +171,7 @@ func (in *eventHubInputV1) setup(ctx context.Context) error { // The legacy SDK does not offer hooks to control // checkpointing (it internally updates the checkpoint // info after a successful handler execution). - // + // // So we are keeping the existing behaviour (do not // handle publish acks). // From 5cd781f6b606d5262e2501aa187b94b84e343132 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 17:37:29 +0200 Subject: [PATCH 18/41] Update x-pack/filebeat/input/azureeventhub/v1_input_test.go Co-authored-by: Tiago Queiroz --- x-pack/filebeat/input/azureeventhub/v1_input_test.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v1_input_test.go b/x-pack/filebeat/input/azureeventhub/v1_input_test.go index f288a5f852f..e69de29bb2d 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input_test.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input_test.go @@ -1,7 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build !aix - -package azureeventhub From cc1d24512dad1a0d8f09055c12cb6e4d49c18ced Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 17:59:59 +0200 Subject: [PATCH 19/41] Remove a leftover empty file --- x-pack/filebeat/input/azureeventhub/v1_input_test.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 x-pack/filebeat/input/azureeventhub/v1_input_test.go diff --git a/x-pack/filebeat/input/azureeventhub/v1_input_test.go b/x-pack/filebeat/input/azureeventhub/v1_input_test.go deleted file mode 100644 index e69de29bb2d..00000000000 From cfbf6bb6f467e65eac78bc7bac9fb061db4a7ee0 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 17:19:27 +0200 Subject: [PATCH 20/41] Rename engine to processor --- x-pack/filebeat/input/azureeventhub/config.go | 7 + x-pack/filebeat/input/azureeventhub/input.go | 14 +- .../filebeat/input/azureeventhub/v2_input.go | 323 ++++++++++++++++++ 3 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 x-pack/filebeat/input/azureeventhub/v2_input.go diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index 80c2a905162..c5bb3316428 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -27,6 +27,8 @@ type azureInputConfig struct { OverrideEnvironment string `config:"resource_manager_endpoint"` // cleanup the log JSON input for known issues, options: SINGLE_QUOTES, NEW_LINES SanitizeOptions []string `config:"sanitize_options"` + // Processor version to use (v1 or v2). Default is v1. + ProcessorVersion string `config:"processor_version"` } const ephContainerName = "filebeat" @@ -40,6 +42,7 @@ func (conf *azureInputConfig) Validate() error { if conf.EventHubName == "" { return errors.New("no event hub name configured") } + // FIXME: this check applies only to processor v1 if conf.SAName == "" || conf.SAKey == "" { return errors.New("no storage account or storage account key configured") } @@ -73,6 +76,10 @@ func (conf *azureInputConfig) Validate() error { } } + if conf.ProcessorVersion == "" { + conf.ProcessorVersion = "v1" + } + return nil } diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index a12083f2abe..301c7062191 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -68,7 +68,19 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { return nil, fmt.Errorf("reading %s input config: %w", inputName, err) } - return newEventHubInputV1(config, m.log) + switch config.ProcessorVersion { + case "v1": + return newEventHubInputV1(config, m.log) + case "v2": + return newEventHubInputV2(config, m.log) + default: + return nil, fmt.Errorf("invalid azure-eventhub processor version: %s", config.ProcessorVersion) + } + + //return &azureInput{ + // config: config, + // log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), + //}, nil } func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go new file mode 100644 index 00000000000..889eef9f606 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -0,0 +1,323 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" +) + +type eventHubInputV2 struct { + config azureInputConfig + log *logp.Logger + metrics *inputMetrics + checkpointStore *checkpoints.BlobStore + consumerClient *azeventhubs.ConsumerClient + pipelineClient beat.Client +} + +func newEventHubInputV2(config azureInputConfig, log *logp.Logger) (v2.Input, error) { + return &eventHubInputV2{ + config: config, + log: log.Named(inputName), + }, nil +} + +func (in *eventHubInputV2) Name() string { + return inputName +} + +func (in *eventHubInputV2) Test(v2.TestContext) error { + return nil +} + +func (in *eventHubInputV2) Run( + inputContext v2.Context, + pipeline beat.Pipeline, +) error { + var err error + + ctx := v2.GoContextFromCanceler(inputContext.Cancelation) + + // Create pipelineClient for publishing events and receive notification of their ACKs. + in.pipelineClient, err = createPipelineClient(pipeline) + if err != nil { + return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) + } + defer in.pipelineClient.Close() + + // Setup input metrics + inputMetrics := newInputMetrics(inputContext.ID, nil) + defer inputMetrics.Close() + in.metrics = inputMetrics + + // Initialize the components needed to process events, in particular + // the consumerClient. + err = in.setup(ctx) + if err != nil { + return err + } + defer in.consumerClient.Close(context.Background()) + + // Start the main run loop + in.run(ctx) + + return nil +} + +func (in *eventHubInputV2) setup(ctx context.Context) error { + // FIXME: check more pipelineClient creation options. + blobContainerClient, err := container.NewClientFromConnectionString( + in.config.SAConnectionString, + in.config.SAContainer, + nil, + ) + if err != nil { + return fmt.Errorf("failed to create blob container pipelineClient: %w", err) + } + + checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) + if err != nil { + return fmt.Errorf("failed to create checkpoint store: %w", err) + } + in.checkpointStore = checkpointStore + + consumerClient, err := azeventhubs.NewConsumerClientFromConnectionString( + in.config.ConnectionString, + in.config.EventHubName, + in.config.ConsumerGroup, + nil, + ) + if err != nil { + return fmt.Errorf("failed to create consumer pipelineClient: %w", err) + } + in.consumerClient = consumerClient + + return nil +} + +func (in *eventHubInputV2) run(ctx context.Context) { + processor, err := azeventhubs.NewProcessor( + in.consumerClient, + in.checkpointStore, + nil, + ) + if err != nil { + in.log.Errorw("error creating processor", "error", err) + return + } + + // Run in the background, launching goroutines to process each partition + go in.workersLoop(processor) + + if err := processor.Run(ctx); err != nil { + // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. + // We should check the error and decide if we want to retry or not. Should + // we add an exponential backoff and retry mechanism? + in.log.Errorw("error running processor", "error", err) + } +} + +func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { + for { + processorPartitionClient := processor.NextPartitionClient(context.TODO()) + if processorPartitionClient == nil { + // Processor has stopped + break + } + + go func() { + in.log.Infow("starting a partition worker", "partition", processorPartitionClient.PartitionID()) + + if err := in.processEventsForPartition(processorPartitionClient); err != nil { + // FIXME: it seems we always get an error, even when the processor is stopped. + in.log.Infow( + "stopping processing events for partition", + "reason", err, + "partition", processorPartitionClient.PartitionID(), + ) + } + + in.log.Infow( + "partition worker exited", + "partition", processorPartitionClient.PartitionID(), + ) + }() + } +} + +// processEventsForPartition shows the typical pattern for processing a partition. +func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhubs.ProcessorPartitionClient) error { + // 1. [BEGIN] Initialize any partition specific resources for your application. + // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). + // 3. [END] Cleanup any resources. + partitionID := partitionClient.PartitionID() + + defer func() { + // 3/3 [END] Do cleanup here, like shutting down database clients + // or other resources used for processing this partition. + shutdownPartitionResources(partitionClient) + }() + + // 1/3 [BEGIN] Initialize any partition specific resources for your application. + if err := initializePartitionResources(partitionID); err != nil { + return err + } + + // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. + for { + // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. + receiveCtx, cancelReceive := context.WithTimeout(context.TODO(), 10*time.Second) + events, err := partitionClient.ReceiveEvents(receiveCtx, 100, nil) + cancelReceive() + + if err != nil && !errors.Is(err, context.DeadlineExceeded) { + var eventHubError *azeventhubs.Error + + if errors.As(err, &eventHubError) && eventHubError.Code == azeventhubs.ErrorCodeOwnershipLost { + return nil + } + + return err + } + + if len(events) == 0 { + continue + } + + in.log.Debugw("received events", "partition", partitionID) + + err = in.processReceivedEvents(events) + if err != nil { + return fmt.Errorf("error processing received events: %w", err) + } + + in.log.Debugw("updating checkpoint information", "partition", partitionID) + + // Updates the checkpoint with the latest event received. If processing needs to restart + // it will restart from this point, automatically. + if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { + in.log.Errorw("error updating checkpoint", "error", err) + return err + } + + in.log.Debugw("checkpoint updated", "partition", partitionID) + } +} + +func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData) error { + processingStartTime := time.Now() + azure := mapstr.M{ + // The partition ID is not available. + // "partition_id": partitionID, + "eventhub": in.config.EventHubName, + "consumer_group": in.config.ConsumerGroup, + } + + for _, receivedEventData := range receivedEvents { + // A single event can contain multiple records. We create a new event for each record. + records := in.unpackRecords(receivedEventData.Body) + + for record := range records { + _, _ = azure.Put("offset", receivedEventData.Offset) + _, _ = azure.Put("sequence_number", receivedEventData.SequenceNumber) + _, _ = azure.Put("enqueued_time", receivedEventData.EnqueuedTime) + + event := beat.Event{ + // this is the default value for the @timestamp field; usually the ingest + // pipeline replaces it with a value in the payload. + Timestamp: processingStartTime, + Fields: mapstr.M{ + "message": record, + "azure": azure, + }, + Private: receivedEventData.Body, + } + + in.pipelineClient.Publish(event) + } + } + + return nil +} + +func (in *eventHubInputV2) unpackRecords(bMessage []byte) []string { + var mapObject map[string][]interface{} + var records []string + + // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. + // Sanitization occurs if options are available and the message contains an invalid JSON. + // + // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps + if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { + bMessage = sanitize(bMessage, in.config.SanitizeOptions...) + in.metrics.sanitizedMessages.Inc() + } + + // check if the message is a "records" object containing a list of events + err := json.Unmarshal(bMessage, &mapObject) + if err == nil { + if len(mapObject[expandEventListFromField]) > 0 { + for _, ms := range mapObject[expandEventListFromField] { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + } else { + in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) + // in some cases the message is an array + var arrayObject []interface{} + err = json.Unmarshal(bMessage, &arrayObject) + if err != nil { + // return entire message + in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) + in.metrics.decodeErrors.Inc() + return []string{string(bMessage)} + } + + for _, ms := range arrayObject { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + in.metrics.receivedEvents.Inc() + } else { + in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + + return records +} + +func initializePartitionResources(partitionID string) error { + // initialize things that might be partition specific, like a + // database connection. + return nil +} + +func shutdownPartitionResources(partitionClient *azeventhubs.ProcessorPartitionClient) { + // Each PartitionClient holds onto an external resource and should be closed if you're + // not processing them anymore. + defer partitionClient.Close(context.TODO()) +} From 20db610f786523cbc56edd49c42f9d37a240a06a Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 20 May 2024 17:20:21 +0200 Subject: [PATCH 21/41] Enable v2 config option in the Filebeat module --- .../module/azure/activitylogs/config/azure-eventhub.yml | 7 +++++++ x-pack/filebeat/module/azure/activitylogs/manifest.yml | 4 +++- x-pack/filebeat/module/azure/auditlogs/manifest.yml | 3 +++ x-pack/filebeat/module/azure/platformlogs/manifest.yml | 3 +++ x-pack/filebeat/module/azure/signinlogs/manifest.yml | 4 +++- 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml index c76ce0872f4..b87bb81a0c1 100644 --- a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml @@ -18,6 +18,9 @@ storage_account: {{ .storage_account }} {{ if .storage_account_key }} storage_account_key: {{ .storage_account_key }} {{ end }} +{{ if .storage_account_connection_string }} +storage_account_connection_string: {{ .storage_account_connection_string }} +{{ end }} {{ if .storage_account_container }} storage_account_container: {{ .storage_account_container }} @@ -34,6 +37,10 @@ resource_manager_endpoint: {{ .resource_manager_endpoint }} tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} +{{ if .processor_version }} +processor_version: {{ .processor_version }} +{{ end }} + processors: - add_fields: target: '' diff --git a/x-pack/filebeat/module/azure/activitylogs/manifest.yml b/x-pack/filebeat/module/azure/activitylogs/manifest.yml index 709b70edc63..ee65033c6f4 100644 --- a/x-pack/filebeat/module/azure/activitylogs/manifest.yml +++ b/x-pack/filebeat/module/azure/activitylogs/manifest.yml @@ -10,11 +10,13 @@ var: - name: connection_string - name: storage_account - name: storage_account_key + - name: storage_account_connection_string - name: storage_account_container - name: resource_manager_endpoint - name: tags default: [forwarded] - + - name: storage_account_connection_string + default: "v1" ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/auditlogs/manifest.yml b/x-pack/filebeat/module/azure/auditlogs/manifest.yml index 6727ce8a33f..28c322ccf70 100644 --- a/x-pack/filebeat/module/azure/auditlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/auditlogs/manifest.yml @@ -10,10 +10,13 @@ var: - name: connection_string - name: storage_account - name: storage_account_key + - name: storage_account_connection_string - name: storage_account_container - name: resource_manager_endpoint - name: tags default: [forwarded] + - name: processor_version + default: "v1" ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/platformlogs/manifest.yml b/x-pack/filebeat/module/azure/platformlogs/manifest.yml index f2a4864a90d..a0abd4ffc62 100644 --- a/x-pack/filebeat/module/azure/platformlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/platformlogs/manifest.yml @@ -9,10 +9,13 @@ var: - name: connection_string - name: storage_account - name: storage_account_key + - name: storage_account_connection_string - name: storage_account_container - name: resource_manager_endpoint - name: tags default: [forwarded] + - name: processor_version + default: "v1" ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/signinlogs/manifest.yml b/x-pack/filebeat/module/azure/signinlogs/manifest.yml index 01b6bc36593..b512fa4677a 100644 --- a/x-pack/filebeat/module/azure/signinlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/signinlogs/manifest.yml @@ -10,11 +10,13 @@ var: - name: connection_string - name: storage_account - name: storage_account_key + - name: storage_account_connection_string - name: storage_account_container - name: resource_manager_endpoint - name: tags default: [forwarded] - + - name: processor_version + default: "v1" ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml From 9743e1d09ed0188a26fb3acd4fe57714fb4e10d0 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 14:59:27 +0200 Subject: [PATCH 22/41] Move message unpacking in the `messageDecoder` --- .../filebeat/input/azureeventhub/decoder.go | 100 +++++++++++++ .../filebeat/input/azureeventhub/v1_input.go | 134 +++++++----------- .../filebeat/input/azureeventhub/v2_input.go | 13 +- 3 files changed, 167 insertions(+), 80 deletions(-) create mode 100644 x-pack/filebeat/input/azureeventhub/decoder.go diff --git a/x-pack/filebeat/input/azureeventhub/decoder.go b/x-pack/filebeat/input/azureeventhub/decoder.go new file mode 100644 index 00000000000..c71cb45bfc0 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/decoder.go @@ -0,0 +1,100 @@ +package azureeventhub + +import ( + "encoding/json" + "fmt" + "github.com/elastic/elastic-agent-libs/logp" +) + +type messageDecoder struct { + config *azureInputConfig + log *logp.Logger + metrics *inputMetrics +} + +// Decode splits the message into multiple ones based on +// the group field provided by the configuration. +// +// `messageDecoder` supports two types of messages: +// +// 1. A message with an object with a `records` +// field containing a list of events. +// 2. A message with a single event. +// +// (1) Here is an example of a message containing an object with +// a `records` field: +// +// { +// "records": [ +// { +// "time": "2019-12-17T13:43:44.4946995Z", +// "test": "this is some message" +// } +// ] +// } +// +// (2) Here is an example of a message with a single event: +// +// { +// "time": "2019-12-17T13:43:44.4946995Z", +// "test": "this is some message" +// } +// +// The Diagnostic Settings [^1] usually produces an object with a +// `records` fields (1) when exporting data to an +// event hub. This is the most common case. +// +// [^1]: the Diagnostic Settings is the Azure component used +// to export logs and metrics from an Azure service. +func (u *messageDecoder) Decode(bMessage []byte) []string { + var mapObject map[string][]interface{} + var records []string + + // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. + // Sanitization occurs if options are available and the message contains an invalid JSON. + // + // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps + if len(u.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { + bMessage = sanitize(bMessage, u.config.SanitizeOptions...) + u.metrics.sanitizedMessages.Inc() + } + + // check if the message is a "records" object containing a list of events + err := json.Unmarshal(bMessage, &mapObject) + if err == nil { + if len(mapObject[expandEventListFromField]) > 0 { + for _, ms := range mapObject[expandEventListFromField] { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + u.metrics.receivedEvents.Inc() + } else { + u.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + } else { + u.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) + // in some cases the message is an array + var arrayObject []interface{} + err = json.Unmarshal(bMessage, &arrayObject) + if err != nil { + // return entire message + u.log.Debugf("deserializing multiple messages to an array returning error: %s", err) + u.metrics.decodeErrors.Inc() + return []string{string(bMessage)} + } + + for _, ms := range arrayObject { + js, err := json.Marshal(ms) + if err == nil { + records = append(records, string(js)) + u.metrics.receivedEvents.Inc() + } else { + u.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) + } + } + } + + return records +} diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 43573c812b6..2e45507c0ca 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -8,7 +8,6 @@ package azureeventhub import ( "context" - "encoding/json" "fmt" "time" @@ -34,6 +33,7 @@ type eventHubInputV1 struct { metrics *inputMetrics processor *eph.EventProcessorHost pipelineClient beat.Client + messageDecoder messageDecoder } // newEventHubInputV1 creates a new instance of the Azure Event Hub input V1. @@ -76,6 +76,12 @@ func (in *eventHubInputV1) Run( in.metrics = newInputMetrics(inputContext.ID, nil) defer in.metrics.Close() + in.messageDecoder = messageDecoder{ + config: &in.config, + log: in.log, + metrics: in.metrics, + } + ctx := v2.GoContextFromCanceler(inputContext.Cancelation) // Initialize the input components @@ -232,7 +238,7 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { in.metrics.receivedMessages.Inc() in.metrics.receivedBytes.Add(uint64(len(event.Data))) - records := in.unpackRecords(event.Data) + records := in.messageDecoder.Decode(event.Data) for _, record := range records { _, _ = eventHubMetadata.Put("offset", event.SystemProperties.Offset) @@ -262,89 +268,59 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) } -// unpackRecords will try to split the message into multiple ones based on -// the group field provided by the configuration. -// -// `unpackRecords()` supports two types of messages: -// -// 1. A message with an object with a `records` -// field containing a list of events. -// 2. A message with a single event. +//// unpackRecords will try to split the message into multiple ones based on the group field provided by the configuration +//func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { +// var mapObject map[string][]interface{} +// var messages []string // -// (1) Here is an example of a message containing an object with -// a `records` field: -// -// { -// "records": [ -// { -// "time": "2019-12-17T13:43:44.4946995Z", -// "test": "this is some message" -// } -// ] +// // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. +// // Sanitization occurs if options are available and the message contains an invalid JSON. +// // +// // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps +// if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { +// bMessage = sanitize(bMessage, in.config.SanitizeOptions...) +// in.metrics.sanitizedMessages.Inc() // } // -// (2) Here is an example of a message with a single event: +// // check if the message is a "records" object containing a list of events +// err := json.Unmarshal(bMessage, &mapObject) +// if err == nil { +// if len(mapObject[expandEventListFromField]) > 0 { +// for _, ms := range mapObject[expandEventListFromField] { +// js, err := json.Marshal(ms) +// if err == nil { +// messages = append(messages, string(js)) +// in.metrics.receivedEvents.Inc() +// } else { +// in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) +// } +// } +// } +// } else { +// in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) +// // in some cases the message is an array +// var arrayObject []interface{} +// err = json.Unmarshal(bMessage, &arrayObject) +// if err != nil { +// // return entire message +// in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) +// in.metrics.decodeErrors.Inc() +// return []string{string(bMessage)} +// } // -// { -// "time": "2019-12-17T13:43:44.4946995Z", -// "test": "this is some message" +// for _, ms := range arrayObject { +// js, err := json.Marshal(ms) +// if err == nil { +// messages = append(messages, string(js)) +// in.metrics.receivedEvents.Inc() +// } else { +// in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) +// } +// } // } // -// The Diagnostic Settings uses the single object with `records` -// fields (1) when exporting data from an Azure service to an -// event hub. This is the most common case. -func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { - var mapObject map[string][]interface{} - var messages []string - - // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. - // Sanitization occurs if options are available and the message contains an invalid JSON. - // - // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps - if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { - bMessage = sanitize(bMessage, in.config.SanitizeOptions...) - in.metrics.sanitizedMessages.Inc() - } - - // check if the message is a "records" object containing a list of events - err := json.Unmarshal(bMessage, &mapObject) - if err == nil { - if len(mapObject[expandEventListFromField]) > 0 { - for _, ms := range mapObject[expandEventListFromField] { - js, err := json.Marshal(ms) - if err == nil { - messages = append(messages, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - } else { - in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) - // in some cases the message is an array - var arrayObject []interface{} - err = json.Unmarshal(bMessage, &arrayObject) - if err != nil { - // return entire message - in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) - in.metrics.decodeErrors.Inc() - return []string{string(bMessage)} - } - - for _, ms := range arrayObject { - js, err := json.Marshal(ms) - if err == nil { - messages = append(messages, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - - return messages -} +// return messages +//} func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { // if no override is set then the azure public cloud is used diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 889eef9f606..83c54412b21 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -30,6 +30,7 @@ type eventHubInputV2 struct { checkpointStore *checkpoints.BlobStore consumerClient *azeventhubs.ConsumerClient pipelineClient beat.Client + messageDecoder messageDecoder } func newEventHubInputV2(config azureInputConfig, log *logp.Logger) (v2.Input, error) { @@ -67,6 +68,14 @@ func (in *eventHubInputV2) Run( defer inputMetrics.Close() in.metrics = inputMetrics + // Decode the messages from event hub into + // a `[]string`. + in.messageDecoder = messageDecoder{ + config: &in.config, + log: in.log, + metrics: in.metrics, + } + // Initialize the components needed to process events, in particular // the consumerClient. err = in.setup(ctx) @@ -221,6 +230,7 @@ func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhub } } +// processReceivedEvents func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData) error { processingStartTime := time.Now() azure := mapstr.M{ @@ -232,7 +242,8 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R for _, receivedEventData := range receivedEvents { // A single event can contain multiple records. We create a new event for each record. - records := in.unpackRecords(receivedEventData.Body) + //records := in.unpackRecords(receivedEventData.Body) + records := in.messageDecoder.Decode(receivedEventData.Body) for record := range records { _, _ = azure.Put("offset", receivedEventData.Offset) From 0368febd221537bab70ca1db7b924a3d49b0dfa2 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 15:01:06 +0200 Subject: [PATCH 23/41] Retry starting the processor 30s after an error I don't wanna use `time.Sleep`, this is just a test. --- .../filebeat/input/azureeventhub/v2_input.go | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 83c54412b21..4468e83feff 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -122,25 +122,34 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { } func (in *eventHubInputV2) run(ctx context.Context) { - processor, err := azeventhubs.NewProcessor( - in.consumerClient, - in.checkpointStore, - nil, - ) - if err != nil { - in.log.Errorw("error creating processor", "error", err) - return - } - // Run in the background, launching goroutines to process each partition - go in.workersLoop(processor) + for ctx.Err() == nil { + + processor, err := azeventhubs.NewProcessor( + in.consumerClient, + in.checkpointStore, + nil, + ) + if err != nil { + in.log.Errorw("error creating processor", "error", err) + return + } + + // Run in the background, launching goroutines to process each partition + go in.workersLoop(processor) + + if err := processor.Run(ctx); err != nil { + // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. + // We should check the error and decide if we want to retry or not. Should + // we add an and retry mechanism with exponential backoff? + in.log.Errorw("processor completed with an error", "error", err) - if err := processor.Run(ctx); err != nil { - // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. - // We should check the error and decide if we want to retry or not. Should - // we add an exponential backoff and retry mechanism? - in.log.Errorw("error running processor", "error", err) + time.Sleep(30 * time.Second) + } + + in.log.Infow("run completed", "error", err) } + } func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { From f1f80c62e9c9603f072c1f7f31c67dc43eb3ca12 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Wed, 22 May 2024 17:56:56 +0200 Subject: [PATCH 24/41] Add input metrics to input v2 --- .../filebeat/input/azureeventhub/v2_input.go | 127 ++++++++---------- 1 file changed, 54 insertions(+), 73 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 4468e83feff..2f51b641880 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -8,7 +8,6 @@ package azureeventhub import ( "context" - "encoding/json" "errors" "fmt" "time" @@ -23,6 +22,8 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +// azureInputConfig the Azure Event Hub input v2, +// that uses the modern Azure Event Hub SDK for Go. type eventHubInputV2 struct { config azureInputConfig log *logp.Logger @@ -33,6 +34,8 @@ type eventHubInputV2 struct { messageDecoder messageDecoder } +// newEventHubInputV2 creates a new instance of the Azure Event Hub input v2, +// that uses the modern Azure Event Hub SDK for Go. func newEventHubInputV2(config azureInputConfig, log *logp.Logger) (v2.Input, error) { return &eventHubInputV2{ config: config, @@ -48,6 +51,7 @@ func (in *eventHubInputV2) Test(v2.TestContext) error { return nil } +// Run starts the Azure Event Hub input v2. func (in *eventHubInputV2) Run( inputContext v2.Context, pipeline beat.Pipeline, @@ -56,13 +60,6 @@ func (in *eventHubInputV2) Run( ctx := v2.GoContextFromCanceler(inputContext.Cancelation) - // Create pipelineClient for publishing events and receive notification of their ACKs. - in.pipelineClient, err = createPipelineClient(pipeline) - if err != nil { - return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) - } - defer in.pipelineClient.Close() - // Setup input metrics inputMetrics := newInputMetrics(inputContext.ID, nil) defer inputMetrics.Close() @@ -76,20 +73,29 @@ func (in *eventHubInputV2) Run( metrics: in.metrics, } - // Initialize the components needed to process events, in particular - // the consumerClient. + // Initialize the components needed to process events, + // in particular the consumerClient. err = in.setup(ctx) if err != nil { return err } defer in.consumerClient.Close(context.Background()) + // Create pipelineClient for publishing events and receive + // notification of their ACKs. + in.pipelineClient, err = createPipelineClient(pipeline) + if err != nil { + return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) + } + defer in.pipelineClient.Close() + // Start the main run loop in.run(ctx) return nil } +// setup initializes the components needed to process events. func (in *eventHubInputV2) setup(ctx context.Context) error { // FIXME: check more pipelineClient creation options. blobContainerClient, err := container.NewClientFromConnectionString( @@ -121,10 +127,13 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { return nil } +// run starts the main loop for processing events. func (in *eventHubInputV2) run(ctx context.Context) { - for ctx.Err() == nil { - + // Create a new processor for each run. + // + // The docs explicitly say that the processor + // is not reusable. processor, err := azeventhubs.NewProcessor( in.consumerClient, in.checkpointStore, @@ -135,15 +144,20 @@ func (in *eventHubInputV2) run(ctx context.Context) { return } - // Run in the background, launching goroutines to process each partition + // Launch one goroutines for each partition + // to process events. go in.workersLoop(processor) + // Run the processor to start processing events. + // This is a blocking call. if err := processor.Run(ctx); err != nil { // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. // We should check the error and decide if we want to retry or not. Should // we add an and retry mechanism with exponential backoff? in.log.Errorw("processor completed with an error", "error", err) + // FIXME: `time.Sleep()` is not the best way to handle this. + // Using it for testing purposes. time.Sleep(30 * time.Second) } @@ -152,35 +166,41 @@ func (in *eventHubInputV2) run(ctx context.Context) { } +// workersLoop starts a goroutine for each partition to process events. func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { for { processorPartitionClient := processor.NextPartitionClient(context.TODO()) if processorPartitionClient == nil { - // Processor has stopped + // We break out from the for loop when `NextPartitionClient` + // return `nil` (signals the processor has stopped). break } + partitionID := processorPartitionClient.PartitionID() go func() { - in.log.Infow("starting a partition worker", "partition", processorPartitionClient.PartitionID()) + in.log.Infow( + "starting a partition worker", + "partition", partitionID, + ) if err := in.processEventsForPartition(processorPartitionClient); err != nil { // FIXME: it seems we always get an error, even when the processor is stopped. in.log.Infow( "stopping processing events for partition", "reason", err, - "partition", processorPartitionClient.PartitionID(), + "partition", partitionID, ) } in.log.Infow( "partition worker exited", - "partition", processorPartitionClient.PartitionID(), + "partition", partitionID, ) }() } } -// processEventsForPartition shows the typical pattern for processing a partition. +// processEventsForPartition receives events from a partition and processes them. func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhubs.ProcessorPartitionClient) error { // 1. [BEGIN] Initialize any partition specific resources for your application. // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). @@ -228,8 +248,10 @@ func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhub in.log.Debugw("updating checkpoint information", "partition", partitionID) - // Updates the checkpoint with the latest event received. If processing needs to restart - // it will restart from this point, automatically. + // Updates the checkpoint with the latest event received. + // + // If processing needs to restart it will restart from this + // point, automatically. if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { in.log.Errorw("error updating checkpoint", "error", err) return err @@ -250,6 +272,10 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R } for _, receivedEventData := range receivedEvents { + // Update input metrics. + in.metrics.receivedMessages.Inc() + in.metrics.receivedBytes.Add(uint64(len(receivedEventData.Body))) + // A single event can contain multiple records. We create a new event for each record. //records := in.unpackRecords(receivedEventData.Body) records := in.messageDecoder.Decode(receivedEventData.Body) @@ -267,67 +293,22 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R "message": record, "azure": azure, }, - Private: receivedEventData.Body, + Private: receivedEventData, } + // Publish the event to the Beats pipeline. in.pipelineClient.Publish(event) - } - } - return nil -} - -func (in *eventHubInputV2) unpackRecords(bMessage []byte) []string { - var mapObject map[string][]interface{} - var records []string - - // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. - // Sanitization occurs if options are available and the message contains an invalid JSON. - // - // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps - if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { - bMessage = sanitize(bMessage, in.config.SanitizeOptions...) - in.metrics.sanitizedMessages.Inc() - } - - // check if the message is a "records" object containing a list of events - err := json.Unmarshal(bMessage, &mapObject) - if err == nil { - if len(mapObject[expandEventListFromField]) > 0 { - for _, ms := range mapObject[expandEventListFromField] { - js, err := json.Marshal(ms) - if err == nil { - records = append(records, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } - } - } else { - in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) - // in some cases the message is an array - var arrayObject []interface{} - err = json.Unmarshal(bMessage, &arrayObject) - if err != nil { - // return entire message - in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) - in.metrics.decodeErrors.Inc() - return []string{string(bMessage)} + // Update input metrics. + in.metrics.sentEvents.Inc() } - for _, ms := range arrayObject { - js, err := json.Marshal(ms) - if err == nil { - records = append(records, string(js)) - in.metrics.receivedEvents.Inc() - } else { - in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) - } - } + // Update input metrics. + in.metrics.processedMessages.Inc() + in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) } - return records + return nil } func initializePartitionResources(partitionID string) error { From 61421b9427d118a09adea1497025f5e9ea52ec73 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 23 May 2024 15:28:52 +0200 Subject: [PATCH 25/41] Try the backoff package to restart the processor --- .../filebeat/input/azureeventhub/v2_input.go | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 2f51b641880..a8a349745db 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -18,6 +18,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/backoff" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" ) @@ -129,6 +130,16 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { // run starts the main loop for processing events. func (in *eventHubInputV2) run(ctx context.Context) { + + // Handle the case when the processor stops due to + // transient errors (network failures) and we need to + // restart. + processorRunBackoff := backoff.NewEqualJitterBackoff( + ctx.Done(), + 10*time.Second, // initial backoff + 120*time.Second, // max backoff + ) + for ctx.Err() == nil { // Create a new processor for each run. // @@ -137,7 +148,7 @@ func (in *eventHubInputV2) run(ctx context.Context) { processor, err := azeventhubs.NewProcessor( in.consumerClient, in.checkpointStore, - nil, + nil, // default options ) if err != nil { in.log.Errorw("error creating processor", "error", err) @@ -146,30 +157,39 @@ func (in *eventHubInputV2) run(ctx context.Context) { // Launch one goroutines for each partition // to process events. - go in.workersLoop(processor) + go in.workersLoop(ctx, processor) // Run the processor to start processing events. - // This is a blocking call. + // + // This is a blocking call. It will return when the processor + // stops due to an error or when the context is cancelled. if err := processor.Run(ctx); err != nil { + in.log.Errorw("processor exited with a non-nil error", "error", err) + + // FIXME: `time.Sleep()` is not the best way to handle this. + // Using it for testing purposes. + // time.Sleep(30 * time.Second) + in.log.Infow("waiting before retrying starting the processor") + // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. // We should check the error and decide if we want to retry or not. Should // we add an and retry mechanism with exponential backoff? - in.log.Errorw("processor completed with an error", "error", err) + processorRunBackoff.Wait() - // FIXME: `time.Sleep()` is not the best way to handle this. - // Using it for testing purposes. - time.Sleep(30 * time.Second) + in.log.Infow("ready to try to start the processor again") } - in.log.Infow("run completed", "error", err) + in.log.Infow( + "run completed; continue if context error is nil", + "context_error", ctx.Err(), + ) } - } // workersLoop starts a goroutine for each partition to process events. -func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { +func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhubs.Processor) { for { - processorPartitionClient := processor.NextPartitionClient(context.TODO()) + processorPartitionClient := processor.NextPartitionClient(ctx) if processorPartitionClient == nil { // We break out from the for loop when `NextPartitionClient` // return `nil` (signals the processor has stopped). @@ -177,13 +197,14 @@ func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { } partitionID := processorPartitionClient.PartitionID() + go func() { in.log.Infow( "starting a partition worker", "partition", partitionID, ) - if err := in.processEventsForPartition(processorPartitionClient); err != nil { + if err := in.processEventsForPartition(ctx, processorPartitionClient); err != nil { // FIXME: it seems we always get an error, even when the processor is stopped. in.log.Infow( "stopping processing events for partition", @@ -201,18 +222,18 @@ func (in *eventHubInputV2) workersLoop(processor *azeventhubs.Processor) { } // processEventsForPartition receives events from a partition and processes them. -func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhubs.ProcessorPartitionClient) error { +func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient) error { // 1. [BEGIN] Initialize any partition specific resources for your application. // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). // 3. [END] Cleanup any resources. - partitionID := partitionClient.PartitionID() - defer func() { // 3/3 [END] Do cleanup here, like shutting down database clients // or other resources used for processing this partition. shutdownPartitionResources(partitionClient) }() + partitionID := partitionClient.PartitionID() + // 1/3 [BEGIN] Initialize any partition specific resources for your application. if err := initializePartitionResources(partitionID); err != nil { return err @@ -221,7 +242,7 @@ func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhub // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. for { // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. - receiveCtx, cancelReceive := context.WithTimeout(context.TODO(), 10*time.Second) + receiveCtx, cancelReceive := context.WithTimeout(ctx, 5*time.Second) events, err := partitionClient.ReceiveEvents(receiveCtx, 100, nil) cancelReceive() @@ -252,7 +273,7 @@ func (in *eventHubInputV2) processEventsForPartition(partitionClient *azeventhub // // If processing needs to restart it will restart from this // point, automatically. - if err := partitionClient.UpdateCheckpoint(context.TODO(), events[len(events)-1], nil); err != nil { + if err := partitionClient.UpdateCheckpoint(ctx, events[len(events)-1], nil); err != nil { in.log.Errorw("error updating checkpoint", "error", err) return err } From 56cfb7f05af7b4788adf2f34b9b322866ea99cfe Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Thu, 23 May 2024 16:03:59 +0200 Subject: [PATCH 26/41] Adding acker.LastEventPrivateReporter() Trying to listen for the ack of the last event, and store its checkpoint information --- x-pack/filebeat/input/azureeventhub/input.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 301c7062191..800fc44aaf9 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -15,6 +15,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/acker" "github.com/elastic/beats/v7/libbeat/feature" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" @@ -85,6 +86,9 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { return pipeline.ConnectWith(beat.ClientConfig{ + EventListener: acker.LastEventPrivateReporter(func(acked int, data interface{}) { + // fmt.Println(acked, data) + }), Processing: beat.ProcessingConfig{ // This input only produces events with basic types so normalization // is not required. From 358d2dce34c2550aaefd7f3413b03d2e3998139a Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 14:57:39 +0200 Subject: [PATCH 27/41] Ensure container exist and add migration assistant --- .../filebeat/input/azureeventhub/v2_input.go | 148 ++++++-- .../input/azureeventhub/v2_migration.go | 320 ++++++++++++++++++ 2 files changed, 443 insertions(+), 25 deletions(-) create mode 100644 x-pack/filebeat/input/azureeventhub/v2_migration.go diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index a8a349745db..2dd83b73fe6 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -12,6 +12,9 @@ import ( "fmt" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" @@ -26,13 +29,14 @@ import ( // azureInputConfig the Azure Event Hub input v2, // that uses the modern Azure Event Hub SDK for Go. type eventHubInputV2 struct { - config azureInputConfig - log *logp.Logger - metrics *inputMetrics - checkpointStore *checkpoints.BlobStore - consumerClient *azeventhubs.ConsumerClient - pipelineClient beat.Client - messageDecoder messageDecoder + config azureInputConfig + log *logp.Logger + metrics *inputMetrics + checkpointStore *checkpoints.BlobStore + consumerClient *azeventhubs.ConsumerClient + pipelineClient beat.Client + messageDecoder messageDecoder + migrationAssistant *migrationAssistant } // newEventHubInputV2 creates a new instance of the Azure Event Hub input v2, @@ -66,14 +70,6 @@ func (in *eventHubInputV2) Run( defer inputMetrics.Close() in.metrics = inputMetrics - // Decode the messages from event hub into - // a `[]string`. - in.messageDecoder = messageDecoder{ - config: &in.config, - log: in.log, - metrics: in.metrics, - } - // Initialize the components needed to process events, // in particular the consumerClient. err = in.setup(ctx) @@ -98,6 +94,15 @@ func (in *eventHubInputV2) Run( // setup initializes the components needed to process events. func (in *eventHubInputV2) setup(ctx context.Context) error { + + // Decode the messages from event hub into + // a `[]string`. + in.messageDecoder = messageDecoder{ + config: &in.config, + log: in.log, + metrics: in.metrics, + } + // FIXME: check more pipelineClient creation options. blobContainerClient, err := container.NewClientFromConnectionString( in.config.SAConnectionString, @@ -108,12 +113,27 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { return fmt.Errorf("failed to create blob container pipelineClient: %w", err) } + // The modern event hub SDK does not create the container + // automatically like the old SDK. + // + // The new `BlobStore` explicitly says: + // "the container must exist before the checkpoint store can be used." + // + // We need to ensure it exists before we can use it. + err = in.ensureContainerExists(ctx, blobContainerClient) + if err != nil { + return fmt.Errorf("failed to ensure blob container exists: %w", err) + } + + // The checkpoint store is used to store the checkpoint information + // in the blob container. checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) if err != nil { return fmt.Errorf("failed to create checkpoint store: %w", err) } in.checkpointStore = checkpointStore + // Create the event hub consumerClient to receive events. consumerClient, err := azeventhubs.NewConsumerClientFromConnectionString( in.config.ConnectionString, in.config.EventHubName, @@ -125,11 +145,30 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { } in.consumerClient = consumerClient + // FIXME: add migration assistant. + in.migrationAssistant = newMigrationAssistant( + in.log, + consumerClient, + blobContainerClient, + checkpointStore, + ) + return nil } // run starts the main loop for processing events. func (in *eventHubInputV2) run(ctx context.Context) { + // Check if we need to migrate the checkpoint store. + err := in.migrationAssistant.checkAndMigrate( + ctx, + in.config.ConnectionString, + in.config.EventHubName, + in.config.ConsumerGroup, + ) + if err != nil { + in.log.Errorw("error migrating checkpoint store", "error", err) + // FIXME: should we return here? + } // Handle the case when the processor stops due to // transient errors (network failures) and we need to @@ -140,6 +179,10 @@ func (in *eventHubInputV2) run(ctx context.Context) { 120*time.Second, // max backoff ) + processorOptions := azeventhubs.ProcessorOptions{ + LoadBalancingStrategy: azeventhubs.ProcessorStrategyBalanced, + } + for ctx.Err() == nil { // Create a new processor for each run. // @@ -148,7 +191,7 @@ func (in *eventHubInputV2) run(ctx context.Context) { processor, err := azeventhubs.NewProcessor( in.consumerClient, in.checkpointStore, - nil, // default options + &processorOptions, ) if err != nil { in.log.Errorw("error creating processor", "error", err) @@ -161,14 +204,16 @@ func (in *eventHubInputV2) run(ctx context.Context) { // Run the processor to start processing events. // - // This is a blocking call. It will return when the processor - // stops due to an error or when the context is cancelled. + // This is a blocking call. + // + // It will return when the processor stops due to: + // - an error + // - when the context is cancelled. + // + // On cancellation, it will return a nil error. if err := processor.Run(ctx); err != nil { in.log.Errorw("processor exited with a non-nil error", "error", err) - // FIXME: `time.Sleep()` is not the best way to handle this. - // Using it for testing purposes. - // time.Sleep(30 * time.Second) in.log.Infow("waiting before retrying starting the processor") // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. @@ -176,7 +221,10 @@ func (in *eventHubInputV2) run(ctx context.Context) { // we add an and retry mechanism with exponential backoff? processorRunBackoff.Wait() - in.log.Infow("ready to try to start the processor again") + // Update input metrics. + in.metrics.processorRestarts.Inc() + + in.log.Infow("Pssor again") } in.log.Infow( @@ -186,9 +234,55 @@ func (in *eventHubInputV2) run(ctx context.Context) { } } +// ensureContainerExists ensures the blob container exists. +func (in *eventHubInputV2) ensureContainerExists(ctx context.Context, blobContainerClient *container.Client) error { + exists, err := in.containerExists(ctx, blobContainerClient) + if err != nil { + return fmt.Errorf("failed to check if blob container exists: %w", err) + } + if exists { + return nil + } + + // Since the container does not exist, we create it. + r, err := blobContainerClient.Create(ctx, nil) + if err != nil { + // If the container already exists, we ignore the error. + var responseError *azcore.ResponseError + if !errors.As(err, &responseError) || responseError.ErrorCode != string(bloberror.ContainerAlreadyExists) { + return fmt.Errorf("failed to create blob container: %w", err) + } + + in.log.Debugw("blob container already exists, no need to create a new one", "container", in.config.SAContainer) + } + + in.log.Infow("blob container created successfully", "response", r) + + return nil +} + +// containerExists checks if the blob container exists. +func (in *eventHubInputV2) containerExists(ctx context.Context, blobContainerClient *container.Client) (bool, error) { + // Try to access the container to see if it exists. + _, err := blobContainerClient.GetProperties(ctx, &container.GetPropertiesOptions{}) + if err == nil { + in.log.Debugw("blob container already exists, no need to create a new one", "container", in.config.SAContainer) + return true, nil + } + + var responseError *azcore.ResponseError + if errors.As(err, &responseError) && responseError.ErrorCode == string(bloberror.ContainerNotFound) { + return false, nil + } + + return false, fmt.Errorf("failed to check if blob container exists: %w", err) +} + // workersLoop starts a goroutine for each partition to process events. func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhubs.Processor) { for { + // The call blocks until an owned partition is available or the + // context is cancelled. processorPartitionClient := processor.NextPartitionClient(ctx) if processorPartitionClient == nil { // We break out from the for loop when `NextPartitionClient` @@ -198,6 +292,7 @@ func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhu partitionID := processorPartitionClient.PartitionID() + // Start a goroutine to process events for the partition. go func() { in.log.Infow( "starting a partition worker", @@ -223,6 +318,9 @@ func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhu // processEventsForPartition receives events from a partition and processes them. func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient) error { + + // pipelineClient := createPipelineClient() + // 1. [BEGIN] Initialize any partition specific resources for your application. // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). // 3. [END] Cleanup any resources. @@ -235,7 +333,7 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit partitionID := partitionClient.PartitionID() // 1/3 [BEGIN] Initialize any partition specific resources for your application. - if err := initializePartitionResources(partitionID); err != nil { + if err := initializePartitionResources(partitionID, partitionClient); err != nil { return err } @@ -260,7 +358,7 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit continue } - in.log.Debugw("received events", "partition", partitionID) + in.log.Debugw("received events", "count", len(events), "partition", partitionID) err = in.processReceivedEvents(events) if err != nil { @@ -332,7 +430,7 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R return nil } -func initializePartitionResources(partitionID string) error { +func initializePartitionResources(partitionID string, partitionClient *azeventhubs.ProcessorPartitionClient) error { // initialize things that might be partition specific, like a // database connection. return nil diff --git a/x-pack/filebeat/input/azureeventhub/v2_migration.go b/x-pack/filebeat/input/azureeventhub/v2_migration.go new file mode 100644 index 00000000000..f0f0945d717 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v2_migration.go @@ -0,0 +1,320 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/url" + "strconv" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + "github.com/elastic/elastic-agent-libs/logp" +) + +// migrationAssistant assists the input in migrating +// checkpoint data from v1 to v2. +type migrationAssistant struct { + log *logp.Logger + consumerClient *azeventhubs.ConsumerClient + blobContainerClient *container.Client + checkpointStore *checkpoints.BlobStore +} + +func newMigrationAssistant(log *logp.Logger, consumerClient *azeventhubs.ConsumerClient, blobContainerClient *container.Client, checkpointStore *checkpoints.BlobStore) *migrationAssistant { + return &migrationAssistant{ + log: log, + consumerClient: consumerClient, + blobContainerClient: blobContainerClient, + checkpointStore: checkpointStore, + } +} + +func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnectionString, eventHubName, consumerGroup string) error { + + // Fetching event hub information + eventHubProperties, err := m.consumerClient.GetEventHubProperties(ctx, nil) + if err != nil { + return fmt.Errorf("failed to get event hub properties: %w", err) + } + + m.log.Infof("Event Hub properties: %v", eventHubProperties) + + // Parse the connection string to get FQDN. + props, err := parseConnectionString(eventHubConnectionString) + if err != nil { + return fmt.Errorf("failed to parse connection string: %w", err) + } + + err = m.checkAndMigratePartition(ctx, eventHubProperties, props, eventHubName, consumerGroup) + if err != nil { + return fmt.Errorf("failed to check and migrate partition: %w", err) + } + + // blobClient := m.blobContainerClient.NewBlobClient("") + // blobClient.BlobExists(ctx) + + // blobPager := m.blobContainerClient.NewListBlobsFlatPager(nil) + + // for blobPager.More() { + // page, err := blobPager.NextPage(ctx) + // if err != nil { + // return fmt.Errorf("failed to list blobs: %w", err) + // } + + // } + + // Fetching the list of blobs in the container. + + // Search for the checkpoint blobs in the container. + // The blobs are named as ///checkpoint/ + + // blobPager := m.blobContainerClient.NewListBlobsFlatPager(nil) + + // r, err := blobPager.NextPage(ctx) + // if err != nil { + // return fmt.Errorf("failed to list blobs: %w", err) + // } + + // props.FullyQualifiedNamespace + + // // Fetching event hub information + // eventHubProperties, err := m.consumerClient.GetEventHubProperties(ctx, nil) + // if err != nil { + // return fmt.Errorf("failed to get event hub properties: %w", err) + // } + + // // v2 checkpoint information path + // // mbranca-general.servicebus.windows.net/sdh4552/$Default/checkpoint/0 + + // eventHubProperties.PartitionIDs + + return nil +} + +func (m *migrationAssistant) checkAndMigratePartition( + ctx context.Context, + eventHubProperties azeventhubs.EventHubProperties, + props ConnectionStringProperties, + eventHubName, + consumerGroup string) error { + + blobs := map[string]bool{} + + c := m.blobContainerClient.NewListBlobsFlatPager(nil) + + for c.More() { + page, err := c.NextPage(ctx) + if err != nil { + return fmt.Errorf("failed to list blobs: %w", err) + } + + for _, blob := range page.Segment.BlobItems { + blobs[*blob.Name] = true + } + } + + for _, partitionID := range eventHubProperties.PartitionIDs { + // v2 checkpoint information path + // mbranca-general.servicebus.windows.net/sdh4552/$Default/checkpoint/0 + blob := fmt.Sprintf("%s/%s/%s/checkpoint/%s", props.FullyQualifiedNamespace, eventHubName, consumerGroup, partitionID) + + if _, ok := blobs[blob]; ok { + m.log.Infow( + "checkpoint v2 information for partition already exists, no migration needed", + "partitionID", partitionID, + ) + continue + } + + // try downloading the checkpoint v1 information for the partition + if _, ok := blobs[partitionID]; !ok { + m.log.Infow( + "checkpoint v1 information for partition doesn't exist, no migration needed", + "partitionID", partitionID, + ) + continue + } + + // v1 checkpoint information path is the partition ID itself + cln := m.blobContainerClient.NewBlobClient(partitionID) + + buff := [4000]byte{} + size, err := cln.DownloadBuffer(ctx, buff[:], nil) + if err != nil { + return fmt.Errorf("failed to download checkpoint v1 information for partition %s: %w", partitionID, err) + } + + m.log.Infow("downloaded checkpoint v1 information for partition", "partitionID", partitionID, "size", size) + + var checkpointV1 *LegacyCheckpoint + + if err := json.Unmarshal(buff[0:size], &checkpointV1); err != nil { + return fmt.Errorf("failed to unmarshal checkpoint v1 information for partition %s: %w", partitionID, err) + } + + // migrate the checkpoint v1 information to v2 + m.log.Infow("migrating checkpoint v1 information to v2", "partitionID", partitionID) + + checkpointV2 := azeventhubs.Checkpoint{ + ConsumerGroup: consumerGroup, + EventHubName: eventHubName, + FullyQualifiedNamespace: props.FullyQualifiedNamespace, + PartitionID: partitionID, + } + + offset, err := strconv.ParseInt(checkpointV1.Checkpoint.Offset, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse offset: %w", err) + } + + checkpointV2.Offset = &offset + checkpointV2.SequenceNumber = &checkpointV1.Checkpoint.SequenceNumber + + if err := m.checkpointStore.SetCheckpoint(ctx, checkpointV2, nil); err != nil { + return fmt.Errorf("failed to update checkpoint v2 information for partition %s: %w", partitionID, err) + } + + m.log.Infow("migrated checkpoint v1 information to v2", "partitionID", partitionID) + } + + return nil +} + +type LegacyCheckpoint struct { + PartitionID string `json:"partitionID"` + Epoch int `json:"epoch"` + Owner string `json:"owner"` + Checkpoint struct { + Offset string `json:"offset"` + SequenceNumber int64 `json:"sequenceNumber"` + EnqueueTime string `json:"enqueueTime"` // ": "0001-01-01T00:00:00Z" + } `json:"checkpoint"` +} + +// ConnectionStringProperties are the properties of a connection string +// as returned by [ParseConnectionString]. +type ConnectionStringProperties struct { + // Endpoint is the Endpoint value in the connection string. + // Ex: sb://example.servicebus.windows.net + Endpoint string + + // EntityPath is EntityPath value in the connection string. + EntityPath *string + + // FullyQualifiedNamespace is the Endpoint value without the protocol scheme. + // Ex: example.servicebus.windows.net + FullyQualifiedNamespace string + + // SharedAccessKey is the SharedAccessKey value in the connection string. + SharedAccessKey *string + + // SharedAccessKeyName is the SharedAccessKeyName value in the connection string. + SharedAccessKeyName *string + + // SharedAccessSignature is the SharedAccessSignature value in the connection string. + SharedAccessSignature *string + + // Emulator indicates that the connection string is for an emulator: + // ex: Endpoint=localhost:6765;SharedAccessKeyName=<< REDACTED >>;SharedAccessKey=<< REDACTED >>;UseDevelopmentEmulator=true + Emulator bool +} + +// ParseConnectionString takes a connection string from the Azure portal and returns the +// parsed representation. +// +// There are two supported formats: +// +// 1. Connection strings generated from the portal (or elsewhere) that contain an embedded key and keyname. +// +// 2. A connection string with an embedded SharedAccessSignature: +// Endpoint=sb://.servicebus.windows.net;SharedAccessSignature=SharedAccessSignature sr=.servicebus.windows.net&sig=&se=&skn=" +func parseConnectionString(connStr string) (ConnectionStringProperties, error) { + const ( + endpointKey = "Endpoint" + sharedAccessKeyNameKey = "SharedAccessKeyName" + sharedAccessKeyKey = "SharedAccessKey" + entityPathKey = "EntityPath" + sharedAccessSignatureKey = "SharedAccessSignature" + useEmulator = "UseDevelopmentEmulator" + ) + + csp := ConnectionStringProperties{} + + splits := strings.Split(connStr, ";") + + for _, split := range splits { + if split == "" { + continue + } + + keyAndValue := strings.SplitN(split, "=", 2) + if len(keyAndValue) < 2 { + return ConnectionStringProperties{}, errors.New("failed parsing connection string due to unmatched key value separated by '='") + } + + // if a key value pair has `=` in the value, recombine them + key := keyAndValue[0] + value := strings.Join(keyAndValue[1:], "=") + switch { + case strings.EqualFold(endpointKey, key): + u, err := url.Parse(value) + if err != nil { + return ConnectionStringProperties{}, errors.New("failed parsing connection string due to an incorrectly formatted Endpoint value") + } + csp.Endpoint = value + csp.FullyQualifiedNamespace = u.Host + case strings.EqualFold(sharedAccessKeyNameKey, key): + csp.SharedAccessKeyName = &value + case strings.EqualFold(sharedAccessKeyKey, key): + csp.SharedAccessKey = &value + case strings.EqualFold(entityPathKey, key): + csp.EntityPath = &value + case strings.EqualFold(sharedAccessSignatureKey, key): + csp.SharedAccessSignature = &value + case strings.EqualFold(useEmulator, key): + v, err := strconv.ParseBool(value) + + if err != nil { + return ConnectionStringProperties{}, err + } + + csp.Emulator = v + } + } + + if csp.Emulator { + // check that they're only connecting to localhost + endpointParts := strings.SplitN(csp.Endpoint, ":", 3) // allow for a port, if it exists. + + if len(endpointParts) < 2 || endpointParts[0] != "sb" || endpointParts[1] != "//localhost" { + // there should always be at least two parts "sb:" and "//localhost" + // with an optional 3rd piece that's the port "1111". + // (we don't need to validate it's a valid host since it's been through url.Parse() above) + return ConnectionStringProperties{}, fmt.Errorf("UseDevelopmentEmulator=true can only be used with sb://localhost or sb://localhost:, not %s", csp.Endpoint) + } + } + + if csp.FullyQualifiedNamespace == "" { + return ConnectionStringProperties{}, fmt.Errorf("key %q must not be empty", endpointKey) + } + + if csp.SharedAccessSignature == nil && csp.SharedAccessKeyName == nil { + return ConnectionStringProperties{}, fmt.Errorf("key %q must not be empty", sharedAccessKeyNameKey) + } + + if csp.SharedAccessKey == nil && csp.SharedAccessSignature == nil { + return ConnectionStringProperties{}, fmt.Errorf("key %q or %q cannot both be empty", sharedAccessKeyKey, sharedAccessSignatureKey) + } + + return csp, nil +} From 71fd5dbdbdeeb1d30240e486ea952ab639ee07d6 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 14:58:03 +0200 Subject: [PATCH 28/41] Add processor restarts metric --- x-pack/filebeat/input/azureeventhub/metrics.go | 6 ++++++ x-pack/filebeat/input/azureeventhub/metrics_test.go | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/x-pack/filebeat/input/azureeventhub/metrics.go b/x-pack/filebeat/input/azureeventhub/metrics.go index efef262d2c0..e0a8eeb08ef 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics.go +++ b/x-pack/filebeat/input/azureeventhub/metrics.go @@ -33,6 +33,9 @@ func newInputMetrics(id string, parentRegistry *monitoring.Registry) *inputMetri // General processingTime: metrics.NewUniformSample(1024), // TODO: set a reasonable value for the sample size. decodeErrors: monitoring.NewUint(reg, "decode_errors_total"), + + // Processor + processorRestarts: monitoring.NewUint(reg, "processor_restarts_total"), } _ = adapter. NewGoMetrics(reg, "processing_time", adapter.Accept). @@ -82,6 +85,9 @@ type inputMetrics struct { // General processingTime metrics.Sample // processingTime tracks the time it takes to process a message. decodeErrors *monitoring.Uint // decodeErrors tracks the number of errors that occurred while decoding a message. + + // Processor + processorRestarts *monitoring.Uint // processorRestarts tracks the number of times the processor has restarted. } // Close unregisters the metrics from the registry. diff --git a/x-pack/filebeat/input/azureeventhub/metrics_test.go b/x-pack/filebeat/input/azureeventhub/metrics_test.go index 52b9f008f5c..ddba8e0299c 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics_test.go +++ b/x-pack/filebeat/input/azureeventhub/metrics_test.go @@ -51,6 +51,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { sentEvents uint64 processingTime uint64 decodeErrors uint64 + processorRestarts uint64 }{ { event: []byte("{\"records\": [{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]}"), @@ -61,6 +62,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { receivedEvents: 1, sentEvents: 1, decodeErrors: 0, + processorRestarts: 0, }, { event: []byte("{\"records\": [{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}, {\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]}"), @@ -74,6 +76,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { receivedEvents: 2, sentEvents: 2, decodeErrors: 0, + processorRestarts: 0, }, { event: []byte("{\"records\": [{'test':'this is some message','time':'2019-12-17T13:43:44.4946995Z'}]}"), // Thank you, Azure Functions logs. @@ -87,6 +90,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { receivedEvents: 1, sentEvents: 1, decodeErrors: 0, + processorRestarts: 0, }, { event: []byte("{\"records\": [{'test':'this is some message','time':'2019-12-17T13:43:44.4946995Z'}]}"), @@ -100,6 +104,7 @@ func TestInputMetricsEventsReceived(t *testing.T) { decodeErrors: 1, receivedEvents: 0, // If we can't decode the message, we can't count the events in it. sentEvents: 1, // The input sends the unmodified message as a string to the outlet. + processorRestarts: 0, }, } @@ -158,6 +163,9 @@ func TestInputMetricsEventsReceived(t *testing.T) { assert.Equal(t, tc.receivedEvents, metrics.receivedEvents.Get()) assert.Equal(t, tc.sentEvents, metrics.sentEvents.Get()) + // Processor + assert.Equal(t, tc.processorRestarts, metrics.processorRestarts.Get()) + metrics.Close() // Stop the metrics collection. } } From fa326f92c89133ecac971d417bec70519450808d Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 14:58:27 +0200 Subject: [PATCH 29/41] minor: add comments --- x-pack/filebeat/input/azureeventhub/decoder.go | 7 +++++++ x-pack/filebeat/input/azureeventhub/input.go | 10 +++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/decoder.go b/x-pack/filebeat/input/azureeventhub/decoder.go index c71cb45bfc0..6b9c40f5c8a 100644 --- a/x-pack/filebeat/input/azureeventhub/decoder.go +++ b/x-pack/filebeat/input/azureeventhub/decoder.go @@ -1,8 +1,15 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + package azureeventhub import ( "encoding/json" "fmt" + "github.com/elastic/elastic-agent-libs/logp" ) diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 800fc44aaf9..446c4616a16 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -51,18 +51,18 @@ func Plugin(log *logp.Logger) v2.Plugin { } } -// eventHubInputManager is the manager for the Azure Event Hub input. -// -// It is responsible for creating new instances of the input, according -// to the configuration provided. +// azureInputConfig is responsible for creating the right azure-eventhub input +// based on the configuration. type eventHubInputManager struct { log *logp.Logger } +// Init initializes the input manager. func (m *eventHubInputManager) Init(unison.Group) error { return nil } +// Create creates a new azure-eventhub input based on the configuration. func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { var config azureInputConfig if err := cfg.Unpack(&config); err != nil { @@ -75,7 +75,7 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { case "v2": return newEventHubInputV2(config, m.log) default: - return nil, fmt.Errorf("invalid azure-eventhub processor version: %s", config.ProcessorVersion) + return nil, fmt.Errorf("invalid azure-eventhub processor version: %s (available versions: v1, v2)", config.ProcessorVersion) } //return &azureInput{ From ef39901a53e29f25ac79b8edea66158a59ba8b8d Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 17:02:10 +0200 Subject: [PATCH 30/41] Update checkpoint when pipeline acks the events --- .../filebeat/input/azureeventhub/v2_input.go | 84 ++++++++++++------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 2dd83b73fe6..d82a77b7809 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -13,6 +13,7 @@ import ( "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" @@ -21,6 +22,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/acker" "github.com/elastic/beats/v7/libbeat/common/backoff" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" @@ -35,6 +37,7 @@ type eventHubInputV2 struct { checkpointStore *checkpoints.BlobStore consumerClient *azeventhubs.ConsumerClient pipelineClient beat.Client + pipeline beat.Pipeline messageDecoder messageDecoder migrationAssistant *migrationAssistant } @@ -80,11 +83,12 @@ func (in *eventHubInputV2) Run( // Create pipelineClient for publishing events and receive // notification of their ACKs. - in.pipelineClient, err = createPipelineClient(pipeline) - if err != nil { - return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) - } - defer in.pipelineClient.Close() + // in.pipelineClient, err = createPipelineClient(pipeline) + // if err != nil { + // return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) + // } + // defer in.pipelineClient.Close() + in.pipeline = pipeline // Start the main run loop in.run(ctx) @@ -319,24 +323,25 @@ func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhu // processEventsForPartition receives events from a partition and processes them. func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient) error { - // pipelineClient := createPipelineClient() - // 1. [BEGIN] Initialize any partition specific resources for your application. // 2. [CONTINUOUS] Loop, calling ReceiveEvents() and UpdateCheckpoint(). // 3. [END] Cleanup any resources. - defer func() { - // 3/3 [END] Do cleanup here, like shutting down database clients - // or other resources used for processing this partition. - shutdownPartitionResources(partitionClient) - }() partitionID := partitionClient.PartitionID() // 1/3 [BEGIN] Initialize any partition specific resources for your application. - if err := initializePartitionResources(partitionID, partitionClient); err != nil { + pipelineClient, err := initializePartitionResources(ctx, partitionClient, in.pipeline, in.log) + if err != nil { return err } + defer func() { + // 3/3 [END] Do cleanup here, like shutting down database clients + // or other resources used for processing this partition. + shutdownPartitionResources(ctx, partitionClient, pipelineClient) + in.log.Debugw("partition resources cleaned up", "partition", partitionID) + }() + // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. for { // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. @@ -360,28 +365,28 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit in.log.Debugw("received events", "count", len(events), "partition", partitionID) - err = in.processReceivedEvents(events) + err = in.processReceivedEvents(events, pipelineClient) if err != nil { return fmt.Errorf("error processing received events: %w", err) } - in.log.Debugw("updating checkpoint information", "partition", partitionID) + //in.log.Debugw("updating checkpoint information", "partition", partitionID) - // Updates the checkpoint with the latest event received. - // - // If processing needs to restart it will restart from this - // point, automatically. - if err := partitionClient.UpdateCheckpoint(ctx, events[len(events)-1], nil); err != nil { - in.log.Errorw("error updating checkpoint", "error", err) - return err - } + //// Updates the checkpoint with the latest event received. + //// + //// If processing needs to restart it will restart from this + //// point, automatically. + //if err := partitionClient.UpdateCheckpoint(ctx, events[len(events)-1], nil); err != nil { + // in.log.Errorw("error updating checkpoint", "error", err) + // return err + //} - in.log.Debugw("checkpoint updated", "partition", partitionID) + //in.log.Debugw("checkpoint updated", "partition", partitionID) } } // processReceivedEvents -func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData) error { +func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData, pipelineClient beat.Client) error { processingStartTime := time.Now() azure := mapstr.M{ // The partition ID is not available. @@ -416,7 +421,7 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R } // Publish the event to the Beats pipeline. - in.pipelineClient.Publish(event) + pipelineClient.Publish(event) // Update input metrics. in.metrics.sentEvents.Inc() @@ -430,14 +435,33 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R return nil } -func initializePartitionResources(partitionID string, partitionClient *azeventhubs.ProcessorPartitionClient) error { +func initializePartitionResources(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient, pipeline beat.Pipeline, log *logp.Logger) (beat.Client, error) { // initialize things that might be partition specific, like a // database connection. - return nil + return pipeline.ConnectWith(beat.ClientConfig{ + EventListener: acker.LastEventPrivateReporter(func(acked int, data any) { + err := partitionClient.UpdateCheckpoint(ctx, data.(*azeventhubs.ReceivedEventData), nil) + if err != nil { + log.Errorw("error updating checkpoint", "error", err) + } + log.Debugw( + "checkpoint updated", + "partition", partitionClient.PartitionID(), + "acked", acked, + ) + }), + Processing: beat.ProcessingConfig{ + // This input only produces events with basic types so normalization + // is not required. + EventNormalization: to.Ptr(false), + }, + }) } -func shutdownPartitionResources(partitionClient *azeventhubs.ProcessorPartitionClient) { +func shutdownPartitionResources(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient, pipelineClient beat.Client) { // Each PartitionClient holds onto an external resource and should be closed if you're // not processing them anymore. - defer partitionClient.Close(context.TODO()) + defer partitionClient.Close(ctx) + + defer pipelineClient.Close() } From ee6e4af1f2d538e5b0ed489155441aac1142239d Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 17:44:43 +0200 Subject: [PATCH 31/41] Add partition ID to event hub metadata --- x-pack/filebeat/input/azureeventhub/v2_input.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index d82a77b7809..8f5e089caa5 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -36,7 +36,6 @@ type eventHubInputV2 struct { metrics *inputMetrics checkpointStore *checkpoints.BlobStore consumerClient *azeventhubs.ConsumerClient - pipelineClient beat.Client pipeline beat.Pipeline messageDecoder messageDecoder migrationAssistant *migrationAssistant @@ -365,7 +364,7 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit in.log.Debugw("received events", "count", len(events), "partition", partitionID) - err = in.processReceivedEvents(events, pipelineClient) + err = in.processReceivedEvents(events, partitionID, pipelineClient) if err != nil { return fmt.Errorf("error processing received events: %w", err) } @@ -386,11 +385,10 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit } // processReceivedEvents -func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData, pipelineClient beat.Client) error { +func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData, partitionID string, pipelineClient beat.Client) error { processingStartTime := time.Now() azure := mapstr.M{ - // The partition ID is not available. - // "partition_id": partitionID, + "partition_id": partitionID, "eventhub": in.config.EventHubName, "consumer_group": in.config.ConsumerGroup, } @@ -435,6 +433,9 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R return nil } +// initializePartitionResources initializes any partition specific resources for your application. +// +// Sets up a pipelineClient for publishing events and receive notification of their ACKs. func initializePartitionResources(ctx context.Context, partitionClient *azeventhubs.ProcessorPartitionClient, pipeline beat.Pipeline, log *logp.Logger) (beat.Client, error) { // initialize things that might be partition specific, like a // database connection. @@ -444,6 +445,7 @@ func initializePartitionResources(ctx context.Context, partitionClient *azeventh if err != nil { log.Errorw("error updating checkpoint", "error", err) } + log.Debugw( "checkpoint updated", "partition", partitionClient.PartitionID(), @@ -463,5 +465,7 @@ func shutdownPartitionResources(ctx context.Context, partitionClient *azeventhub // not processing them anymore. defer partitionClient.Close(ctx) + // Closing the pipeline since we're done + // processing events for this partition. defer pipelineClient.Close() } From 9a496dc56fdac7397a169e6e2f7a9e47a1786d1f Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 27 May 2024 17:44:57 +0200 Subject: [PATCH 32/41] minor: comments --- x-pack/filebeat/input/azureeventhub/v2_migration.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_migration.go b/x-pack/filebeat/input/azureeventhub/v2_migration.go index f0f0945d717..4ccbc2d270b 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_migration.go +++ b/x-pack/filebeat/input/azureeventhub/v2_migration.go @@ -47,7 +47,12 @@ func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnec return fmt.Errorf("failed to get event hub properties: %w", err) } - m.log.Infof("Event Hub properties: %v", eventHubProperties) + m.log.Infow( + "Event Hub properties", + "name", eventHubProperties.Name, + "created_on", eventHubProperties.CreatedOn, + "partition_ids", eventHubProperties.PartitionIDs, + ) // Parse the connection string to get FQDN. props, err := parseConnectionString(eventHubConnectionString) From 7889599bec87fb5dcb3b0e3a3b37cfdb9dfcc7b0 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Fri, 31 May 2024 15:44:36 +0200 Subject: [PATCH 33/41] Add support for partition key in eventhub metadata Alongside the partition ID, users can optional send event with a partition key. Add an (optional) partition key to the event hub metadata. --- x-pack/filebeat/input/azureeventhub/v2_input.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 8f5e089caa5..dbfd3eb732b 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -387,7 +387,7 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit // processReceivedEvents func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.ReceivedEventData, partitionID string, pipelineClient beat.Client) error { processingStartTime := time.Now() - azure := mapstr.M{ + eventHubMetadata := mapstr.M{ "partition_id": partitionID, "eventhub": in.config.EventHubName, "consumer_group": in.config.ConsumerGroup, @@ -403,9 +403,14 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R records := in.messageDecoder.Decode(receivedEventData.Body) for record := range records { - _, _ = azure.Put("offset", receivedEventData.Offset) - _, _ = azure.Put("sequence_number", receivedEventData.SequenceNumber) - _, _ = azure.Put("enqueued_time", receivedEventData.EnqueuedTime) + _, _ = eventHubMetadata.Put("offset", receivedEventData.Offset) + _, _ = eventHubMetadata.Put("sequence_number", receivedEventData.SequenceNumber) + _, _ = eventHubMetadata.Put("enqueued_time", receivedEventData.EnqueuedTime) + + // The partition key is optional. + if receivedEventData.PartitionKey != nil { + _, _ = eventHubMetadata.Put("partition_key", *receivedEventData.PartitionKey) + } event := beat.Event{ // this is the default value for the @timestamp field; usually the ingest @@ -413,7 +418,7 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R Timestamp: processingStartTime, Fields: mapstr.M{ "message": record, - "azure": azure, + "azure": eventHubMetadata, }, Private: receivedEventData, } From 883811978c215728ccdf9ecdbfecaef433626e75 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Fri, 31 May 2024 16:34:11 +0200 Subject: [PATCH 34/41] Add migrate_checkpoint config option The new migrate_checkpoint config option controls if the input v2 should perform a migration check on start. If migrate_checkpoint is true, the input checks and performs the migration (if v1 info exists) on the very first v2 run. If migrate_checkpoint is false, the input will skip the migration assistant and will not perform any checks or migration. --- x-pack/filebeat/input/azureeventhub/config.go | 2 ++ .../filebeat/input/azureeventhub/v2_input.go | 23 +++++++++++-------- .../input/azureeventhub/v2_migration.go | 3 +-- .../activitylogs/config/azure-eventhub.yml | 4 ++++ .../module/azure/activitylogs/manifest.yml | 4 +++- .../azure/auditlogs/config/azure-eventhub.yml | 8 +++++++ .../module/azure/auditlogs/manifest.yml | 2 ++ .../platformlogs/config/azure-eventhub.yml | 8 +++++++ .../module/azure/platformlogs/manifest.yml | 2 ++ .../signinlogs/config/azure-eventhub.yml | 8 +++++++ .../module/azure/signinlogs/manifest.yml | 2 ++ 11 files changed, 53 insertions(+), 13 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index c5bb3316428..445b145b3e7 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -29,6 +29,8 @@ type azureInputConfig struct { SanitizeOptions []string `config:"sanitize_options"` // Processor version to use (v1 or v2). Default is v1. ProcessorVersion string `config:"processor_version"` + // Perform the checkpoint information migration from v1 to v2 + MigrateCheckpoint bool `config:"migrate_checkpoint"` } const ephContainerName = "filebeat" diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index dbfd3eb732b..5f14480b3f1 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -161,16 +161,19 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { // run starts the main loop for processing events. func (in *eventHubInputV2) run(ctx context.Context) { - // Check if we need to migrate the checkpoint store. - err := in.migrationAssistant.checkAndMigrate( - ctx, - in.config.ConnectionString, - in.config.EventHubName, - in.config.ConsumerGroup, - ) - if err != nil { - in.log.Errorw("error migrating checkpoint store", "error", err) - // FIXME: should we return here? + if in.config.MigrateCheckpoint { + in.log.Infow("checkpoint migration is enabled") + // Check if we need to migrate the checkpoint store. + err := in.migrationAssistant.checkAndMigrate( + ctx, + in.config.ConnectionString, + in.config.EventHubName, + in.config.ConsumerGroup, + ) + if err != nil { + in.log.Errorw("error migrating checkpoint store", "error", err) + // FIXME: should we return here? + } } // Handle the case when the processor stops due to diff --git a/x-pack/filebeat/input/azureeventhub/v2_migration.go b/x-pack/filebeat/input/azureeventhub/v2_migration.go index 4ccbc2d270b..e59603be015 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_migration.go +++ b/x-pack/filebeat/input/azureeventhub/v2_migration.go @@ -40,7 +40,6 @@ func newMigrationAssistant(log *logp.Logger, consumerClient *azeventhubs.Consume } func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnectionString, eventHubName, consumerGroup string) error { - // Fetching event hub information eventHubProperties, err := m.consumerClient.GetEventHubProperties(ctx, nil) if err != nil { @@ -48,7 +47,7 @@ func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnec } m.log.Infow( - "Event Hub properties", + "event hub information", "name", eventHubProperties.Name, "created_on", eventHubProperties.CreatedOn, "partition_ids", eventHubProperties.PartitionIDs, diff --git a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml index b87bb81a0c1..c4f00a0bd1e 100644 --- a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml @@ -41,6 +41,10 @@ publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processor_version: {{ .processor_version }} {{ end }} +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + processors: - add_fields: target: '' diff --git a/x-pack/filebeat/module/azure/activitylogs/manifest.yml b/x-pack/filebeat/module/azure/activitylogs/manifest.yml index ee65033c6f4..e6d16421239 100644 --- a/x-pack/filebeat/module/azure/activitylogs/manifest.yml +++ b/x-pack/filebeat/module/azure/activitylogs/manifest.yml @@ -15,8 +15,10 @@ var: - name: resource_manager_endpoint - name: tags default: [forwarded] - - name: storage_account_connection_string + - name: processor_version default: "v1" + - name: migrate_checkpoint + default: true ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml index f01ada27ba5..aa20e72ea48 100644 --- a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml @@ -31,6 +31,14 @@ storage_account_container: filebeat-auditlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .processor_version }} +processor_version: {{ .processor_version }} +{{ end }} + +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processors: diff --git a/x-pack/filebeat/module/azure/auditlogs/manifest.yml b/x-pack/filebeat/module/azure/auditlogs/manifest.yml index 28c322ccf70..2ef6b99c1db 100644 --- a/x-pack/filebeat/module/azure/auditlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/auditlogs/manifest.yml @@ -17,6 +17,8 @@ var: default: [forwarded] - name: processor_version default: "v1" + - name: migrate_checkpoint + default: true ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml index b8efd833b97..d19c1fa6384 100644 --- a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml @@ -31,6 +31,14 @@ storage_account_container: filebeat-platformlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .processor_version }} +processor_version: {{ .processor_version }} +{{ end }} + +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} diff --git a/x-pack/filebeat/module/azure/platformlogs/manifest.yml b/x-pack/filebeat/module/azure/platformlogs/manifest.yml index a0abd4ffc62..c4d24a31e43 100644 --- a/x-pack/filebeat/module/azure/platformlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/platformlogs/manifest.yml @@ -16,6 +16,8 @@ var: default: [forwarded] - name: processor_version default: "v1" + - name: migrate_checkpoint + default: true ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml index 685788589ee..940bb720afe 100644 --- a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml @@ -31,6 +31,14 @@ storage_account_container: filebeat-signinlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .processor_version }} +processor_version: {{ .processor_version }} +{{ end }} + +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processors: diff --git a/x-pack/filebeat/module/azure/signinlogs/manifest.yml b/x-pack/filebeat/module/azure/signinlogs/manifest.yml index b512fa4677a..a053c06fe1b 100644 --- a/x-pack/filebeat/module/azure/signinlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/signinlogs/manifest.yml @@ -17,6 +17,8 @@ var: default: [forwarded] - name: processor_version default: "v1" + - name: migrate_checkpoint + default: true ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml From 0944380cac2b05e9358e355220bc22e4c3bb2f36 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 3 Jun 2024 11:15:21 +0200 Subject: [PATCH 35/41] Make starting position adjustable (+validation) Expand processor options by adding a new `start_position` configuration. Possible values for `start_position` are: - "earliest" to start from the beginning of the event hub retention period. - "latest" to start from new events. The input uses the 'start_position' option when checkpoint information from the storage account container is unavailable (on the input's first start). --- x-pack/filebeat/input/azureeventhub/config.go | 44 +++++++++++++---- x-pack/filebeat/input/azureeventhub/input.go | 6 ++- .../filebeat/input/azureeventhub/v2_input.go | 47 +++++++++++++++++-- .../activitylogs/config/azure-eventhub.yml | 4 ++ .../module/azure/activitylogs/manifest.yml | 4 +- .../azure/auditlogs/config/azure-eventhub.yml | 4 ++ .../module/azure/auditlogs/manifest.yml | 4 +- .../platformlogs/config/azure-eventhub.yml | 4 ++ .../module/azure/platformlogs/manifest.yml | 4 +- .../signinlogs/config/azure-eventhub.yml | 4 ++ .../module/azure/signinlogs/manifest.yml | 5 +- 11 files changed, 112 insertions(+), 18 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index 445b145b3e7..95486e68836 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -20,17 +20,21 @@ type azureInputConfig struct { EventHubName string `config:"eventhub" validate:"required"` ConsumerGroup string `config:"consumer_group"` // Azure Storage container to store leases and checkpoints - SAName string `config:"storage_account"` - SAKey string `config:"storage_account_key"` - SAContainer string `config:"storage_account_container"` + SAName string `config:"storage_account" validate:"required"` + SAKey string `config:"storage_account_key"` // (processor v1 only) + SAConnectionString string `config:"storage_account_connection_string"` // (processor v2 only) + SAContainer string `config:"storage_account_container"` // by default the azure public environment is used, to override, users can provide a specific resource manager endpoint OverrideEnvironment string `config:"resource_manager_endpoint"` // cleanup the log JSON input for known issues, options: SINGLE_QUOTES, NEW_LINES SanitizeOptions []string `config:"sanitize_options"` - // Processor version to use (v1 or v2). Default is v1. + // Processor version to use (v1 or v2). Default is v1 (processor v2 only). ProcessorVersion string `config:"processor_version"` - // Perform the checkpoint information migration from v1 to v2 + // Controls if the input should perform the checkpoint information + // migration from v1 to v2 (processor v2 only). MigrateCheckpoint bool `config:"migrate_checkpoint"` + // Controls the start position for all partitions (processor v2 only). + StartPosition string `config:"start_position"` } const ephContainerName = "filebeat" @@ -44,9 +48,8 @@ func (conf *azureInputConfig) Validate() error { if conf.EventHubName == "" { return errors.New("no event hub name configured") } - // FIXME: this check applies only to processor v1 - if conf.SAName == "" || conf.SAKey == "" { - return errors.New("no storage account or storage account key configured") + if conf.SAName == "" { + return errors.New("no storage account configured (config: storage_account)") } if conf.SAContainer == "" { conf.SAContainer = fmt.Sprintf("%s-%s", ephContainerName, conf.EventHubName) @@ -79,7 +82,30 @@ func (conf *azureInputConfig) Validate() error { } if conf.ProcessorVersion == "" { - conf.ProcessorVersion = "v1" + // The default processor version is "v1". + conf.ProcessorVersion = processorV1 + } + + switch conf.ProcessorVersion { + case processorV1: + if conf.SAKey == "" { + return errors.New("no storage account key configured (config: storage_account_key)") + } + case processorV2: + if conf.SAKey != "" { + logger.Warnf("storage_account_key is not used in processor v2") + } + if conf.SAConnectionString == "" { + return errors.New("no storage account connection string configured (config: storage_account_connection_string)") + } + default: + return fmt.Errorf("invalid azure-eventhub processor version: %s (available versions: v1, v2)", conf.ProcessorVersion) + } + + if conf.StartPosition == "" { + // For backward compatibility with v1, + // the default start position is "earliest". + conf.StartPosition = startPositionEarliest } return nil diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 446c4616a16..4cf2a91afdc 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -26,6 +26,8 @@ const ( eventHubConnector = ";EntityPath=" expandEventListFromField = "records" inputName = "azure-eventhub" + processorV1 = "v1" + processorV2 = "v2" ) var environments = map[string]azure.Environment{ @@ -70,9 +72,9 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { } switch config.ProcessorVersion { - case "v1": + case processorV1: return newEventHubInputV1(config, m.log) - case "v2": + case processorV2: return newEventHubInputV2(config, m.log) default: return nil, fmt.Errorf("invalid azure-eventhub processor version: %s (available versions: v1, v2)", config.ProcessorVersion) diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 5f14480b3f1..46fe05c322b 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -28,6 +28,11 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +const ( + startPositionEarliest = "earliest" + startPositionLatest = "latest" +) + // azureInputConfig the Azure Event Hub input v2, // that uses the modern Azure Event Hub SDK for Go. type eventHubInputV2 struct { @@ -185,9 +190,7 @@ func (in *eventHubInputV2) run(ctx context.Context) { 120*time.Second, // max backoff ) - processorOptions := azeventhubs.ProcessorOptions{ - LoadBalancingStrategy: azeventhubs.ProcessorStrategyBalanced, - } + processorOptions := createProcessorOptions(in.config) for ctx.Err() == nil { // Create a new processor for each run. @@ -240,6 +243,44 @@ func (in *eventHubInputV2) run(ctx context.Context) { } } +// createProcessorOptions creates the processor options using the input configuration. +func createProcessorOptions(config azureInputConfig) azeventhubs.ProcessorOptions { + // LoadBalancingStrategy offers multiple options: + // + // - Balanced + // - Greedy + // + // As of now, we only support Balanced. + loadBalancingStrategy := azeventhubs.ProcessorStrategyBalanced + + // Start position offers multiple options: + // + // - Offset + // - SequenceNumber + // - EnqueuedTime + // + // As of now, we only support Earliest and Latest. + // + // The processor uses the default start position for + // all partitions if there is no checkpoint information + // available from the storage account container. + defaultStartPosition := azeventhubs.StartPosition{} + + switch config.StartPosition { + case startPositionEarliest: + defaultStartPosition.Earliest = to.Ptr(true) + case startPositionLatest: + defaultStartPosition.Latest = to.Ptr(true) + } + + return azeventhubs.ProcessorOptions{ + LoadBalancingStrategy: loadBalancingStrategy, + StartPositions: azeventhubs.StartPositions{ + Default: defaultStartPosition, + }, + } +} + // ensureContainerExists ensures the blob container exists. func (in *eventHubInputV2) ensureContainerExists(ctx context.Context, blobContainerClient *container.Client) error { exists, err := in.containerExists(ctx, blobContainerClient) diff --git a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml index c4f00a0bd1e..b69d473dd9b 100644 --- a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml @@ -45,6 +45,10 @@ processor_version: {{ .processor_version }} migrate_checkpoint: {{ .migrate_checkpoint }} {{ end }} +{{ if .start_position }} +start_position: {{ .start_position }} +{{ end }} + processors: - add_fields: target: '' diff --git a/x-pack/filebeat/module/azure/activitylogs/manifest.yml b/x-pack/filebeat/module/azure/activitylogs/manifest.yml index e6d16421239..59c1ef9b729 100644 --- a/x-pack/filebeat/module/azure/activitylogs/manifest.yml +++ b/x-pack/filebeat/module/azure/activitylogs/manifest.yml @@ -18,7 +18,9 @@ var: - name: processor_version default: "v1" - name: migrate_checkpoint - default: true + default: yes + - name: start_position + default: "earliest" ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml index aa20e72ea48..8b6dd0d383f 100644 --- a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml @@ -39,6 +39,10 @@ processor_version: {{ .processor_version }} migrate_checkpoint: {{ .migrate_checkpoint }} {{ end }} +{{ if .start_position }} +start_position: {{ .start_position }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processors: diff --git a/x-pack/filebeat/module/azure/auditlogs/manifest.yml b/x-pack/filebeat/module/azure/auditlogs/manifest.yml index 2ef6b99c1db..8da58bfc252 100644 --- a/x-pack/filebeat/module/azure/auditlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/auditlogs/manifest.yml @@ -18,7 +18,9 @@ var: - name: processor_version default: "v1" - name: migrate_checkpoint - default: true + default: yes + - name: start_position + default: "earliest" ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml index d19c1fa6384..6648e40dcef 100644 --- a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml @@ -39,6 +39,10 @@ processor_version: {{ .processor_version }} migrate_checkpoint: {{ .migrate_checkpoint }} {{ end }} +{{ if .start_position }} +start_position: {{ .start_position }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} diff --git a/x-pack/filebeat/module/azure/platformlogs/manifest.yml b/x-pack/filebeat/module/azure/platformlogs/manifest.yml index c4d24a31e43..36e1f438f01 100644 --- a/x-pack/filebeat/module/azure/platformlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/platformlogs/manifest.yml @@ -17,7 +17,9 @@ var: - name: processor_version default: "v1" - name: migrate_checkpoint - default: true + default: yes + - name: start_position + default: "earliest" ingest_pipeline: - ingest/pipeline.yml diff --git a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml index 940bb720afe..6e11a945acc 100644 --- a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml @@ -39,6 +39,10 @@ processor_version: {{ .processor_version }} migrate_checkpoint: {{ .migrate_checkpoint }} {{ end }} +{{ if .start_position }} +start_position: {{ .start_position }} +{{ end }} + tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processors: diff --git a/x-pack/filebeat/module/azure/signinlogs/manifest.yml b/x-pack/filebeat/module/azure/signinlogs/manifest.yml index a053c06fe1b..c64dff0b207 100644 --- a/x-pack/filebeat/module/azure/signinlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/signinlogs/manifest.yml @@ -18,7 +18,10 @@ var: - name: processor_version default: "v1" - name: migrate_checkpoint - default: true + default: yes + - name: start_position + default: "earliest" + ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml From 9711c66109406fe7e943adb37c32d75d12973972 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 3 Jun 2024 17:51:10 +0200 Subject: [PATCH 36/41] Add README with detailed test scenarios --- x-pack/filebeat/input/azureeventhub/README.md | 567 ++++++++++++++++++ 1 file changed, 567 insertions(+) create mode 100644 x-pack/filebeat/input/azureeventhub/README.md diff --git a/x-pack/filebeat/input/azureeventhub/README.md b/x-pack/filebeat/input/azureeventhub/README.md new file mode 100644 index 00000000000..e1692afd8bf --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/README.md @@ -0,0 +1,567 @@ +# azure-eventhub input plugin for Filebeat + +## Test Scenarios + +Test event: + +```json +{ + "records": [ + { + "ReleaseVersion": "6.2023.14.3+7f34763.release_2023w14_zmoog_5", + "RoleLocation": "France South", + "callerIpAddress": "88.14.206.49", + "category": "Administrative", + "correlationId": "15e73c11-4990-43fb-abf5-755b4551e501", + "durationMs": "0", + "identity": { + "authorization": { + "action": "Microsoft.Compute/virtualMachines/deallocate/action", + "evidence": { + "principalId": "ee4d999c57f24213adac6192582b8649", + "principalType": "Group", + "role": "Owner", + "roleAssignmentId": "0b47993c5d35401cb0d75a4f00f4728c", + "roleAssignmentScope": "/subscriptions/12cabcb4-86e8-404f-a3d2-1dc9982f45ca", + "roleDefinitionId": "8e3af657a8ff443ca75c2fe8c4bcb635" + }, + "scope": "/subscriptions/12cabcb4-86e8-404f-a3d2-1dc9982f45ca/resourceGroups/tdancheva-integrations/providers/Microsoft.Compute/virtualMachines/azure-host-2" + }, + "claims": { + "aio": "AWQAm/8TAAAA6/xwhRYxDjcCZif6YoWZ+QsQMuhT5SHB+ppfzHYY+/sRZ4R2MCnsy1UgKpHzCkrKm/pd3Cou0WkwJE16A5XXl6YXvFdOEYtVvR9Rl1ICI7+s3jIsyqgAt9KnxrUJs7Vk", + "altsecid": "5::10032002612EEF9A", + "appid": "c44b4083-3bb0-49c1-b47d-974e53cbdf3c", + "appidacr": "2", + "aud": "https://management.core.windows.net/", + "exp": "1681892540", + "groups": "6089bd09-85f7-465c-826e-626f83b4b90c,ee4d999c-57f2-4213-adac-6192582b8649", + "http://schemas.microsoft.com/claims/authnclassreference": "1", + "http://schemas.microsoft.com/claims/authnmethodsreferences": "pwd,mfa", + "http://schemas.microsoft.com/identity/claims/identityprovider": "https://sts.windows.net/4fa94b7d-a743-486f-abcc-6c276c44cf4b/", + "http://schemas.microsoft.com/identity/claims/objectidentifier": "385b609f-6d52-48c6-839c-057d2cd5b1e9", + "http://schemas.microsoft.com/identity/claims/scope": "user_impersonation", + "http://schemas.microsoft.com/identity/claims/tenantid": "aa40685b-417d-4664-b4ec-8f7640719adb", + "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress": "tamara.dancheva@elastic.co", + "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/givenname": "Tamara", + "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/name": "tamara.dancheva@elastic.co", + "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/nameidentifier": "vvTSrJ-rm3FoWEwZguCZGPOgbhAcYEC0aOWDbdS_w5o", + "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/surname": "Dancheva", + "iat": "1681888229", + "ipaddr": "88.14.206.49", + "iss": "https://sts.windows.net/aa40685b-417d-4664-b4ec-8f7640719adb/", + "name": "Tamara Dancheva", + "nbf": "1681888229", + "puid": "1003200290727777", + "rh": "0.AUgAW2hAqn1BZEa07I92QHGa20ZIf3kAutdPukPawfj2MBNIAOE.", + "uti": "_xoydzBhcUObV3WTNcBFAA", + "ver": "1.0", + "wids": "f2ef992c-3afb-46b9-b7cf-a126ee74c451", + "xms_tcdt": "1391159646" + } + }, + "jobId": "ProxyResourceLongOperationJob:2DGRL:2DTDANCHEVA:3A2DINTEGRATIONS:2DMICROSOFT:3A2ECOMPUTE:3A2FVIRTUALMACHINES:3A|187402E12C07F52B", + "jobType": "ProxyResourceLongOperationJob", + "level": "Information", + "operationName": "MICROSOFT.COMPUTE/VIRTUALMACHINES/DEALLOCATE/ACTION", + "properties": { + "entity": "/subscriptions/12cabcb4-86e8-404f-a3d2-1dc9982f45ca/resourceGroups/tdancheva-integrations/providers/Microsoft.Compute/virtualMachines/azure-host-2", + "eventCategory": "Administrative", + "hierarchy": "aa40685b-417d-4664-b4ec-8f7640719adb/12cabcb4-86e8-404f-a3d2-1dc9982f45ca", + "message": "Microsoft.Compute/virtualMachines/deallocate/action" + }, + "resourceId": "/SUBSCRIPTIONS/12CABCB4-86E8-404F-A3D2-1DC9982F45CA/RESOURCEGROUPS/TDANCHEVA-INTEGRATIONS/PROVIDERS/MICROSOFT.COMPUTE/VIRTUALMACHINES/AZURE-HOST-2", + "resultSignature": "Succeeded.", + "resultType": "Success", + "tenantId": "aa40685b-417d-4664-b4ec-8f7640719adb", + "time": "2023-06-15:54:46.8676027Z" + } + ] +} + +``` + +### Scenario 001: Migration + +- Setup +- start with v1 +- process 10 events +- check that checkpoint info v1 have been created +- check that the 10 events are processed +- check that checkpoint info v1 have been updated +- stop v1, enable v2, and start v2 +- check that checkpoint info v2 have been created +- check that the 10 events are not processed again + +#### Setup + +- Delete the index `filebeat-8.15.0` from the test cluster. +- Set `storage_account_container` with a new container name. + +#### Start with v1 + +Using the following configuration: + +```yaml +# x-pack/filebeat/modules.d/azure.yml + +- module: azure + # All logs + activitylogs: + enabled: true + var: + eventhub: "eventhubsdkupgrade" + consumer_group: "$Default" + connection_string: "" + storage_account: "mbrancageneral" + storage_account_container: "filebeat-activitylogs-zmoog-0005" + storage_account_key: "" + storage_account_connection_string: "" + processor_version: "v1" + migrate_checkpoint: yes + start_position: "earliest" +``` + +#### Check that checkpoint info v1 have been created + +After the input started successfully, I see four blobs in the +`filebeat-activitylogs-zmoog-0005`, one for each partition. + +Here is the content of blob for partition `0`: + +```json +{ + "partitionID": "0", + "epoch": 1, + "owner": "382ed56f-291c-4801-a70a-13ddbe131040", + "checkpoint": { + "offset": "-1", + "sequenceNumber": 0, + "enqueueTime": "0001-01-01T00:00:00Z" + }, + "state": "available", + "token": "33cdc5d9-7e22-443a-bd6d-197c971967b3" +} +``` + +All values have their zero value because the input never processed any event. + +#### Process 10 events + +Use the https://pypi.org/project/eventhubs/ tool to send 10 events to the event hub `eventhubsdkupgrade`: + +```shell +export EVENTHUB_CONNECTION_STRING="" +export EVENTHUB_NAMESPACE="mbranca-general" +export EVENTHUB_NAME="eventhubsdkupgrade" + +$ eh -v eventdata send-batch --lines-from-text-file activitylogs.ndjson --batch-size 40 +Sending 10 events to eventhubsdkupgrade +sending batch of 10 events +batch sent successfully +``` + +The `activitylogs.ndjson` file contains ten copies of the file test event. + +#### check that the 10 events are processed + +I see the `filebeat-8.15.0` contains 10 documents. + +#### check that checkpoint info v1 have been updated + +I see the `filebeat-activitylogs-zmoog-0005` container still contains four blobs, but one of them +now has a different size (`235B` instead of `228B`). + +The content of blobs `0`, `2`, and `3` is unchanged. + +The content of blobs `1` is: + +```json +{ + "partitionID": "1", + "epoch": 1, + "owner": "382ed56f-291c-4801-a70a-13ddbe131040", + "checkpoint": { + "offset": "31680", + "sequenceNumber": 9, + "enqueueTime": "2024-06-03T10:34:22.678Z" + }, + "state": "available", + "token": "32cd8a2c-a8cf-4f0f-b3cd-9e13c9830beb" +} +``` + +The `sequenceNumber` changed from `0` to `9`. + +#### stop v1, enable v2, and start v2 + +Stop Filebeat and update the config with the following changes: + +```yaml +# x-pack/filebeat/modules.d/azure.yml + +- module: azure + # All logs + activitylogs: + enabled: true + var: + eventhub: "eventhubsdkupgrade" + consumer_group: "$Default" + connection_string: "" + storage_account: "mbrancageneral" + storage_account_container: "filebeat-activitylogs-zmoog-0005" + storage_account_key: "" + storage_account_connection_string: "" # NOTE: make sure this is set + processor_version: "v2" # CHANGE: v1 > v2 + migrate_checkpoint: yes + start_position: "earliest" +``` + +#### check that checkpoint info v2 have been created + +I see we have the following folder: + +```text +filebeat-activitylogs-zmoog-0005 / mbranca-general.servicebus.windows.net / eventhubsdkupgrade / $Default / checkpoint +``` + +The folder containts four blobs `0`, `1`, `2`, and `3` + +The metadata of blobs `0`, `2`, and `3`: + +- `offset`: -1 +- `sequencenumber`: 0 + +The metadata of blob `1` is: + +- `offset`: 31680 +- `sequencenumber`: 9 + +#### check that the 10 events are not processed again + +The index `filebeat-8.15.0` still contains 10 documents, so the input did not reprocessed the same events. + +### Scenario 002: ingest 100 events (1 input) + +- Setup +- Start v2 +- Take a note with the sequencenumber for all partitions +- Process 100 events +- Check that the 100 events are processed +- check that checkpoint info v2 have been updated + +#### Setup + +- Delete the index `filebeat-8.15.0` from the test cluster. + +#### Start v2 + +Using the following configuration: + +```yaml +# x-pack/filebeat/modules.d/azure.yml + +- module: azure + # All logs + activitylogs: + enabled: true + var: + eventhub: "eventhubsdkupgrade" + consumer_group: "$Default" + connection_string: "" + storage_account: "mbrancageneral" + storage_account_container: "filebeat-activitylogs-zmoog-0005" + storage_account_key: "" + storage_account_connection_string: "" + processor_version: "v2" + migrate_checkpoint: yes + start_position: "earliest" +``` + +#### Take a note with the sequencenumber for all partitions + +Here are the current sequence numbers: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 0 | -1 | +| 1 | 9 | 31680 | +| 2 | 0 | -1 | +| 3 | 0 | -1 | + + +#### Process 100 events + +Edit the `activitylogs.ndjson` to have 100 events. + +Send the 100 events: + +```shell +$ eh -v eventdata send-batch --lines-from-text-file activitylogs.ndjson --batch-size 40 +Sending 100 events to eventhubsdkupgrade +sending batch of 40 events +batch sent successfully +sending batch of 40 events +batch sent successfully +sending batch of 20 events +batch sent successfully +``` + +#### Check that the 100 events are processed + +I see the `filebeat-8.15.0` contains 100 events. + + +#### Check that checkpoint info v2 have been updated + +Here are the current sequence numbers: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 39 | 137280 | +| 1 | 49 | 172480 | +| 2 | 19 | 66880 | +| 3 | 0 | -1 | + +Of the 100 events published, + +- 40 landed on partition 0 (0 > 39) +- 40 landed on partition 1 (9 > 49) +- 20 landed on partition 2 (0 > 19) +- 0 landed on partition 3 + +Here are the logs: + +```shell +$ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") | [."@timestamp",."log.level",."log.logger",.message,.partition,.count//0,.acked//0,.error.message//"na",.error] | @tsv' | sort + +2024-06-03T12:45:23.791+0200 info input.azure-eventhub Input 'azure-eventhub' starting 0 0 na +2024-06-03T12:45:24.379+0200 debug input.azure-eventhub blob container already exists, no need to create a new one 0 0 na +2024-06-03T12:45:29.629+0200 info input.azure-eventhub checkpoint migration is enabled 0 0 na +2024-06-03T12:45:46.201+0200 info input.azure-eventhub event hub information 0 0 na +2024-06-03T12:46:28.779+0200 info input.azure-eventhub downloaded checkpoint v1 information for partition 0 0 na +2024-06-03T12:46:35.197+0200 info input.azure-eventhub migrating checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:42.561+0200 info input.azure-eventhub migrated checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:49.400+0200 info input.azure-eventhub downloaded checkpoint v1 information for partition 0 0 na +2024-06-03T12:46:49.400+0200 info input.azure-eventhub migrating checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:49.633+0200 info input.azure-eventhub migrated checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:49.862+0200 info input.azure-eventhub downloaded checkpoint v1 information for partition 0 0 na +2024-06-03T12:46:49.863+0200 info input.azure-eventhub migrating checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:50.100+0200 info input.azure-eventhub migrated checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:50.333+0200 info input.azure-eventhub downloaded checkpoint v1 information for partition 0 0 na +2024-06-03T12:46:50.333+0200 info input.azure-eventhub migrating checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:50.567+0200 info input.azure-eventhub migrated checkpoint v1 information to v2 0 0 na +2024-06-03T12:46:55.762+0200 info input.azure-eventhub starting a partition worker 2 0 0 na +2024-06-03T12:47:08.101+0200 info input.azure-eventhub starting a partition worker 3 0 0 na +2024-06-03T12:47:21.621+0200 info input.azure-eventhub starting a partition worker 0 0 0 na +2024-06-03T12:47:34.559+0200 info input.azure-eventhub starting a partition worker 1 0 0 na +2024-06-03T13:04:44.734+0200 debug input.azure-eventhub received events 1 15 0 na +2024-06-03T13:04:45.964+0200 debug input.azure-eventhub received events 2 20 0 na +2024-06-03T13:04:46.821+0200 debug input.azure-eventhub received events 0 40 0 na +2024-06-03T13:04:49.746+0200 debug input.azure-eventhub received events 1 25 0 na +2024-06-03T13:05:00.339+0200 debug input.azure-eventhub checkpoint updated 1 0 40 na +2024-06-03T13:05:01.889+0200 debug input.azure-eventhub checkpoint updated 0 0 40 na +2024-06-03T13:05:03.167+0200 debug input.azure-eventhub checkpoint updated 2 0 20 na +``` + +### Scenario 003: ingest 100 events (2 input) + +- Setup +- Start two inputs +- Take a note with the sequencenumber for all partitions +- Process 100 events +- Check that the 100 events are processed +- check that checkpoint info v2 have been updated + +#### Setup + +- Delete the index `filebeat-8.15.0` from the test cluster. + + +#### Start two inputs + +Using the following configuration for all inputs: + +```yaml +# x-pack/filebeat/modules.d/azure.yml + +- module: azure + # All logs + activitylogs: + enabled: true + var: + eventhub: "eventhubsdkupgrade" + consumer_group: "$Default" + connection_string: "" + storage_account: "mbrancageneral" + storage_account_container: "filebeat-activitylogs-zmoog-0005" + storage_account_key: "" + storage_account_connection_string: "" + processor_version: "v2" + migrate_checkpoint: yes + start_position: "earliest" +``` + +- Started input 1 +- Input 1 is running and processing events + +```shell +$ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") | [."@timestamp",."log.level",."log.logger",.message,.partition,.count//0,.acked//0,.error.message//"na",.error] | @tsv' | sort + +2024-06-03T12:46:55.762+0200 info input.azure-eventhub starting a partition worker 2 0 0 na +2024-06-03T12:47:08.101+0200 info input.azure-eventhub starting a partition worker 3 0 0 na +2024-06-03T12:47:21.621+0200 info input.azure-eventhub starting a partition worker 0 0 0 na +2024-06-03T12:47:34.559+0200 info input.azure-eventhub starting a partition worker 1 0 0 na +2024-06-03T13:04:44.734+0200 debug input.azure-eventhub received events 1 15 0 na +2024-06-03T13:04:45.964+0200 debug input.azure-eventhub received events 2 20 0 na +2024-06-03T13:04:46.821+0200 debug input.azure-eventhub received events 0 40 0 na +2024-06-03T13:04:49.746+0200 debug input.azure-eventhub received events 1 25 0 na +2024-06-03T13:05:00.339+0200 debug input.azure-eventhub checkpoint updated 1 0 40 na +2024-06-03T13:05:01.889+0200 debug input.azure-eventhub checkpoint updated 0 0 40 na +2024-06-03T13:05:03.167+0200 debug input.azure-eventhub checkpoint updated 2 0 20 na +``` + +- Started input 2 + +Input 2 claimed partitions `0` and `3`. + +```shell +$ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") | [."@timestamp",."log.level",."log.logger",.message,.partition,.count//0,.acked//0,.error.message//"na",.error] | @tsv' | sort + +2024-06-03T13:51:33.748+0200 info input.azure-eventhub Input 'azure-eventhub' starting 0 0 na +2024-06-03T13:51:37.197+0200 debug input.azure-eventhub blob container already exists, no need to create a new one 0 0 na +2024-06-03T13:51:37.197+0200 info input.azure-eventhub checkpoint migration is enabled 0 0 na +2024-06-03T13:51:38.986+0200 info input.azure-eventhub event hub information 0 0 na +2024-06-03T13:51:39.234+0200 info input.azure-eventhub checkpoint v2 information for partition already exists, no migration needed 0 0 na +2024-06-03T13:51:39.234+0200 info input.azure-eventhub checkpoint v2 information for partition already exists, no migration needed 0 0 na +2024-06-03T13:51:39.234+0200 info input.azure-eventhub checkpoint v2 information for partition already exists, no migration needed 0 0 na +2024-06-03T13:51:39.234+0200 info input.azure-eventhub checkpoint v2 information for partition already exists, no migration needed 0 0 na +2024-06-03T13:51:40.728+0200 info input.azure-eventhub starting a partition worker 3 0 0 na +2024-06-03T13:52:03.777+0200 info input.azure-eventhub starting a partition worker 0 0 0 na +``` + +Input 1 released partitions `0` and `3`. + +```shell +$ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") | [."@timestamp",."log.level",."log.logger",.message,.partition,.count//0,.acked//0,.error.message//"na",.error] | @tsv' | sort + +2024-06-03T13:51:45.711+0200 debug input.azure-eventhub partition resources cleaned up 3 0 0 na +2024-06-03T13:51:45.711+0200 info input.azure-eventhub partition worker exited 3 0 0 na +2024-06-03T13:52:08.734+0200 debug input.azure-eventhub partition resources cleaned up 0 0 0 na +2024-06-03T13:52:08.734+0200 info input.azure-eventhub partition worker exited 0 0 0 na +``` + +After input 2 started successfully, the two input share 50% of the event hub partitions each: + +- input 1: partition 1, 2 +- input 2: partition 0, 3 + +#### Process 100 events + +Edit the `activitylogs.ndjson` to have 100 events. + +Send the 100 events: + +```shell +$ eh -v eventdata send-batch --lines-from-text-file activitylogs.ndjson --batch-size 40 +Sending 100 events to eventhubsdkupgrade +sending batch of 40 events +batch sent successfully +sending batch of 40 events +batch sent successfully +sending batch of 20 events +batch sent successfully +``` + + +#### Check that the 100 events are processed + +I see the `filebeat-8.15.0` contains 100 events. + + +#### Check that checkpoint info v2 have been updated + +Here are the current sequence numbers: + +Before + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 39 | 137280 | +| 1 | 49 | 172480 | +| 2 | 19 | 66880 | +| 3 | 0 | -1 | + +After + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 59 | 207680 | +| 1 | 49 | 207680 | +| 2 | 59 | 66880 | +| 3 | 39 | 137280 | + +Of the 100 events published, + +- 20 landed on partition 0 (39 > 59) +- 0 landed on partition 1 +- 40 landed on partition 2 (19 > 59) +- 40 landed on partition 3 (0 > 39) + +The total number of documents increased by 100. + +#### Check that documents come from two agents + +By running the following query: + +```json +POST /index_name/_search +{ + "size": 0, + "aggs": { + "agents": { + "terms": { + "field": "agent.id.keyword" + } + } + } +} +``` + +I get this split: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 100, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "agents": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "43928b5d-b3c6-4ad9-9a6f-d24d1c3e83bd", + "doc_count": 60 + }, + { + "key": "f5f4b7cb-fc0f-4aa2-909f-62fad44d56ff", + "doc_count": 40 + } + ] + } + } +} +``` From 5521028f7b887e048f1af40d4976c7f8d55a5a02 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 3 Jun 2024 23:37:48 +0200 Subject: [PATCH 37/41] Add Scenario 004: Invalid Elasticsearch endpoint --- x-pack/filebeat/input/azureeventhub/README.md | 199 +++++++++++++++++- 1 file changed, 197 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/README.md b/x-pack/filebeat/input/azureeventhub/README.md index e1692afd8bf..706b0a4746d 100644 --- a/x-pack/filebeat/input/azureeventhub/README.md +++ b/x-pack/filebeat/input/azureeventhub/README.md @@ -363,7 +363,7 @@ $ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") 2024-06-03T13:05:03.167+0200 debug input.azure-eventhub checkpoint updated 2 0 20 na ``` -### Scenario 003: ingest 100 events (2 input) +### Scenario 003: ingest 100 events (2 inputs) - Setup - Start two inputs @@ -371,6 +371,8 @@ $ pbpaste | grep '^{' | jq -r 'select(."log.logger" == "input.azure-eventhub") - Process 100 events - Check that the 100 events are processed - check that checkpoint info v2 have been updated +- Stop input 2 +- Check that input 1 started two new consumer #### Setup @@ -455,7 +457,7 @@ After input 2 started successfully, the two input share 50% of the event hub par - input 1: partition 1, 2 - input 2: partition 0, 3 -#### Process 100 events +#### Send 100 events Edit the `activitylogs.ndjson` to have 100 events. @@ -565,3 +567,196 @@ I get this split: } } ``` + +#### Stop input 2 + +Just shut down the input 2. + +#### Check that input 1 started two new consumers + +After ~10s, the input 1 started two new consumer to claim the partition from input 2: + +```shell +2024-06-03T19:25:20.104+0200 info input.azure-eventhub starting a partition worker 2 0 0 na +2024-06-03T19:25:32.100+0200 info input.azure-eventhub starting a partition worker 1 0 0 na +``` + +### Scenario 004: Invalid Elasticsearch endpoint + +The goal of this scenario is to verify that if the input uses an invalid Elasticsearch endpoint, the input does not update the checkpoint data. + +- Setup +- Start one input +- Take a note with the sequencenumber for all partitions +- Send 10 events +- check that checkpoint info v2 are not updated +- check that the 10 events are stored in the in-memory queue + +#### Setup + +- Delete the index `filebeat-8.15.0` from the test cluster. + + +#### Start one input + +Using the following configuration: + +```yaml +# x-pack/filebeat/modules.d/azure.yml + +- module: azure + # All logs + activitylogs: + enabled: true + var: + eventhub: "eventhubsdkupgrade" + consumer_group: "$Default" + connection_string: "" + storage_account: "mbrancageneral" + storage_account_container: "filebeat-activitylogs-zmoog-0005" + storage_account_key: "" + storage_account_connection_string: "" + processor_version: "v2" + migrate_checkpoint: yes + start_position: "earliest" +``` + +Important: set the `cloud.id` with a deleted deployment, or set `cloud.auth` with invalid credentials. + +```shell +./filebeat -e -v -d * \ + --strict.perms=false \ + --path.home /Users/zmoog/code/projects/elastic/beats/x-pack/filebeat \ + -E cloud.id= \ + -E cloud.auth= \ + -E gc_percent=100 \ + -E setup.ilm.enabled=false \ + -E setup.template.enabled=false \ + -E output.elasticsearch.allow_older_versions=true +``` + +The Elasticsearch output must fail to send anything to the cluster. + +#### Take a note with the sequencenumber for all partitions + +Current checkpoint info are: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 59 | 207680 | +| 1 | 49 | 172480 | +| 2 | 59 | 207680 | +| 3 | 39 | 137280 | + +#### Send 10 events + +Edit the `activitylogs.ndjson` to have 10 events. + +Send the 10 events: + +```shell +$ eh -v eventdata send-batch --lines-from-text-file activitylogs.ndjson --batch-size 40 + +Sending 10 events to eventhubsdkupgrade +sending batch of 10 events +batch sent successfully +``` + +#### check that checkpoint info v2 are not updated + +The partition `1` received 10 events: + +``` +2024-06-03T22:55:18.539+0200 debug input.azure-eventhub received events 1 10 0 na +``` + +Current checkpoint info are: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 59 | 207680 | +| 1 | 49 | 172480 | +| 2 | 59 | 207680 | +| 3 | 39 | 137280 | + +Partition `1`, and all other partitions checkpoint info as metadata, are unchanged. + + +#### check that the 10 events are stored in the in-memory queue + +Checking the metrics: + +```shell +$ pbpaste | grep "Non-zero" | jq -r '[.["@timestamp"],.component.id,.monitoring.metrics.filebeat.events.active,.monitoring.metrics.libbeat.pipeline.events.active,.monitoring.metrics.libbeat.output.events.total//"n/a",.monitoring.metrics.libbeat.output.events.acked//"n/a",.monitoring.metrics.libbeat.output.events.failed//0] | @tsv' | sort + +2024-06-03T22:54:14.956+0200 0 0 n/a n/a 0 +2024-06-03T22:54:44.956+0200 0 0 n/a n/a 0 +2024-06-03T22:55:14.972+0200 0 0 n/a n/a 0 +2024-06-03T22:55:44.958+0200 10 10 n/a n/a 0 +2024-06-03T22:56:14.956+0200 10 10 n/a n/a 0 +2024-06-03T22:56:44.962+0200 10 10 n/a n/a 0 +2024-06-03T22:57:14.957+0200 10 10 n/a n/a 0 +2024-06-03T22:57:44.955+0200 10 10 n/a n/a 0 +2024-06-03T22:58:14.957+0200 10 10 n/a n/a 0 +2024-06-03T22:58:44.956+0200 10 10 n/a n/a 0 +2024-06-03T22:59:14.957+0200 10 10 n/a n/a 0 +2024-06-03T22:59:44.957+0200 10 10 n/a n/a 0 +2024-06-03T23:00:14.957+0200 10 10 n/a n/a 0 +2024-06-03T23:00:44.956+0200 10 10 n/a n/a 0 +2024-06-03T23:01:14.956+0200 10 10 n/a n/a 0 +202e-06-03T23:01:44.955+0200 10 10 n/a n/a 0 +2024-06-03T23:02:14.961+0200 10 10 n/a n/a 0 +2024-06-03T23:02:44.957+0200 10 10 n/a n/a 0 +2024-06-03T23:03:14.955+0200 10 10 n/a n/a 0 +``` + +I see the `.monitoring.metrics.filebeat.events.active` and `.monitoring.metrics.libbeat.pipeline.events.active` metrics values are both `10`, but `.monitoring.metrics.libbeat.output.events.total` and `.monitoring.metrics.libbeat.output.events.acked` metrics values are both `n/a`. + +#### Check that after fixing the problem the input successfully processed the 10 events + +- Update `cloud.auth` with valid credentials +- restart the input + +After restarting the input, here are the input metrics: + +```shell +$ pbpaste | grep "Non-zero" | jq -r '[.["@timestamp"],.component.id,.monitoring.metrics.filebeat.events.active,.monitoring.metrics.libbeat.pipeline.events.active,.monitoring.metrics.libbeat.output.events.total//"n/a",.monitoring.metrics.libbeat.output.events.acked//"n/a",.monitoring.metrics.libbeat.output.events.failed//0] | @tsv' | sort + +2024-06-03T23:25:57.052+0200 0 0 n/a n/a 0 +2024-06-03T23:26:27.057+0200 10 10 n/a n/a 0 +2024-06-03T23:26:57.060+0200 0 0 10 10 0 +``` + +The 10 events have been reprocessed successfully. + +Here are the checkpoint info. + +Before: + +Current checkpoint info are: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 59 | 207680 | +| 1 | 49 | 172480 | +| 2 | 59 | 207680 | +| 3 | 39 | 137280 | + +After: + +Current checkpoint info are: + +| Partition | Sequence number | Offset | +| --------- | --------------- | ------ | +| 0 | 59 | 207680 | +| 1 | 59 | 207680 | +| 2 | 59 | 207680 | +| 3 | 39 | 137280 | + + +Of the 10 events published, + +- 0 landed on partition 0 +- 10 landed on partition 1 (49 > 59) +- 0 landed on partition 2 +- 0 landed on partition 3 From 7c48f78b111140d2ebe7e7ec64d62fc2c7386605 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Mon, 3 Jun 2024 23:44:27 +0200 Subject: [PATCH 38/41] READNE cleanup --- x-pack/filebeat/input/azureeventhub/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/README.md b/x-pack/filebeat/input/azureeventhub/README.md index 706b0a4746d..8f6bf5272d5 100644 --- a/x-pack/filebeat/input/azureeventhub/README.md +++ b/x-pack/filebeat/input/azureeventhub/README.md @@ -589,8 +589,9 @@ The goal of this scenario is to verify that if the input uses an invalid Elastic - Start one input - Take a note with the sequencenumber for all partitions - Send 10 events -- check that checkpoint info v2 are not updated -- check that the 10 events are stored in the in-memory queue +- Check that checkpoint info v2 are not updated +- Check that the 10 events are stored in the in-memory queue +- Check that after fixing the problem the input successfully processed the 10 events #### Setup @@ -662,7 +663,7 @@ sending batch of 10 events batch sent successfully ``` -#### check that checkpoint info v2 are not updated +#### Check that checkpoint info v2 are not updated The partition `1` received 10 events: @@ -682,7 +683,7 @@ Current checkpoint info are: Partition `1`, and all other partitions checkpoint info as metadata, are unchanged. -#### check that the 10 events are stored in the in-memory queue +#### Check that the 10 events are stored in the in-memory queue Checking the metrics: From 335d48d894bb79db58a1dedfb3eb8c2d876b71d2 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 4 Jun 2024 10:00:26 +0200 Subject: [PATCH 39/41] Add receive_timeout and receive_count to config Makes the receive configuration settings available for customization on the input settings. The current default values (receive_timeout: 5s, receive_count: 100) are probably fine, but it is better to make these options available to users. --- x-pack/filebeat/input/azureeventhub/config.go | 19 +++++ .../filebeat/input/azureeventhub/v2_input.go | 79 ++++++++----------- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index 95486e68836..6d1a03bff44 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "strings" + "time" "unicode" "github.com/elastic/elastic-agent-libs/logp" @@ -35,6 +36,14 @@ type azureInputConfig struct { MigrateCheckpoint bool `config:"migrate_checkpoint"` // Controls the start position for all partitions (processor v2 only). StartPosition string `config:"start_position"` + // Processor receive timeout (processor v2 only). + // Wait up to `ReceiveTimeout` for `ReceiveCount` events, + // otherwise returns whatever we collected during that time. + ReceiveTimeout time.Duration `config:"receive_timeout"` + // Processor receive count (processor v2 only). + // Wait up to `ReceiveTimeout` for `ReceiveCount` events, + // otherwise returns whatever we collected during that time. + ReceiveCount int `config:"receive_count"` } const ephContainerName = "filebeat" @@ -108,6 +117,16 @@ func (conf *azureInputConfig) Validate() error { conf.StartPosition = startPositionEarliest } + // Default receive timeout and count. + if conf.ReceiveTimeout == 0 { + // The default receive timeout is 5 second. + conf.ReceiveTimeout = 5 * time.Second + } + if conf.ReceiveCount == 0 { + // The default receive count is 100. + conf.ReceiveCount = 100 + } + return nil } diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index 46fe05c322b..bd973c58c13 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -29,8 +29,10 @@ import ( ) const ( - startPositionEarliest = "earliest" - startPositionLatest = "latest" + startPositionEarliest = "earliest" + startPositionLatest = "latest" + processorRestartBackoff = 10 * time.Second + processorRestartMaxBackoff = 120 * time.Second ) // azureInputConfig the Azure Event Hub input v2, @@ -85,13 +87,9 @@ func (in *eventHubInputV2) Run( } defer in.consumerClient.Close(context.Background()) - // Create pipelineClient for publishing events and receive - // notification of their ACKs. - // in.pipelineClient, err = createPipelineClient(pipeline) - // if err != nil { - // return fmt.Errorf("failed to create pipeline pipelineClient: %w", err) - // } - // defer in.pipelineClient.Close() + // Store a reference to the pipeline, so we + // can create a new pipeline client for each + // partition. in.pipeline = pipeline // Start the main run loop @@ -153,7 +151,8 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { } in.consumerClient = consumerClient - // FIXME: add migration assistant. + // Manage the migration of the checkpoint information + // from the old Event Hub SDK to the new Event Hub SDK. in.migrationAssistant = newMigrationAssistant( in.log, consumerClient, @@ -183,13 +182,15 @@ func (in *eventHubInputV2) run(ctx context.Context) { // Handle the case when the processor stops due to // transient errors (network failures) and we need to - // restart. + // restart it. processorRunBackoff := backoff.NewEqualJitterBackoff( ctx.Done(), - 10*time.Second, // initial backoff - 120*time.Second, // max backoff + processorRestartBackoff, // initial backoff + processorRestartMaxBackoff, // max backoff ) + // Create the processor options using the input + // configuration. processorOptions := createProcessorOptions(in.config) for ctx.Err() == nil { @@ -200,7 +201,7 @@ func (in *eventHubInputV2) run(ctx context.Context) { processor, err := azeventhubs.NewProcessor( in.consumerClient, in.checkpointStore, - &processorOptions, + processorOptions, ) if err != nil { in.log.Errorw("error creating processor", "error", err) @@ -225,26 +226,25 @@ func (in *eventHubInputV2) run(ctx context.Context) { in.log.Infow("waiting before retrying starting the processor") - // FIXME: `Run()` returns an error when the processor thinks it's unrecoverable. - // We should check the error and decide if we want to retry or not. Should - // we add an and retry mechanism with exponential backoff? + // `Run()` returns an error when the processor thinks it's + // unrecoverable. + // + // We wait before retrying to start the processor. processorRunBackoff.Wait() // Update input metrics. in.metrics.processorRestarts.Inc() - - in.log.Infow("Pssor again") } in.log.Infow( - "run completed; continue if context error is nil", + "run completed; restarting the processor if context error is nil", "context_error", ctx.Err(), ) } } // createProcessorOptions creates the processor options using the input configuration. -func createProcessorOptions(config azureInputConfig) azeventhubs.ProcessorOptions { +func createProcessorOptions(config azureInputConfig) *azeventhubs.ProcessorOptions { // LoadBalancingStrategy offers multiple options: // // - Balanced @@ -273,7 +273,7 @@ func createProcessorOptions(config azureInputConfig) azeventhubs.ProcessorOption defaultStartPosition.Latest = to.Ptr(true) } - return azeventhubs.ProcessorOptions{ + return &azeventhubs.ProcessorOptions{ LoadBalancingStrategy: loadBalancingStrategy, StartPositions: azeventhubs.StartPositions{ Default: defaultStartPosition, @@ -347,7 +347,8 @@ func (in *eventHubInputV2) workersLoop(ctx context.Context, processor *azeventhu ) if err := in.processEventsForPartition(ctx, processorPartitionClient); err != nil { - // FIXME: it seems we always get an error, even when the processor is stopped. + // It seems we always get an error, + // even when the processor is stopped. in.log.Infow( "stopping processing events for partition", "reason", err, @@ -387,15 +388,20 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. for { - // Wait up to a minute for 100 events, otherwise returns whatever we collected during that time. - receiveCtx, cancelReceive := context.WithTimeout(ctx, 5*time.Second) - events, err := partitionClient.ReceiveEvents(receiveCtx, 100, nil) + // Wait up to `in.config.ReceiveTimeout` for `in.config.ReceiveCount` events, + // otherwise returns whatever we collected during that time. + receiveCtx, cancelReceive := context.WithTimeout(ctx, in.config.ReceiveTimeout) + events, err := partitionClient.ReceiveEvents(receiveCtx, in.config.ReceiveCount, nil) cancelReceive() if err != nil && !errors.Is(err, context.DeadlineExceeded) { var eventHubError *azeventhubs.Error - if errors.As(err, &eventHubError) && eventHubError.Code == azeventhubs.ErrorCodeOwnershipLost { + in.log.Infow( + "ownership lost for partition, stopping processing", + "partition", partitionID, + ) + return nil } @@ -406,25 +412,10 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit continue } - in.log.Debugw("received events", "count", len(events), "partition", partitionID) - err = in.processReceivedEvents(events, partitionID, pipelineClient) if err != nil { return fmt.Errorf("error processing received events: %w", err) } - - //in.log.Debugw("updating checkpoint information", "partition", partitionID) - - //// Updates the checkpoint with the latest event received. - //// - //// If processing needs to restart it will restart from this - //// point, automatically. - //if err := partitionClient.UpdateCheckpoint(ctx, events[len(events)-1], nil); err != nil { - // in.log.Errorw("error updating checkpoint", "error", err) - // return err - //} - - //in.log.Debugw("checkpoint updated", "partition", partitionID) } } @@ -442,8 +433,8 @@ func (in *eventHubInputV2) processReceivedEvents(receivedEvents []*azeventhubs.R in.metrics.receivedMessages.Inc() in.metrics.receivedBytes.Add(uint64(len(receivedEventData.Body))) - // A single event can contain multiple records. We create a new event for each record. - //records := in.unpackRecords(receivedEventData.Body) + // A single event can contain multiple records. + // We create a new event for each record. records := in.messageDecoder.Decode(receivedEventData.Body) for record := range records { From bca6e6369a8dcc628e5fdcc4fd83ed846c0abca2 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 4 Jun 2024 17:38:00 +0200 Subject: [PATCH 40/41] Update tests --- x-pack/filebeat/input/azureeventhub/config.go | 80 ++++--- .../input/azureeventhub/config_test.go | 13 +- .../filebeat/input/azureeventhub/decoder.go | 2 +- .../input/azureeventhub/decoder_test.go | 106 +++++++++ x-pack/filebeat/input/azureeventhub/input.go | 38 +-- .../input/azureeventhub/input_test.go | 82 +------ .../input/azureeventhub/metrics_test.go | 5 + .../input/azureeventhub/sanitization_test.go | 35 --- .../filebeat/input/azureeventhub/v1_input.go | 32 ++- .../input/azureeventhub/v1_input_test.go | 37 +++ .../filebeat/input/azureeventhub/v2_input.go | 67 ++++-- .../input/azureeventhub/v2_migration.go | 221 +++++++++--------- .../activitylogs/config/azure-eventhub.yml | 20 +- .../module/azure/activitylogs/manifest.yml | 12 +- .../azure/auditlogs/config/azure-eventhub.yml | 19 +- .../module/azure/auditlogs/manifest.yml | 13 +- .../platformlogs/config/azure-eventhub.yml | 20 +- .../module/azure/platformlogs/manifest.yml | 13 +- .../signinlogs/config/azure-eventhub.yml | 20 +- .../module/azure/signinlogs/manifest.yml | 13 +- 20 files changed, 492 insertions(+), 356 deletions(-) create mode 100644 x-pack/filebeat/input/azureeventhub/decoder_test.go create mode 100644 x-pack/filebeat/input/azureeventhub/v1_input_test.go diff --git a/x-pack/filebeat/input/azureeventhub/config.go b/x-pack/filebeat/input/azureeventhub/config.go index 6d1a03bff44..76c3541ee49 100644 --- a/x-pack/filebeat/input/azureeventhub/config.go +++ b/x-pack/filebeat/input/azureeventhub/config.go @@ -16,37 +16,66 @@ import ( "github.com/elastic/elastic-agent-libs/logp" ) +const ephContainerName = "filebeat" + type azureInputConfig struct { + // EventHubName is the name of the event hub to connect to. + EventHubName string `config:"eventhub" validate:"required"` + // ConnectionString is the connection string to connect to the event hub. ConnectionString string `config:"connection_string" validate:"required"` - EventHubName string `config:"eventhub" validate:"required"` - ConsumerGroup string `config:"consumer_group"` + // ConsumerGroup is the name of the consumer group to use. + ConsumerGroup string `config:"consumer_group"` // Azure Storage container to store leases and checkpoints - SAName string `config:"storage_account" validate:"required"` - SAKey string `config:"storage_account_key"` // (processor v1 only) - SAConnectionString string `config:"storage_account_connection_string"` // (processor v2 only) - SAContainer string `config:"storage_account_container"` + SAName string `config:"storage_account" validate:"required"` + // SAKey is used to connect to the storage account (processor v1 only) + SAKey string `config:"storage_account_key"` + // SAConnectionString is used to connect to the storage account (processor v2 only) + SAConnectionString string `config:"storage_account_connection_string"` + // SAContainer is the name of the storage account container to store + // partition ownership and checkpoint information. + SAContainer string `config:"storage_account_container"` // by default the azure public environment is used, to override, users can provide a specific resource manager endpoint OverrideEnvironment string `config:"resource_manager_endpoint"` // cleanup the log JSON input for known issues, options: SINGLE_QUOTES, NEW_LINES SanitizeOptions []string `config:"sanitize_options"` - // Processor version to use (v1 or v2). Default is v1 (processor v2 only). - ProcessorVersion string `config:"processor_version"` // Controls if the input should perform the checkpoint information // migration from v1 to v2 (processor v2 only). MigrateCheckpoint bool `config:"migrate_checkpoint"` + // Processor version to use (v1 or v2). Default is v1. + ProcessorVersion string `config:"processor_version"` + // + ProcessorUpdateInterval time.Duration `config:"processor_update_interval"` // Controls the start position for all partitions (processor v2 only). - StartPosition string `config:"start_position"` + ProcessorStartPosition string `config:"processor_start_position"` // Processor receive timeout (processor v2 only). - // Wait up to `ReceiveTimeout` for `ReceiveCount` events, + // Wait up to `PartitionReceiveTimeout` for `PartitionReceiveCount` events, // otherwise returns whatever we collected during that time. - ReceiveTimeout time.Duration `config:"receive_timeout"` + PartitionReceiveTimeout time.Duration `config:"partition_receive_timeout"` // Processor receive count (processor v2 only). - // Wait up to `ReceiveTimeout` for `ReceiveCount` events, + // Wait up to `PartitionReceiveTimeout` for `PartitionReceiveCount` events, // otherwise returns whatever we collected during that time. - ReceiveCount int `config:"receive_count"` + PartitionReceiveCount int `config:"partition_receive_count"` } -const ephContainerName = "filebeat" +func defaultConfig() azureInputConfig { + return azureInputConfig{ + // For this release, we continue to use + // the processor v1 as the default. + ProcessorVersion: processorV1, + // + ProcessorUpdateInterval: 10 * time.Second, + // For backward compatibility with v1, + // the default start position is "earliest". + ProcessorStartPosition: startPositionEarliest, + // Receive timeout and count control how + // many events we want to receive from + // the processor before returning. + PartitionReceiveTimeout: 5 * time.Second, + PartitionReceiveCount: 100, + // Default + SanitizeOptions: []string{}, + } +} // Validate validates the config. func (conf *azureInputConfig) Validate() error { @@ -90,11 +119,6 @@ func (conf *azureInputConfig) Validate() error { } } - if conf.ProcessorVersion == "" { - // The default processor version is "v1". - conf.ProcessorVersion = processorV1 - } - switch conf.ProcessorVersion { case processorV1: if conf.SAKey == "" { @@ -102,7 +126,7 @@ func (conf *azureInputConfig) Validate() error { } case processorV2: if conf.SAKey != "" { - logger.Warnf("storage_account_key is not used in processor v2") + logger.Warnf("storage_account_key is not used in processor v2, please remove it from the configuration (config: storage_account_key)") } if conf.SAConnectionString == "" { return errors.New("no storage account connection string configured (config: storage_account_connection_string)") @@ -111,22 +135,6 @@ func (conf *azureInputConfig) Validate() error { return fmt.Errorf("invalid azure-eventhub processor version: %s (available versions: v1, v2)", conf.ProcessorVersion) } - if conf.StartPosition == "" { - // For backward compatibility with v1, - // the default start position is "earliest". - conf.StartPosition = startPositionEarliest - } - - // Default receive timeout and count. - if conf.ReceiveTimeout == 0 { - // The default receive timeout is 5 second. - conf.ReceiveTimeout = 5 * time.Second - } - if conf.ReceiveCount == 0 { - // The default receive count is 100. - conf.ReceiveCount = 100 - } - return nil } diff --git a/x-pack/filebeat/input/azureeventhub/config_test.go b/x-pack/filebeat/input/azureeventhub/config_test.go index 047a4ee5626..321cf9d1ebc 100644 --- a/x-pack/filebeat/input/azureeventhub/config_test.go +++ b/x-pack/filebeat/input/azureeventhub/config_test.go @@ -36,13 +36,12 @@ func TestStorageContainerValidate(t *testing.T) { func TestValidate(t *testing.T) { t.Run("Sanitize storage account containers with underscores", func(t *testing.T) { - config := azureInputConfig{ - ConnectionString: "sb://test-ns.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=SECRET", - EventHubName: "event_hub_00", - SAName: "teststorageaccount", - SAKey: "secret", - SAContainer: "filebeat-activitylogs-event_hub_00", - } + config := defaultConfig() + config.ConnectionString = "sb://test-ns.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=SECRET" + config.EventHubName = "event_hub_00" + config.SAName = "teststorageaccount" + config.SAKey = "secret" + config.SAContainer = "filebeat-activitylogs-event_hub_00" if err := config.Validate(); err != nil { t.Fatalf("unexpected validation error: %v", err) diff --git a/x-pack/filebeat/input/azureeventhub/decoder.go b/x-pack/filebeat/input/azureeventhub/decoder.go index 6b9c40f5c8a..49f6ca3f648 100644 --- a/x-pack/filebeat/input/azureeventhub/decoder.go +++ b/x-pack/filebeat/input/azureeventhub/decoder.go @@ -14,7 +14,7 @@ import ( ) type messageDecoder struct { - config *azureInputConfig + config azureInputConfig log *logp.Logger metrics *inputMetrics } diff --git a/x-pack/filebeat/input/azureeventhub/decoder_test.go b/x-pack/filebeat/input/azureeventhub/decoder_test.go new file mode 100644 index 00000000000..7f2613493a8 --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/decoder_test.go @@ -0,0 +1,106 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "fmt" + "testing" + + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/monitoring" + "github.com/stretchr/testify/assert" +) + +func TestDecodeRecords(t *testing.T) { + config := defaultConfig() + log := logp.NewLogger(fmt.Sprintf("%s test for input", inputName)) + reg := monitoring.NewRegistry() + + decoder := messageDecoder{ + config: config, + log: log, + metrics: newInputMetrics("test", reg), + } + + msgs := []string{ + "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", + "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", + "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", + } + + t.Run("Decode multiple records", func(t *testing.T) { + msg := "{\"records\":[{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]}" + + messages := decoder.Decode([]byte(msg)) + + assert.NotNil(t, messages) + assert.Equal(t, len(messages), 3) + for _, ms := range messages { + assert.Contains(t, msgs, ms) + } + }) + + t.Run("Decode array of events", func(t *testing.T) { + msg1 := "[{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]" + + messages := decoder.Decode([]byte(msg1)) + + assert.NotNil(t, messages) + assert.Equal(t, len(messages), 3) + for _, ms := range messages { + assert.Contains(t, msgs, ms) + } + }) + + t.Run("Decode one event only", func(t *testing.T) { + msg2 := "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}" + + messages := decoder.Decode([]byte(msg2)) + + assert.NotNil(t, messages) + assert.Equal(t, len(messages), 1) + for _, ms := range messages { + assert.Contains(t, msgs, ms) + } + }) +} + +func TestDecodeRecordsWithSanitization(t *testing.T) { + config := defaultConfig() + config.SanitizeOptions = []string{"SINGLE_QUOTES", "NEW_LINES"} + log := logp.NewLogger(fmt.Sprintf("%s test for input", inputName)) + reg := monitoring.NewRegistry() + metrics := newInputMetrics("test", reg) + defer metrics.Close() + + decoder := messageDecoder{ + config: config, + log: log, + metrics: metrics, + } + + msg := "{\"records\":[{'test':\"this is some message\",\n\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + + "{\"time\": \"2023-04-11T13:35:20Z\", \"resourceId\": \"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\", \"category\": \"FunctionAppLogs\", \"operationName\": \"Microsoft.Web/sites/functions/log\", \"level\": \"Informational\", \"location\": \"West Europe\", \"properties\": {'appName':'REDACTED','roleInstance':'REDACTED','message':'Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe ','category':'Function.HttpTriggerJava.User','hostVersion':'4.16.5.5','functionInvocationId':'REDACTED','functionName':'HttpTriggerJava','hostInstanceId':'REDACTED','level':'Information','levelId':2,'processId':62}}]}" + msgs := []string{ + "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", + "{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", + "{\"category\":\"FunctionAppLogs\",\"level\":\"Informational\",\"location\":\"West Europe\",\"operationName\":\"Microsoft.Web/sites/functions/log\",\"properties\":{\"appName\":\"REDACTED\",\"category\":\"Function.HttpTriggerJava.User\",\"functionInvocationId\":\"REDACTED\",\"functionName\":\"HttpTriggerJava\",\"hostInstanceId\":\"REDACTED\",\"hostVersion\":\"4.16.5.5\",\"level\":\"Information\",\"levelId\":2,\"message\":\"Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe \",\"processId\":62,\"roleInstance\":\"REDACTED\"},\"resourceId\":\"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\",\"time\":\"2023-04-11T13:35:20Z\"}", + } + + messages := decoder.Decode([]byte(msg)) + + assert.NotNil(t, messages) + assert.Equal(t, len(messages), 3) + for _, ms := range messages { + assert.Contains(t, msgs, ms) + } +} diff --git a/x-pack/filebeat/input/azureeventhub/input.go b/x-pack/filebeat/input/azureeventhub/input.go index 4cf2a91afdc..c2ad4f9c7ad 100644 --- a/x-pack/filebeat/input/azureeventhub/input.go +++ b/x-pack/filebeat/input/azureeventhub/input.go @@ -8,14 +8,10 @@ package azureeventhub import ( "fmt" - "strings" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/go-autorest/autorest/azure" v2 "github.com/elastic/beats/v7/filebeat/input/v2" - "github.com/elastic/beats/v7/libbeat/beat" - "github.com/elastic/beats/v7/libbeat/common/acker" "github.com/elastic/beats/v7/libbeat/feature" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" @@ -66,7 +62,7 @@ func (m *eventHubInputManager) Init(unison.Group) error { // Create creates a new azure-eventhub input based on the configuration. func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { - var config azureInputConfig + config := defaultConfig() if err := cfg.Unpack(&config); err != nil { return nil, fmt.Errorf("reading %s input config: %w", inputName, err) } @@ -79,36 +75,4 @@ func (m *eventHubInputManager) Create(cfg *conf.C) (v2.Input, error) { default: return nil, fmt.Errorf("invalid azure-eventhub processor version: %s (available versions: v1, v2)", config.ProcessorVersion) } - - //return &azureInput{ - // config: config, - // log: logp.NewLogger(fmt.Sprintf("%s input", inputName)).With("connection string", stripConnectionString(config.ConnectionString)), - //}, nil -} - -func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { - return pipeline.ConnectWith(beat.ClientConfig{ - EventListener: acker.LastEventPrivateReporter(func(acked int, data interface{}) { - // fmt.Println(acked, data) - }), - Processing: beat.ProcessingConfig{ - // This input only produces events with basic types so normalization - // is not required. - EventNormalization: to.Ptr(false), - }, - }) -} - -// Strip connection string to remove sensitive information -// A connection string should look like this: -// Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKeyName=DummyAccessKeyName;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly= -// This code will remove everything after ';' so key information is stripped -func stripConnectionString(c string) string { - if parts := strings.SplitN(c, ";", 2); len(parts) == 2 { - return parts[0] - } - - // We actually expect the string to have the documented format - // if we reach here something is wrong, so let's stay on the safe side - return "(redacted)" } diff --git a/x-pack/filebeat/input/azureeventhub/input_test.go b/x-pack/filebeat/input/azureeventhub/input_test.go index 64f0a1b68e6..839e7bb1f11 100644 --- a/x-pack/filebeat/input/azureeventhub/input_test.go +++ b/x-pack/filebeat/input/azureeventhub/input_test.go @@ -64,7 +64,13 @@ func TestProcessEvents(t *testing.T) { log: log, metrics: metrics, pipelineClient: &fakePipelineClient, + messageDecoder: messageDecoder{ + config: defaultTestConfig, + log: log, + metrics: metrics, + }, } + var sn int64 = 12 now := time.Now() var off int64 = 1234 @@ -93,58 +99,6 @@ func TestProcessEvents(t *testing.T) { assert.Equal(t, message, single) } -func TestParseMultipleRecords(t *testing.T) { - // records object - msg := "{\"records\":[{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]}" - msgs := []string{ - "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", - "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", - "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", - } - - reg := monitoring.NewRegistry() - metrics := newInputMetrics("test", reg) - defer metrics.Close() - - fakePipelineClient := fakeClient{} - - input := eventHubInputV1{ - config: azureInputConfig{}, - log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), - metrics: metrics, - pipelineClient: &fakePipelineClient, - } - - messages := input.unpackRecords([]byte(msg)) - assert.NotNil(t, messages) - assert.Equal(t, len(messages), 3) - for _, ms := range messages { - assert.Contains(t, msgs, ms) - } - - // array of events - msg1 := "[{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"test\":\"this is 2nd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"test\":\"this is 3rd message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}]" - messages = input.unpackRecords([]byte(msg1)) - assert.NotNil(t, messages) - assert.Equal(t, len(messages), 3) - for _, ms := range messages { - assert.Contains(t, msgs, ms) - } - - // one event only - msg2 := "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}" - messages = input.unpackRecords([]byte(msg2)) - assert.NotNil(t, messages) - assert.Equal(t, len(messages), 1) - for _, ms := range messages { - assert.Contains(t, msgs, ms) - } -} - //func TestNewInputDone(t *testing.T) { // log := logp.NewLogger(fmt.Sprintf("%s test for input", inputName)) // config := mapstr.M{ @@ -156,30 +110,6 @@ func TestParseMultipleRecords(t *testing.T) { // inputtest.AssertNotStartedInputCanBeDone(t, NewInput, &config) //} -func TestStripConnectionString(t *testing.T) { - tests := []struct { - connectionString, expected string - }{ - { - "Endpoint=sb://something", - "(redacted)", - }, - { - "Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKeyName=DummyAccessKeyName;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly=", - "Endpoint=sb://dummynamespace.servicebus.windows.net/", - }, - { - "Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly=", - "Endpoint=sb://dummynamespace.servicebus.windows.net/", - }, - } - - for _, tt := range tests { - res := stripConnectionString(tt.connectionString) - assert.Equal(t, res, tt.expected) - } -} - // ackClient is a fake beat.Client that ACKs the published messages. type fakeClient struct { sync.Mutex diff --git a/x-pack/filebeat/input/azureeventhub/metrics_test.go b/x-pack/filebeat/input/azureeventhub/metrics_test.go index ddba8e0299c..fbab4e1e122 100644 --- a/x-pack/filebeat/input/azureeventhub/metrics_test.go +++ b/x-pack/filebeat/input/azureeventhub/metrics_test.go @@ -129,6 +129,11 @@ func TestInputMetricsEventsReceived(t *testing.T) { metrics: metrics, pipelineClient: &fakeClient, log: log, + messageDecoder: messageDecoder{ + config: inputConfig, + metrics: metrics, + log: log, + }, } ev := eventhub.Event{ diff --git a/x-pack/filebeat/input/azureeventhub/sanitization_test.go b/x-pack/filebeat/input/azureeventhub/sanitization_test.go index 6e2645c40d7..3ad8928cdc3 100644 --- a/x-pack/filebeat/input/azureeventhub/sanitization_test.go +++ b/x-pack/filebeat/input/azureeventhub/sanitization_test.go @@ -7,46 +7,11 @@ package azureeventhub import ( - "fmt" "testing" "github.com/stretchr/testify/assert" - - "github.com/elastic/elastic-agent-libs/logp" - "github.com/elastic/elastic-agent-libs/monitoring" ) -func TestParseMultipleRecordsSanitization(t *testing.T) { - msg := "{\"records\":[{'test':\"this is some message\",\n\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + - "{\"time\": \"2023-04-11T13:35:20Z\", \"resourceId\": \"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\", \"category\": \"FunctionAppLogs\", \"operationName\": \"Microsoft.Web/sites/functions/log\", \"level\": \"Informational\", \"location\": \"West Europe\", \"properties\": {'appName':'REDACTED','roleInstance':'REDACTED','message':'Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe ','category':'Function.HttpTriggerJava.User','hostVersion':'4.16.5.5','functionInvocationId':'REDACTED','functionName':'HttpTriggerJava','hostInstanceId':'REDACTED','level':'Information','levelId':2,'processId':62}}]}" - msgs := []string{ - "{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", - "{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", - "{\"category\":\"FunctionAppLogs\",\"level\":\"Informational\",\"location\":\"West Europe\",\"operationName\":\"Microsoft.Web/sites/functions/log\",\"properties\":{\"appName\":\"REDACTED\",\"category\":\"Function.HttpTriggerJava.User\",\"functionInvocationId\":\"REDACTED\",\"functionName\":\"HttpTriggerJava\",\"hostInstanceId\":\"REDACTED\",\"hostVersion\":\"4.16.5.5\",\"level\":\"Information\",\"levelId\":2,\"message\":\"Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe \",\"processId\":62,\"roleInstance\":\"REDACTED\"},\"resourceId\":\"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\",\"time\":\"2023-04-11T13:35:20Z\"}", - } - - reg := monitoring.NewRegistry() - metrics := newInputMetrics("test", reg) - defer metrics.Close() - - input := eventHubInputV1{ - config: azureInputConfig{ - SanitizeOptions: []string{"SINGLE_QUOTES", "NEW_LINES"}, - }, - log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), - metrics: metrics, - pipelineClient: &fakeClient{}, - } - - messages := input.unpackRecords([]byte(msg)) - assert.NotNil(t, messages) - assert.Equal(t, len(messages), 3) - for _, ms := range messages { - assert.Contains(t, msgs, ms) - } -} - func TestSanitize(t *testing.T) { jsonByte := []byte("{'test':\"this is 'some' message\n\",\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}") diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index 2e45507c0ca..a5014fac305 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -9,8 +9,10 @@ package azureeventhub import ( "context" "fmt" + "strings" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/go-autorest/autorest/azure" eventhub "github.com/Azure/azure-event-hubs-go/v3" @@ -20,6 +22,7 @@ import ( v2 "github.com/elastic/beats/v7/filebeat/input/v2" "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/common/acker" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" ) @@ -77,7 +80,7 @@ func (in *eventHubInputV1) Run( defer in.metrics.Close() in.messageDecoder = messageDecoder{ - config: &in.config, + config: in.config, log: in.log, metrics: in.metrics, } @@ -268,6 +271,33 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { in.metrics.processingTime.Update(time.Since(processingStartTime).Nanoseconds()) } +func createPipelineClient(pipeline beat.Pipeline) (beat.Client, error) { + return pipeline.ConnectWith(beat.ClientConfig{ + EventListener: acker.LastEventPrivateReporter(func(acked int, data interface{}) { + // fmt.Println(acked, data) + }), + Processing: beat.ProcessingConfig{ + // This input only produces events with basic types so normalization + // is not required. + EventNormalization: to.Ptr(false), + }, + }) +} + +// Strip connection string to remove sensitive information +// A connection string should look like this: +// Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKeyName=DummyAccessKeyName;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly= +// This code will remove everything after ';' so key information is stripped +func stripConnectionString(c string) string { + if parts := strings.SplitN(c, ";", 2); len(parts) == 2 { + return parts[0] + } + + // We actually expect the string to have the documented format + // if we reach here something is wrong, so let's stay on the safe side + return "(redacted)" +} + //// unpackRecords will try to split the message into multiple ones based on the group field provided by the configuration //func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { // var mapObject map[string][]interface{} diff --git a/x-pack/filebeat/input/azureeventhub/v1_input_test.go b/x-pack/filebeat/input/azureeventhub/v1_input_test.go new file mode 100644 index 00000000000..cd20ecbfdcc --- /dev/null +++ b/x-pack/filebeat/input/azureeventhub/v1_input_test.go @@ -0,0 +1,37 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build !aix + +package azureeventhub + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestStripConnectionString(t *testing.T) { + tests := []struct { + connectionString, expected string + }{ + { + "Endpoint=sb://something", + "(redacted)", + }, + { + "Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKeyName=DummyAccessKeyName;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly=", + "Endpoint=sb://dummynamespace.servicebus.windows.net/", + }, + { + "Endpoint=sb://dummynamespace.servicebus.windows.net/;SharedAccessKey=5dOntTRytoC24opYThisAsit3is2B+OGY1US/fuL3ly=", + "Endpoint=sb://dummynamespace.servicebus.windows.net/", + }, + } + + for _, tt := range tests { + res := stripConnectionString(tt.connectionString) + assert.Equal(t, res, tt.expected) + } +} diff --git a/x-pack/filebeat/input/azureeventhub/v2_input.go b/x-pack/filebeat/input/azureeventhub/v2_input.go index bd973c58c13..2bbbba53abf 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_input.go +++ b/x-pack/filebeat/input/azureeventhub/v2_input.go @@ -12,12 +12,13 @@ import ( "fmt" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" v2 "github.com/elastic/beats/v7/filebeat/input/v2" @@ -29,9 +30,17 @@ import ( ) const ( - startPositionEarliest = "earliest" - startPositionLatest = "latest" - processorRestartBackoff = 10 * time.Second + // startPositionEarliest lets the processor start from the earliest + // available event from the event hub retention period. + startPositionEarliest = "earliest" + // startPositionEarliest lets the processor start from the latest + // available event from the event hub retention period. + startPositionLatest = "latest" + // processorRestartBackoff is the initial backoff time before + // restarting the processor. + processorRestartBackoff = 10 * time.Second + // processorRestartMaxBackoff is the maximum backoff time before + // restarting the processor. processorRestartMaxBackoff = 120 * time.Second ) @@ -104,16 +113,20 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { // Decode the messages from event hub into // a `[]string`. in.messageDecoder = messageDecoder{ - config: &in.config, + config: in.config, log: in.log, metrics: in.metrics, } // FIXME: check more pipelineClient creation options. - blobContainerClient, err := container.NewClientFromConnectionString( + containerClient, err := container.NewClientFromConnectionString( in.config.SAConnectionString, in.config.SAContainer, - nil, + &container.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Cloud: cloud.AzurePublic, + }, + }, ) if err != nil { return fmt.Errorf("failed to create blob container pipelineClient: %w", err) @@ -126,14 +139,14 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { // "the container must exist before the checkpoint store can be used." // // We need to ensure it exists before we can use it. - err = in.ensureContainerExists(ctx, blobContainerClient) + err = in.ensureContainerExists(ctx, containerClient) if err != nil { return fmt.Errorf("failed to ensure blob container exists: %w", err) } // The checkpoint store is used to store the checkpoint information // in the blob container. - checkpointStore, err := checkpoints.NewBlobStore(blobContainerClient, nil) + checkpointStore, err := checkpoints.NewBlobStore(containerClient, nil) if err != nil { return fmt.Errorf("failed to create checkpoint store: %w", err) } @@ -156,7 +169,7 @@ func (in *eventHubInputV2) setup(ctx context.Context) error { in.migrationAssistant = newMigrationAssistant( in.log, consumerClient, - blobContainerClient, + containerClient, checkpointStore, ) @@ -245,13 +258,6 @@ func (in *eventHubInputV2) run(ctx context.Context) { // createProcessorOptions creates the processor options using the input configuration. func createProcessorOptions(config azureInputConfig) *azeventhubs.ProcessorOptions { - // LoadBalancingStrategy offers multiple options: - // - // - Balanced - // - Greedy - // - // As of now, we only support Balanced. - loadBalancingStrategy := azeventhubs.ProcessorStrategyBalanced // Start position offers multiple options: // @@ -266,7 +272,7 @@ func createProcessorOptions(config azureInputConfig) *azeventhubs.ProcessorOptio // available from the storage account container. defaultStartPosition := azeventhubs.StartPosition{} - switch config.StartPosition { + switch config.ProcessorStartPosition { case startPositionEarliest: defaultStartPosition.Earliest = to.Ptr(true) case startPositionLatest: @@ -274,7 +280,22 @@ func createProcessorOptions(config azureInputConfig) *azeventhubs.ProcessorOptio } return &azeventhubs.ProcessorOptions{ - LoadBalancingStrategy: loadBalancingStrategy, + // + // The `LoadBalancingStrategy` controls how the + // processor distributes the partitions across the + // consumers. + // + // LoadBalancingStrategy offers multiple options: + // + // - Balanced + // - Greedy + // + // As of now, we only support the "balanced" load + // balancing strategy for retro compatibility with + // the old SDK. + // + LoadBalancingStrategy: azeventhubs.ProcessorStrategyBalanced, + UpdateInterval: config.ProcessorUpdateInterval, StartPositions: azeventhubs.StartPositions{ Default: defaultStartPosition, }, @@ -388,10 +409,10 @@ func (in *eventHubInputV2) processEventsForPartition(ctx context.Context, partit // 2/3 [CONTINUOUS] Receive events, checkpointing as needed using UpdateCheckpoint. for { - // Wait up to `in.config.ReceiveTimeout` for `in.config.ReceiveCount` events, + // Wait up to `in.config.PartitionReceiveTimeout` for `in.config.PartitionReceiveCount` events, // otherwise returns whatever we collected during that time. - receiveCtx, cancelReceive := context.WithTimeout(ctx, in.config.ReceiveTimeout) - events, err := partitionClient.ReceiveEvents(receiveCtx, in.config.ReceiveCount, nil) + receiveCtx, cancelReceive := context.WithTimeout(ctx, in.config.PartitionReceiveTimeout) + events, err := partitionClient.ReceiveEvents(receiveCtx, in.config.PartitionReceiveCount, nil) cancelReceive() if err != nil && !errors.Is(err, context.DeadlineExceeded) { diff --git a/x-pack/filebeat/input/azureeventhub/v2_migration.go b/x-pack/filebeat/input/azureeventhub/v2_migration.go index e59603be015..755cabc7b83 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_migration.go +++ b/x-pack/filebeat/input/azureeventhub/v2_migration.go @@ -11,26 +11,41 @@ import ( "encoding/json" "errors" "fmt" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "net/url" "strconv" "strings" "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/checkpoints" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" "github.com/elastic/elastic-agent-libs/logp" ) +type consumerClient interface { + GetEventHubProperties(ctx context.Context, options *azeventhubs.GetEventHubPropertiesOptions) (azeventhubs.EventHubProperties, error) +} + +type containerClient interface { + NewBlobClient(blobName string) *blob.Client + NewListBlobsFlatPager(o *container.ListBlobsFlatOptions) *runtime.Pager[container.ListBlobsFlatResponse] +} + +type checkpointer interface { + SetCheckpoint(ctx context.Context, checkpoint azeventhubs.Checkpoint, options *azeventhubs.SetCheckpointOptions) error +} + // migrationAssistant assists the input in migrating -// checkpoint data from v1 to v2. +// v1 checkpoint information to v2. type migrationAssistant struct { log *logp.Logger - consumerClient *azeventhubs.ConsumerClient - blobContainerClient *container.Client - checkpointStore *checkpoints.BlobStore + consumerClient consumerClient + blobContainerClient containerClient + checkpointStore checkpointer } -func newMigrationAssistant(log *logp.Logger, consumerClient *azeventhubs.ConsumerClient, blobContainerClient *container.Client, checkpointStore *checkpoints.BlobStore) *migrationAssistant { +// newMigrationAssistant creates a new migration assistant. +func newMigrationAssistant(log *logp.Logger, consumerClient consumerClient, blobContainerClient containerClient, checkpointStore checkpointer) *migrationAssistant { return &migrationAssistant{ log: log, consumerClient: consumerClient, @@ -39,6 +54,8 @@ func newMigrationAssistant(log *logp.Logger, consumerClient *azeventhubs.Consume } } +// checkAndMigrate checks if the v1 checkpoint information for the partitions +// exists and migrates it to v2 if it does. func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnectionString, eventHubName, consumerGroup string) error { // Fetching event hub information eventHubProperties, err := m.consumerClient.GetEventHubProperties(ctx, nil) @@ -54,146 +71,132 @@ func (m *migrationAssistant) checkAndMigrate(ctx context.Context, eventHubConnec ) // Parse the connection string to get FQDN. - props, err := parseConnectionString(eventHubConnectionString) + connectionStringInfo, err := parseConnectionString(eventHubConnectionString) if err != nil { return fmt.Errorf("failed to parse connection string: %w", err) } - err = m.checkAndMigratePartition(ctx, eventHubProperties, props, eventHubName, consumerGroup) + blobs, err := m.listBlobs(ctx) if err != nil { - return fmt.Errorf("failed to check and migrate partition: %w", err) + return err } - // blobClient := m.blobContainerClient.NewBlobClient("") - // blobClient.BlobExists(ctx) - - // blobPager := m.blobContainerClient.NewListBlobsFlatPager(nil) - - // for blobPager.More() { - // page, err := blobPager.NextPage(ctx) - // if err != nil { - // return fmt.Errorf("failed to list blobs: %w", err) - // } - - // } - - // Fetching the list of blobs in the container. - - // Search for the checkpoint blobs in the container. - // The blobs are named as ///checkpoint/ - - // blobPager := m.blobContainerClient.NewListBlobsFlatPager(nil) - - // r, err := blobPager.NextPage(ctx) - // if err != nil { - // return fmt.Errorf("failed to list blobs: %w", err) - // } - - // props.FullyQualifiedNamespace - - // // Fetching event hub information - // eventHubProperties, err := m.consumerClient.GetEventHubProperties(ctx, nil) - // if err != nil { - // return fmt.Errorf("failed to get event hub properties: %w", err) - // } - - // // v2 checkpoint information path - // // mbranca-general.servicebus.windows.net/sdh4552/$Default/checkpoint/0 - - // eventHubProperties.PartitionIDs + for _, partitionID := range eventHubProperties.PartitionIDs { + err = m.checkAndMigratePartition(ctx, blobs, partitionID, connectionStringInfo.FullyQualifiedNamespace, eventHubName, consumerGroup) + if err != nil { + return fmt.Errorf("failed to check and migrate partition: %w", err) + } + } return nil } +// checkAndMigratePartition checks if the v1 checkpoint information for the +// `partitionID` partition. func (m *migrationAssistant) checkAndMigratePartition( ctx context.Context, - eventHubProperties azeventhubs.EventHubProperties, - props ConnectionStringProperties, + blobs map[string]bool, + partitionID, + fullyQualifiedNamespace, eventHubName, consumerGroup string) error { - blobs := map[string]bool{} + // v2 checkpoint information path + // mbranca-general.servicebus.windows.net/sdh4552/$Default/checkpoint/0 + blob := fmt.Sprintf("%s/%s/%s/checkpoint/%s", fullyQualifiedNamespace, eventHubName, consumerGroup, partitionID) - c := m.blobContainerClient.NewListBlobsFlatPager(nil) + // Check if v2 checkpoint information exists + if _, ok := blobs[blob]; ok { + m.log.Infow( + "checkpoint v2 information for partition already exists, no migration needed", + "partitionID", partitionID, + ) - for c.More() { - page, err := c.NextPage(ctx) - if err != nil { - return fmt.Errorf("failed to list blobs: %w", err) - } - - for _, blob := range page.Segment.BlobItems { - blobs[*blob.Name] = true - } + return nil } - for _, partitionID := range eventHubProperties.PartitionIDs { - // v2 checkpoint information path - // mbranca-general.servicebus.windows.net/sdh4552/$Default/checkpoint/0 - blob := fmt.Sprintf("%s/%s/%s/checkpoint/%s", props.FullyQualifiedNamespace, eventHubName, consumerGroup, partitionID) - - if _, ok := blobs[blob]; ok { - m.log.Infow( - "checkpoint v2 information for partition already exists, no migration needed", - "partitionID", partitionID, - ) - continue - } + // Check if v1 checkpoint information exists + if _, ok := blobs[partitionID]; !ok { + m.log.Infow( + "checkpoint v1 information for partition doesn't exist, no migration needed", + "partitionID", partitionID, + ) - // try downloading the checkpoint v1 information for the partition - if _, ok := blobs[partitionID]; !ok { - m.log.Infow( - "checkpoint v1 information for partition doesn't exist, no migration needed", - "partitionID", partitionID, - ) - continue - } + return nil + } - // v1 checkpoint information path is the partition ID itself - cln := m.blobContainerClient.NewBlobClient(partitionID) + // Try downloading the checkpoint v1 information for the partition + cln := m.blobContainerClient.NewBlobClient(partitionID) - buff := [4000]byte{} - size, err := cln.DownloadBuffer(ctx, buff[:], nil) - if err != nil { - return fmt.Errorf("failed to download checkpoint v1 information for partition %s: %w", partitionID, err) - } + // 4KB buffer should be enough to read + // the checkpoint v1 information. + buff := [4000]byte{} - m.log.Infow("downloaded checkpoint v1 information for partition", "partitionID", partitionID, "size", size) + size, err := cln.DownloadBuffer(ctx, buff[:], nil) + if err != nil { + return fmt.Errorf("failed to download checkpoint v1 information for partition %s: %w", partitionID, err) + } - var checkpointV1 *LegacyCheckpoint + m.log.Infow( + "downloaded checkpoint v1 information for partition", + "partitionID", partitionID, + "size", size, + ) - if err := json.Unmarshal(buff[0:size], &checkpointV1); err != nil { - return fmt.Errorf("failed to unmarshal checkpoint v1 information for partition %s: %w", partitionID, err) - } + // Unmarshal the checkpoint v1 information + var checkpointV1 *LegacyCheckpoint - // migrate the checkpoint v1 information to v2 - m.log.Infow("migrating checkpoint v1 information to v2", "partitionID", partitionID) + if err := json.Unmarshal(buff[0:size], &checkpointV1); err != nil { + return fmt.Errorf("failed to unmarshal checkpoint v1 information for partition %s: %w", partitionID, err) + } - checkpointV2 := azeventhubs.Checkpoint{ - ConsumerGroup: consumerGroup, - EventHubName: eventHubName, - FullyQualifiedNamespace: props.FullyQualifiedNamespace, - PartitionID: partitionID, - } + // migrate the checkpoint v1 information to v2 + m.log.Infow("migrating checkpoint v1 information to v2", "partitionID", partitionID) - offset, err := strconv.ParseInt(checkpointV1.Checkpoint.Offset, 10, 64) - if err != nil { - return fmt.Errorf("failed to parse offset: %w", err) - } + // Common checkpoint information + checkpointV2 := azeventhubs.Checkpoint{ + ConsumerGroup: consumerGroup, + EventHubName: eventHubName, + FullyQualifiedNamespace: fullyQualifiedNamespace, + PartitionID: partitionID, + } - checkpointV2.Offset = &offset - checkpointV2.SequenceNumber = &checkpointV1.Checkpoint.SequenceNumber + offset, err := strconv.ParseInt(checkpointV1.Checkpoint.Offset, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse offset: %w", err) + } - if err := m.checkpointStore.SetCheckpoint(ctx, checkpointV2, nil); err != nil { - return fmt.Errorf("failed to update checkpoint v2 information for partition %s: %w", partitionID, err) - } + checkpointV2.Offset = &offset + checkpointV2.SequenceNumber = &checkpointV1.Checkpoint.SequenceNumber - m.log.Infow("migrated checkpoint v1 information to v2", "partitionID", partitionID) + // Stores the checkpoint v2 information for the partition + if err := m.checkpointStore.SetCheckpoint(ctx, checkpointV2, nil); err != nil { + return fmt.Errorf("failed to update checkpoint v2 information for partition %s: %w", partitionID, err) } + m.log.Infow("migrated checkpoint v1 information to v2", "partitionID", partitionID) + return nil } +// listBlobs lists all the blobs in the container. +func (m *migrationAssistant) listBlobs(ctx context.Context) (map[string]bool, error) { + blobs := map[string]bool{} + + c := m.blobContainerClient.NewListBlobsFlatPager(nil) + for c.More() { + page, err := c.NextPage(ctx) + if err != nil { + return map[string]bool{}, fmt.Errorf("failed to list blobs: %w", err) + } + + for _, blob := range page.Segment.BlobItems { + blobs[*blob.Name] = true + } + } + return blobs, nil +} + type LegacyCheckpoint struct { PartitionID string `json:"partitionID"` Epoch int `json:"epoch"` diff --git a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml index b69d473dd9b..ac3d2d352bb 100644 --- a/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/activitylogs/config/azure-eventhub.yml @@ -37,16 +37,28 @@ resource_manager_endpoint: {{ .resource_manager_endpoint }} tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + {{ if .processor_version }} processor_version: {{ .processor_version }} {{ end }} -{{ if .migrate_checkpoint }} -migrate_checkpoint: {{ .migrate_checkpoint }} +{{ if .processor_update_interval }} +processor_update_interval: {{ .processor_update_interval }} +{{ end }} + +{{ if .processor_start_position }} +processor_start_position: {{ .processor_start_position }} +{{ end }} + +{{ if .partition_receive_timeout }} +partition_receive_timeout: {{ .partition_receive_timeout }} {{ end }} -{{ if .start_position }} -start_position: {{ .start_position }} +{{ if .partition_receive_count }} +partition_receive_count: {{ .partition_receive_count }} {{ end }} processors: diff --git a/x-pack/filebeat/module/azure/activitylogs/manifest.yml b/x-pack/filebeat/module/azure/activitylogs/manifest.yml index 59c1ef9b729..140b34a42d7 100644 --- a/x-pack/filebeat/module/azure/activitylogs/manifest.yml +++ b/x-pack/filebeat/module/azure/activitylogs/manifest.yml @@ -15,12 +15,18 @@ var: - name: resource_manager_endpoint - name: tags default: [forwarded] - - name: processor_version - default: "v1" - name: migrate_checkpoint default: yes - - name: start_position + - name: processor_version + default: "v1" + - name: processor_update_interval + default: "10s" + - name: processor_start_position default: "earliest" + - name: partition_receive_timeout + default: "5s" + - name: partition_receive_count + default: 100 ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml index 8b6dd0d383f..9e84a9e6951 100644 --- a/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/auditlogs/config/azure-eventhub.yml @@ -31,18 +31,29 @@ storage_account_container: filebeat-auditlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + {{ if .processor_version }} processor_version: {{ .processor_version }} {{ end }} -{{ if .migrate_checkpoint }} -migrate_checkpoint: {{ .migrate_checkpoint }} +{{ if .processor_update_interval }} +processor_update_interval: {{ .processor_update_interval }} +{{ end }} + +{{ if .processor_start_position }} +processor_start_position: {{ .processor_start_position }} {{ end }} -{{ if .start_position }} -start_position: {{ .start_position }} +{{ if .partition_receive_timeout }} +partition_receive_timeout: {{ .partition_receive_timeout }} {{ end }} +{{ if .partition_receive_count }} +partition_receive_count: {{ .partition_receive_count }} +{{ end }} tags: {{.tags | tojson}} publisher_pipeline.disable_host: {{ inList .tags "forwarded" }} processors: diff --git a/x-pack/filebeat/module/azure/auditlogs/manifest.yml b/x-pack/filebeat/module/azure/auditlogs/manifest.yml index 8da58bfc252..32cb1719fb5 100644 --- a/x-pack/filebeat/module/azure/auditlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/auditlogs/manifest.yml @@ -15,13 +15,18 @@ var: - name: resource_manager_endpoint - name: tags default: [forwarded] - - name: processor_version - default: "v1" - name: migrate_checkpoint default: yes - - name: start_position + - name: processor_version + default: "v1" + - name: processor_update_interval + default: "10s" + - name: processor_start_position default: "earliest" - + - name: partition_receive_timeout + default: "5s" + - name: partition_receive_count + default: 100 ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml index 6648e40dcef..ee7c2727ffb 100644 --- a/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/platformlogs/config/azure-eventhub.yml @@ -31,16 +31,28 @@ storage_account_container: filebeat-platformlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + {{ if .processor_version }} processor_version: {{ .processor_version }} {{ end }} -{{ if .migrate_checkpoint }} -migrate_checkpoint: {{ .migrate_checkpoint }} +{{ if .processor_update_interval }} +processor_update_interval: {{ .processor_update_interval }} +{{ end }} + +{{ if .processor_start_position }} +processor_start_position: {{ .processor_start_position }} +{{ end }} + +{{ if .partition_receive_timeout }} +partition_receive_timeout: {{ .partition_receive_timeout }} {{ end }} -{{ if .start_position }} -start_position: {{ .start_position }} +{{ if .partition_receive_count }} +partition_receive_count: {{ .partition_receive_count }} {{ end }} tags: {{.tags | tojson}} diff --git a/x-pack/filebeat/module/azure/platformlogs/manifest.yml b/x-pack/filebeat/module/azure/platformlogs/manifest.yml index 36e1f438f01..345fe4cd555 100644 --- a/x-pack/filebeat/module/azure/platformlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/platformlogs/manifest.yml @@ -14,13 +14,18 @@ var: - name: resource_manager_endpoint - name: tags default: [forwarded] - - name: processor_version - default: "v1" - name: migrate_checkpoint default: yes - - name: start_position + - name: processor_version + default: "v1" + - name: processor_update_interval + default: "10s" + - name: processor_start_position default: "earliest" - + - name: partition_receive_timeout + default: "5s" + - name: partition_receive_count + default: 100 ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml diff --git a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml index 6e11a945acc..02f4bf7421d 100644 --- a/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml +++ b/x-pack/filebeat/module/azure/signinlogs/config/azure-eventhub.yml @@ -31,16 +31,28 @@ storage_account_container: filebeat-signinlogs-{{ .eventhub }} resource_manager_endpoint: {{ .resource_manager_endpoint }} {{ end }} +{{ if .migrate_checkpoint }} +migrate_checkpoint: {{ .migrate_checkpoint }} +{{ end }} + {{ if .processor_version }} processor_version: {{ .processor_version }} {{ end }} -{{ if .migrate_checkpoint }} -migrate_checkpoint: {{ .migrate_checkpoint }} +{{ if .processor_update_interval }} +processor_update_interval: {{ .processor_update_interval }} +{{ end }} + +{{ if .processor_start_position }} +processor_start_position: {{ .processor_start_position }} +{{ end }} + +{{ if .partition_receive_timeout }} +partition_receive_timeout: {{ .partition_receive_timeout }} {{ end }} -{{ if .start_position }} -start_position: {{ .start_position }} +{{ if .partition_receive_count }} +partition_receive_count: {{ .partition_receive_count }} {{ end }} tags: {{.tags | tojson}} diff --git a/x-pack/filebeat/module/azure/signinlogs/manifest.yml b/x-pack/filebeat/module/azure/signinlogs/manifest.yml index c64dff0b207..ec9bd467d97 100644 --- a/x-pack/filebeat/module/azure/signinlogs/manifest.yml +++ b/x-pack/filebeat/module/azure/signinlogs/manifest.yml @@ -15,13 +15,18 @@ var: - name: resource_manager_endpoint - name: tags default: [forwarded] - - name: processor_version - default: "v1" - name: migrate_checkpoint default: yes - - name: start_position + - name: processor_version + default: "v1" + - name: processor_update_interval + default: "10s" + - name: processor_start_position default: "earliest" - + - name: partition_receive_timeout + default: "5s" + - name: partition_receive_count + default: 100 ingest_pipeline: - ingest/pipeline.yml - ../azure-shared-pipeline.yml From 80cbc2a639c53f23afa7323d3c92dc55b788a432 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 4 Jun 2024 18:24:01 +0200 Subject: [PATCH 41/41] Fix linter complaints and cleanups --- .../input/azureeventhub/decoder_test.go | 3 +- .../filebeat/input/azureeventhub/v1_input.go | 56 +------------------ .../input/azureeventhub/v2_migration.go | 6 +- 3 files changed, 7 insertions(+), 58 deletions(-) diff --git a/x-pack/filebeat/input/azureeventhub/decoder_test.go b/x-pack/filebeat/input/azureeventhub/decoder_test.go index 7f2613493a8..f1c30651ae5 100644 --- a/x-pack/filebeat/input/azureeventhub/decoder_test.go +++ b/x-pack/filebeat/input/azureeventhub/decoder_test.go @@ -10,9 +10,10 @@ import ( "fmt" "testing" + "github.com/stretchr/testify/assert" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/monitoring" - "github.com/stretchr/testify/assert" ) func TestDecodeRecords(t *testing.T) { diff --git a/x-pack/filebeat/input/azureeventhub/v1_input.go b/x-pack/filebeat/input/azureeventhub/v1_input.go index a5014fac305..3a6a21189c5 100644 --- a/x-pack/filebeat/input/azureeventhub/v1_input.go +++ b/x-pack/filebeat/input/azureeventhub/v1_input.go @@ -232,7 +232,7 @@ func (in *eventHubInputV1) processEvents(event *eventhub.Event) { processingStartTime := time.Now() eventHubMetadata := mapstr.M{ // The `partition_id` is not available in the - // current version of the SDK. + // legacy version of the SDK. "eventhub": in.config.EventHubName, "consumer_group": in.config.ConsumerGroup, } @@ -298,60 +298,6 @@ func stripConnectionString(c string) string { return "(redacted)" } -//// unpackRecords will try to split the message into multiple ones based on the group field provided by the configuration -//func (in *eventHubInputV1) unpackRecords(bMessage []byte) []string { -// var mapObject map[string][]interface{} -// var messages []string -// -// // Clean up the message for known issues [1] where Azure services produce malformed JSON documents. -// // Sanitization occurs if options are available and the message contains an invalid JSON. -// // -// // [1]: https://learn.microsoft.com/en-us/answers/questions/1001797/invalid-json-logs-produced-for-function-apps -// if len(in.config.SanitizeOptions) != 0 && !json.Valid(bMessage) { -// bMessage = sanitize(bMessage, in.config.SanitizeOptions...) -// in.metrics.sanitizedMessages.Inc() -// } -// -// // check if the message is a "records" object containing a list of events -// err := json.Unmarshal(bMessage, &mapObject) -// if err == nil { -// if len(mapObject[expandEventListFromField]) > 0 { -// for _, ms := range mapObject[expandEventListFromField] { -// js, err := json.Marshal(ms) -// if err == nil { -// messages = append(messages, string(js)) -// in.metrics.receivedEvents.Inc() -// } else { -// in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) -// } -// } -// } -// } else { -// in.log.Debugf("deserializing multiple messages to a `records` object returning error: %s", err) -// // in some cases the message is an array -// var arrayObject []interface{} -// err = json.Unmarshal(bMessage, &arrayObject) -// if err != nil { -// // return entire message -// in.log.Debugf("deserializing multiple messages to an array returning error: %s", err) -// in.metrics.decodeErrors.Inc() -// return []string{string(bMessage)} -// } -// -// for _, ms := range arrayObject { -// js, err := json.Marshal(ms) -// if err == nil { -// messages = append(messages, string(js)) -// in.metrics.receivedEvents.Inc() -// } else { -// in.log.Errorw(fmt.Sprintf("serializing message %s", ms), "error", err) -// } -// } -// } -// -// return messages -//} - func getAzureEnvironment(overrideResManager string) (azure.Environment, error) { // if no override is set then the azure public cloud is used if overrideResManager == "" || overrideResManager == "" { diff --git a/x-pack/filebeat/input/azureeventhub/v2_migration.go b/x-pack/filebeat/input/azureeventhub/v2_migration.go index 755cabc7b83..52a91137ccc 100644 --- a/x-pack/filebeat/input/azureeventhub/v2_migration.go +++ b/x-pack/filebeat/input/azureeventhub/v2_migration.go @@ -11,14 +11,16 @@ import ( "encoding/json" "errors" "fmt" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "net/url" "strconv" "strings" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + "github.com/elastic/elastic-agent-libs/logp" )