Skip to content

Commit

Permalink
Fix flint skipping index syntax issues (#1846)
Browse files Browse the repository at this point in the history
* update flint related issues for
 - vpc flow
 - cloud trail
 - multiple records protocol support

Signed-off-by: YANGDB <[email protected]>

* update flint vega ip sankey visualization query

Signed-off-by: YANGDB <[email protected]>

* update flint vega ip sankey visualization query

Signed-off-by: YANGDB <[email protected]>

---------

Signed-off-by: YANGDB <[email protected]>
  • Loading branch information
YANG-DB authored May 28, 2024
1 parent 59f0d57 commit 0d2a1c7
Show file tree
Hide file tree
Showing 15 changed files with 335 additions and 146 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ CREATE SKIPPING INDEX ON {table_name} (
accountid BLOOM_FILTER,
region VALUE_SET,
severity_id VALUE_SET,
src_endpoint.ip BLOOM_FILTER,
dst_endpoint.ip BLOOM_FILTER,
src_endpoint.svc_name VALUE_SET,
dst_endpoint.svc_name VALUE_SET,
request_processing_time MIN_MAX,
traffic.bytes MIN_MAX
`src_endpoint.ip` BLOOM_FILTER,
`dst_endpoint.ip` BLOOM_FILTER,
`src_endpoint.svc_name` VALUE_SET,
`dst_endpoint.svc_name` VALUE_SET,
`traffic.bytes` MIN_MAX
) WITH (
auto_refresh = true,
refresh_interval = '15 Minutes',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
accountid STRING,
eventday STRING
)
USING json
USING parquet
LOCATION '{s3_bucket_location}'

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,51 +1,50 @@
CREATE MATERIALIZED VIEW {table_name}__mview AS
SELECT
rec.userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
rec.userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
rec.userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
rec.userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
rec.userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
rec.userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
rec.userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
rec.userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST(rec.userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
rec.userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
rec.userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
rec.userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
rec.userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
rec.userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
rec.userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,
userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST( userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,

rec.eventVersion AS `aws.cloudtrail.eventVersion`,
CAST(rec.eventTime AS TIMESTAMP) AS `@timestamp`,
rec.eventSource AS `aws.cloudtrail.eventSource`,
rec.eventName AS `aws.cloudtrail.eventName`,
rec.eventCategory AS `aws.cloudtrail.eventCategory`,
rec.eventType AS `aws.cloudtrail.eventType`,
rec.eventId AS `aws.cloudtrail.eventId`,
eventVersion AS `aws.cloudtrail.eventVersion`,
CAST( eventTime AS TIMESTAMP) AS `@timestamp`,
eventSource AS `aws.cloudtrail.eventSource`,
eventName AS `aws.cloudtrail.eventName`,
eventCategory AS `aws.cloudtrail.eventCategory`,
eventType AS `aws.cloudtrail.eventType`,
eventId AS `aws.cloudtrail.eventId`,

rec.awsRegion AS `aws.cloudtrail.awsRegion`,
rec.sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
rec.userAgent AS `aws.cloudtrail.userAgent`,
rec.errorCode AS `errorCode`,
rec.errorMessage AS `errorMessage`,
rec.requestParameters AS `aws.cloudtrail.requestParameter`,
rec.responseElements AS `aws.cloudtrail.responseElements`,
rec.additionalEventData AS `aws.cloudtrail.additionalEventData`,
rec.requestId AS `aws.cloudtrail.requestId`,
rec.resources AS `aws.cloudtrail.resources`,
rec.apiVersion AS `aws.cloudtrail.apiVersion`,
rec.readOnly AS `aws.cloudtrail.readOnly`,
rec.recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
rec.serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
rec.sharedEventId AS `aws.cloudtrail.sharedEventId`,
rec.vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
rec.tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
rec.tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
rec.tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
awsRegion AS `aws.cloudtrail.awsRegion`,
sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
userAgent AS `aws.cloudtrail.userAgent`,
errorCode AS `errorCode`,
errorMessage AS `errorMessage`,
requestParameters AS `aws.cloudtrail.requestParameter`,
responseElements AS `aws.cloudtrail.responseElements`,
additionalEventData AS `aws.cloudtrail.additionalEventData`,
requestId AS `aws.cloudtrail.requestId`,
resources AS `aws.cloudtrail.resources`,
apiVersion AS `aws.cloudtrail.apiVersion`,
readOnly AS `aws.cloudtrail.readOnly`,
recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
sharedEventId AS `aws.cloudtrail.sharedEventId`,
vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
FROM
{table_name}
LATERAL VIEW explode(Records) myTable AS rec
WITH (
auto_refresh = true,
refresh_interval = '15 Minute',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
CREATE MATERIALIZED VIEW {table_name}__mview AS
SELECT
rec.userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
rec.userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
rec.userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
rec.userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
rec.userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
rec.userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
rec.userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
rec.userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST(rec.userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
rec.userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
rec.userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
rec.userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
rec.userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
rec.userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
rec.userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,

rec.eventVersion AS `aws.cloudtrail.eventVersion`,
CAST(rec.eventTime AS TIMESTAMP) AS `@timestamp`,
rec.eventSource AS `aws.cloudtrail.eventSource`,
rec.eventName AS `aws.cloudtrail.eventName`,
rec.eventCategory AS `aws.cloudtrail.eventCategory`,
rec.eventType AS `aws.cloudtrail.eventType`,
rec.eventId AS `aws.cloudtrail.eventId`,

rec.awsRegion AS `aws.cloudtrail.awsRegion`,
rec.sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
rec.userAgent AS `aws.cloudtrail.userAgent`,
rec.errorCode AS `errorCode`,
rec.errorMessage AS `errorMessage`,
rec.requestParameters AS `aws.cloudtrail.requestParameter`,
rec.responseElements AS `aws.cloudtrail.responseElements`,
rec.additionalEventData AS `aws.cloudtrail.additionalEventData`,
rec.requestId AS `aws.cloudtrail.requestId`,
rec.resources AS `aws.cloudtrail.resources`,
rec.apiVersion AS `aws.cloudtrail.apiVersion`,
rec.readOnly AS `aws.cloudtrail.readOnly`,
rec.recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
rec.serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
rec.sharedEventId AS `aws.cloudtrail.sharedEventId`,
rec.vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
rec.tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
rec.tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
rec.tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
FROM
{table_name}
LATERAL VIEW explode(Records) explodedCloudTrailsTable AS rec
WITH (
auto_refresh = true,
refresh_interval = '15 Minute',
checkpoint_location = '{s3_checkpoint_location}',
watermark_delay = '1 Minute',
extra_options = '{ "{table_name}": { "maxFilesPerTrigger": "10" }}'
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
CREATE SKIPPING INDEX ON {table_name} (
rec.userIdentity.principalId BLOOM_FILTER,
rec.userIdentity.accountId BLOOM_FILTER,
rec.userIdentity.userName BLOOM_FILTER,
rec.sourceIPAddress BLOOM_FILTER,
rec.eventId BLOOM_FILTER,
rec.userIdentity.type VALUE_SET,
rec.eventName VALUE_SET,
rec.eventType VALUE_SET,
rec.awsRegion VALUE_SET
`userIdentity.principalId` BLOOM_FILTER,
`userIdentity.accountId` BLOOM_FILTER,
`userIdentity.userName` BLOOM_FILTER,
`sourceIPAddress` BLOOM_FILTER,
`eventId` BLOOM_FILTER,
`userIdentity.type` VALUE_SET,
`eventName` VALUE_SET,
`eventType` VALUE_SET,
`awsRegion` VALUE_SET
) WITH (
auto_refresh = true,
refresh_interval = '15 Minutes',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
Records ARRAY<STRUCT<
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
eventVersion STRING,
userIdentity STRUCT<
type:STRING,
Expand Down Expand Up @@ -56,10 +55,10 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
cipherSuite:STRING,
clientProvidedHostHeader:STRING
>
>>
) USING json
LOCATION '{s3_bucket_location}'
)
USING json
OPTIONS (
compression='gzip',
recursivefilelookup='true'
PATH '{s3_bucket_location}',
recursivefilelookup='true',
multiline 'true'
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
Records ARRAY<STRUCT<
eventVersion STRING,
userIdentity STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
invokedBy:STRING,
accessKeyId:STRING,
userName:STRING,
sessionContext:STRUCT<
attributes:STRUCT<
mfaAuthenticated:STRING,
creationDate:STRING
>,
sessionIssuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING
>,
ec2RoleDelivery:STRING,
webIdFederationData:MAP<STRING,STRING>
>
>,
eventTime STRING,
eventSource STRING,
eventName STRING,
awsRegion STRING,
sourceIPAddress STRING,
userAgent STRING,
errorCode STRING,
errorMessage STRING,
requestParameters STRING,
responseElements STRING,
additionalEventData STRING,
requestId STRING,
eventId STRING,
resources ARRAY<STRUCT<
arn:STRING,
accountId:STRING,
type:STRING
>>,
eventType STRING,
apiVersion STRING,
readOnly STRING,
recipientAccountId STRING,
serviceEventDetails STRING,
sharedEventId STRING,
vpcEndpointId STRING,
eventCategory STRING,
tlsDetails STRUCT<
tlsVersion:STRING,
cipherSuite:STRING,
clientProvidedHostHeader:STRING
>
>>
)
USING json
LOCATION '{s3_bucket_location}'
OPTIONS (
compression='gzip',
recursivefilelookup='true',
multiline 'true'
)
Loading

0 comments on commit 0d2a1c7

Please sign in to comment.