Skip to content

Commit

Permalink
Merge pull request #426 from target/S3Redundancy
Browse files Browse the repository at this point in the history
Optional Redundancy logging to remote S3 location
  • Loading branch information
phutelmyer authored Jan 14, 2024
2 parents cbc086a + 9704d8a commit c127c1c
Show file tree
Hide file tree
Showing 12 changed files with 406 additions and 16 deletions.
4 changes: 4 additions & 0 deletions build/go/frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ RUN mkdir /var/log/strelka/ && \
chgrp -R 0 /var/log/strelka/ && \
chmod -R g=u /var/log/strelka/

#Create blank strelka.log file to make sure watcher has something to start with
RUN touch /var/log/strelka/strelka.log
RUN chmod -R 777 /var/log/strelka/strelka.log

# Set container entrypoint. This could be set/overridden elsewhere in deployment (e.g. k8s, docker-compose, etc.)
# Currently overwritten in ./build/docker-compose.yml
ENTRYPOINT ["strelka-frontend", "-locallog=true", "-kafkalog=false"]
7 changes: 7 additions & 0 deletions configs/go/frontend/frontend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ broker:
keylocation: "path to key location"
calocation: "path to target ca bundle"
topic: "topic name here"
s3redundancy: "Boolean to pipe logs to S3 if kafka connection interrupted"
s3:
accesskey: "S3 Access Key"
secretkey: "S3 Secret Key"
bucketName: "S3 bucket name"
region: "Region that the S3 Bucket resides in"
endpoint: "Endpoint that the S3 bucket refers to"
14 changes: 14 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,11 @@ For the options below, only one response setting may be configured.
* "broker.keylocation": File Path to key file to be used to authenticate to Kafka Topic (Optional)
* "broker.calocation": File Path to CA Certificate bundle to be used to authenticate to Kafka Topic (Optional)
* "broker.topic": Full topic name of the Kafka Topic to connect to (Optional)
* "s3.accesskey": Access Key of the bucket to send redundancy files to (Optional)
* "s3.secretkey": Secret Key of the bucket that you want send redundancy files to (Optional)
* "s3.bucketName": Name of the bucket to send redundancy files to (Optional)
* "s3.region": Region that the bucket to send redundancy files resides in (Optional)
* "s3.endpoint": Endpoint of the bucket to send redundancy files to (Optional)
#### manager
* "coordinator.addr": network address of the coordinator (defaults to strelka_coordinator_1:6379)
Expand Down Expand Up @@ -750,6 +755,15 @@ Currently this is toggled on and off in the Frontend Dockerfile, which is overwr
The Kafka Producer that is created with the abbove command line options is fully configurable, and placeholder fields have already been added to the frontend.yaml configuration file. This file will need to be updated in order to point to an existing Kafka Topic, as desired. In cases where some fields are not used (e.g when security has not been enable on the desired Kafka Topic, etc) then unused fields in the broker configuration section of the frontend.yaml file may simply be replaced with an empty string.
#### Optional: S3 Redundancy
Dependant on a Kafka producer being created and a boolean in the Kafka config set to true, S3 redundancy can be toggled on in order to account for any issues with a Kafka connection. S3, in this case, is referring to either a AWS S3 bucket, or a Ceph Opensource Object Storage bucket.
Currently, if the option for S3 redundancy is toggled on, if the Kafka connection as desribed in the Kafka logging section of this document is interrupted, then, after the local log file is updated, the contents of that log file will be uploaded to the configureable S3 location. By default logs are kept for three hours after the start of the interuption of the Kafka connection, and, will rotate logs in S3 on the hour to maintain relevancy in the remote bucket location.
Once connection is re-established to the original Kafka broker, then the stored logs are sent in parallel to new logs to the Kafka broker. If a restart of the Frontend is required to reset the connection, then the logs will be sent to the Kafka Broker (if they are not stale) at the next start up.
This option is set to false by default.
## Scanners
Each scanner parses files of a specific flavor and performs data collection and/or file extraction on them. Scanners are typically named after the type of file they are intended to scan (e.g. "ScanHtml", "ScanPe", "ScanRar") but may also be named after the type of function or tool they use to perform their tasks (e.g. "ScanExiftool", "ScanHeader", "ScanOcr").
Expand Down
6 changes: 6 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ require (
)

require (
github.com/fsnotify/fsnotify v1.5.4 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
)

require (
github.com/aws/aws-sdk-go v1.44.55
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
golang.org/x/net v0.17.0 // indirect
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,8 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0=
github.com/aws/aws-sdk-go v1.44.55 h1:h+p61sPEsLOpnQ2mKnGPrIe1MFUKwwA0X5eQYAcjOMU=
github.com/aws/aws-sdk-go v1.44.55/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM=
github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
Expand Down Expand Up @@ -1180,6 +1182,9 @@ github.com/jhump/protoreflect v1.11.0/go.mod h1:U7aMIjN0NWq9swDP7xDdoMfRHb35uiuT
github.com/jhump/protoreflect v1.14.1/go.mod h1:JytZfP5d0r8pVNLZvai7U/MCuTWITgrI4tTg7puQFKI=
github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/joefitzgerald/rainbow-reporter v0.1.0/go.mod h1:481CNgqmVHQZzdIbN52CupLJyoVwB10FQ/IQlF1pdL8=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
Expand Down
104 changes: 102 additions & 2 deletions src/go/cmd/strelka-frontend/main.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bytes"
"context"
"crypto/sha256"
"encoding/json"
Expand All @@ -10,8 +11,11 @@ import (
"io/ioutil"
"log"
"net"
"os"
"strconv"
"time"

"github.com/fsnotify/fsnotify"
"github.com/go-redis/redis/v8"
"github.com/google/uuid"
"google.golang.org/grpc"
Expand All @@ -22,6 +26,7 @@ import (
"github.com/target/strelka/src/go/api/strelka"
"github.com/target/strelka/src/go/pkg/rpc"
"github.com/target/strelka/src/go/pkg/structs"
tosss3 "github.com/target/strelka/src/go/pkg/tossS3"

"github.com/confluentinc/confluent-kafka-go/v2/kafka"
)
Expand Down Expand Up @@ -279,6 +284,13 @@ func main() {
log.Fatalf("failed to listen: %v", err)
}

//Check to see if redundancy toggled for Kafka Producer, defaults to false
var boolS3 = false
boolS3, err = strconv.ParseBool(conf.Broker.S3redundancy)
if err != nil {
log.Printf("failed to parse boolean for S3 Redundancy, setting to default (False). %v", err)
}

responses := make(chan *strelka.ScanResponse, 100)
defer close(responses)
if conf.Response.Log != "" {
Expand All @@ -290,6 +302,9 @@ func main() {
}
if !*locallog && *kafkalog {
log.Printf("Creating new Kafka producer.")

// Full kafka configuration documentation:
// https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
p, err := kafka.NewProducer(&kafka.ConfigMap{
"bootstrap.servers": conf.Broker.Bootstrap,
"security.protocol": conf.Broker.Protocol,
Expand Down Expand Up @@ -321,9 +336,8 @@ func main() {
}
}()

// Produce to Kafka from logs
// Produce messages to topic (asynchronously)
go func() {
// Produce messages to topic (asynchronously)
topic := conf.Broker.Topic
for r := range responses {
rawIn := json.RawMessage(r.Event)
Expand All @@ -346,6 +360,92 @@ func main() {
}, nil)
}
}()

//Optional function to pipe to S3 if change detected in local log file
if *&boolS3 {
//Make watcher for seeing if strelka.log file has been changed
watcher, err := fsnotify.NewWatcher()
if err != nil {
log.Fatal(err)
}

defer watcher.Close()

//Watcher for making sure that logs go to S3 if Kafka fails
err = watcher.Add("/var/log/strelka/strelka.log")
if err != nil {
log.Printf("An error occured adding watcher")
log.Fatal(err)
}

// Additional go function added to upload to S3 whenever change has been detected in strelka.log file.
go func() {
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
if event.Op&fsnotify.Write == fsnotify.Write {
localLog, err := os.Open("/var/log/strelka/strelka.log") // For read access.
if err != nil {
log.Println("ERROR failed to open strelka.log for size verification:", err)
}

logMetadata, err := localLog.Stat()
if err != nil {
log.Println("ERROR failed to retrieve strelka.log metadata:", err)
}

//Make sure that the strelka.log file hasn't just been truncated before uploading
if logMetadata.Size() != 0 {
tosss3.UploadToS3(conf.S3.AccessKey, conf.S3.SecretKey, conf.S3.BucketName, conf.S3.Region, conf.S3.Endpoint)
log.Println("Change to strelka.log file detected, upload to S3 in progress.")
}
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("ERROR:", err)
}
}
}()

// Produce messages to topic from logs
go func() {
topic := conf.Broker.Topic
s3logs := tosss3.ListS3BucketContents(conf.S3.AccessKey, conf.S3.SecretKey, conf.S3.BucketName, conf.S3.Region, conf.S3.Endpoint)
for _, item := range s3logs.Contents {
// marshall the json message
log.Println("item key is: " + *item.Key)
var rawCurrData = tosss3.DownloadFromS3(conf.S3.AccessKey, conf.S3.SecretKey, conf.S3.BucketName, *item.Key, conf.S3.Region, conf.S3.Endpoint)
for _, splitLog := range bytes.Split(rawCurrData, []byte("\n")) {
rawIn := json.RawMessage(string(splitLog))
bytesMess, err := rawIn.MarshalJSON()
if err != nil {
log.Printf("Unable to marshal byte encoded event for S3 log, check error message for more details: %v", err)
}

p.Produce(&kafka.Message{
TopicPartition: kafka.TopicPartition{Topic: &topic, Partition: -1},
Value: bytesMess,
Headers: []kafka.Header{
{Key: "@timestamp", Value: []byte(time.Now().Format("2006-01-02T15:04:05-0700"))},
},
}, nil)
}

}

//truncate strelka log file at the end of sending to Kafka
log.Printf("Beginning to truncate local strelka log.")
err := os.Truncate("/var/log/strelka/strelka.log", 0)
if err != nil {
log.Printf("Failed to truncate strelka.log file after sending messages to Kafka: %v", err)
}
}()
}
}
} else if conf.Response.Report != 0 {
go func() {
Expand Down
38 changes: 24 additions & 14 deletions src/go/pkg/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ type ConfThroughput struct {

type ConfFiles struct {
Patterns []string // required
Mimetypes []string //optional
Minsize int //optional
Maxsize int //optional
LimitPattern int //optional
LimitTotal int //optional
Modified int //optional
Mimetypes []string // optional
Minsize int // optional
Maxsize int // optional
LimitPattern int // optional
LimitTotal int // optional
Modified int // optional
Delete bool // optional
Gatekeeper bool // required
Processed string // optional
Expand All @@ -48,20 +48,29 @@ type ConfCoordinator struct {
}

type ConfKafka struct {
Bootstrap string //required
Protocol string //required
Certlocation string //required
Keylocation string //required
Calocation string //required
Topic string //required
Bootstrap string // required
Protocol string // required
Certlocation string // required
Keylocation string // required
Calocation string // required
Topic string // required
S3redundancy string // optional, defaults to false
}

type ConfS3 struct {
AccessKey string // optional, can be left blank if S3redundancy set to false in ConfKafka
SecretKey string // optional, can be left blank if S3redundancy set to false in ConfKafka
BucketName string // optional, can be left blank if S3redundancy set to false in ConfKafka
Region string // optional, can be left blank if S3redundancy set to false in ConfKafka
Endpoint string // optional, can be left blank if S3redundancy set to false in ConfKafka
}

type ConfGatekeeper struct {
Addr string // required
DB int // required
Pool int // required
Read time.Duration // required
TTL time.Duration //required
TTL time.Duration // required
}

// determines what action the client takes with responses, defaults to discarding messages
Expand Down Expand Up @@ -95,7 +104,8 @@ type Frontend struct {
Coordinator ConfCoordinator // required
Gatekeeper ConfGatekeeper // required
Response ConfResponse // optional
Broker ConfKafka //required
Broker ConfKafka // required
S3 ConfS3 // optional
}

type Manager struct {
Expand Down
40 changes: 40 additions & 0 deletions src/go/pkg/tossS3/tossS3Delete.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package tosss3

import (
"log"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)

func tossS3Delete(AccessKey string, AccessSecret string, myBucket string, filename string, region string, endpoint string) {

// Create a Session with a custom creds
var awsConfig = &aws.Config{
Region: aws.String(region),
Endpoint: aws.String(endpoint),
Credentials: credentials.NewStaticCredentials(string(AccessKey), string(AccessSecret), ""),
}

// The session the S3 Uploader will use
sess := session.Must(session.NewSession(awsConfig))

// Create S3 service client
svc := s3.New(sess)

// Delete log from S3 now that it's been read
_, err := svc.DeleteObject(&s3.DeleteObjectInput{Bucket: aws.String(string(myBucket)), Key: aws.String(filename)})
if err != nil {
log.Printf("Unable to delete object %q from bucket %q, %v", filename, myBucket, err)
}

err = svc.WaitUntilObjectNotExists(&s3.HeadObjectInput{
Bucket: aws.String(string(myBucket)),
Key: aws.String(filename),
})
if err != nil {
log.Printf("Failed to delete file from S3, %v", err)
}
}
46 changes: 46 additions & 0 deletions src/go/pkg/tossS3/tossS3Download.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package tosss3

import (
"log"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)

func DownloadFromS3(AccessKey string, AccessSecret string, myBucket string, filename string, region string, endpoint string) []byte {

// Create a Session with a custom creds
var awsConfig = &aws.Config{
Region: aws.String(region),
Endpoint: aws.String(endpoint),
Credentials: credentials.NewStaticCredentials(string(AccessKey), string(AccessSecret), ""),
}

// The session the S3 Uploader will use
sess := session.Must(session.NewSession(awsConfig))

// Create downloader in order to retrieve log files (Should hopefully only be one)
downloader := s3manager.NewDownloader(sess)

// Prune out old logs before downloading to reduce time to catch up
tossS3PruneLogs(AccessKey, AccessSecret, myBucket, region, endpoint)

buff := &aws.WriteAtBuffer{}

// Iterate through buckets, download to buffer
logFileFromS3, err := downloader.Download(buff, &s3.GetObjectInput{
Bucket: aws.String(string(myBucket)),
Key: aws.String(filename),
})
if err != nil {
log.Printf("failed to download file, %v", err)
}
log.Printf("Persistance log downloaded from S3, %d bytes\n", logFileFromS3)

tossS3Delete(AccessKey, AccessSecret, myBucket, filename, region, endpoint)

return buff.Bytes()
}
Loading

0 comments on commit c127c1c

Please sign in to comment.