diff --git a/.gitignore b/.gitignore index 18d4830..9193832 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ config.yml goskyr.class goskyr.model .release-env -.local-env \ No newline at end of file +.local-env +config-test-dir \ No newline at end of file diff --git a/main.go b/main.go index 59a1ef6..4480e5e 100644 --- a/main.go +++ b/main.go @@ -118,11 +118,11 @@ func main() { writer = &output.StdoutWriter{} } else { switch config.Writer.Type { - case "stdout": + case output.STDOUT_WRITER_TYPE: writer = &output.StdoutWriter{} - case "api": + case output.API_WRITER_TYPE: writer = output.NewAPIWriter(&config.Writer) - case "file": + case output.FILE_WRITER_TYPE: writer = output.NewFileWriter(&config.Writer) default: log.Fatalf("writer of type %s not implemented", config.Writer.Type) diff --git a/output/api.go b/output/api.go index 27198ad..25705fd 100644 --- a/output/api.go +++ b/output/api.go @@ -33,13 +33,12 @@ func (f *APIWriter) Write(items chan map[string]interface{}) { apiPassword := f.writerConfig.Password deletedSources := map[string]bool{} - nrItems := 0 + nrItemsWritten := 0 batch := []map[string]interface{}{} // This code assumes that within one source, items are ordered // by date ascending. for item := range items { - nrItems++ currentSrc := item["sourceUrl"].(string) if _, found := deletedSources[currentSrc]; !found { deletedSources[currentSrc] = true @@ -55,7 +54,7 @@ func (f *APIWriter) Write(items chan map[string]interface{}) { req.SetBasicAuth(apiUser, apiPassword) resp, err := client.Do(req) if err != nil { - log.Printf("error while sending event %+v to the api: %v", item, err) + log.Printf("error while deleting items from the api: %v\n", err) continue } if resp.StatusCode != 200 { @@ -63,25 +62,33 @@ func (f *APIWriter) Write(items chan map[string]interface{}) { if err != nil { log.Fatal(err) } - log.Fatalf("error while deleting items. Status Code: %d\nUrl: %s Response: %s", resp.StatusCode, deleteURL, body) + log.Fatalf("error while deleting items. Status Code: %d\nUrl: %s Response: %s\n", resp.StatusCode, deleteURL, body) } resp.Body.Close() } batch = append(batch, item) if len(batch) == 100 { - postBatch(client, batch, apiURL, apiUser, apiPassword) + if err := postBatch(client, batch, apiURL, apiUser, apiPassword); err != nil { + fmt.Printf("%v\n", err) + } else { + nrItemsWritten = nrItemsWritten + 100 + } batch = []map[string]interface{}{} } } - postBatch(client, batch, apiURL, apiUser, apiPassword) + if err := postBatch(client, batch, apiURL, apiUser, apiPassword); err != nil { + fmt.Printf("%v\n", err) + } else { + nrItemsWritten = nrItemsWritten + len(batch) + } - log.Printf("wrote %d items from %d sources to the api", nrItems, len(deletedSources)) + log.Printf("wrote %d items from %d sources to the api", nrItemsWritten, len(deletedSources)) } -func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiUser, apiPassword string) { +func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiUser, apiPassword string) error { concertJSON, err := json.Marshal(batch) if err != nil { - log.Fatal(err) + return err } req, _ := http.NewRequest("POST", apiURL, bytes.NewBuffer(concertJSON)) req.Header = map[string][]string{ @@ -90,16 +97,16 @@ func postBatch(client *http.Client, batch []map[string]interface{}, apiURL, apiU req.SetBasicAuth(apiUser, apiPassword) resp, err := client.Do(req) if err != nil { - log.Printf("error while sending post request: %v", err) - return + return fmt.Errorf("error while sending post request: %v", err) } defer resp.Body.Close() if resp.StatusCode != 201 { body, err := io.ReadAll(resp.Body) if err != nil { - log.Printf("error while reading post request respones: %v", err) + return fmt.Errorf("error while reading post request respones: %v", err) } else { - log.Printf("error while adding new events. Status Code: %d Response: %s", resp.StatusCode, body) + return fmt.Errorf("error while adding new events. Status Code: %d Response: %s", resp.StatusCode, body) } } + return nil } diff --git a/output/writer.go b/output/writer.go index 0c540f0..e9fea94 100644 --- a/output/writer.go +++ b/output/writer.go @@ -10,9 +10,15 @@ type Writer interface { // which is responsible for writing the scraped data to a specific output // eg. stdout. type WriterConfig struct { - Type string `yaml:"type" env:"WRITER_TYPE" env-default:"stdout"` + Type string `yaml:"type" env:"WRITER_TYPE"` Uri string `yaml:"uri" env:"WRITER_URI"` User string `yaml:"user" env:"WRITER_USER"` Password string `yaml:"password" env:"WRITER_PASSWORD"` FilePath string `yaml:"filepath" env:"WRITER_FILEPATH"` } + +const ( + STDOUT_WRITER_TYPE = "stdout" + FILE_WRITER_TYPE = "file" + API_WRITER_TYPE = "api" +) diff --git a/scraper/scraper.go b/scraper/scraper.go index 753408a..48d54ee 100644 --- a/scraper/scraper.go +++ b/scraper/scraper.go @@ -52,21 +52,32 @@ func NewConfig(configPath string) (*Config, error) { err := filepath.WalkDir(configPath, func(path string, d fs.DirEntry, err error) error { if !d.IsDir() { var configTmp Config - if err := cleanenv.ReadConfig(path, configTmp); err != nil { + if err := cleanenv.ReadConfig(path, &configTmp); err != nil { return err } config.Scrapers = append(config.Scrapers, configTmp.Scrapers...) if configTmp.Writer.Type != "" { if config.Writer.Type == "" { + config.Writer = configTmp.Writer + } else { + return fmt.Errorf("ERROR: config files must only contain max. one writer") } } } return nil // skipping everything that is not a file }) + if err != nil { + return nil, err + } } else { - err := cleanenv.ReadConfig(configPath, &config) - return &config, err + if err := cleanenv.ReadConfig(configPath, &config); err != nil { + return nil, err + } + } + if config.Writer.Type == "" { + config.Writer.Type = output.STDOUT_WRITER_TYPE } + return &config, nil } // RegexConfig is used for extracting a substring from a string based on the