Skip to content

Commit

Permalink
feat: parse manifesti by degreeType (multiple files) and by courseNam…
Browse files Browse the repository at this point in the history
…e (one file)

NOTE: see docs/main_process.pdf
NOTE: byCourseName file wasn't originally considered, added during
construction
  • Loading branch information
lorenzocorallo committed Oct 16, 2024
1 parent d659346 commit 8377f35
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 17 deletions.
48 changes: 48 additions & 0 deletions cmd/parser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,61 @@ package main

import (
"log/slog"
"path"

"github.com/PoliNetworkOrg/rankings-backend-go/pkg/constants"
"github.com/PoliNetworkOrg/rankings-backend-go/pkg/logger"
"github.com/PoliNetworkOrg/rankings-backend-go/pkg/parser"
"github.com/PoliNetworkOrg/rankings-backend-go/pkg/scraper"
"github.com/PoliNetworkOrg/rankings-backend-go/pkg/utils"
"github.com/PoliNetworkOrg/rankings-backend-go/pkg/writer"
)

func main() {
slog.SetDefault(logger.GetDefaultLogger())
opts := ParseOpts()
outDir := path.Join(opts.dataDir, constants.OutputBaseFolder, constants.OutputParsedManifestiFolder) // abs path

slog.Info("argv validation", "data_dir", opts.dataDir)

smWriter, err := writer.NewWriter[[]scraper.Manifesto](opts.dataDir)
if err != nil {
panic(err)
}

inputMans, err := smWriter.JsonRead(constants.OutputManifestiListFilename)
if err != nil {
panic(err)
}

byDegTypeMans := parser.ParseManifestiByDegreeType(inputMans)
dtmWriter, err := writer.NewWriter[parser.ManifestiByDegreeType](outDir)
if err != nil {
panic(err)
}
for _, m := range byDegTypeMans {
fn := utils.MakeFilename(m.DegreeType, ".json")
err := dtmWriter.JsonWrite(fn, m, false)
if err != nil {
slog.Error("error while writing parsed manifesti byDegreeType (grouped)", "filename", fn)
panic(err)
}

slog.Info("manifesti parser: successful write", "filename", fn)
}

byCourseMans := parser.ParseManifestiByCourse(inputMans)
cmWriter, err := writer.NewWriter[parser.ManifestiByCourse](outDir)
if err != nil {
panic(err)
}

cmFn := constants.OutputParsedManifestiAllFilename
err = cmWriter.JsonWrite("all.json", byCourseMans, false)
if err != nil {
slog.Error("error while writing parsed manifesti byCourse (all)", "filename", cmFn)
panic(err)
}

slog.Info("manifesti parser: successful write", "filename", cmFn)
}
4 changes: 2 additions & 2 deletions cmd/scraper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ func GetRemoteManifesti() ([]byte, []scraper.Manifesto, error) {
return nil, nil, err
}

out := parser.ManifestiJson{}
out := parser.ManifestiByDegreeType{}
err = json.Unmarshal(bytes, &out.Data)
if err != nil {
return bytes, nil, err
}

return bytes, out.GetSlice(), err
return bytes, out.GetAll(), err
}

func DoLocalEqualsRemoteManifesti(w *writer.Writer[[]scraper.Manifesto]) (bool, error) {
Expand Down
5 changes: 4 additions & 1 deletion pkg/constants/output.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package constants

const (
OutputBaseFolder = "output"
OutputHtmlFolder = "html"
OutputLinksFilename = "links.txt"
OutputStatsFilname = "stats.json"
OutputManifestiListFilename = "manifesti_list.json"

OutputBaseFolder = "output"
OutputParsedManifestiFolder = "manifesti"
OutputParsedManifestiAllFilename = "all.json"

TmpDirectoryName = "tmp"
)
48 changes: 35 additions & 13 deletions pkg/parser/manifesti.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,32 @@ type (
courseMap = map[string]locationMap
)

type ManifestiJson struct {
Data map[string]courseMap // json output structure
type ManifestiByDegreeType struct {
DegreeType string `json:"degreeType"`
Data courseMap `json:"data"`
}

type ManifestiByCourse struct {
Data courseMap `json:"data"`
}

func ParseManifestiByDegreeType(mans []scraper.Manifesto) []ManifestiByDegreeType {
byDegType := groupByDegreeType(mans)
out := make([]ManifestiByDegreeType, 0, len(byDegType))
for dt, all := range groupByDegreeType(mans) {
data := groupByCourse(all)
m := ManifestiByDegreeType { DegreeType: dt, Data: data }
out = append(out, m)
}

return out
}

func ParseManifestiByCourse(mans []scraper.Manifesto) ManifestiByCourse {
byDegType := groupByCourse(mans)
return ManifestiByCourse {
Data: byDegType,
}
}

func groupByDegreeType(mans []scraper.Manifesto) degreeMap {
Expand Down Expand Up @@ -40,18 +64,16 @@ func groupByCourse(mans []scraper.Manifesto) courseMap {
return out
}

func (m *ManifestiJson) GetSlice() []scraper.Manifesto {
func (m *ManifestiByDegreeType) GetAll() []scraper.Manifesto {
out := make([]scraper.Manifesto, 0)
for dtk, m1 := range m.Data {
for ck, m2 := range m1 {
for lk, url := range m2 {
out = append(out, scraper.Manifesto {
Name: ck,
Location: lk,
DegreeType: dtk,
Url: url,
})
}
for ck, m2 := range m.Data {
for lk, url := range m2 {
out = append(out, scraper.Manifesto{
Name: ck,
Location: lk,
DegreeType: m.DegreeType,
Url: url,
})
}
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/utils/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"io/fs"
"log/slog"
"os"
"path"
"path/filepath"
"strings"

"github.com/PoliNetworkOrg/rankings-backend-go/pkg/constants"
)
Expand Down Expand Up @@ -63,3 +63,10 @@ func TmpDirectory() (string, error) {

return tmpPath, nil
}

func MakeFilename(str string, ext string) string {
str = strings.TrimSpace(str)
str = strings.ToLower(str)
str = strings.ReplaceAll(str, " ", "_")
return str + ext
}

0 comments on commit 8377f35

Please sign in to comment.