Skip to content

Commit

Permalink
Migrate rules add and edit pages to HTMX
Browse files Browse the repository at this point in the history
1. Replace JavaScript-based rule management with HTMX-powered
  interactions
2. Update server-side handlers to support HTMX requests
3. Modify HTML templates to use HTMX attributes
4. Remove unnecessary JavaScript code
5. Updates tests to cover new functionality
  • Loading branch information
paskal authored and umputun committed Sep 23, 2024
1 parent f77739b commit aece0cd
Show file tree
Hide file tree
Showing 15 changed files with 478 additions and 565 deletions.
2 changes: 1 addition & 1 deletion backend/datastore/mongo.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type MongoServer struct {
func New(connectionURI, dbName string, delay time.Duration) (*MongoServer, error) {
log.Printf("[INFO] connect to mongo server with db=%s, delay=%s", dbName, delay)
if delay > 0 {
log.Printf("[DEBUG] initial mongo delay=%d", delay)
log.Printf("[DEBUG] initial mongo delay=%s", delay)
time.Sleep(delay)
}
if connectionURI == "" {
Expand Down
27 changes: 22 additions & 5 deletions backend/extractor/readability.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,19 @@ var (
const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15"

// Extract fetches page and retrieves article
func (f UReadability) Extract(ctx context.Context, reqURL string) (rb *Response, err error) {
func (f UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
return f.extractWithRules(ctx, reqURL, nil)
}

// ExtractByRule fetches page and retrieves article using a specific rule
func (f UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
return f.extractWithRules(ctx, reqURL, rule)
}

// ExtractWithRules is the core function that handles extraction with or without a specific rule
func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
log.Printf("[INFO] extract %s", reqURL)
rb = &Response{}
rb := &Response{}

httpClient := &http.Client{Timeout: time.Second * f.TimeOut}
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
Expand Down Expand Up @@ -93,7 +103,7 @@ func (f UReadability) Extract(ctx context.Context, reqURL string) (rb *Response,

var body string
rb.ContentType, rb.Charset, body = f.toUtf8(dataBytes, resp.Header)
rb.Content, rb.Rich, err = f.getContent(ctx, body, reqURL)
rb.Content, rb.Rich, err = f.getContent(ctx, body, reqURL, rule)
if err != nil {
log.Printf("[WARN] failed to parse %s, error=%v", reqURL, err)
return nil, err
Expand Down Expand Up @@ -127,8 +137,10 @@ func (f UReadability) Extract(ctx context.Context, reqURL string) (rb *Response,
return rb, nil
}

// gets content from raw body string, both content (text only) and rich (with html tags)
func (f UReadability) getContent(ctx context.Context, body, reqURL string) (content, rich string, err error) {
// getContent retrieves content from raw body string, both content (text only) and rich (with html tags)
// if rule is provided, it uses custom rule, otherwise tries to retrieve one from the storage,
// and at last tries to use general readability parser
func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
// general parser
genParser := func(body, _ string) (content, rich string, err error) {
doc, err := readability.NewDocument(body)
Expand Down Expand Up @@ -159,6 +171,11 @@ func (f UReadability) getContent(ctx context.Context, body, reqURL string) (cont
return f.getText(res, ""), res, nil
}

if rule != nil {
log.Printf("[DEBUG] custom rule provided for %s: %v", reqURL, rule)
return customParser(body, reqURL, *rule)
}

if f.Rules != nil {
r := f.Rules
if rule, found := r.Get(ctx, reqURL); found {
Expand Down
2 changes: 1 addition & 1 deletion backend/extractor/readability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ func TestGetContentCustom(t *testing.T) {
assert.NoError(t, err)
body := string(dataBytes)

content, rich, err := lr.getContent(context.Background(), body, ts.URL+"/2015/09/25/poiezdka-s-apple-maps/")
content, rich, err := lr.getContent(context.Background(), body, ts.URL+"/2015/09/25/poiezdka-s-apple-maps/", nil)
assert.NoError(t, err)
assert.Equal(t, 6988, len(content))
assert.Equal(t, 7169, len(rich))
Expand Down
163 changes: 124 additions & 39 deletions backend/rest/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"os"
"path/filepath"
"strings"
"time"

"github.com/didip/tollbooth/v7"
Expand All @@ -33,6 +34,7 @@ type Server struct {
Credentials map[string]string

indexPage *template.Template
rulePage *template.Template
}

// JSON is a map alias, just for convenience
Expand All @@ -44,6 +46,7 @@ func (s *Server) Run(ctx context.Context, address string, port int, frontendDir

_ = os.Mkdir(filepath.Join(frontendDir, "components"), 0o700)
t := template.Must(template.ParseGlob(filepath.Join(frontendDir, "components", "*.gohtml")))
s.rulePage = template.Must(template.Must(t.Clone()).ParseFiles(filepath.Join(frontendDir, "rule.gohtml")))
s.indexPage = template.Must(template.Must(t.Clone()).ParseFiles(filepath.Join(frontendDir, "index.gohtml")))
httpServer := &http.Server{
Addr: fmt.Sprintf("%s:%d", address, port),
Expand Down Expand Up @@ -77,20 +80,19 @@ func (s *Server) routes(frontendDir string) chi.Router {
router.Route("/api", func(r chi.Router) {
r.Get("/content/v1/parser", s.extractArticleEmulateReadability)
r.Post("/extract", s.extractArticle)

r.Get("/rule", s.getRule)
r.Get("/rule/{id}", s.getRuleByID)
r.Get("/rules", s.getAllRules)
r.Post("/auth", s.authFake)

r.Group(func(protected chi.Router) {
protected.Use(basicAuth("ureadability", s.Credentials))
protected.Post("/rule", s.saveRule)
protected.Post("/toggle-rule/{id}", s.toggleRule)
protected.Post("/preview", s.handlePreview)
})
})

router.Get("/", s.handleIndex)
router.Get("/add/", s.handleAdd)
router.Get("/edit/{id}", s.handleEdit)

_ = os.Mkdir(filepath.Join(frontendDir, "static"), 0o700)
fs, err := UM.NewFileServer("/", filepath.Join(frontendDir, "static"), UM.FsOptSPA)
Expand Down Expand Up @@ -119,6 +121,42 @@ func (s *Server) handleIndex(w http.ResponseWriter, r *http.Request) {
}
}

func (s *Server) handleAdd(w http.ResponseWriter, _ *http.Request) {
data := struct {
Title string
Rule datastore.Rule
}{
Title: "Добавление правила",
Rule: datastore.Rule{}, // Empty rule for the form
}
err := s.rulePage.ExecuteTemplate(w, "base.gohtml", data)
if err != nil {
log.Printf("[WARN] failed to render add template, %v", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}

func (s *Server) handleEdit(w http.ResponseWriter, r *http.Request) {
id := getBid(chi.URLParam(r, "id"))
rule, found := s.Readability.Rules.GetByID(r.Context(), id)
if !found {
http.Error(w, "Rule not found", http.StatusNotFound)
return
}
data := struct {
Title string
Rule datastore.Rule
}{
Title: "Редактирование правила",
Rule: rule,
}
err := s.rulePage.ExecuteTemplate(w, "base.gohtml", data)
if err != nil {
log.Printf("[WARN] failed to render edit template, %v", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}

func (s *Server) extractArticle(w http.ResponseWriter, r *http.Request) {
artRequest := extractor.Response{}
if err := render.DecodeJSON(r.Body, &artRequest); err != nil {
Expand Down Expand Up @@ -176,61 +214,108 @@ func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http
render.JSON(w, r, &res)
}

// getRule find rule matching url param (domain portion only)
func (s *Server) getRule(w http.ResponseWriter, r *http.Request) {
url := r.URL.Query().Get("url")
if url == "" {
render.Status(r, http.StatusExpectationFailed)
render.JSON(w, r, JSON{"error": "no url passed"})
// generates previews for the provided test URLs
func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
http.Error(w, "Failed to parse form", http.StatusBadRequest)
return
}

rule, found := s.Readability.Rules.Get(r.Context(), url)
if !found {
render.Status(r, http.StatusBadRequest)
render.JSON(w, r, JSON{"error": "not found"})
return
testURLs := strings.Split(r.FormValue("test_urls"), "\n")
content := strings.TrimSpace(r.FormValue("content"))
log.Printf("[INFO] test urls: %v", testURLs)
log.Printf("[INFO] custom rule: %v", content)

// Create a temporary rule for extraction
var tempRule *datastore.Rule
if content != "" {
tempRule = &datastore.Rule{
Enabled: true,
Content: content,
}
}

log.Printf("[DEBUG] rule for %s found, %v", url, rule)
render.JSON(w, r, rule)
}
var responses []extractor.Response
for _, url := range testURLs {
url = strings.TrimSpace(url)
if url == "" {
continue
}

// getRuleByID returns rule by id - GET /rule/:id"
func (s *Server) getRuleByID(w http.ResponseWriter, r *http.Request) {
id := getBid(chi.URLParam(r, "id"))
rule, found := s.Readability.Rules.GetByID(r.Context(), id)
if !found {
render.Status(r, http.StatusBadRequest)
render.JSON(w, r, JSON{"error": "not found"})
return
log.Printf("[DEBUG] custom rule provided for %s: %v", url, tempRule)
result, e := s.Readability.ExtractByRule(r.Context(), url, tempRule)
if e != nil {
log.Printf("[WARN] failed to extract content for %s: %v", url, e)
continue
}

responses = append(responses, *result)
}

// create a new type where Rich would be type template.HTML instead of string,
// to avoid escaping in the template
type result struct {
Title string
Excerpt string
Rich template.HTML
Content string
}

var results []result
for _, r := range responses {
results = append(results, result{
Title: r.Title,
Excerpt: r.Excerpt,
//nolint: gosec // this content is escaped by Extractor, so it's safe to use it as is
Rich: template.HTML(r.Rich),
Content: r.Content,
})
}

data := struct {
Results []result
}{
Results: results,
}
log.Printf("[DEBUG] rule for %s found, %v", id.Hex(), rule)
render.JSON(w, r, &rule)
}

// getAllRules returns list of all rules, including disabled
func (s *Server) getAllRules(w http.ResponseWriter, r *http.Request) {
render.JSON(w, r, s.Readability.Rules.All(r.Context()))
err = s.rulePage.ExecuteTemplate(w, "preview.gohtml", data)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}

// saveRule upsert rule, forcing enabled=true
func (s *Server) saveRule(w http.ResponseWriter, r *http.Request) {
rule := datastore.Rule{}
err := r.ParseForm()
if err != nil {
http.Error(w, "Failed to parse form", http.StatusBadRequest)
return
}
rule := datastore.Rule{
Enabled: true,
ID: getBid(r.FormValue("id")),
Domain: r.FormValue("domain"),
Author: r.FormValue("author"),
Content: r.FormValue("content"),
MatchURLs: strings.Split(r.FormValue("match_url"), "\n"),
Excludes: strings.Split(r.FormValue("excludes"), "\n"),
TestURLs: strings.Split(r.FormValue("test_urls"), "\n"),
}

if err := render.DecodeJSON(r.Body, &rule); err != nil {
render.Status(r, http.StatusInternalServerError)
render.JSON(w, r, JSON{"error": err.Error()})
// return error in case domain is not set
if rule.Domain == "" {
http.Error(w, "Domain is required", http.StatusBadRequest)
return
}

rule.Enabled = true
srule, err := s.Readability.Rules.Save(r.Context(), rule)
if err != nil {
render.Status(r, http.StatusBadRequest)
render.JSON(w, r, JSON{"error": err.Error()})
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

w.Header().Set("HX-Redirect", "/")
render.JSON(w, r, &srule)
}

Expand Down
Loading

0 comments on commit aece0cd

Please sign in to comment.