Merge pull request #305 from Southclaws/node-fill-via-link

implement a fill rule for links->nodes
Southclaws · Nov 22, 2024 · 49a28af · 49a28af
2 parents e341378 + f9f46b8
commit 49a28af
Show file tree

Hide file tree

Showing 31 changed files with 909 additions and 346 deletions.
diff --git a/api/openapi.yaml b/api/openapi.yaml
@@ -1670,6 +1670,9 @@ paths:
 
         When a link is submitted, it is first "cleaned" to remove any fragments.
       tags: [links]
+      parameters:
+        - $ref: "#/components/parameters/NodeContentFillRuleQuery"
+        - $ref: "#/components/parameters/NodeContentFillTargetQuery"
       requestBody: { $ref: "#/components/requestBodies/LinkCreate" }
       responses:
         default: { $ref: "#/components/responses/InternalServerError" }
@@ -3079,6 +3082,7 @@ components:
     ContentFillRule:
       type: string
       enum:
+        - create
         - replace
         # NOTE: Prepend and append are not implemented yet.
         # - prepend

diff --git a/app/resources/asset/asset_enum_gen.go b/app/resources/asset/asset_enum_gen.go
diff --git a/app/resources/asset/fillrule.go b/app/resources/asset/fillrule.go
@@ -7,10 +7,10 @@ import "github.com/rs/xid"
 type contentFillRuleEnum string
 
 const (
-	contentFillRuleEnumNone contentFillRuleEnum = "none"
-	contentFillRulePrepend  contentFillRuleEnum = "prepend"
-	contentFillRuleAppend   contentFillRuleEnum = "append"
-	contentFillRuleReplace  contentFillRuleEnum = "replace"
+	contentFillRuleCreate  contentFillRuleEnum = "create"
+	contentFillRulePrepend contentFillRuleEnum = "prepend"
+	contentFillRuleAppend  contentFillRuleEnum = "append"
+	contentFillRuleReplace contentFillRuleEnum = "replace"
 )
 
 type ContentFillCommand struct {

diff --git a/app/resources/datagraph/content.go b/app/resources/datagraph/content.go
@@ -53,6 +53,7 @@ type Content struct {
 	short string
 	plain string
 	links []string
+	media []string
 	sdrs  RefList
 }
 
@@ -110,6 +111,10 @@ func (r Content) Links() []string {
 	return r.links
 }
 
+func (r Content) Media() []string {
+	return r.media
+}
+
 func (r Content) References() RefList {
 	return r.sdrs
 }
@@ -123,18 +128,35 @@ type options struct {
 }
 type option func(*options)
 
+func WithBaseURL(url string) option {
+	return func(o *options) {
+		o.baseURL = url
+	}
+}
+
 // NewRichText will pull out any meaningful structured information from markdown
 // document this includes a summary of the text and all link URLs for hydrating.
 func NewRichText(raw string) (Content, error) {
 	return NewRichTextFromReader(strings.NewReader(raw))
 }
 
+// NewRichText will pull out any meaningful structured information from markdown
+// document this includes a summary of the text and all link URLs for hydrating.
+func NewRichTextWithOptions(raw string, opts ...option) (Content, error) {
+	return NewRichTextFromReader(strings.NewReader(raw), opts...)
+}
+
 func NewRichTextFromReader(r io.Reader, opts ...option) (Content, error) {
 	o := options{baseURL: "ignore:"}
 	for _, opt := range opts {
 		opt(&o)
 	}
 
+	baseURL, err := url.Parse(o.baseURL)
+	if err != nil {
+		return Content{}, fault.Wrap(err)
+	}
+
 	buf, err := io.ReadAll(r)
 	if err != nil {
 		return Content{}, fault.Wrap(err)
@@ -154,20 +176,22 @@ func NewRichTextFromReader(r io.Reader, opts ...option) (Content, error) {
 
 	short := getSummary(result)
 
-	bodyTree, links, refs := extractReferences(htmlTree)
+	bodyTree, links, media, refs := extractReferences(htmlTree, baseURL)
 
 	return Content{
 		html:  bodyTree,
 		short: short,
 		plain: result.TextContent,
 		links: links,
+		media: media,
 		sdrs:  refs,
 	}, nil
 }
 
-func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
+func extractReferences(htmlTree *html.Node, baseURL *url.URL) (*html.Node, []string, []string, RefList) {
 	bodyTree := &html.Node{}
 	links := []string{}
+	media := []string{}
 	sdrs := []url.URL{}
 
 	if htmlTree.DataAtom == atom.Body {
@@ -177,9 +201,9 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
 	var walk func(n *html.Node)
 	walk = func(n *html.Node) {
 		if n.Parent != nil {
-			switch n.Parent.DataAtom {
+			switch n.DataAtom {
 			case atom.A:
-				href, hasHref := lo.Find(n.Parent.Attr, func(a html.Attribute) bool {
+				href, hasHref := lo.Find(n.Attr, func(a html.Attribute) bool {
 					return strings.ToLower(a.Key) == "href"
 				})
 
@@ -193,6 +217,25 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
 						}
 					}
 				}
+
+			case atom.Img:
+				src, hasSrc := lo.Find(n.Attr, func(a html.Attribute) bool {
+					return strings.ToLower(a.Key) == "src"
+				})
+
+				if hasSrc {
+					if parsed, err := url.Parse(src.Val); err == nil {
+						switch parsed.Scheme {
+						case "":
+							abs := baseURL.ResolveReference(parsed).String()
+							media = append(media, abs)
+						case "http", "https":
+							media = append(media, parsed.String())
+						case RefScheme:
+							sdrs = append(sdrs, *parsed)
+						}
+					}
+				}
 			}
 		}
 
@@ -217,7 +260,7 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
 		refs = append(refs, r)
 	}
 
-	return bodyTree, links, refs
+	return bodyTree, links, media, refs
 }
 
 func getSummary(article readability.Article) string {

diff --git a/app/resources/datagraph/content_test.go b/app/resources/datagraph/content_test.go
@@ -15,6 +15,7 @@ func check(t *testing.T, want Content) func(got Content, err error) {
 		require.NoError(t, err)
 		assert.Equal(t, want.short, got.short)
 		assert.Equal(t, want.links, got.links)
+		assert.Equal(t, want.media, got.media)
 	}
 }
 
@@ -23,6 +24,7 @@ func TestNewRichText(t *testing.T) {
 		check(t, Content{
 			short: `Here's a paragraph. It's pretty neat. Here's the rest of the text. neat photo right? This is quite a long post, the summary...`,
 			links: []string{},
+			media: []string{"http://image.com"},
 		})(NewRichText(`<h1>heading</h1>
 
 <p>Here's a paragraph. It's pretty neat.</p>
@@ -40,6 +42,7 @@ func TestNewRichText(t *testing.T) {
 		check(t, Content{
 			short: `Here's a paragraph. It's pretty neat. here are my favourite ovens here are my favourite trees`,
 			links: []string{"https://ao.com/cooking/ovens", "https://tre.ee/trees/favs"},
+			media: []string{},
 		})(NewRichText(`<h1>heading</h1>
 
 <p>Here's a paragraph. It's pretty neat.</p>
@@ -49,12 +52,51 @@ func TestNewRichText(t *testing.T) {
 `))
 	})
 
+	t.Run("pull_images", func(t *testing.T) {
+		check(t, Content{
+			short: `Here are some cool photos.`,
+			links: []string{},
+			media: []string{
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75",
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75",
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75",
+			},
+		})(NewRichText(`<h1>heading</h1>
+
+<p>Here are some cool photos.</p>
+
+<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75" />
+<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75" />
+<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75" />
+`))
+	})
+
+	t.Run("pull_images_relative", func(t *testing.T) {
+		check(t, Content{
+			short: `Here are some cool photos.`,
+			links: []string{},
+			media: []string{
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75",
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75",
+				"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75",
+			},
+		})(NewRichTextWithOptions(`<h1>heading</h1>
+
+<p>Here are some cool photos.</p>
+
+<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75" />
+<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75" />
+<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75" />
+`, WithBaseURL("https://barney.is")))
+	})
+
 	t.Run("with_uris", func(t *testing.T) {
 		mention := utils.Must(xid.FromString("cn2h3gfljatbqvjqctdg"))
 
 		check(t, Content{
 			short: `hey @southclaws!`,
 			links: []string{},
+			media: []string{},
 			sdrs: RefList{
 				{Kind: KindProfile, ID: mention},
 			},

diff --git a/app/resources/mq/message_types.go b/app/resources/mq/message_types.go
@@ -17,7 +17,9 @@ import (
 )
 
 type IndexNode struct {
-	ID library.NodeID
+	ID               library.NodeID
+	SummariseContent bool
+	AutoTag          bool
 }
 
 type DeleteNode struct {
@@ -40,6 +42,11 @@ type IndexProfile struct {
 	ID account.AccountID
 }
 
+type DownloadAsset struct {
+	URL             string
+	ContentFillRule opt.Optional[asset.ContentFillCommand]
+}
+
 type AnalyseAsset struct {
 	AssetID         xid.ID
 	ContentFillRule opt.Optional[asset.ContentFillCommand]

diff --git a/app/services/asset/analyse_job/analyse.go b/app/services/asset/analyse_job/analyse.go
@@ -5,23 +5,26 @@ import (
 
 	"github.com/Southclaws/opt"
 	"github.com/Southclaws/storyden/app/resources/asset"
-	"github.com/Southclaws/storyden/app/resources/mq"
+	"github.com/Southclaws/storyden/app/resources/library/node_writer"
 	"github.com/Southclaws/storyden/app/services/asset/analyse"
-	"github.com/Southclaws/storyden/internal/infrastructure/pubsub"
+	"github.com/Southclaws/storyden/app/services/asset/asset_upload"
 )
 
 type analyseConsumer struct {
-	queue    pubsub.Topic[mq.AnalyseAsset]
-	analyser *analyse.Analyser
+	analyser   *analyse.Analyser
+	uploader   *asset_upload.Uploader
+	nodeWriter *node_writer.Writer
 }
 
 func newAnalyseConsumer(
-	queue pubsub.Topic[mq.AnalyseAsset],
 	analyser *analyse.Analyser,
+	uploader *asset_upload.Uploader,
+	nodeWriter *node_writer.Writer,
 ) *analyseConsumer {
 	return &analyseConsumer{
-		queue:    queue,
-		analyser: analyser,
+		analyser:   analyser,
+		uploader:   uploader,
+		nodeWriter: nodeWriter,
 	}
 }
 

diff --git a/app/services/asset/analyse_job/download.go b/app/services/asset/analyse_job/download.go
@@ -0,0 +1,56 @@
+package analyse_job
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/Southclaws/fault"
+	"github.com/Southclaws/fault/fctx"
+	"github.com/Southclaws/opt"
+	"github.com/gosimple/slug"
+
+	"github.com/Southclaws/storyden/app/resources/asset"
+	"github.com/Southclaws/storyden/app/resources/library"
+	"github.com/Southclaws/storyden/app/resources/library/node_writer"
+	"github.com/Southclaws/storyden/app/services/asset/asset_upload"
+)
+
+func (c *analyseConsumer) downloadAsset(ctx context.Context, src string, fillrule opt.Optional[asset.ContentFillCommand]) error {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, src, nil)
+	if err != nil {
+		return fault.Wrap(err, fctx.With(ctx))
+	}
+
+	client := &http.Client{
+		Timeout: 30 * time.Second,
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return fault.Wrap(err, fctx.With(ctx))
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		ctx = fctx.WithMeta(ctx, "status", resp.Status)
+		return fault.Wrap(fault.New("failed to get asset"), fctx.With(ctx))
+	}
+
+	// TODO: Better naming???
+	name := slug.Make(src)
+
+	a, err := c.uploader.Upload(ctx, resp.Body, resp.ContentLength, asset.NewFilename(name), asset_upload.Options{})
+	if err != nil {
+		return fault.Wrap(err, fctx.With(ctx))
+	}
+
+	if fr, ok := fillrule.Get(); ok {
+		_, err = c.nodeWriter.Update(ctx, library.NewID(fr.TargetNodeID), node_writer.WithAssets([]asset.AssetID{a.ID}))
+		if err != nil {
+			return fault.Wrap(err, fctx.With(ctx))
+		}
+	}
+
+	return nil
+}