Skip to content

Commit

Permalink
Merge pull request #305 from Southclaws/node-fill-via-link
Browse files Browse the repository at this point in the history
implement a fill rule for links->nodes
  • Loading branch information
Southclaws authored Nov 22, 2024
2 parents e341378 + f9f46b8 commit 49a28af
Show file tree
Hide file tree
Showing 31 changed files with 909 additions and 346 deletions.
4 changes: 4 additions & 0 deletions api/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1670,6 +1670,9 @@ paths:
When a link is submitted, it is first "cleaned" to remove any fragments.
tags: [links]
parameters:
- $ref: "#/components/parameters/NodeContentFillRuleQuery"
- $ref: "#/components/parameters/NodeContentFillTargetQuery"
requestBody: { $ref: "#/components/requestBodies/LinkCreate" }
responses:
default: { $ref: "#/components/responses/InternalServerError" }
Expand Down Expand Up @@ -3079,6 +3082,7 @@ components:
ContentFillRule:
type: string
enum:
- create
- replace
# NOTE: Prepend and append are not implemented yet.
# - prepend
Expand Down
12 changes: 6 additions & 6 deletions app/resources/asset/asset_enum_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions app/resources/asset/fillrule.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ import "github.com/rs/xid"
type contentFillRuleEnum string

const (
contentFillRuleEnumNone contentFillRuleEnum = "none"
contentFillRulePrepend contentFillRuleEnum = "prepend"
contentFillRuleAppend contentFillRuleEnum = "append"
contentFillRuleReplace contentFillRuleEnum = "replace"
contentFillRuleCreate contentFillRuleEnum = "create"
contentFillRulePrepend contentFillRuleEnum = "prepend"
contentFillRuleAppend contentFillRuleEnum = "append"
contentFillRuleReplace contentFillRuleEnum = "replace"
)

type ContentFillCommand struct {
Expand Down
53 changes: 48 additions & 5 deletions app/resources/datagraph/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type Content struct {
short string
plain string
links []string
media []string
sdrs RefList
}

Expand Down Expand Up @@ -110,6 +111,10 @@ func (r Content) Links() []string {
return r.links
}

func (r Content) Media() []string {
return r.media
}

func (r Content) References() RefList {
return r.sdrs
}
Expand All @@ -123,18 +128,35 @@ type options struct {
}
type option func(*options)

func WithBaseURL(url string) option {
return func(o *options) {
o.baseURL = url
}
}

// NewRichText will pull out any meaningful structured information from markdown
// document this includes a summary of the text and all link URLs for hydrating.
func NewRichText(raw string) (Content, error) {
return NewRichTextFromReader(strings.NewReader(raw))
}

// NewRichText will pull out any meaningful structured information from markdown
// document this includes a summary of the text and all link URLs for hydrating.
func NewRichTextWithOptions(raw string, opts ...option) (Content, error) {
return NewRichTextFromReader(strings.NewReader(raw), opts...)
}

func NewRichTextFromReader(r io.Reader, opts ...option) (Content, error) {
o := options{baseURL: "ignore:"}
for _, opt := range opts {
opt(&o)
}

baseURL, err := url.Parse(o.baseURL)
if err != nil {
return Content{}, fault.Wrap(err)
}

buf, err := io.ReadAll(r)
if err != nil {
return Content{}, fault.Wrap(err)
Expand All @@ -154,20 +176,22 @@ func NewRichTextFromReader(r io.Reader, opts ...option) (Content, error) {

short := getSummary(result)

bodyTree, links, refs := extractReferences(htmlTree)
bodyTree, links, media, refs := extractReferences(htmlTree, baseURL)

return Content{
html: bodyTree,
short: short,
plain: result.TextContent,
links: links,
media: media,
sdrs: refs,
}, nil
}

func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
func extractReferences(htmlTree *html.Node, baseURL *url.URL) (*html.Node, []string, []string, RefList) {
bodyTree := &html.Node{}
links := []string{}
media := []string{}
sdrs := []url.URL{}

if htmlTree.DataAtom == atom.Body {
Expand All @@ -177,9 +201,9 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
var walk func(n *html.Node)
walk = func(n *html.Node) {
if n.Parent != nil {
switch n.Parent.DataAtom {
switch n.DataAtom {
case atom.A:
href, hasHref := lo.Find(n.Parent.Attr, func(a html.Attribute) bool {
href, hasHref := lo.Find(n.Attr, func(a html.Attribute) bool {
return strings.ToLower(a.Key) == "href"
})

Expand All @@ -193,6 +217,25 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
}
}
}

case atom.Img:
src, hasSrc := lo.Find(n.Attr, func(a html.Attribute) bool {
return strings.ToLower(a.Key) == "src"
})

if hasSrc {
if parsed, err := url.Parse(src.Val); err == nil {
switch parsed.Scheme {
case "":
abs := baseURL.ResolveReference(parsed).String()
media = append(media, abs)
case "http", "https":
media = append(media, parsed.String())
case RefScheme:
sdrs = append(sdrs, *parsed)
}
}
}
}
}

Expand All @@ -217,7 +260,7 @@ func extractReferences(htmlTree *html.Node) (*html.Node, []string, RefList) {
refs = append(refs, r)
}

return bodyTree, links, refs
return bodyTree, links, media, refs
}

func getSummary(article readability.Article) string {
Expand Down
42 changes: 42 additions & 0 deletions app/resources/datagraph/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func check(t *testing.T, want Content) func(got Content, err error) {
require.NoError(t, err)
assert.Equal(t, want.short, got.short)
assert.Equal(t, want.links, got.links)
assert.Equal(t, want.media, got.media)
}
}

Expand All @@ -23,6 +24,7 @@ func TestNewRichText(t *testing.T) {
check(t, Content{
short: `Here's a paragraph. It's pretty neat. Here's the rest of the text. neat photo right? This is quite a long post, the summary...`,
links: []string{},
media: []string{"http://image.com"},
})(NewRichText(`<h1>heading</h1>
<p>Here's a paragraph. It's pretty neat.</p>
Expand All @@ -40,6 +42,7 @@ func TestNewRichText(t *testing.T) {
check(t, Content{
short: `Here's a paragraph. It's pretty neat. here are my favourite ovens here are my favourite trees`,
links: []string{"https://ao.com/cooking/ovens", "https://tre.ee/trees/favs"},
media: []string{},
})(NewRichText(`<h1>heading</h1>
<p>Here's a paragraph. It's pretty neat.</p>
Expand All @@ -49,12 +52,51 @@ func TestNewRichText(t *testing.T) {
`))
})

t.Run("pull_images", func(t *testing.T) {
check(t, Content{
short: `Here are some cool photos.`,
links: []string{},
media: []string{
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75",
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75",
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75",
},
})(NewRichText(`<h1>heading</h1>
<p>Here are some cool photos.</p>
<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75" />
<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75" />
<img src="https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75" />
`))
})

t.Run("pull_images_relative", func(t *testing.T) {
check(t, Content{
short: `Here are some cool photos.`,
links: []string{},
media: []string{
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75",
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75",
"https://barney.is/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75",
},
})(NewRichTextWithOptions(`<h1>heading</h1>
<p>Here are some cool photos.</p>
<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2Fcarters-halt.jpg&w=3840&q=75" />
<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2F30.jpg&w=3840&q=75" />
<img src="/_next/image?url=%2Fphotography%2Fcity-of-london%2Fboxes.jpg&w=2048&q=75" />
`, WithBaseURL("https://barney.is")))
})

t.Run("with_uris", func(t *testing.T) {
mention := utils.Must(xid.FromString("cn2h3gfljatbqvjqctdg"))

check(t, Content{
short: `hey @southclaws!`,
links: []string{},
media: []string{},
sdrs: RefList{
{Kind: KindProfile, ID: mention},
},
Expand Down
9 changes: 8 additions & 1 deletion app/resources/mq/message_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ import (
)

type IndexNode struct {
ID library.NodeID
ID library.NodeID
SummariseContent bool
AutoTag bool
}

type DeleteNode struct {
Expand All @@ -40,6 +42,11 @@ type IndexProfile struct {
ID account.AccountID
}

type DownloadAsset struct {
URL string
ContentFillRule opt.Optional[asset.ContentFillCommand]
}

type AnalyseAsset struct {
AssetID xid.ID
ContentFillRule opt.Optional[asset.ContentFillCommand]
Expand Down
17 changes: 10 additions & 7 deletions app/services/asset/analyse_job/analyse.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@ import (

"github.com/Southclaws/opt"
"github.com/Southclaws/storyden/app/resources/asset"
"github.com/Southclaws/storyden/app/resources/mq"
"github.com/Southclaws/storyden/app/resources/library/node_writer"
"github.com/Southclaws/storyden/app/services/asset/analyse"
"github.com/Southclaws/storyden/internal/infrastructure/pubsub"
"github.com/Southclaws/storyden/app/services/asset/asset_upload"
)

type analyseConsumer struct {
queue pubsub.Topic[mq.AnalyseAsset]
analyser *analyse.Analyser
analyser *analyse.Analyser
uploader *asset_upload.Uploader
nodeWriter *node_writer.Writer
}

func newAnalyseConsumer(
queue pubsub.Topic[mq.AnalyseAsset],
analyser *analyse.Analyser,
uploader *asset_upload.Uploader,
nodeWriter *node_writer.Writer,
) *analyseConsumer {
return &analyseConsumer{
queue: queue,
analyser: analyser,
analyser: analyser,
uploader: uploader,
nodeWriter: nodeWriter,
}
}

Expand Down
56 changes: 56 additions & 0 deletions app/services/asset/analyse_job/download.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package analyse_job

import (
"context"
"net/http"
"time"

"github.com/Southclaws/fault"
"github.com/Southclaws/fault/fctx"
"github.com/Southclaws/opt"
"github.com/gosimple/slug"

"github.com/Southclaws/storyden/app/resources/asset"
"github.com/Southclaws/storyden/app/resources/library"
"github.com/Southclaws/storyden/app/resources/library/node_writer"
"github.com/Southclaws/storyden/app/services/asset/asset_upload"
)

func (c *analyseConsumer) downloadAsset(ctx context.Context, src string, fillrule opt.Optional[asset.ContentFillCommand]) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, src, nil)
if err != nil {
return fault.Wrap(err, fctx.With(ctx))
}

client := &http.Client{
Timeout: 30 * time.Second,
}

resp, err := client.Do(req)
if err != nil {
return fault.Wrap(err, fctx.With(ctx))
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
ctx = fctx.WithMeta(ctx, "status", resp.Status)
return fault.Wrap(fault.New("failed to get asset"), fctx.With(ctx))
}

// TODO: Better naming???
name := slug.Make(src)

a, err := c.uploader.Upload(ctx, resp.Body, resp.ContentLength, asset.NewFilename(name), asset_upload.Options{})
if err != nil {
return fault.Wrap(err, fctx.With(ctx))
}

if fr, ok := fillrule.Get(); ok {
_, err = c.nodeWriter.Update(ctx, library.NewID(fr.TargetNodeID), node_writer.WithAssets([]asset.AssetID{a.ID}))
if err != nil {
return fault.Wrap(err, fctx.With(ctx))
}
}

return nil
}
Loading

0 comments on commit 49a28af

Please sign in to comment.