From b997dc72bcf7b6338883024f4bd02df51266c066 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 16 Dec 2024 05:07:44 -0800 Subject: [PATCH] Add mobile toolkit v3 URL helper (#139) --- cmd/rwp/cmd/serve/api.go | 64 ++- cmd/rwp/cmd/serve/helpers.go | 16 +- pkg/asset/asset_file.go | 6 +- pkg/content/element/element.go | 3 - pkg/content/iterator/html.go | 8 +- pkg/content/iterator/html_converter.go | 44 +- pkg/fetcher/fetcher_archive.go | 12 +- pkg/fetcher/fetcher_archive_test.go | 44 +- pkg/fetcher/fetcher_file.go | 18 +- pkg/fetcher/fetcher_file_test.go | 49 +- pkg/fetcher/resource.go | 6 +- pkg/fetcher/resource_bytes.go | 8 +- pkg/manifest/collection.go | 23 +- pkg/manifest/collection_test.go | 45 +- pkg/manifest/contributor.go | 14 +- pkg/manifest/contributor_test.go | 10 +- pkg/manifest/guided_navigation.go | 3 +- pkg/manifest/href.go | 108 +++++ pkg/manifest/href_normalizer.go | 62 +++ pkg/manifest/href_test.go | 70 +++ pkg/manifest/link.go | 213 ++++----- pkg/manifest/link_test.go | 107 ++--- pkg/manifest/locator.go | 68 ++- pkg/manifest/locator_test.go | 77 +++- pkg/manifest/manifest.go | 212 +++++---- pkg/manifest/manifest_test.go | 194 ++++---- pkg/manifest/manifest_transformer.go | 93 ++++ pkg/manifest/metadata.go | 34 +- pkg/manifest/metadata_test.go | 2 +- pkg/manifest/profile.go | 2 +- pkg/manifest/subject.go | 14 +- pkg/manifest/subject_test.go | 16 +- pkg/mediatype/mediatype.go | 29 +- pkg/mediatype/mediatype_of.go | 16 +- pkg/mediatype/sniffer.go | 19 +- pkg/mediatype/sniffer_context.go | 13 + pkg/mediatype/sniffer_mimes.go | 10 - pkg/mediatype/types.go | 3 +- pkg/mediatype/types_matcher.go | 70 +++ pkg/parser/epub/deobfuscator_test.go | 18 +- pkg/parser/epub/factory.go | 13 +- pkg/parser/epub/media_overlay_service.go | 24 +- pkg/parser/epub/metadata.go | 25 +- pkg/parser/epub/metadata_test.go | 10 +- pkg/parser/epub/parser.go | 33 +- pkg/parser/epub/parser_encryption.go | 18 +- pkg/parser/epub/parser_encryption_test.go | 21 +- pkg/parser/epub/parser_navdoc.go | 20 +- pkg/parser/epub/parser_navdoc_test.go | 37 +- pkg/parser/epub/parser_ncx.go | 35 +- pkg/parser/epub/parser_ncx_test.go | 33 +- pkg/parser/epub/parser_packagedoc.go | 20 +- pkg/parser/epub/parser_packagedoc_test.go | 54 ++- pkg/parser/epub/parser_smil.go | 30 +- pkg/parser/epub/parser_smil_test.go | 3 +- pkg/parser/epub/positions_service.go | 14 +- pkg/parser/epub/utils.go | 18 +- pkg/parser/parser_audio.go | 16 +- pkg/parser/parser_image.go | 16 +- pkg/parser/parser_image_test.go | 10 +- pkg/parser/parser_readium_webpub.go | 2 +- pkg/parser/pdf/parser_metadata.go | 20 +- pkg/parser/pdf/positions_service.go | 15 +- pkg/parser/utils.go | 4 +- pkg/parser/utils_test.go | 18 +- pkg/pub/publication.go | 31 +- pkg/pub/service_content.go | 4 +- pkg/pub/service_guided_navigation.go | 44 +- pkg/pub/service_positions.go | 22 +- pkg/pub/service_positions_test.go | 38 +- pkg/streamer/a11y_infer.go | 8 +- pkg/streamer/a11y_infer_test.go | 4 +- pkg/util/href.go | 112 ----- pkg/util/href_test.go | 212 --------- pkg/util/uri_template.go | 112 ----- pkg/util/uri_template_test.go | 55 --- pkg/util/url/scheme.go | 57 +++ pkg/util/url/uritemplates/README.txt | 2 + pkg/util/url/uritemplates/uritemplates.go | 248 ++++++++++ .../url/uritemplates/uritemplates_test.go | 280 ++++++++++++ pkg/util/url/uritemplates/utils.go | 32 ++ pkg/util/url/url.go | 421 +++++++++++++++++ pkg/util/url/url_test.go | 427 ++++++++++++++++++ 83 files changed, 2986 insertions(+), 1455 deletions(-) create mode 100644 pkg/manifest/href.go create mode 100644 pkg/manifest/href_normalizer.go create mode 100644 pkg/manifest/href_test.go create mode 100644 pkg/manifest/manifest_transformer.go delete mode 100644 pkg/mediatype/sniffer_mimes.go create mode 100644 pkg/mediatype/types_matcher.go delete mode 100644 pkg/util/href.go delete mode 100644 pkg/util/href_test.go delete mode 100644 pkg/util/uri_template.go delete mode 100644 pkg/util/uri_template_test.go create mode 100644 pkg/util/url/scheme.go create mode 100644 pkg/util/url/uritemplates/README.txt create mode 100644 pkg/util/url/uritemplates/uritemplates.go create mode 100644 pkg/util/url/uritemplates/uritemplates_test.go create mode 100644 pkg/util/url/uritemplates/utils.go create mode 100644 pkg/util/url/url.go create mode 100644 pkg/util/url/url_test.go diff --git a/cmd/rwp/cmd/serve/api.go b/cmd/rwp/cmd/serve/api.go index 57834991..dfcae9de 100644 --- a/cmd/rwp/cmd/serve/api.go +++ b/cmd/rwp/cmd/serve/api.go @@ -24,6 +24,7 @@ import ( "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/pub" "github.com/readium/go-toolkit/pkg/streamer" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/zeebo/xxh3" ) @@ -67,32 +68,6 @@ func (s *Server) getPublication(filename string) (*pub.Publication, error) { return nil, errors.Wrap(err, "failed opening "+cp) } - // TODO: Remove this after we make links relative in the go-toolkit - for i, link := range pub.Manifest.Links { - pub.Manifest.Links[i] = makeRelative(link) - } - for i, link := range pub.Manifest.Resources { - pub.Manifest.Resources[i] = makeRelative(link) - } - for i, link := range pub.Manifest.ReadingOrder { - pub.Manifest.ReadingOrder[i] = makeRelative(link) - } - for i, link := range pub.Manifest.TableOfContents { - pub.Manifest.TableOfContents[i] = makeRelative(link) - } - var makeCollectionRelative func(mp manifest.PublicationCollectionMap) - makeCollectionRelative = func(mp manifest.PublicationCollectionMap) { - for i := range mp { - for j := range mp[i] { - for k := range mp[i][j].Links { - mp[i][j].Links[k] = makeRelative(mp[i][j].Links[k]) - } - makeCollectionRelative(mp[i][j].Subcollections) - } - } - } - makeCollectionRelative(pub.Manifest.Subcollections) - // Cache the publication encPub := &cache.CachedPublication{Publication: pub} s.lfu.Set(cp, encPub) @@ -122,10 +97,19 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { scheme = "https://" } rPath, _ := s.router.Get("manifest").URLPath("path", vars["path"]) + conformsTo := conformsToAsMimetype(publication.Manifest.Metadata.ConformsTo) + + selfUrl, err := url.AbsoluteURLFromString(scheme + req.Host + rPath.String()) + if err != nil { + slog.Error("failed creating self URL", "error", err) + w.WriteHeader(500) + return + } + selfLink := &manifest.Link{ - Rels: manifest.Strings{"self"}, - Type: conformsToAsMimetype(publication.Manifest.Metadata.ConformsTo), - Href: scheme + req.Host + rPath.String(), + Rels: manifest.Strings{"self"}, + MediaType: &conformsTo, + Href: manifest.NewHREF(selfUrl), } // Marshal the manifest @@ -155,7 +139,7 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { } // Add headers - w.Header().Set("content-type", conformsToAsMimetype(publication.Manifest.Metadata.ConformsTo)+"; charset=utf-8") + w.Header().Set("content-type", conformsTo.String()+"; charset=utf-8") w.Header().Set("cache-control", "private, must-revalidate") w.Header().Set("access-control-allow-origin", "*") // TODO: provide options? @@ -190,9 +174,19 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { return } + // Parse asset path from mux vars + href, err := url.URLFromDecodedPath(path.Clean(vars["asset"])) + if err != nil { + slog.Error("failed parsing asset path as URL", "error", err) + w.WriteHeader(400) + return + } + rawHref := href.Raw() + rawHref.RawQuery = r.URL.Query().Encode() // Add the query parameters of the URL + href, _ = url.RelativeURLFromGo(rawHref) // Turn it back into a go-toolkit relative URL + // Make sure the asset exists in the publication - href := path.Clean(vars["asset"]) - link := publication.Find(href) + link := publication.LinkWithHref(href) if link == nil { w.WriteHeader(http.StatusNotFound) return @@ -200,8 +194,8 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { finalLink := *link // Expand templated links to include URL query parameters - if finalLink.Templated { - finalLink = finalLink.ExpandTemplate(convertURLValuesToMap(r.URL.Query())) + if finalLink.Href.IsTemplated() { + finalLink.Href = manifest.NewHREF(finalLink.URL(nil, convertURLValuesToMap(r.URL.Query()))) } // Get the asset from the publication @@ -217,7 +211,7 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { } // Patch mimetype where necessary - contentType := link.MediaType().String() + contentType := link.MediaType.String() if sub, ok := mimeSubstitutions[contentType]; ok { contentType = sub } diff --git a/cmd/rwp/cmd/serve/helpers.go b/cmd/rwp/cmd/serve/helpers.go index 998a9253..f7a1c04e 100644 --- a/cmd/rwp/cmd/serve/helpers.go +++ b/cmd/rwp/cmd/serve/helpers.go @@ -51,21 +51,13 @@ var compressableMimes = []string{ "application/vnd.ms-fontobject", } -func makeRelative(link manifest.Link) manifest.Link { - link.Href = strings.TrimPrefix(link.Href, "/") - for i, alt := range link.Alternates { - link.Alternates[i].Href = strings.TrimPrefix(alt.Href, "/") - } - return link -} - -func conformsToAsMimetype(conformsTo manifest.Profiles) string { - mime := mediatype.ReadiumWebpubManifest.String() +func conformsToAsMimetype(conformsTo manifest.Profiles) mediatype.MediaType { + mime := mediatype.ReadiumWebpubManifest for _, profile := range conformsTo { if profile == manifest.ProfileDivina { - mime = mediatype.ReadiumDivinaManifest.String() + mime = mediatype.ReadiumDivinaManifest } else if profile == manifest.ProfileAudiobook { - mime = mediatype.ReadiumAudiobookManifest.String() + mime = mediatype.ReadiumAudiobookManifest } else { continue } diff --git a/pkg/asset/asset_file.go b/pkg/asset/asset_file.go index f26cb7ff..8814b08e 100644 --- a/pkg/asset/asset_file.go +++ b/pkg/asset/asset_file.go @@ -70,13 +70,13 @@ func (a *FileAsset) CreateFetcher(dependencies Dependencies, credentials string) return nil, err } if stat.IsDir() { - return fetcher.NewFileFetcher("/", a.filepath), nil + return fetcher.NewFileFetcher("", a.filepath), nil } else { af, err := fetcher.NewArchiveFetcherFromPathWithFactory(a.filepath, dependencies.ArchiveFactory) if err == nil { return af, nil } - // logrus.Warnf("couldn't open %s as archive: %v", a.filepath, err) - return fetcher.NewFileFetcher("/"+a.Name(), a.filepath), nil + + return fetcher.NewFileFetcher(a.Name(), a.filepath), nil } } diff --git a/pkg/content/element/element.go b/pkg/content/element/element.go index 1f0ebc82..6ad7a007 100644 --- a/pkg/content/element/element.go +++ b/pkg/content/element/element.go @@ -74,7 +74,6 @@ func (e AudioElement) Locator() manifest.Locator { // Implements EmbeddedElement func (e AudioElement) EmbeddedLink() manifest.Link { - e.embeddedLink.Href = strings.TrimPrefix(e.embeddedLink.Href, "/") return e.embeddedLink } @@ -115,7 +114,6 @@ func (e VideoElement) Locator() manifest.Locator { // Implements EmbeddedElement func (e VideoElement) EmbeddedLink() manifest.Link { - e.embeddedLink.Href = strings.TrimPrefix(e.embeddedLink.Href, "/") return e.embeddedLink } @@ -158,7 +156,6 @@ func (e ImageElement) Locator() manifest.Locator { // Implements EmbeddedElement func (e ImageElement) EmbeddedLink() manifest.Link { - e.embeddedLink.Href = strings.TrimPrefix(e.embeddedLink.Href, "/") return e.embeddedLink } diff --git a/pkg/content/iterator/html.go b/pkg/content/iterator/html.go index bf388fab..98c13750 100644 --- a/pkg/content/iterator/html.go +++ b/pkg/content/iterator/html.go @@ -36,7 +36,7 @@ func NewHTML(resource fetcher.Resource, locator manifest.Locator) *HTMLContentIt func HTMLFactory() ResourceContentIteratorFactory { return func(resource fetcher.Resource, locator manifest.Locator) Iterator { - if resource.Link().MediaType().Matches(&mediatype.HTML, &mediatype.XHTML) { + if resource.Link().MediaType.Matches(&mediatype.HTML, &mediatype.XHTML) { return NewHTML(resource, locator) } return nil @@ -129,7 +129,7 @@ func (it *HTMLContentIterator) elements() (*ParsedElements, error) { func (it *HTMLContentIterator) parseElements() (*ParsedElements, error) { raw, rerr := it.resource.ReadAsString() if rerr != nil { - return nil, errors.Wrap(rerr, "failed reading HTML string of "+it.resource.Link().Href) + return nil, errors.Wrap(rerr, "failed reading HTML string of "+it.resource.Link().Href.String()) } document, err := html.ParseWithOptions( @@ -137,12 +137,12 @@ func (it *HTMLContentIterator) parseElements() (*ParsedElements, error) { html.ParseOptionEnableScripting(false), ) if err != nil { - return nil, errors.Wrap(err, "failed parsing HTML of "+it.resource.Link().Href) + return nil, errors.Wrap(err, "failed parsing HTML of "+it.resource.Link().Href.String()) } body := childOfType(document, atom.Body, true) if body == nil { - return nil, errors.New("HTML of " + it.resource.Link().Href + " doesn't have a ") + return nil, errors.New("HTML of " + it.resource.Link().Href.String() + " doesn't have a ") } contentConverter := HTMLConverter{ diff --git a/pkg/content/iterator/html_converter.go b/pkg/content/iterator/html_converter.go index d7c92636..73662b0e 100644 --- a/pkg/content/iterator/html_converter.go +++ b/pkg/content/iterator/html_converter.go @@ -1,7 +1,7 @@ package iterator import ( - "net/url" + nurl "net/url" "strings" "unicode" "unicode/utf8" @@ -9,7 +9,8 @@ import ( "github.com/readium/go-toolkit/pkg/content/element" iutil "github.com/readium/go-toolkit/pkg/internal/util" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) @@ -72,14 +73,15 @@ func getAttr(n *html.Node, key string) string { return "" } -func srcRelativeToHref(n *html.Node, base string) *string { +func srcRelativeToHref(n *html.Node, base url.URL) url.URL { if n == nil { return nil } if v := getAttr(n, "src"); v != "" { - h, _ := util.NewHREF(v, base).String() - return &h + if u, _ := url.URLFromString(v); u != nil { + return base.Resolve(u) + } } return nil } @@ -336,10 +338,10 @@ func (c *HTMLConverter) Head(n *html.Node, depth int) { cssSelector = &cs } elementLocator := manifest.Locator{ - Href: c.baseLocator.Href, - Type: c.baseLocator.Type, - Title: c.baseLocator.Title, - Text: c.baseLocator.Text, + Href: c.baseLocator.Href, + MediaType: c.baseLocator.MediaType, + Title: c.baseLocator.Title, + Text: c.baseLocator.Text, Locations: manifest.Locations{ OtherLocations: map[string]interface{}{ "cssSelector": cssSelector, @@ -361,7 +363,7 @@ func (c *HTMLConverter) Head(n *html.Node, depth int) { c.elements = append(c.elements, element.NewImageElement( elementLocator, manifest.Link{ - Href: *href, + Href: manifest.NewHREF(href), }, "", // FIXME: Get the caption from figcaption atlist, @@ -372,7 +374,7 @@ func (c *HTMLConverter) Head(n *html.Node, depth int) { var link *manifest.Link if href != nil { link = &manifest.Link{ - Href: *href, + Href: manifest.NewHREF(href), } } else { sourceNodes := childrenOfType(n, atom.Source, 1) @@ -380,10 +382,12 @@ func (c *HTMLConverter) Head(n *html.Node, depth int) { for _, source := range sourceNodes { if src := srcRelativeToHref(source, c.baseLocator.Href); src != nil { l := manifest.Link{ - Href: *src, + Href: manifest.NewHREF(href), } if typ := getAttr(source, "type"); typ != "" { - l.Type = typ + if mt, err := mediatype.NewOfString(typ); err == nil { + l.MediaType = &mt + } } sources = append(sources, l) } @@ -495,7 +499,7 @@ func (c *HTMLConverter) flushText() { quote := element.Quote{} for _, at := range el.Attr { if at.Key == "cite" { - quote.ReferenceURL, _ = url.Parse(at.Val) + quote.ReferenceURL, _ = nurl.Parse(at.Val) } if at.Key == "title" { quote.ReferenceTitle = at.Val @@ -512,9 +516,9 @@ func (c *HTMLConverter) flushText() { } el := element.NewTextElement( manifest.Locator{ - Href: c.baseLocator.Href, - Type: c.baseLocator.Type, - Title: c.baseLocator.Title, + Href: c.baseLocator.Href, + MediaType: c.baseLocator.MediaType, + Title: c.baseLocator.Title, Locations: manifest.Locations{ OtherLocations: map[string]interface{}{}, }, @@ -563,9 +567,9 @@ func (c *HTMLConverter) flushSegment() { } seg := element.TextSegment{ Locator: manifest.Locator{ - Href: c.baseLocator.Href, - Type: c.baseLocator.Type, - Title: c.baseLocator.Title, + Href: c.baseLocator.Href, + MediaType: c.baseLocator.MediaType, + Title: c.baseLocator.Title, Locations: manifest.Locations{ // TODO fix: needs to use baseLocator locations too! OtherLocations: map[string]interface{}{}, diff --git a/pkg/fetcher/fetcher_archive.go b/pkg/fetcher/fetcher_archive.go index f9b40963..cee368a6 100644 --- a/pkg/fetcher/fetcher_archive.go +++ b/pkg/fetcher/fetcher_archive.go @@ -4,7 +4,6 @@ import ( "errors" "io" "path" - "strings" "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/manifest" @@ -23,17 +22,18 @@ func (f *ArchiveFetcher) Links() (manifest.LinkList, error) { links := make(manifest.LinkList, 0, len(entries)) for _, af := range entries { fp := path.Clean(af.Path()) - if !strings.HasPrefix(fp, "/") { - fp = "/" + fp + href, err := manifest.NewHREFFromString(fp, false) + if err != nil { + return nil, err } link := manifest.Link{ - Href: fp, + Href: href, } ext := path.Ext(fp) if ext != "" { mt := mediatype.OfExtension(ext[1:]) // Remove leading "." if mt != nil { - link.Type = mt.String() + link.MediaType = mt } } links = append(links, link) @@ -43,7 +43,7 @@ func (f *ArchiveFetcher) Links() (manifest.LinkList, error) { // Get implements Fetcher func (f *ArchiveFetcher) Get(link manifest.Link) Resource { - entry, err := f.archive.Entry(strings.TrimPrefix(link.Href, "/")) + entry, err := f.archive.Entry(link.Href.String()) if err != nil { return NewFailureResource(link, NotFound(err)) } diff --git a/pkg/fetcher/fetcher_archive_test.go b/pkg/fetcher/fetcher_archive_test.go index e3024df5..5e00aa51 100644 --- a/pkg/fetcher/fetcher_archive_test.go +++ b/pkg/fetcher/fetcher_archive_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" "github.com/stretchr/testify/assert" ) @@ -15,13 +16,14 @@ func withArchiveFetcher(t *testing.T, callback func(a *ArchiveFetcher)) { } func TestArchiveFetcherLinks(t *testing.T) { - makeTestLink := func(href string, typ string, entryLength uint64, isCompressed bool) struct { + makeTestLink := func(href string, typ *mediatype.MediaType, entryLength uint64, isCompressed bool) struct { manifest.Link manifest.Properties } { + l := manifest.Link{ - Href: href, - Type: typ, + Href: manifest.MustNewHREFFromString(href, false), + MediaType: typ, } p := manifest.Properties{ "https://readium.org/webpub-manifest/properties#archive": map[string]interface{}{ @@ -39,16 +41,16 @@ func TestArchiveFetcherLinks(t *testing.T) { manifest.Link manifest.Properties }{ - makeTestLink("/mimetype", "", 20, false), - makeTestLink("/EPUB/cover.xhtml", "application/xhtml+xml", 259, true), - makeTestLink("/EPUB/css/epub.css", "text/css", 595, true), - makeTestLink("/EPUB/css/nav.css", "text/css", 306, true), - makeTestLink("/EPUB/images/cover.png", "image/png", 35809, true), - makeTestLink("/EPUB/nav.xhtml", "application/xhtml+xml", 2293, true), - makeTestLink("/EPUB/package.opf", "application/oebps-package+xml", 773, true), - makeTestLink("/EPUB/s04.xhtml", "application/xhtml+xml", 118269, true), - makeTestLink("/EPUB/toc.ncx", "application/x-dtbncx+xml", 1697, true), - makeTestLink("/META-INF/container.xml", "application/xml", 176, true), + makeTestLink("mimetype", nil, 20, false), + makeTestLink("EPUB/cover.xhtml", &mediatype.XHTML, 259, true), + makeTestLink("EPUB/css/epub.css", &mediatype.CSS, 595, true), + makeTestLink("EPUB/css/nav.css", &mediatype.CSS, 306, true), + makeTestLink("EPUB/images/cover.png", &mediatype.PNG, 35809, true), + makeTestLink("EPUB/nav.xhtml", &mediatype.XHTML, 2293, true), + makeTestLink("EPUB/package.opf", &mediatype.OPF, 773, true), + makeTestLink("EPUB/s04.xhtml", &mediatype.XHTML, 118269, true), + makeTestLink("EPUB/toc.ncx", &mediatype.NCX, 1697, true), + makeTestLink("META-INF/container.xml", &mediatype.XML, 176, true), } withArchiveFetcher(t, func(a *ArchiveFetcher) { @@ -66,7 +68,7 @@ func TestArchiveFetcherLinks(t *testing.T) { func TestArchiveFetcherLengthNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/unknown"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) }) @@ -74,7 +76,7 @@ func TestArchiveFetcherLengthNotFound(t *testing.T) { func TestArchiveFetcherReadNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/unknown"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Read(0, 0) assert.Equal(t, NotFound(err.Cause), err) _, err = resource.Stream(&bytes.Buffer{}, 0, 0) @@ -84,7 +86,7 @@ func TestArchiveFetcherReadNotFound(t *testing.T) { func TestArchiveFetcherRead(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/mimetype"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) bin, err := resource.Read(0, 0) if assert.Nil(t, err) { assert.Equal(t, "application/epub+zip", string(bin)) @@ -100,7 +102,7 @@ func TestArchiveFetcherRead(t *testing.T) { func TestArchiveFetcherReadRange(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/mimetype"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) bin, err := resource.Read(0, 10) if assert.Nil(t, err) { assert.Equal(t, "application", string(bin)) @@ -116,7 +118,7 @@ func TestArchiveFetcherReadRange(t *testing.T) { func TestArchiveFetcherComputingLength(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/mimetype"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) length, err := resource.Length() assert.Nil(t, err) assert.EqualValues(t, 20, length) @@ -125,7 +127,7 @@ func TestArchiveFetcherComputingLength(t *testing.T) { func TestArchiveFetcherDirectoryLengthNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/EPUB"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("EPUB", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) }) @@ -133,7 +135,7 @@ func TestArchiveFetcherDirectoryLengthNotFound(t *testing.T) { func TestArchiveFetcherFileNotFoundLength(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/unknown"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) }) @@ -141,7 +143,7 @@ func TestArchiveFetcherFileNotFoundLength(t *testing.T) { func TestArchiveFetcherAddsProperties(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: "/EPUB/css/epub.css"}) + resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("EPUB/css/epub.css", false)}) assert.Equal(t, manifest.Properties{ "https://readium.org/webpub-manifest/properties#archive": map[string]interface{}{ "entryLength": uint64(595), diff --git a/pkg/fetcher/fetcher_file.go b/pkg/fetcher/fetcher_file.go index 6c57339c..54c93dbd 100644 --- a/pkg/fetcher/fetcher_file.go +++ b/pkg/fetcher/fetcher_file.go @@ -41,8 +41,12 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { return err } + href, err := manifest.NewHREFFromString(filepath.ToSlash(filepath.Join(href, strings.TrimPrefix(apath, xpath))), false) + if err != nil { + return err + } link := manifest.Link{ - Href: filepath.ToSlash(filepath.Join(href, strings.TrimPrefix(apath, xpath))), + Href: href, } f, err := os.Open(apath) @@ -50,14 +54,14 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { defer f.Close() mt := mediatype.OfFileOnly(f) if mt != nil { - link.Type = mt.String() + link.MediaType = mt } } else { ext := filepath.Ext(apath) if ext != "" { mt := mediatype.OfExtension(ext[1:]) if mt != nil { - link.Type = mt.String() + link.MediaType = mt } } } @@ -73,14 +77,8 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { // Get implements Fetcher func (f *FileFetcher) Get(link manifest.Link) Resource { - linkHref := link.Href - if !strings.HasPrefix(linkHref, "/") { - linkHref = "/" + linkHref - } + linkHref := link.Href.String() for itemHref, itemFile := range f.paths { - if !strings.HasPrefix(itemHref, "/") { - itemHref = "/" + itemHref - } if strings.HasPrefix(linkHref, itemHref) { resourceFile := filepath.Join(itemFile, strings.TrimPrefix(linkHref, itemHref)) // Make sure that the requested resource is [path] or one of its descendant. diff --git a/pkg/fetcher/fetcher_file_test.go b/pkg/fetcher/fetcher_file_test.go index c90c0caa..dfa02f64 100644 --- a/pkg/fetcher/fetcher_file_test.go +++ b/pkg/fetcher/fetcher_file_test.go @@ -5,24 +5,25 @@ import ( "testing" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" "github.com/stretchr/testify/assert" ) var testFileFetcher = &FileFetcher{ paths: map[string]string{ - "/file_href": "./testdata/text.txt", - "/dir_href": "./testdata/directory", + "file_href": "./testdata/text.txt", + "dir_href": "./testdata/directory", }, } func TestFileFetcherLengthNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/unknown"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherReadNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/unknown"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Read(0, 0) assert.Equal(t, NotFound(err.Cause), err) _, err = resource.Stream(&bytes.Buffer{}, 0, 0) @@ -30,7 +31,7 @@ func TestFileFetcherReadNotFound(t *testing.T) { } func TestFileFetcherHrefInMap(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) bin, err := resource.Read(0, 0) if assert.Nil(t, err) { assert.Equal(t, "text", string(bin)) @@ -44,7 +45,7 @@ func TestFileFetcherHrefInMap(t *testing.T) { } func TestFileFetcherDirectoryFile(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/dir_href/text1.txt"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/text1.txt", false)}) bin, err := resource.Read(0, 0) if assert.Nil(t, err) { assert.Equal(t, "text1", string(bin)) @@ -58,7 +59,7 @@ func TestFileFetcherDirectoryFile(t *testing.T) { } func TestFileFetcherSubdirectoryFile(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/dir_href/subdirectory/text2.txt"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory/text2.txt", false)}) bin, err := resource.Read(0, 0) assert.Nil(t, err) assert.Equal(t, "text2", string(bin)) @@ -71,7 +72,7 @@ func TestFileFetcherSubdirectoryFile(t *testing.T) { } func TestFileFetcherDirectoryNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/dir_href/subdirectory"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) _, err := resource.Read(0, 0) assert.Equal(t, NotFound(err.Cause), err) _, err = resource.Stream(&bytes.Buffer{}, 0, 0) @@ -79,7 +80,7 @@ func TestFileFetcherDirectoryNotFound(t *testing.T) { } func TestFileFetcherDirectoryTraversalNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/dir_href/../text.txt"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/../text.txt", false)}) _, err := resource.Read(0, 0) assert.Equal(t, NotFound(err.Cause), err, "cannot traverse up a directory using '..'") _, err = resource.Stream(&bytes.Buffer{}, 0, 0) @@ -87,7 +88,7 @@ func TestFileFetcherDirectoryTraversalNotFound(t *testing.T) { } func TestFileFetcherReadRange(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) bin, err := resource.Read(0, 2) if assert.Nil(t, err) { assert.Equal(t, "tex", string(bin), "read data should be the first three bytes of the file") @@ -102,7 +103,7 @@ func TestFileFetcherReadRange(t *testing.T) { } func TestFileFetcherTwoRangesSameResource(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) bin, err := resource.Read(0, 1) if assert.Nil(t, err) { assert.Equal(t, "te", string(bin)) @@ -127,7 +128,7 @@ func TestFileFetcherTwoRangesSameResource(t *testing.T) { } func TestFileFetcherOutOfRangeClamping(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) bin, err := resource.Read(-5, 60) if assert.Nil(t, err) { assert.Equal(t, "text", string(bin)) @@ -141,7 +142,7 @@ func TestFileFetcherOutOfRangeClamping(t *testing.T) { } func TestFileFetcherDecreasingRange(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) _, err := resource.Read(60, 20) if assert.Error(t, err) { assert.Equal(t, RangeNotSatisfiable(err.Cause), err, "range isn't satisfiable") @@ -153,20 +154,20 @@ func TestFileFetcherDecreasingRange(t *testing.T) { } func TestFileFetcherComputingLength(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/file_href"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) length, err := resource.Length() assert.Nil(t, err) assert.EqualValues(t, 4, length) } func TestFileFetcherDirectoryLengthNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/dir_href/subdirectory"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherFileNotFoundLength(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: "/unknown"}) + resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) _, err := resource.Length() assert.Equal(t, NotFound(err.Cause), err) } @@ -176,17 +177,17 @@ func TestFileFetcherLinks(t *testing.T) { assert.Nil(t, err) mustContain := manifest.LinkList{{ - Href: "/dir_href/subdirectory/hello.mp3", - Type: "audio/mpeg", + Href: manifest.MustNewHREFFromString("dir_href/subdirectory/hello.mp3", false), + MediaType: &mediatype.MP3, }, { - Href: "/dir_href/subdirectory/text2.txt", - Type: "text/plain", + Href: manifest.MustNewHREFFromString("dir_href/subdirectory/text2.txt", false), + MediaType: &mediatype.Text, }, { - Href: "/dir_href/text1.txt", - Type: "text/plain", + Href: manifest.MustNewHREFFromString("dir_href/text1.txt", false), + MediaType: &mediatype.Text, }, { - Href: "/file_href", - Type: "text/plain", + Href: manifest.MustNewHREFFromString("file_href", false), + MediaType: &mediatype.Text, }} assert.ElementsMatch(t, mustContain, links) diff --git a/pkg/fetcher/resource.go b/pkg/fetcher/resource.go index 01f18948..656654ba 100644 --- a/pkg/fetcher/resource.go +++ b/pkg/fetcher/resource.go @@ -13,6 +13,7 @@ import ( "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/xmlquery" + "golang.org/x/text/encoding" "golang.org/x/text/encoding/unicode" ) @@ -72,7 +73,10 @@ func ReadResourceAsString(r Resource) (string, *ResourceError) { if ex != nil { return "", ex } - cs := r.Link().MediaType().Charset() + var cs encoding.Encoding + if r.Link().MediaType != nil { + cs = r.Link().MediaType.Charset() + } if cs == nil { cs = unicode.UTF8 } diff --git a/pkg/fetcher/resource_bytes.go b/pkg/fetcher/resource_bytes.go index ddbfd59b..9b0b592e 100644 --- a/pkg/fetcher/resource_bytes.go +++ b/pkg/fetcher/resource_bytes.go @@ -61,11 +61,11 @@ func (r *BytesResource) Read(start int64, end int64) ([]byte, *ResourceError) { // Bounds check length := int64(len(r._bytes)) - if start > (length - 1) { - start = length - 1 + if start > length { + start = length } - if end > length { - end = length + if end > (length - 1) { + end = length - 1 } return r._bytes[start : end+1], nil diff --git a/pkg/manifest/collection.go b/pkg/manifest/collection.go index 5394c101..68c6adb7 100644 --- a/pkg/manifest/collection.go +++ b/pkg/manifest/collection.go @@ -16,7 +16,7 @@ type PublicationCollectionMap map[string][]PublicationCollection // https://readium.org/webpub-manifest/schema/subcollection.schema.json type PublicationCollection struct { Metadata map[string]interface{} `json:"metadata,omitempty"` - Links []Link `json:"links,omitempty"` + Links LinkList `json:"links,omitempty"` Subcollections PublicationCollectionMap `json:"-"` } @@ -36,8 +36,9 @@ func appendPublicationCollectionToJSON(pc PublicationCollectionMap, obj map[stri // Parses a [PublicationCollection] from its RWPM JSON representation. // // TODO log [warnings] ? -// The [links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. -func PublicationCollectionFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) (*PublicationCollection, error) { +// +// The [links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. +func PublicationCollectionFromJSON(rawJson interface{}) (*PublicationCollection, error) { if rawJson == nil { return nil, nil } @@ -53,7 +54,7 @@ func PublicationCollectionFromJSON(rawJson interface{}, normalizeHref LinkHrefNo case map[string]interface{}: lkz, ok := dd["links"].([]interface{}) if ok { - links, err = LinksFromJSONArray(lkz, normalizeHref) + links, err = LinksFromJSONArray(lkz) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'links'") } @@ -67,12 +68,12 @@ func PublicationCollectionFromJSON(rawJson interface{}, normalizeHref LinkHrefNo delete(dd, "links") delete(dd, "metadata") - subcollections, err = PublicationCollectionsFromJSON(dd, normalizeHref) + subcollections, err = PublicationCollectionsFromJSON(dd) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling subcollections") } case []interface{}: - links, err = LinksFromJSONArray(dd, normalizeHref) + links, err = LinksFromJSONArray(dd) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling as Link array") } @@ -97,7 +98,7 @@ func (pc *PublicationCollection) UnmarshalJSON(b []byte) error { if err != nil { return err } - fpc, err := PublicationCollectionFromJSON(object, LinkHrefNormalizerIdentity) + fpc, err := PublicationCollectionFromJSON(object) if err != nil { return err } @@ -120,7 +121,7 @@ func (pc PublicationCollection) MarshalJSON() ([]byte, error) { // Parses a map of [PublicationCollection] indexed by their roles from its RWPM JSON representation. // // The [Links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. -func PublicationCollectionsFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormalizer) (PublicationCollectionMap, error) { +func PublicationCollectionsFromJSON(rawJson map[string]interface{}) (PublicationCollectionMap, error) { if rawJson == nil { return nil, nil } @@ -136,7 +137,7 @@ func PublicationCollectionsFromJSON(rawJson map[string]interface{}, normalizeHre sub := rawJson[role] // Parses a list of links or a single collection object. - collection, err := PublicationCollectionFromJSON(sub, normalizeHref) + collection, err := PublicationCollectionFromJSON(sub) if collection != nil { if _, ok := collections[role]; ok { collections[role] = append(collections[role], *collection) @@ -147,7 +148,7 @@ func PublicationCollectionsFromJSON(rawJson map[string]interface{}, normalizeHre // Parses a list of collection objects. var newCollections []PublicationCollection for j, v := range subArr { - c, err := PublicationCollectionFromJSON(v, normalizeHref) + c, err := PublicationCollectionFromJSON(v) if err != nil { return nil, errors.Wrapf(err, "failed unmarshalling PublicationCollection for role %s at position %d", role, j) } @@ -178,7 +179,7 @@ func (pcm *PublicationCollectionMap) UnmarshalJSON(b []byte) error { if err != nil { return err } - fpc, err := PublicationCollectionsFromJSON(object, LinkHrefNormalizerIdentity) + fpc, err := PublicationCollectionsFromJSON(object) if err != nil { return err } diff --git a/pkg/manifest/collection_test.go b/pkg/manifest/collection_test.go index 173050dc..0194a2fb 100644 --- a/pkg/manifest/collection_test.go +++ b/pkg/manifest/collection_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "testing" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -15,7 +16,7 @@ func TestPubCollectionUnmarshalMinimalJSON(t *testing.T) { }`), &pc)) assert.Equal(t, PublicationCollection{ - Links: []Link{{Href: "/link"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/link"))}}, Metadata: map[string]interface{}{}, }, pc, "unmarshalled JSON object should be equal to PublicationCollection object") } @@ -53,29 +54,29 @@ func TestPubCollectionUnmarshalFullJSON(t *testing.T) { }`), &pc)) assert.Equal(t, PublicationCollection{ - Links: []Link{{Href: "/link"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/link"))}}, Metadata: map[string]interface{}{ "metadata1": "value", }, Subcollections: PublicationCollectionMap{ "sub1": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink"))}}, }}, "sub2": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink1"}, {Href: "/sublink2"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink1"))}, {Href: NewHREF(url.MustURLFromString("/sublink2"))}}, }}, "sub3": { - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink3"}}}, - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink4"}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink3"))}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink4"))}}}, }, }, }, pc, "unmarshalled JSON object should be equal to PublicationCollection object") } func TestPubCollectionUnmarshalNilJSON(t *testing.T) { - pc, err := PublicationCollectionFromJSON(nil, nil) + pc, err := PublicationCollectionFromJSON(nil) assert.NoError(t, err) assert.Nil(t, pc) } @@ -105,21 +106,21 @@ func TestPubCollectionUnmarshalJSONMultipleCollections(t *testing.T) { } ] }`), &pcsr)) - pcs, err := PublicationCollectionsFromJSON(pcsr, LinkHrefNormalizerIdentity) + pcs, err := PublicationCollectionsFromJSON(pcsr) assert.NoError(t, err) assert.Equal(t, PublicationCollectionMap{ "sub1": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink"))}}, }}, "sub2": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink1"}, {Href: "/sublink2"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink1"))}, {Href: NewHREF(url.MustURLFromString("/sublink2"))}}, }}, "sub3": { - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink3"}}}, - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink4"}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink3"))}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink4"))}}}, }, }, pcs, "unmarshalled JSON object should be equal to map of PublicationCollection to role") } @@ -127,7 +128,7 @@ func TestPubCollectionUnmarshalJSONMultipleCollections(t *testing.T) { func TestPubCollectionMinimalJSON(t *testing.T) { bin, err := json.Marshal(&PublicationCollection{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/link"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/link"))}}, }) assert.NoError(t, err) assert.JSONEq(t, `{ @@ -141,19 +142,19 @@ func TestPubCollectionFullJSON(t *testing.T) { Metadata: map[string]interface{}{ "metadata1": "value", }, - Links: []Link{{Href: "/link"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/link"))}}, Subcollections: PublicationCollectionMap{ "sub1": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink"))}}, }}, "sub2": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink1"}, {Href: "/sublink2"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink1"))}, {Href: NewHREF(url.MustURLFromString("/sublink2"))}}, }}, "sub3": { - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink3"}}}, - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink4"}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink3"))}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink4"))}}}, }, }, }) @@ -199,15 +200,15 @@ func TestPubCollectionMultipleCollectionsJSON(t *testing.T) { bin, err := json.Marshal(PublicationCollectionMap{ "sub1": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink"))}}, }}, "sub2": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink1"}, {Href: "/sublink2"}}, + Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink1"))}, {Href: NewHREF(url.MustURLFromString("/sublink2"))}}, }}, "sub3": { - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink3"}}}, - {Metadata: map[string]interface{}{}, Links: []Link{{Href: "/sublink4"}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink3"))}}}, + {Metadata: map[string]interface{}{}, Links: []Link{{Href: NewHREF(url.MustURLFromString("/sublink4"))}}}, }, }) assert.NoError(t, err) diff --git a/pkg/manifest/contributor.go b/pkg/manifest/contributor.go index 3e78852b..92968353 100644 --- a/pkg/manifest/contributor.go +++ b/pkg/manifest/contributor.go @@ -16,7 +16,7 @@ type Contributor struct { Identifier string `json:"identifier,omitempty"` // An unambiguous reference to this contributor. Roles Strings `json:"role,omitempty"` // The roles of the contributor in the making of the publication. Position *float64 `json:"position,omitempty"` // The position of the publication in this collection/series, when the contributor represents a collection. TODO validator - Links []Link `json:"links,omitempty"` // Used to retrieve similar publications for the given contributor. + Links LinkList `json:"links,omitempty"` // Used to retrieve similar publications for the given contributor. } func (c Contributor) Name() string { @@ -33,7 +33,7 @@ func (c Contributor) SortAs() string { // Parses a [Contributor] from its RWPM JSON representation. // A contributor can be parsed from a single string, or a full-fledged object. // The [links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. -func ContributorFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) (*Contributor, error) { +func ContributorFromJSON(rawJson interface{}) (*Contributor, error) { if rawJson == nil { return nil, nil } @@ -75,7 +75,7 @@ func ContributorFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) // Links rawLinks, ok := dd["links"].([]interface{}) if ok { - links, err := LinksFromJSONArray(rawLinks, normalizeHref) + links, err := LinksFromJSONArray(rawLinks) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'links'") } @@ -97,13 +97,13 @@ func ContributorFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) return c, nil } -func ContributorFromJSONArray(rawJsonArray interface{}, normalizeHref LinkHrefNormalizer) ([]Contributor, error) { +func ContributorFromJSONArray(rawJsonArray interface{}) ([]Contributor, error) { var contributors []Contributor switch rjx := rawJsonArray.(type) { case []interface{}: contributors = make([]Contributor, 0, len(rjx)) for i, entry := range rjx { - rc, err := ContributorFromJSON(entry, normalizeHref) + rc, err := ContributorFromJSON(entry) if err != nil { return nil, errors.Wrapf(err, "failed unmarshalling Contributor at position %d", i) } @@ -113,7 +113,7 @@ func ContributorFromJSONArray(rawJsonArray interface{}, normalizeHref LinkHrefNo contributors = append(contributors, *rc) } default: - c, err := ContributorFromJSON(rjx, normalizeHref) + c, err := ContributorFromJSON(rjx) if err != nil { return nil, err } @@ -139,7 +139,7 @@ func (c *Contributor) UnmarshalJSON(data []byte) error { if err != nil { return err } - fc, err := ContributorFromJSON(d, LinkHrefNormalizerIdentity) + fc, err := ContributorFromJSON(d) if err != nil { return err } diff --git a/pkg/manifest/contributor_test.go b/pkg/manifest/contributor_test.go index f2db8439..1e0b9b5e 100644 --- a/pkg/manifest/contributor_test.go +++ b/pkg/manifest/contributor_test.go @@ -36,10 +36,10 @@ func TestContributorUnmarshalFullJSON(t *testing.T) { Position: &position, Links: []Link{ { - Href: "http://link1", + Href: MustNewHREFFromString("http://link1", false), }, { - Href: "http://link2", + Href: MustNewHREFFromString("http://link2", false), }, }, } @@ -120,12 +120,10 @@ func TestContributorFullJSON(t *testing.T) { Position: &pos, Links: []Link{ { - Href: "http://link1", - Templated: true, + Href: MustNewHREFFromString("http://link1", true), }, { - Href: "http://link2", - Templated: false, + Href: MustNewHREFFromString("http://link2", false), }, }, } diff --git a/pkg/manifest/guided_navigation.go b/pkg/manifest/guided_navigation.go index c3ae76e0..b3abeb17 100644 --- a/pkg/manifest/guided_navigation.go +++ b/pkg/manifest/guided_navigation.go @@ -3,13 +3,14 @@ package manifest // Readium Guided Navigation Document // https://readium.org/guided-navigation/schema/document.schema.json type GuidedNavigationDocument struct { - Links []Link `json:"links,omitempty"` // References to other resources that are related to the current Guided Navigation Document. + Links LinkList `json:"links,omitempty"` // References to other resources that are related to the current Guided Navigation Document. Guided []GuidedNavigationObject `json:"guided"` // A sequence of resources and/or media fragments into these resources, meant to be presented sequentially to the user. } // Readium Guided Navigation Object // https://readium.org/guided-navigation/schema/object.schema.json // TODO: Role should be typed +// TODO: all refs should be url.URL type GuidedNavigationObject struct { AudioRef string `json:"audioref,omitempty"` // References an audio resource or a fragment of it. ImgRef string `json:"imgref,omitempty"` // References an image or a fragment of it. diff --git a/pkg/manifest/href.go b/pkg/manifest/href.go new file mode 100644 index 00000000..d3ca7cfa --- /dev/null +++ b/pkg/manifest/href.go @@ -0,0 +1,108 @@ +package manifest + +import ( + "github.com/readium/go-toolkit/pkg/util/url" + "github.com/readium/go-toolkit/pkg/util/url/uritemplates" +) + +// An hypertext reference points to a resource in a [Publication]. +// It is potentially templated, use [Resolve] to get the actual URL. +type HREF struct { + // Only one of these two is set in an instance. + href url.URL + template string +} + +// Creates an [HREF] from a valid URL. +func NewHREF(href url.URL) HREF { + return HREF{href: href} +} + +// Proxy for NewHREFFromString which panics if the URL is invalid. +func MustNewHREFFromString(href string, templated bool) HREF { + h, err := NewHREFFromString(href, templated) + if err != nil { + panic(err) + } + return h +} + +// Creates an [HREF] from a valid URL or URL template (RFC 6570). +// `templated` Indicates whether [href] is a URL template. +func NewHREFFromString(href string, templated bool) (HREF, error) { + if templated { + // Check that the produced URL is valid + eurl, _, err := uritemplates.Expand(href, nil) + if err != nil { + return HREF{}, err + } + _, err = url.URLFromString(eurl) + if err != nil { + return HREF{}, err + } + return HREF{ + template: href, + }, err + } else { + u, err := url.URLFromString(href) + if err != nil { + return HREF{}, err + } + return NewHREF(u), nil + } +} + +// Returns the URL represented by this HREF, resolved to the given [base] URL. +// If the HREF is a template, the [parameters] are used to expand it according to RFC 6570. +func (h HREF) Resolve(base url.URL, parameters map[string]string) url.URL { + if h.IsTemplated() { + exp, _, err := uritemplates.Expand(h.template, parameters) + if err != nil { + panic("Invalid URL template expansion: " + err.Error()) + } + u, err := url.URLFromString(exp) + if err != nil { + panic("Invalid URL template expansion: " + err.Error()) + } + if base == nil { + return u + } + return base.Resolve(u) + } else { + if base == nil { + return h.href + } + return base.Resolve(h.href) + } +} + +// Indicates whether this HREF is templated. +func (h HREF) IsTemplated() bool { + return h.template != "" +} + +// List of URI template parameter keys, if the HREF is templated. +func (h HREF) Parameters() []string { + if h.IsTemplated() { + v, _ := uritemplates.Values(h.template) + return v + } + return []string{} +} + +// Resolves the receiver HREF to the given [baseUrl]. +func (h HREF) ResolveTo(baseURL url.URL) HREF { + if h.IsTemplated() { + // WARNING: Cannot safely resolve a URI template to a base URL before expanding it + } else { + h.href = baseURL.Resolve(h.href) + } + return h +} + +func (h HREF) String() string { + if h.IsTemplated() { + return h.template + } + return h.href.String() +} diff --git a/pkg/manifest/href_normalizer.go b/pkg/manifest/href_normalizer.go new file mode 100644 index 00000000..3bdb24dd --- /dev/null +++ b/pkg/manifest/href_normalizer.go @@ -0,0 +1,62 @@ +package manifest + +import "github.com/readium/go-toolkit/pkg/util/url" + +// Returns a copy of the receiver after normalizing its HREFs to the link with `rel="self"`. +func (m Manifest) NormalizeHREFsToSelf() Manifest { + self := m.LinkWithRel("self") + if self == nil { + return m + } + + return m.NormalizeHREFsToBase(self.URL(nil, nil)) +} + +// Returns a copy of the receiver after normalizing its HREFs to the given [baseUrl]. +func (m Manifest) NormalizeHREFsToBase(baseURL url.URL) Manifest { + if baseURL == nil { + return m + } + + return m.Copy(NewHREFNormalizer(baseURL)) +} + +// Returns a copy of the receiver after normalizing its HREFs to the given [baseUrl]. +func (l Link) NormalizeHREFsToBase(baseURL url.URL) Link { + if baseURL == nil { + return l + } + + return l.Copy(NewHREFNormalizer(baseURL)) +} + +type HREFNormalizer struct { + baseURL url.URL +} + +func NewHREFNormalizer(baseURL url.URL) HREFNormalizer { + return HREFNormalizer{baseURL: baseURL} +} + +// TransformHREF implements ManifestTransformer +func (n HREFNormalizer) TransformHREF(href HREF) HREF { + return href.ResolveTo(n.baseURL) +} + +// TransformLink implements ManifestTransformer +func (n HREFNormalizer) TransformLink(link Link) Link { + // Identity + return link +} + +// TransformManifest implements ManifestTransformer +func (n HREFNormalizer) TransformManifest(manifest Manifest) Manifest { + // Identity + return manifest +} + +// TransformMetadata implements ManifestTransformer +func (n HREFNormalizer) TransformMetadata(metadata Metadata) Metadata { + // Identity + return metadata +} diff --git a/pkg/manifest/href_test.go b/pkg/manifest/href_test.go new file mode 100644 index 00000000..d053c626 --- /dev/null +++ b/pkg/manifest/href_test.go @@ -0,0 +1,70 @@ +package manifest + +import ( + "testing" + + "github.com/readium/go-toolkit/pkg/util/url" + "github.com/stretchr/testify/assert" +) + +var base, _ = url.URLFromString("http://readium/publication/") + +func TestConvertStaticHREFToURL(t *testing.T) { + u, _ := url.URLFromString("folder/chapter.xhtml") + assert.Equal(t, u, NewHREF(u).Resolve(nil, nil)) + u2, _ := url.URLFromString("http://readium/publication/folder/chapter.xhtml") + assert.Equal(t, u2, NewHREF(u).Resolve(base, nil)) + + // Parameters are ignored + assert.Equal(t, u, NewHREF(u).Resolve(nil, map[string]string{"a": "b"})) +} + +func TestConvertTemplatedHREFToURL(t *testing.T) { + template, _ := NewHREFFromString("url{?x,hello,y}name", true) + + parameters := map[string]string{ + "x": "aaa", + "hello": "Hello, world", + "y": "b", + "foo": "bar", + } + + u, _ := url.URLFromString("urlname") + assert.Equal(t, u, template.Resolve(nil, nil)) + + u, _ = url.URLFromString("http://readium/publication/urlname") + assert.Equal(t, u, template.Resolve(base, nil)) + + u, _ = url.URLFromString("http://readium/publication/url?x=aaa&hello=Hello%2C%20world&y=bname") + assert.Equal(t, u, template.Resolve(base, parameters)) +} + +func TestHREFIsTemplated(t *testing.T) { + h, _ := NewHREFFromString("folder/chapter.xhtml", false) + assert.False(t, h.IsTemplated()) + + h, _ = NewHREFFromString("url", true) + assert.True(t, h.IsTemplated()) + + h, _ = NewHREFFromString("url{?x,hello,y}name", true) + assert.True(t, h.IsTemplated()) +} + +func TestHREFParameters(t *testing.T) { + h, _ := NewHREFFromString("url", false) + assert.Equal(t, []string{}, h.Parameters()) + + h, _ = NewHREFFromString("url", true) + assert.Equal(t, []string{}, h.Parameters()) + + h, _ = NewHREFFromString("url{?x,hello,y}name", true) + assert.Equal(t, []string{"x", "hello", "y"}, h.Parameters()) +} + +func TestHREFToString(t *testing.T) { + h, _ := NewHREFFromString("folder/chapter.xhtml", false) + assert.Equal(t, "folder/chapter.xhtml", h.String()) + + h, _ = NewHREFFromString("url{?x,hello,y}name", true) + assert.Equal(t, "url{?x,hello,y}name", h.String()) +} diff --git a/pkg/manifest/link.go b/pkg/manifest/link.go index 58a2075d..011233d3 100644 --- a/pkg/manifest/link.go +++ b/pkg/manifest/link.go @@ -2,106 +2,84 @@ package manifest import ( "encoding/json" - "strings" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/mediatype" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" ) -// Function used to recursively transform the href of a [Link] when parsing its JSON representation. -type LinkHrefNormalizer func(href string) (string, error) - -// Default href normalizer for [Link], doing nothing. -func LinkHrefNormalizerIdentity(href string) (string, error) { - return href, nil -} - // Link // https://github.com/readium/webpub-manifest/blob/master/README.md#24-the-link-object // https://github.com/readium/webpub-manifest/blob/master/schema/link.schema.json type Link struct { - Href string `json:"href"` // URI or URI template of the linked resource. - Type string `json:"type,omitempty"` // MIME type of the linked resource. - Templated bool `json:"templated,omitempty"` // Indicates that a URI template is used in href. - Title string `json:"title,omitempty"` // Title of the linked resource. - Rels Strings `json:"rel,omitempty"` // Relation between the linked resource and its containing collection. - Properties Properties `json:"properties,omitempty"` // Properties associated to the linked resource. - Height uint `json:"height,omitempty"` // Height of the linked resource in pixels. - Width uint `json:"width,omitempty"` // Width of the linked resource in pixels. - Bitrate float64 `json:"bitrate,omitempty"` // Bitrate of the linked resource in kbps. - Duration float64 `json:"duration,omitempty"` // Length of the linked resource in seconds. - Languages Strings `json:"language,omitempty"` // Expected language of the linked resource (BCP 47 tag). - Alternates LinkList `json:"alternate,omitempty"` // Alternate resources for the linked resource. - Children LinkList `json:"children,omitempty"` // Resources that are children of the linked resource, in the context of a given collection role. -} - -func (l Link) MediaType() mediatype.MediaType { - mt := mediatype.OfString(l.Type) - if mt == nil { - return mediatype.Binary - } - return *mt -} - -// List of URI template parameter keys, if the [Link] is templated. -func (l Link) TemplateParameters() []string { - if !l.Templated { - return nil - } - return util.NewURITemplate(l.Href).Parameters() + Href HREF `json:"href"` // URI or URI template of the linked resource. + MediaType *mediatype.MediaType `json:"type,omitempty"` // MIME type of the linked resource. + Title string `json:"title,omitempty"` // Title of the linked resource. + Rels Strings `json:"rel,omitempty"` // Relation between the linked resource and its containing collection. + Properties Properties `json:"properties,omitempty"` // Properties associated to the linked resource. + Height uint `json:"height,omitempty"` // Height of the linked resource in pixels. + Width uint `json:"width,omitempty"` // Width of the linked resource in pixels. + Bitrate float64 `json:"bitrate,omitempty"` // Bitrate of the linked resource in kbps. + Duration float64 `json:"duration,omitempty"` // Length of the linked resource in seconds. + Languages Strings `json:"language,omitempty"` // Expected language of the linked resource (BCP 47 tag). + Alternates LinkList `json:"alternate,omitempty"` // Alternate resources for the linked resource. + Children LinkList `json:"children,omitempty"` // Resources that are children of the linked resource, in the context of a given collection role. } -// Expands the HREF by replacing URI template variables by the given parameters. -func (l Link) ExpandTemplate(parameters map[string]string) Link { - l.Href = util.NewURITemplate(l.Href).Expand(parameters) - l.Templated = false - return l -} - -// Computes an absolute URL to the link, relative to the given [baseUrl]. -// If the link's [href] is already absolute, the [baseUrl] is ignored. -func (l Link) ToURL(baseURL string) string { - href := strings.TrimPrefix(l.Href, "/") - if href == "" { - return "" - } - if baseURL == "" { - baseURL = "/" - } - h, _ := util.NewHREF(href, baseURL).PercentEncodedString() - return h +// Returns the URL represented by this link's HREF, resolved to the given [base] URL. +// If the HREF is a template, the [parameters] are used to expand it according to RFC 6570. +func (l Link) URL(base url.URL, parameters map[string]string) url.URL { + return l.Href.Resolve(base, parameters) } // Creates an [Link] from its RWPM JSON representation. -func LinkFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormalizer) (*Link, error) { +func LinkFromJSON(rawJson map[string]interface{}) (*Link, error) { if rawJson == nil { return nil, nil } - href, ok := rawJson["href"].(string) + rawHref, ok := rawJson["href"].(string) if !ok { // Warning: [href] is required return nil, errors.New("'href' is required in link") } - if normalizeHref == nil { - normalizeHref = LinkHrefNormalizerIdentity - } - href, err := normalizeHref(href) - if err != nil { - return nil, err + templated := parseOptBool(rawJson["templated"]) + var href HREF + var err error + if templated { + href, err = NewHREFFromString(rawHref, templated) + if err != nil { + return nil, errors.Wrap(err, "failed unmarshalling 'href' as URL template") + } + } else { + u, err := url.URLFromString(rawHref) + if err != nil { + u, err = url.URLFromDecodedPath(rawHref) + if err != nil { + return nil, errors.Wrap(err, "failed unmarshalling 'href' as URL") + } + } + href = NewHREF(u) } link := &Link{ - Href: href, - Type: parseOptString(rawJson["type"]), - Templated: parseOptBool(rawJson["templated"]), - Title: parseOptString(rawJson["title"]), - Height: float64ToUint(parseOptFloat64(rawJson["height"])), - Width: float64ToUint(parseOptFloat64(rawJson["width"])), - Bitrate: float64Positive(parseOptFloat64(rawJson["bitrate"])), - Duration: float64Positive(parseOptFloat64(rawJson["duration"])), + Href: href, + Title: parseOptString(rawJson["title"]), + Height: float64ToUint(parseOptFloat64(rawJson["height"])), + Width: float64ToUint(parseOptFloat64(rawJson["width"])), + Bitrate: float64Positive(parseOptFloat64(rawJson["bitrate"])), + Duration: float64Positive(parseOptFloat64(rawJson["duration"])), + } + + // Media Type + rawType := parseOptString(rawJson["type"]) + if rawType != "" { + mediaType, err := mediatype.NewOfString(rawType) + if err != nil { + return nil, errors.Wrap(err, "failed unmarshalling 'type' as valid mimetype") + } + link.MediaType = &mediaType } // Properties @@ -127,7 +105,7 @@ func LinkFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormaliz // Alternates rawAlternates, ok := rawJson["alternate"].([]interface{}) if ok { - alternates, err := LinksFromJSONArray(rawAlternates, normalizeHref) + alternates, err := LinksFromJSONArray(rawAlternates) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'alternate'") } @@ -137,7 +115,7 @@ func LinkFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormaliz // Children rawChildren, ok := rawJson["children"].([]interface{}) if ok { - children, err := LinksFromJSONArray(rawChildren, normalizeHref) + children, err := LinksFromJSONArray(rawChildren) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'children'") } @@ -147,7 +125,7 @@ func LinkFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormaliz return link, nil } -func LinksFromJSONArray(rawJsonArray []interface{}, normalizeHref LinkHrefNormalizer) ([]Link, error) { +func LinksFromJSONArray(rawJsonArray []interface{}) ([]Link, error) { links := make([]Link, 0, len(rawJsonArray)) for i, entry := range rawJsonArray { entry, ok := entry.(map[string]interface{}) @@ -155,7 +133,7 @@ func LinksFromJSONArray(rawJsonArray []interface{}, normalizeHref LinkHrefNormal // TODO: Should this be a "warning", an error, or completely ignored? continue } - rl, err := LinkFromJSON(entry, normalizeHref) + rl, err := LinkFromJSON(entry) if err != nil { return nil, errors.Wrapf(err, "failed unmarshalling Link at position %d", i) } @@ -173,7 +151,7 @@ func (l *Link) UnmarshalJSON(b []byte) error { if err != nil { return err } - fl, err := LinkFromJSON(object, LinkHrefNormalizerIdentity) + fl, err := LinkFromJSON(object) if err != nil { return err } @@ -183,12 +161,12 @@ func (l *Link) UnmarshalJSON(b []byte) error { func (l Link) MarshalJSON() ([]byte, error) { res := make(map[string]interface{}) - res["href"] = l.Href - if l.Type != "" { - res["type"] = l.Type + res["href"] = l.Href.String() + if l.MediaType != nil { + res["type"] = l.MediaType.String() } - if l.Templated { - res["templated"] = l.Templated + if l.Href.IsTemplated() { + res["templated"] = true } if l.Title != "" { res["title"] = l.Title @@ -227,43 +205,21 @@ func (l Link) MarshalJSON() ([]byte, error) { type LinkList []Link // Returns the first [Link] with the given [href], or null if not found. -func (ll LinkList) IndexOfFirstWithHref(href string) int { +func (ll LinkList) IndexOfFirstWithHref(href url.URL) int { for i, link := range ll { - if link.Templated { - if strings.TrimPrefix(link.ExpandTemplate(nil).Href, "/") == href { - // TODO: remove trimming when href utils are updated - return i - } - } - if link.Href == href { + if link.URL(nil, nil).Equivalent(href) { return i } - for _, alt := range link.Alternates { - if alt.Href == href { - return i - } - } } return -1 } // Finds the first link matching the given HREF. -func (ll LinkList) FirstWithHref(href string) *Link { +func (ll LinkList) FirstWithHref(href url.URL) *Link { for _, link := range ll { - if link.Templated { - if strings.TrimPrefix(link.ExpandTemplate(nil).Href, "/") == href { - // TODO: remove trimming when href utils are updated - return &link - } - } - if link.Href == href { + if link.URL(nil, nil).Equivalent(href) { return &link } - for _, alt := range link.Alternates { - if alt.Href == href { - return &alt - } - } } return nil } @@ -284,7 +240,6 @@ func (ll LinkList) FirstWithRel(rel string) *Link { func (ll LinkList) FilterByRel(rel string) LinkList { flinks := make([]Link, 0) for _, link := range ll { - // TODO should we check alternates? for _, r := range link.Rels { if r == rel { flinks = append(flinks, link) @@ -297,10 +252,9 @@ func (ll LinkList) FilterByRel(rel string) LinkList { // Finds the first link matching the given media type. func (ll LinkList) FirstWithMediaType(mt *mediatype.MediaType) *Link { for _, link := range ll { - if link.MediaType().Matches(mt) { + if link.MediaType.Matches(mt) { return &link } - // TODO should we check alternates? } return nil } @@ -309,10 +263,9 @@ func (ll LinkList) FirstWithMediaType(mt *mediatype.MediaType) *Link { func (ll LinkList) FilterByMediaType(mt ...*mediatype.MediaType) LinkList { flinks := make([]Link, 0) for _, link := range ll { - if link.MediaType().Matches(mt...) { + if link.MediaType.Matches(mt...) { flinks = append(flinks, link) } - // TODO should we check alternates? } return flinks } @@ -320,10 +273,9 @@ func (ll LinkList) FilterByMediaType(mt ...*mediatype.MediaType) LinkList { // Returns whether all the resources in the collection are bitmaps. func (ll LinkList) AllAreBitmap() bool { for _, link := range ll { - if !link.MediaType().IsBitmap() { + if !link.MediaType.IsBitmap() { return false } - // TODO should we check alternates? } return true } @@ -331,10 +283,9 @@ func (ll LinkList) AllAreBitmap() bool { // Returns whether all the resources in the collection are audio clips. func (ll LinkList) AllAreAudio() bool { for _, link := range ll { - if !link.MediaType().IsAudio() { + if !link.MediaType.IsAudio() { return false } - // TODO should we check alternates? } return true } @@ -342,10 +293,9 @@ func (ll LinkList) AllAreAudio() bool { // Returns whether all the resources in the collection are video clips. func (ll LinkList) AllAreVideo() bool { for _, link := range ll { - if !link.MediaType().IsVideo() { + if !link.MediaType.IsVideo() { return false } - // TODO should we check alternates? } return true } @@ -353,11 +303,9 @@ func (ll LinkList) AllAreVideo() bool { // Returns whether all the resources in the collection are bitmaps or video clips. func (ll LinkList) AllAreVisual() bool { for _, link := range ll { - mt := link.MediaType() - if !mt.IsBitmap() && !mt.IsVideo() { + if !link.MediaType.IsBitmap() && !link.MediaType.IsVideo() { return false } - // TODO should we check alternates? } return true } @@ -365,10 +313,9 @@ func (ll LinkList) AllAreVisual() bool { // Returns whether all the resources in the collection are HTML documents. func (ll LinkList) AllAreHTML() bool { for _, link := range ll { - if !link.MediaType().IsHTML() { + if !link.MediaType.IsHTML() { return false } - // TODO should we check alternates? } return true } @@ -376,10 +323,20 @@ func (ll LinkList) AllAreHTML() bool { // Returns whether all the resources in the collection are matching the given media type. func (ll LinkList) AllMatchMediaType(mt ...*mediatype.MediaType) bool { for _, link := range ll { - if !link.MediaType().Matches(mt...) { + if !link.MediaType.Matches(mt...) { return false } - // TODO should we check alternates? } return true } + +// Returns a list of `Link` after flattening the `children` and `alternates` links of the receiver. +func (ll LinkList) Flatten() LinkList { + links := make(LinkList, 0, len(ll)) + for _, link := range ll { + links = append(links, link) + links = append(links, link.Alternates.Flatten()...) + links = append(links, link.Children.Flatten()...) + } + return links +} diff --git a/pkg/manifest/link_test.go b/pkg/manifest/link_test.go index f34a84b6..b3e044ef 100644 --- a/pkg/manifest/link_test.go +++ b/pkg/manifest/link_test.go @@ -5,14 +5,15 @@ import ( "testing" "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) -func TestLinkTemplateParameters(t *testing.T) { +/*func TestLinkTemplateParameters(t *testing.T) { assert.Equal( t, []string{"x", "hello", "y", "z", "w"}, - Link{Href: "/url{?x,hello,y}name{z,y,w}", Templated: true}.TemplateParameters(), + Link{Href: "url{?x,hello,y}name{z,y,w}", Templated: true}.TemplateParameters(), ) } @@ -20,11 +21,11 @@ func TestLinkTemplateExpand(t *testing.T) { assert.Equal( t, Link{ - Href: "/url?x=aaa&hello=Hello,%20world&y=bname", + Href: "url?x=aaa&hello=Hello,%20world&y=bname", Templated: false, }, Link{ - Href: "/url{?x,hello,y}name", + Href: "url{?x,hello,y}name", Templated: true, }.ExpandTemplate(map[string]string{ "x": "aaa", @@ -32,12 +33,13 @@ func TestLinkTemplateExpand(t *testing.T) { "y": "b", }), ) -} +}*/ func TestLinkUnmarshalMinimalJSON(t *testing.T) { var l Link assert.NoError(t, json.Unmarshal([]byte(`{"href": "http://href"}`), &l)) - assert.Equal(t, Link{Href: "http://href"}, l, "parsed JSON object should be equal to Link object") + u, _ := url.URLFromString("http://href") + assert.Equal(t, Link{Href: NewHREF(u)}, l, "parsed JSON object should be equal to Link object") } func TestLinkUnmarshalFullJSON(t *testing.T) { @@ -57,18 +59,18 @@ func TestLinkUnmarshalFullJSON(t *testing.T) { "duration": 45.6, "language": "fr", "alternate": [ - {"href": "/alternate1"}, - {"href": "/alternate2"} + {"href": "alternate1"}, + {"href": "alternate2"} ], "children": [ {"href": "http://child1"}, {"href": "http://child2"} ] }`), &l)) + h, _ := NewHREFFromString("http://href", true) assert.Equal(t, Link{ - Href: "http://href", - Type: "application/pdf", - Templated: true, + Href: h, + MediaType: &mediatype.PDF, Title: "Link Title", Rels: []string{"publication", "cover"}, Properties: Properties{ @@ -80,18 +82,18 @@ func TestLinkUnmarshalFullJSON(t *testing.T) { Duration: 45.6, Languages: []string{"fr"}, Alternates: []Link{ - {Href: "/alternate1"}, - {Href: "/alternate2"}, + {Href: MustNewHREFFromString("alternate1", false)}, + {Href: MustNewHREFFromString("alternate2", false)}, }, Children: []Link{ - {Href: "http://child1"}, - {Href: "http://child2"}, + {Href: MustNewHREFFromString("http://child1", false)}, + {Href: MustNewHREFFromString("http://child2", false)}, }, }, l, "parsed JSON object should be equal to Link object") } func TestLinkUnmarshalNilJSON(t *testing.T) { - s, err := LinkFromJSON(nil, nil) + s, err := LinkFromJSON(nil) assert.NoError(t, err) assert.Nil(t, s) } @@ -99,25 +101,25 @@ func TestLinkUnmarshalNilJSON(t *testing.T) { func TestLinkUnmarshalJSONRelString(t *testing.T) { var l Link assert.NoError(t, json.Unmarshal([]byte(`{"href": "a", "rel": "publication"}`), &l)) - assert.Equal(t, Link{Href: "a", Rels: []string{"publication"}}, l) + assert.Equal(t, Link{Href: MustNewHREFFromString("a", false), Rels: []string{"publication"}}, l) } func TestLinkUnmarshalJSONTemplatedDefaultFalse(t *testing.T) { var l Link assert.NoError(t, json.Unmarshal([]byte(`{"href": "a"}`), &l)) - assert.False(t, l.Templated) + assert.False(t, l.Href.IsTemplated()) } func TestLinkUnmarshalJSONTemplatedNilFalse(t *testing.T) { var l Link assert.NoError(t, json.Unmarshal([]byte(`{"href": "a", "templated": null}`), &l)) - assert.False(t, l.Templated) + assert.False(t, l.Href.IsTemplated()) } func TestLinkUnmarshalJSONMultipleLanguages(t *testing.T) { var l Link assert.NoError(t, json.Unmarshal([]byte(`{"href": "a", "language": ["fr", "en"]}`), &l)) - assert.Equal(t, Link{Href: "a", Languages: []string{"fr", "en"}}, l) + assert.Equal(t, Link{Href: MustNewHREFFromString("a", false), Languages: []string{"fr", "en"}}, l) } func TestLinkUnmarshalJSONRequiresHref(t *testing.T) { @@ -156,13 +158,13 @@ func TestLinkUnmarshalJSONArray(t *testing.T) { {"href": "http://child2"} ]`), &ll)) assert.Equal(t, []Link{ - {Href: "http://child1"}, - {Href: "http://child2"}, + {Href: MustNewHREFFromString("http://child1", false)}, + {Href: MustNewHREFFromString("http://child2", false)}, }, ll, "parsed JSON array should be equal to Link slice") } func TestLinkUnmarshalJSONNilArray(t *testing.T) { - ll, err := LinksFromJSONArray(nil, nil) + ll, err := LinksFromJSONArray(nil) assert.NoError(t, err) assert.Equal(t, []Link{}, ll) } @@ -176,16 +178,15 @@ func TestLinkUnmarshalJSONArrayRefusesInvalidLinks(t *testing.T) { } func TestLinkMinimalJSON(t *testing.T) { - b, err := json.Marshal(Link{Href: "http://href"}) + b, err := json.Marshal(Link{Href: MustNewHREFFromString("http://href", false)}) assert.NoError(t, err) assert.JSONEq(t, `{"href": "http://href"}`, string(b)) } func TestLinkFullJSON(t *testing.T) { b, err := json.Marshal(Link{ - Href: "http://href", - Type: "application/pdf", - Templated: true, + Href: MustNewHREFFromString("http://href", true), + MediaType: &mediatype.PDF, Title: "Link Title", Rels: []string{"publication", "cover"}, Properties: Properties{ @@ -197,12 +198,12 @@ func TestLinkFullJSON(t *testing.T) { Duration: 45.6, Languages: []string{"fr"}, Alternates: []Link{ - {Href: "/alternate1"}, - {Href: "/alternate2"}, + {Href: MustNewHREFFromString("alternate1", false)}, + {Href: MustNewHREFFromString("alternate2", false)}, }, Children: []Link{ - {Href: "http://child1"}, - {Href: "http://child2"}, + {Href: MustNewHREFFromString("http://child1", false)}, + {Href: MustNewHREFFromString("http://child2", false)}, }, }) assert.NoError(t, err) @@ -221,8 +222,8 @@ func TestLinkFullJSON(t *testing.T) { "duration": 45.6, "language": "fr", "alternate": [ - {"href": "/alternate1"}, - {"href": "/alternate2"} + {"href": "alternate1"}, + {"href": "alternate2"} ], "children": [ {"href": "http://child1"}, @@ -233,8 +234,8 @@ func TestLinkFullJSON(t *testing.T) { func TestLinkJSONArray(t *testing.T) { b, err := json.Marshal([]Link{ - {Href: "http://child1"}, - {Href: "http://child2"}, + {Href: MustNewHREFFromString("http://child1", false)}, + {Href: MustNewHREFFromString("http://child2", false)}, }) assert.NoError(t, err) assert.JSONEq(t, `[ @@ -243,50 +244,50 @@ func TestLinkJSONArray(t *testing.T) { ]`, string(b)) } -func TestLinkUnknownMediaType(t *testing.T) { - assert.Equal(t, mediatype.Binary, Link{Href: "file"}.MediaType()) -} +/*func TestLinkUnknownMediaType(t *testing.T) { + assert.Equal(t, &mediatype.Binary, Link{Href: MustNewHREFFromString("file", false)}.MediaType) +}*/ -func TestLinkMediaTypeFromType(t *testing.T) { +/*func TestLinkMediaTypeFromType(t *testing.T) { assert.Equal(t, mediatype.EPUB, Link{Href: "file", Type: "application/epub+zip"}.MediaType()) assert.Equal(t, mediatype.PDF, Link{Href: "file", Type: "application/pdf"}.MediaType()) -} +}*/ func TestLinkToURLRelativeToBase(t *testing.T) { - assert.Equal(t, "http://host/folder/file.html", Link{Href: "folder/file.html"}.ToURL("http://host/")) + assert.Equal(t, "http://host/folder/file.html", Link{Href: MustNewHREFFromString("folder/file.html", false)}.URL(url.MustURLFromString("http://host/"), nil).String()) } func TestLinkToURLRelativeToBaseWithRootPrefix(t *testing.T) { - assert.Equal(t, "http://host/folder/file.html", Link{Href: "/file.html"}.ToURL("http://host/folder/")) + assert.Equal(t, "http://host/folder/file.html", Link{Href: MustNewHREFFromString("folder/file.html", false)}.URL(url.MustURLFromString("http://host/"), nil).String()) } func TestLinkToURLRelativeToNothing(t *testing.T) { - assert.Equal(t, "/folder/file.html", Link{Href: "folder/file.html"}.ToURL("")) + assert.Equal(t, "folder/file.html", Link{Href: MustNewHREFFromString("folder/file.html", false)}.URL(nil, nil).String()) } -func TestLinkToURLWithInvalidHref(t *testing.T) { - assert.Empty(t, Link{Href: ""}.ToURL("http://test.com")) -} +/*func TestLinkToURLWithInvalidHref(t *testing.T) { + assert.Empty(t, Link{Href: MustNewHREFFromString("", false)}.URL(url.MustURLFromString("http://test.com"), nil).String()) +}*/ func TestLinkToURLWithAbsoluteHref(t *testing.T) { - assert.Equal(t, "http://test.com/folder/file.html", Link{Href: "http://test.com/folder/file.html"}.ToURL("http://host/")) + assert.Equal(t, "http://test.com/folder/file.html", Link{Href: MustNewHREFFromString("http://test.com/folder/file.html", false)}.URL(url.MustURLFromString("http://host/"), nil).String()) } func TestLinkToURLWithHrefContainingInvalidChars(t *testing.T) { // Original expected: "http://host/folder/Cory%20Doctorow's/a-fc.jpg". TODO: is it not good that the ' got escaped? - assert.Equal(t, "http://host/folder/Cory%20Doctorow%27s/a-fc.jpg", Link{Href: "/Cory Doctorow's/a-fc.jpg"}.ToURL("http://host/folder/")) + assert.Equal(t, "http://host/folder/Cory%20Doctorow%27s/a-fc.jpg", Link{Href: MustNewHREFFromString("Cory Doctorow's/a-fc.jpg", false)}.URL(url.MustURLFromString("http://host/folder/"), nil).String()) } func TestLinkFirstIndexLinkWithHrefInList(t *testing.T) { - assert.Equal(t, -1, LinkList{Link{Href: "href"}}.IndexOfFirstWithHref("foobar")) + assert.Equal(t, -1, LinkList{Link{Href: MustNewHREFFromString("href", false)}}.IndexOfFirstWithHref(url.MustURLFromString("foobar"))) assert.Equal( t, 1, LinkList{ - Link{Href: "href1"}, - Link{Href: "href2"}, - Link{Href: "href2"}, // duplicated on purpose - }.IndexOfFirstWithHref("href2"), + Link{Href: MustNewHREFFromString("href1", false)}, + Link{Href: MustNewHREFFromString("href2", false)}, + Link{Href: MustNewHREFFromString("href2", false)}, // duplicated on purpose + }.IndexOfFirstWithHref(url.MustURLFromString("href2")), ) } diff --git a/pkg/manifest/locator.go b/pkg/manifest/locator.go index 9225bab8..3aae4db4 100644 --- a/pkg/manifest/locator.go +++ b/pkg/manifest/locator.go @@ -4,6 +4,8 @@ import ( "encoding/json" "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" ) // One or more alternative expressions of the location. @@ -149,6 +151,32 @@ func TextFromJSON(rawJson map[string]interface{}) (t Text) { return } +func (t Text) Substring(start, end int64) Text { + if t.Highlight == "" { + return t + } + + length := int64(len(t.Highlight)) + if start > length-1 { + start = length + } + if start < 0 { + start = 0 + } + if end > length-1 { + end = length - 1 + } + if end < 0 { + end = 0 + } + + t.Before += t.Highlight[:start] + t.After = t.Highlight[end+1:] + t.After + t.Highlight = t.Highlight[start : end+1] + + return t +} + // Locator provides a precise location in a publication in a format that can be stored and shared. // // There are many different use cases for locators: @@ -160,11 +188,11 @@ func TextFromJSON(rawJson map[string]interface{}) (t Text) { // // https://github.com/readium/architecture/tree/master/locators type Locator struct { - Href string `json:"href"` - Type string `json:"type"` - Title string `json:"title,omitempty"` - Locations Locations `json:"locations,omitempty"` - Text Text `json:"text,omitempty"` + Href url.URL `json:"href"` + MediaType mediatype.MediaType `json:"type"` + Title string `json:"title,omitempty"` + Locations Locations `json:"locations,omitempty"` + Text Text `json:"text,omitempty"` } func LocatorFromJSON(rawJson map[string]interface{}) (Locator, error) { @@ -172,13 +200,25 @@ func LocatorFromJSON(rawJson map[string]interface{}) (Locator, error) { return Locator{}, nil } + rawHref := parseOptString(rawJson["href"]) + rawType := parseOptString(rawJson["type"]) + if rawHref == "" || rawType == "" { + return Locator{}, errors.New("'href' and 'type' are required") + } + locator := Locator{ - Href: parseOptString(rawJson["href"]), - Type: parseOptString(rawJson["type"]), Title: parseOptString(rawJson["title"]), } - if locator.Href == "" || locator.Type == "" { - return Locator{}, errors.New("'href' and 'type' are required") + + url, err := url.URLFromString(rawHref) + if err != nil { + return Locator{}, errors.Wrap(err, "failed unmarshalling 'href' as URL") + } + locator.Href = url + + locator.MediaType, err = mediatype.NewOfString(rawType) + if err != nil { + return Locator{}, errors.Wrap(err, "failed unmarshalling 'type' as valid mimetype") } if rawLocations, ok := rawJson["locations"].(map[string]interface{}); ok { @@ -212,8 +252,14 @@ func (l *Locator) UnmarshalJSON(b []byte) error { func (l Locator) MarshalJSON() ([]byte, error) { j := make(map[string]interface{}) - j["href"] = l.Href - j["type"] = l.Type + + if l.Href == nil { + return nil, errors.New("href is required in Locator") + } + j["href"] = l.Href.String() + + j["type"] = l.MediaType.String() + if l.Title != "" { j["title"] = l.Title } diff --git a/pkg/manifest/locator_test.go b/pkg/manifest/locator_test.go index 1756275a..b852fa39 100644 --- a/pkg/manifest/locator_test.go +++ b/pkg/manifest/locator_test.go @@ -5,6 +5,8 @@ import ( "testing" "github.com/readium/go-toolkit/pkg/internal/extensions" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -15,8 +17,8 @@ func TestLocatorUnmarshalMinimalJSON(t *testing.T) { "type": "text/html" }`), &l)) assert.Equal(t, Locator{ - Href: "http://locator", - Type: "text/html", + Href: url.MustURLFromString("http://locator"), + MediaType: mediatype.HTML, }, l) } @@ -34,8 +36,8 @@ func TestLocatorUnmarshalJSON(t *testing.T) { } }`), &l)) assert.Equal(t, Locator{ - Href: "http://locator", - Type: "text/html", + Href: url.MustURLFromString("http://locator"), + MediaType: mediatype.HTML, Title: "My Locator", Locations: Locations{Position: extensions.Pointer[uint](42)}, Text: Text{Highlight: "Excerpt"}, @@ -49,8 +51,8 @@ func TestLocatorUnmarshalInvalidJSON(t *testing.T) { func TestLocatorMinimalJSON(t *testing.T) { s, err := json.Marshal(&Locator{ - Href: "http://locator", - Type: "text/html", + Href: url.MustURLFromString("http://locator"), + MediaType: mediatype.HTML, }) assert.NoError(t, err) assert.JSONEq(t, `{ @@ -61,9 +63,9 @@ func TestLocatorMinimalJSON(t *testing.T) { func TestLocatorJSON(t *testing.T) { s, err := json.Marshal(&Locator{ - Href: "http://locator", - Type: "text/html", - Title: "My Locator", + Href: url.MustURLFromString("http://locator"), + MediaType: mediatype.HTML, + Title: "My Locator", Locations: Locations{ Position: extensions.Pointer[uint](42), }, @@ -178,10 +180,13 @@ func TestLocationsUnmarshalIgnoresTotalProgressionOutOfRange(t *testing.T) { } func TestLocationsMinimalJSON(t *testing.T) { - s, err := json.Marshal(Locator{}) + s, err := json.Marshal(Locator{ + Href: url.MustURLFromString("http://locator"), + MediaType: mediatype.HTML, + }) assert.NoError(t, err) // Note: href and type are not omitted because they are required! - assert.JSONEq(t, `{"href":"", "type":""}`, string(s), "JSON objects should be equal") + assert.JSONEq(t, `{"href": "http://locator", "type": "text/html"}`, string(s), "JSON objects should be equal") } func TestLocationsJSON(t *testing.T) { @@ -243,3 +248,53 @@ func TestTextJSON(t *testing.T) { "after": "Text after" }`, string(s), "JSON objects should be equal") } + +func TestSubstringFromRange(t *testing.T) { + text := Text{ + Before: "before", + Highlight: "highlight", + After: "after", + } + + assert.Equal(t, Text{ + Before: "before", + Highlight: "h", + After: "ighlightafter", + }, text.Substring(0, -1)) + + assert.Equal(t, Text{ + Before: "before", + Highlight: "h", + After: "ighlightafter", + }, text.Substring(0, 0)) + + assert.Equal(t, Text{ + Before: "beforehigh", + Highlight: "lig", + After: "htafter", + }, text.Substring(4, 6)) + + assert.Equal(t, Text{ + Before: "before", + Highlight: "highlight", + After: "after", + }, text.Substring(0, 8)) + + assert.Equal(t, Text{ + Before: "beforehighli", + Highlight: "ght", + After: "after", + }, text.Substring(6, 12)) + + assert.Equal(t, Text{ + Before: "beforehighligh", + Highlight: "t", + After: "after", + }, text.Substring(8, 12)) + + assert.Equal(t, Text{ + Before: "beforehighlight", + Highlight: "", + After: "after", + }, text.Substring(9, 12)) +} diff --git a/pkg/manifest/manifest.go b/pkg/manifest/manifest.go index 529717f8..8e69fd41 100644 --- a/pkg/manifest/manifest.go +++ b/pkg/manifest/manifest.go @@ -2,28 +2,25 @@ package manifest import ( "encoding/json" - "path" - "strings" + "slices" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/mediatype" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" ) const WebpubManifestContext = "https://readium.org/webpub-manifest/context.jsonld" // Manifest Main structure for a publication type Manifest struct { - Context Strings `json:"@context,omitempty"` - Metadata Metadata `json:"metadata"` - Links LinkList `json:"links"` - ReadingOrder LinkList `json:"readingOrder,omitempty"` - Resources LinkList `json:"resources,omitempty"` //Replaces the manifest but less redundant - TableOfContents LinkList `json:"toc,omitempty"` - - Subcollections PublicationCollectionMap `json:"-"` //Extension point for collections that shouldn't show up in the manifest - // Internal []Internal `json:"-"` // TODO remove + Context Strings `json:"@context,omitempty"` + Metadata Metadata `json:"metadata"` + Links LinkList `json:"links"` + ReadingOrder LinkList `json:"readingOrder,omitempty"` + Resources LinkList `json:"resources,omitempty"` //Replaces the manifest but less redundant + TableOfContents LinkList `json:"toc,omitempty"` + Subcollections PublicationCollectionMap `json:"-"` //Extension point for collections that shouldn't show up in the manifest } // Returns whether this manifest conforms to the given Readium Web Publication Profile. @@ -59,13 +56,76 @@ func (m Manifest) ConformsTo(profile Profile) bool { // Finds the first [Link] with the given href in the manifest's links. // Searches through (in order) the reading order, resources and links recursively following alternate and children links. // If there's no match, tries again after removing any query parameter and anchor from the given href. -func (m Manifest) LinkWithHref(href string) *Link { - var deepLinkWithHref func(ll LinkList, href string) *Link - deepLinkWithHref = func(ll LinkList, href string) *Link { +func (m Manifest) LinkWithHref(href url.URL) *Link { + href = href.Normalize() // Normalize HREF here instead of in the loop + + var deepLinkWithHref func(ll LinkList, href url.URL) *Link + deepLinkWithHref = func(ll LinkList, href url.URL) *Link { for _, l := range ll { - if l.Href == href { + nu := l.URL(nil, nil).Normalize() // Normalized version of the href + + if nu.Equivalent(href) { + // Exactly equivalent after normalization return &l } else { + // Check if they have the same relative path after resolving, + // and no fragment, meaning only the query could be different + if nu.Path() == href.Path() && href.Fragment() == "" { + // Check for a possible fit in a templated href + // This is a special fast path for web services accepting arbitrary query parameters in the URL + if l.Href.IsTemplated() { // Templated URI + if params := l.Href.Parameters(); len(params) > 0 { + // At least one parameter in the URI template + matches := true + + // Check that every parameter in the URI template is present by key in the query + for _, p := range params { + if !href.Raw().Query().Has(p) { + matches = false + break + } + } + if matches { + // All template parameters are present in the query parameters + return &l + } + } + } else { + // Check for a possible fit in an href with query parameters + // This is a special fast path for web services accepting arbitrary query parameters in the URL + if len(nu.Raw().Query()) > 0 && len(href.Raw().Query()) > 0 { + // Both the give href and the one we're checking have query parameters + // If the given href has all the key/value pairs in the query that the + // one we're checking has, then they're equivalent! + matches := true + q := href.Raw().Query() + for k, v := range nu.Raw().Query() { + slices.Sort(v) + if qv, ok := q[k]; ok { + if len(qv) > 1 { + slices.Sort(qv) + if !slices.Equal(qv, v) { + matches = false + break + } + } else { + if qv[0] != v[0] { + matches = false + break + } + } + } else { + matches = false + break + } + } + if matches { + return &l + } + } + } + } + if link := deepLinkWithHref(l.Alternates, href); link != nil { return link } @@ -77,7 +137,7 @@ func (m Manifest) LinkWithHref(href string) *Link { return nil } - find := func(href string) *Link { + find := func(href url.URL) *Link { if l := deepLinkWithHref(m.ReadingOrder, href); l != nil { return l } @@ -93,36 +153,28 @@ func (m Manifest) LinkWithHref(href string) *Link { if l := find(href); l != nil { return l } - if l := find(strings.SplitN(strings.SplitN(href, "#", 2)[0], "?", 2)[0]); l != nil { - return l + + broaderHref := href.RemoveFragment().RemoveQuery() + if !broaderHref.Equivalent(href) { + if l := find(broaderHref); l != nil { + return l + } } return nil } // Finds the first [Link] with the given relation in the manifest's links. func (m Manifest) LinkWithRel(rel string) *Link { - for _, resource := range m.Resources { - for _, resRel := range resource.Rels { - if resRel == rel { - return &resource - } - } + if rel := m.ReadingOrder.FirstWithRel(rel); rel != nil { + return rel } - for _, item := range m.ReadingOrder { - for _, spineRel := range item.Rels { - if spineRel == rel { - return &item - } - } + if rel := m.Resources.FirstWithRel(rel); rel != nil { + return rel } - for _, link := range m.Links { - for _, linkRel := range link.Rels { - if linkRel == rel { - return &link - } - } + if rel := m.Links.FirstWithRel(rel); rel != nil { + return rel } return nil @@ -130,31 +182,14 @@ func (m Manifest) LinkWithRel(rel string) *Link { // Finds all [Link]s having the given [rel] in the manifest's links. func (m Manifest) LinksWithRel(rel string) []Link { - var res []Link + r1 := m.ReadingOrder.FilterByRel(rel) + r2 := m.Resources.FilterByRel(rel) + r3 := m.Links.FilterByRel(rel) - for _, resource := range m.Resources { - for _, resRel := range resource.Rels { - if resRel == rel { - res = append(res, resource) - } - } - } - - for _, item := range m.ReadingOrder { - for _, spineRel := range item.Rels { - if spineRel == rel { - res = append(res, item) - } - } - } - - for _, link := range m.Links { - for _, linkRel := range link.Rels { - if linkRel == rel { - res = append(res, link) - } - } - } + res := make([]Link, 0, len(r1)+len(r2)+len(r3)) + res = append(res, r1...) + res = append(res, r2...) + res = append(res, r3...) return res } @@ -162,24 +197,23 @@ func (m Manifest) LinksWithRel(rel string) []Link { // Creates a new [Locator] object from a [Link] to a resource of this manifest. // Returns nil if the resource is not found in this manifest. func (m Manifest) LocatorFromLink(link Link) *Locator { - components := strings.SplitN(link.Href, "#", 2) - href := components[0] - resourceLink := m.LinkWithHref(href) + url := link.URL(nil, nil) + fragment := url.Fragment() + url = url.RemoveFragment() + + resourceLink := m.LinkWithHref(url) if resourceLink == nil { return nil } - if resourceLink.Type == "" { + mediaType := resourceLink.MediaType + if mediaType == nil { return nil } - var fragment string - if len(components) > 1 { - fragment = components[1] - } l := &Locator{ - Href: href, - Type: resourceLink.Type, - Title: resourceLink.Title, + Href: url, + MediaType: *mediaType, + Title: resourceLink.Title, } if l.Title == "" { @@ -211,28 +245,12 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, var links []Link var err error if ok { - links, err = LinksFromJSONArray(rawLinks, LinkHrefNormalizerIdentity) + links, err = LinksFromJSONArray(rawLinks) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'links'") } } - baseURL := "/" - if !packaged { - self := firstLinkWithRel(links, "self") - if self != nil { - url := extensions.ToUrlOrNull(self.Href) - if url != nil { - url.Path = path.Dir(url.Path) - baseURL = url.String() + "/" - } - } - } - - normalizeHref := func(href string) (string, error) { - return util.NewHREF(href, baseURL).String() - } - manifest := new(Manifest) // Context @@ -252,14 +270,14 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, if rmt == nil { return nil, errors.New("'metadata' is required") } - metadata, err := MetadataFromJSON(rmt, normalizeHref) + metadata, err := MetadataFromJSON(rmt) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'metadata'") } manifest.Metadata = *metadata // Links - links, err = LinksFromJSONArray(rawLinks, normalizeHref) + links, err = LinksFromJSONArray(rawLinks) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'links'") } @@ -287,13 +305,13 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, return nil, errors.New("Manifest has no valid 'readingOrder' or 'spine'") } } - readingOrder, err := LinksFromJSONArray(readingOrderRaw, normalizeHref) + readingOrder, err := LinksFromJSONArray(readingOrderRaw) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling reading order") } manifest.ReadingOrder = make(LinkList, 0, len(readingOrder)) // More links with than without mimetypes for _, link := range readingOrder { - if link.Type == "" { + if link.MediaType == nil { continue } manifest.ReadingOrder = append(manifest.ReadingOrder, link) @@ -302,13 +320,13 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, // Resources resourcesRaw, ok := rawJson["resources"].([]interface{}) if ok { - resources, err := LinksFromJSONArray(resourcesRaw, normalizeHref) + resources, err := LinksFromJSONArray(resourcesRaw) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'resources'") } manifest.Resources = make(LinkList, 0, len(resources)) // More resources with than without mimetypes for _, link := range resources { - if link.Type == "" { + if link.MediaType == nil { continue } manifest.Resources = append(manifest.Resources, link) @@ -318,7 +336,7 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, // TOC tocRaw, ok := rawJson["toc"].([]interface{}) if ok { - toc, err := LinksFromJSONArray(tocRaw, normalizeHref) + toc, err := LinksFromJSONArray(tocRaw) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling 'toc'") } @@ -333,7 +351,7 @@ func ManifestFromJSON(rawJson map[string]interface{}, packaged bool) (*Manifest, } // Parses subcollections from the remaining JSON properties. - pcm, err := PublicationCollectionsFromJSON(rawJson, normalizeHref) + pcm, err := PublicationCollectionsFromJSON(rawJson) if err != nil { return nil, errors.Wrap(err, "failed unmarshalling remaining manifest data as subcollections of type PublicationCollectionMap") } diff --git a/pkg/manifest/manifest_test.go b/pkg/manifest/manifest_test.go index 8b78ee72..24c4a71a 100644 --- a/pkg/manifest/manifest_test.go +++ b/pkg/manifest/manifest_test.go @@ -4,6 +4,8 @@ import ( "encoding/json" "testing" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -30,21 +32,21 @@ func TestManifestUnmarshalFullJSON(t *testing.T) { "@context": "https://readium.org/webpub-manifest/context.jsonld", "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ], "resources": [ - {"href": "/image.png", "type": "image/png"} + {"href": "image.png", "type": "image/png"} ], "toc": [ - {"href": "/cover.html"}, - {"href": "/chap1.html"} + {"href": "cover.html"}, + {"href": "chap1.html"} ], "sub": { "links": [ - {"href": "/sublink"} + {"href": "sublink"} ] } }`), &m)) @@ -55,22 +57,22 @@ func TestManifestUnmarshalFullJSON(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString("chap1.html", false), MediaType: &mediatype.HTML}, }, Resources: LinkList{ - Link{Href: "/image.png", Type: "image/png"}, + Link{Href: MustNewHREFFromString("image.png", false), MediaType: &mediatype.PNG}, }, TableOfContents: LinkList{ - Link{Href: "/cover.html"}, - Link{Href: "/chap1.html"}, + Link{Href: MustNewHREFFromString("cover.html", false)}, + Link{Href: MustNewHREFFromString("chap1.html", false)}, }, Subcollections: PublicationCollectionMap{ "sub": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: MustNewHREFFromString("sublink", false)}}, }}, }, }, m, "unmarshalled JSON object should be equal to Manifest object") @@ -82,10 +84,10 @@ func TestManifestUnmarshalJSONContextAsArray(t *testing.T) { "@context": ["context1", "context2"], "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ] }`), &m)) @@ -95,10 +97,10 @@ func TestManifestUnmarshalJSONContextAsArray(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString("chap1.html", false), MediaType: &mediatype.HTML}, }, }, m, "unmarshalled JSON object should be equal to Manifest object with @context array") } @@ -107,10 +109,10 @@ func TestManifestUnmarshalJSONRequiresMetadata(t *testing.T) { var m Manifest assert.Error(t, json.Unmarshal([]byte(`{ "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ] }`), &m)) } @@ -121,10 +123,10 @@ func TestManifestUnmarshalJSONSpinFallback(t *testing.T) { assert.NoError(t, json.Unmarshal([]byte(`{ "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "spine": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ] }`), &m)) @@ -133,24 +135,24 @@ func TestManifestUnmarshalJSONSpinFallback(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString("chap1.html", false), MediaType: &mediatype.HTML}, }, }, m) } -func TestManifestUnmarshalJSONIgnoresMissingReadingOrderType(t *testing.T) { +/*func TestManifestUnmarshalJSONIgnoresMissingReadingOrderType(t *testing.T) { var m Manifest assert.NoError(t, json.Unmarshal([]byte(`{ "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"}, - {"href": "/chap2.html"} + {"href": "chap1.html", "type": "text/html"}, + {"href": "chap2.html"} ] }`), &m)) @@ -159,10 +161,10 @@ func TestManifestUnmarshalJSONIgnoresMissingReadingOrderType(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString( "manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString( "chap1.html", false), MediaType: &mediatype.HTML}, }, }, m) } @@ -172,14 +174,14 @@ func TestManifestUnmarshalJSONIgnoresResourceWithoutType(t *testing.T) { assert.NoError(t, json.Unmarshal([]byte(`{ "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ], "resources": [ - {"href": "/withtype", "type": "text/html"}, - {"href": "/withouttype"} + {"href": "withtype", "type": "text/html"}, + {"href": "withouttype"} ] }`), &m)) @@ -188,16 +190,16 @@ func TestManifestUnmarshalJSONIgnoresResourceWithoutType(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString( "manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString( "chap1.html", false), MediaType: &mediatype.HTML}, }, Resources: LinkList{ - Link{Href: "/withtype", Type: "text/html"}, + Link{Href: MustNewHREFFromString( "withtype", false), MediaType: &mediatype.HTML}, }, }, m) -} +}*/ func TestManifestMinimalJSON(t *testing.T) { bin, err := json.Marshal(Manifest{ @@ -224,21 +226,21 @@ func TestManifestFullJSON(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{ - Link{Href: "/chap1.html", Type: "text/html"}, + Link{Href: MustNewHREFFromString("chap1.html", false), MediaType: &mediatype.HTML}, }, Resources: LinkList{ - Link{Href: "/image.png", Type: "image/png"}, + Link{Href: MustNewHREFFromString("image.png", false), MediaType: &mediatype.PNG}, }, TableOfContents: LinkList{ - Link{Href: "/cover.html"}, Link{Href: "/chap1.html"}, + Link{Href: MustNewHREFFromString("cover.html", false)}, Link{Href: MustNewHREFFromString("chap1.html", false)}, }, Subcollections: PublicationCollectionMap{ "sub": {{ Metadata: map[string]interface{}{}, - Links: []Link{{Href: "/sublink"}}, + Links: []Link{{Href: MustNewHREFFromString("sublink", false)}}, }}, }, }) @@ -248,22 +250,22 @@ func TestManifestFullJSON(t *testing.T) { "@context": "https://readium.org/webpub-manifest/context.jsonld", "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": "self"} + {"href": "manifest.json", "rel": "self"} ], "readingOrder": [ - {"href": "/chap1.html", "type": "text/html"} + {"href": "chap1.html", "type": "text/html"} ], "resources": [ - {"href": "/image.png", "type": "image/png"} + {"href": "image.png", "type": "image/png"} ], "toc": [ - {"href": "/cover.html"}, - {"href": "/chap1.html"} + {"href": "cover.html"}, + {"href": "chap1.html"} ], "sub": { "metadata": {}, "links": [ - {"href": "/sublink"} + {"href": "sublink"} ] } }`, string(bin)) @@ -274,7 +276,7 @@ func TestManifestSelfLinkReplacedWhenPackaged(t *testing.T) { assert.NoError(t, json.Unmarshal([]byte(`{ "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": ["self"], "templated": false} + {"href": "manifest.json", "rel": ["self"], "templated": false} ], "readingOrder": [] }`), &rm)) @@ -286,7 +288,7 @@ func TestManifestSelfLinkReplacedWhenPackaged(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"alternate"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"alternate"}}, }, ReadingOrder: LinkList{}, }, *m) @@ -297,7 +299,7 @@ func TestManifestSelfLinkKeptWhenRemote(t *testing.T) { assert.NoError(t, json.Unmarshal([]byte(`{ "metadata": {"title": "Title"}, "links": [ - {"href": "/manifest.json", "rel": ["self"], "templated": false} + {"href": "manifest.json", "rel": ["self"], "templated": false} ], "readingOrder": [] }`), &rm)) @@ -309,7 +311,7 @@ func TestManifestSelfLinkKeptWhenRemote(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString("Title"), }, Links: LinkList{ - Link{Href: "/manifest.json", Rels: Strings{"self"}}, + Link{Href: MustNewHREFFromString("manifest.json", false), Rels: Strings{"self"}}, }, ReadingOrder: LinkList{}, }, *m) @@ -329,8 +331,9 @@ func TestManifestHrefResolvedToRoot(t *testing.T) { m, err := ManifestFromJSON(rm, true) assert.NoError(t, err) + m2 := m.NormalizeHREFsToSelf() - assert.Equal(t, "/chap1.html", m.ReadingOrder[0].Href) + assert.Equal(t, "chap1.html", m2.ReadingOrder[0].Href.String()) } func TestManifestHrefResolvedToRootRemotePackage(t *testing.T) { @@ -347,8 +350,9 @@ func TestManifestHrefResolvedToRootRemotePackage(t *testing.T) { m, err := ManifestFromJSON(rm, false) assert.NoError(t, err) + m2 := m.NormalizeHREFsToSelf() - assert.Equal(t, "http://example.com/directory/chap1.html", m.ReadingOrder[0].Href) + assert.Equal(t, "http://example.com/directory/chap1.html", m2.ReadingOrder[0].Href.String()) } func TestManifestLocatorFromMinimalLink(t *testing.T) { @@ -357,22 +361,22 @@ func TestManifestLocatorFromMinimalLink(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString(""), }, ReadingOrder: LinkList{{ - Href: "/href", - Type: "text/html", - Title: "Resource", + Href: MustNewHREFFromString("href", false), + MediaType: &mediatype.HTML, + Title: "Resource", }}, } var z float64 assert.Equal(t, &Locator{ - Href: "/href", - Type: "text/html", - Title: "Resource", + Href: url.MustURLFromString("href"), + MediaType: mediatype.HTML, + Title: "Resource", Locations: Locations{ Progression: &z, }, }, manifest.LocatorFromLink(Link{ - Href: "/href", + Href: MustNewHREFFromString("href", false), })) } @@ -382,46 +386,46 @@ func TestManifestLocatorFromInside(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString(""), }, ReadingOrder: LinkList{{ - Href: "/href1", - Type: "text/html", + Href: MustNewHREFFromString("href1", false), + MediaType: &mediatype.HTML, }}, Resources: LinkList{{ - Href: "/href2", - Type: "text/html", + Href: MustNewHREFFromString("href2", false), + MediaType: &mediatype.HTML, }}, Links: LinkList{{ - Href: "/href3", - Type: "text/html", + Href: MustNewHREFFromString("href3", false), + MediaType: &mediatype.HTML, }}, } var z float64 assert.Equal(t, &Locator{ - Href: "/href1", - Type: "text/html", + Href: url.MustURLFromString("href1"), + MediaType: mediatype.HTML, Locations: Locations{ Progression: &z, }, }, manifest.LocatorFromLink(Link{ - Href: "/href1", + Href: MustNewHREFFromString("href1", false), })) assert.Equal(t, &Locator{ - Href: "/href2", - Type: "text/html", + Href: url.MustURLFromString("href2"), + MediaType: mediatype.HTML, Locations: Locations{ Progression: &z, }, }, manifest.LocatorFromLink(Link{ - Href: "/href2", + Href: MustNewHREFFromString("href2", false), })) assert.Equal(t, &Locator{ - Href: "/href3", - Type: "text/html", + Href: url.MustURLFromString("href3"), + MediaType: mediatype.HTML, Locations: Locations{ Progression: &z, }, }, manifest.LocatorFromLink(Link{ - Href: "/href3", + Href: MustNewHREFFromString("href3", false), })) } @@ -431,23 +435,23 @@ func TestManifestLocatorFromFullLinkWithFragment(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString(""), }, ReadingOrder: LinkList{{ - Href: "/href", - Type: "text/html", - Title: "Resource", + Href: MustNewHREFFromString("href", false), + MediaType: &mediatype.HTML, + Title: "Resource", }}, } assert.Equal(t, &Locator{ - Href: "/href", - Type: "text/html", - Title: "Resource", + Href: url.MustURLFromString("href"), + MediaType: mediatype.HTML, + Title: "Resource", Locations: Locations{ Fragments: []string{"page=42"}, }, }, manifest.LocatorFromLink(Link{ - Href: "/href#page=42", - Type: "text/xml", - Title: "My link", + Href: MustNewHREFFromString("href#page=42", false), + MediaType: &mediatype.XML, + Title: "My link", })) } @@ -457,21 +461,21 @@ func TestManifestLocatorFallbackTitle(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString(""), }, ReadingOrder: LinkList{{ - Href: "/href", - Type: "text/html", + Href: MustNewHREFFromString("href", false), + MediaType: &mediatype.HTML, }}, } assert.Equal(t, &Locator{ - Href: "/href", - Type: "text/html", - Title: "My link", + Href: url.MustURLFromString("href"), + MediaType: mediatype.HTML, + Title: "My link", Locations: Locations{ Fragments: []string{"page=42"}, }, }, manifest.LocatorFromLink(Link{ - Href: "/href#page=42", - Type: "text/xml", - Title: "My link", + Href: MustNewHREFFromString("href#page=42", false), + MediaType: &mediatype.HTML, + Title: "My link", })) } @@ -481,11 +485,11 @@ func TestManifestLocatorLinkNotFound(t *testing.T) { LocalizedTitle: NewLocalizedStringFromString(""), }, ReadingOrder: LinkList{{ - Href: "/href", - Type: "text/html", + Href: MustNewHREFFromString("href", false), + MediaType: &mediatype.HTML, }}, } assert.Nil(t, manifest.LocatorFromLink(Link{ - Href: "/notfound", + Href: MustNewHREFFromString("notfound", false), })) } diff --git a/pkg/manifest/manifest_transformer.go b/pkg/manifest/manifest_transformer.go new file mode 100644 index 00000000..f4da16e2 --- /dev/null +++ b/pkg/manifest/manifest_transformer.go @@ -0,0 +1,93 @@ +package manifest + +// Transforms a manifest's components. +type ManifestTransformer interface { + TransformManifest(manifest Manifest) Manifest + TransformMetadata(metadata Metadata) Metadata + TransformLink(link Link) Link + TransformHREF(href HREF) HREF +} + +// Creates a copy of the receiver [Manifest], applying the given [transformer] to each component. +func (m Manifest) Copy(transformer ManifestTransformer) Manifest { + m.Metadata = m.Metadata.Copy(transformer) + m.Links = m.Links.Copy(transformer) + m.ReadingOrder = m.ReadingOrder.Copy(transformer) + m.Resources = m.Resources.Copy(transformer) + m.TableOfContents = m.TableOfContents.Copy(transformer) + m.Subcollections = m.Subcollections.Copy(transformer) + return transformer.TransformManifest(m) +} + +func (m Metadata) Copy(transformer ManifestTransformer) Metadata { + for i, subject := range m.Subjects { + m.Subjects[i] = subject.Copy(transformer) + } + m.Authors = m.Authors.Copy(transformer) + m.Translators = m.Translators.Copy(transformer) + m.Editors = m.Editors.Copy(transformer) + m.Artists = m.Artists.Copy(transformer) + m.Illustrators = m.Illustrators.Copy(transformer) + m.Letterers = m.Letterers.Copy(transformer) + m.Pencilers = m.Pencilers.Copy(transformer) + m.Colorists = m.Colorists.Copy(transformer) + m.Inkers = m.Inkers.Copy(transformer) + m.Narrators = m.Narrators.Copy(transformer) + m.Contributors = m.Contributors.Copy(transformer) + m.Publishers = m.Publishers.Copy(transformer) + m.Imprints = m.Imprints.Copy(transformer) + for k, v := range m.BelongsTo { + m.BelongsTo[k] = v.Copy(transformer) + } + return transformer.TransformMetadata(m) +} + +func (p PublicationCollection) Copy(transformer ManifestTransformer) PublicationCollection { + p.Links = p.Links.Copy(transformer) + p.Subcollections = p.Subcollections.Copy(transformer) + return p +} + +func (p PublicationCollectionMap) Copy(transformer ManifestTransformer) PublicationCollectionMap { + for k, v := range p { + for i, c := range v { + p[k][i] = c.Copy(transformer) + } + } + return p +} + +func (c Contributors) Copy(transformer ManifestTransformer) Contributors { + for i, contributor := range c { + c[i] = contributor.Copy(transformer) + } + return c +} + +func (c Contributor) Copy(transformer ManifestTransformer) Contributor { + c.Links = c.Links.Copy(transformer) + return c +} + +func (s Subject) Copy(transformer ManifestTransformer) Subject { + s.Links = s.Links.Copy(transformer) + return s +} + +func (ll LinkList) Copy(transformer ManifestTransformer) LinkList { + for i, link := range ll { + ll[i] = link.Copy(transformer) + } + return ll +} + +func (l Link) Copy(transformer ManifestTransformer) Link { + l.Href = l.Href.Copy(transformer) + l.Alternates = l.Alternates.Copy(transformer) + l.Children = l.Children.Copy(transformer) + return transformer.TransformLink(l) +} + +func (h HREF) Copy(transformer ManifestTransformer) HREF { + return transformer.TransformHREF(h) +} diff --git a/pkg/manifest/metadata.go b/pkg/manifest/metadata.go index 9b38e8ff..ebe968fe 100644 --- a/pkg/manifest/metadata.go +++ b/pkg/manifest/metadata.go @@ -155,7 +155,7 @@ func toJSONMap(value interface{}) (map[string]interface{}, error) { return object, nil } -func MetadataFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNormalizer) (*Metadata, error) { +func MetadataFromJSON(rawJson map[string]interface{}) (*Metadata, error) { if rawJson == nil { return nil, nil } @@ -222,98 +222,98 @@ func MetadataFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNorm metadata.Languages = languages // Subjects - subjects, err := SubjectFromJSONArray(rawJson["subject"], normalizeHref) + subjects, err := SubjectFromJSONArray(rawJson["subject"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'subject'") } metadata.Subjects = subjects // Contributors - contributors, err := ContributorFromJSONArray(rawJson["contributor"], normalizeHref) + contributors, err := ContributorFromJSONArray(rawJson["contributor"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'contributor'") } metadata.Contributors = contributors // Publishers - contributors, err = ContributorFromJSONArray(rawJson["publisher"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["publisher"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'publisher'") } metadata.Publishers = contributors // Imprints - contributors, err = ContributorFromJSONArray(rawJson["imprint"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["imprint"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'imprint'") } metadata.Imprints = contributors // Authors - contributors, err = ContributorFromJSONArray(rawJson["author"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["author"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'author'") } metadata.Authors = contributors // Translators - contributors, err = ContributorFromJSONArray(rawJson["translator"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["translator"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'translator'") } metadata.Translators = contributors // Editors - contributors, err = ContributorFromJSONArray(rawJson["editor"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["editor"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'editor'") } metadata.Editors = contributors // Artists - contributors, err = ContributorFromJSONArray(rawJson["artist"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["artist"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'artist'") } metadata.Artists = contributors // Illustrators - contributors, err = ContributorFromJSONArray(rawJson["illustrator"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["illustrator"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'illustrator'") } metadata.Illustrators = contributors // Letterers - contributors, err = ContributorFromJSONArray(rawJson["letterer"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["letterer"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'letterer'") } metadata.Letterers = contributors // Pencilers - contributors, err = ContributorFromJSONArray(rawJson["penciler"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["penciler"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'penciler'") } metadata.Pencilers = contributors // Colorists - contributors, err = ContributorFromJSONArray(rawJson["colorist"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["colorist"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'colorist'") } metadata.Colorists = contributors // Inkers - contributors, err = ContributorFromJSONArray(rawJson["inker"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["inker"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'inker'") } metadata.Inkers = contributors // Narrators - contributors, err = ContributorFromJSONArray(rawJson["narrator"], normalizeHref) + contributors, err = ContributorFromJSONArray(rawJson["narrator"]) if err != nil { return nil, errors.Wrap(err, "failed parsing 'narrator'") } @@ -343,7 +343,7 @@ func MetadataFromJSON(rawJson map[string]interface{}, normalizeHref LinkHrefNorm if v == nil { continue } - cl, err := ContributorFromJSONArray(v, normalizeHref) + cl, err := ContributorFromJSONArray(v) if err != nil { return nil, errors.Wrapf(err, "failed parsing 'belongsTo.%s'", k) } @@ -431,7 +431,7 @@ func (m *Metadata) UnmarshalJSON(b []byte) error { if err != nil { return err } - fm, err := MetadataFromJSON(object, LinkHrefNormalizerIdentity) + fm, err := MetadataFromJSON(object) if err != nil { return err } diff --git a/pkg/manifest/metadata_test.go b/pkg/manifest/metadata_test.go index ff93694d..773caf2e 100644 --- a/pkg/manifest/metadata_test.go +++ b/pkg/manifest/metadata_test.go @@ -121,7 +121,7 @@ func TestMetadataUnmarshalFullJSON(t *testing.T) { } func TestMetadataUnmarshalNilJSON(t *testing.T) { - s, err := MetadataFromJSON(nil, nil) + s, err := MetadataFromJSON(nil) assert.NoError(t, err) assert.Nil(t, s) } diff --git a/pkg/manifest/profile.go b/pkg/manifest/profile.go index e3fd4802..a07533fc 100644 --- a/pkg/manifest/profile.go +++ b/pkg/manifest/profile.go @@ -16,7 +16,7 @@ const ProfileEPUB = Profile("https://readium.org/webpub-manifest/profiles/epub") // Profile for audiobooks const ProfileAudiobook = Profile("https://readium.org/webpub-manifest/profiles/audiobook") -// Profile for visual narratives (comics, manga and bandes dessinées) +// Profile for visual narratives (comics, manga, webtoons and bandes dessinées) const ProfileDivina = Profile("https://readium.org/webpub-manifest/profiles/divina") // Profile for PDF documents diff --git a/pkg/manifest/subject.go b/pkg/manifest/subject.go index e569a645..9ae9e518 100644 --- a/pkg/manifest/subject.go +++ b/pkg/manifest/subject.go @@ -14,7 +14,7 @@ type Subject struct { LocalizedSortAs *LocalizedString `json:"sortAs,omitempty"` Scheme string `json:"scheme,omitempty"` Code string `json:"code,omitempty"` - Links []Link `json:"links,omitempty"` + Links LinkList `json:"links,omitempty"` } func (s Subject) Name() string { @@ -31,7 +31,7 @@ func (s Subject) SortAs() string { // Parses a [Subject] from its RWPM JSON representation. // A subject can be parsed from a single string, or a full-fledged object. // The [links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. -func SubjectFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) (*Subject, error) { +func SubjectFromJSON(rawJson interface{}) (*Subject, error) { if rawJson == nil { return nil, nil } @@ -67,7 +67,7 @@ func SubjectFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) (*Su // links lln, ok := rjs["links"].([]interface{}) if ok { - links, err := LinksFromJSONArray(lln, normalizeHref) + links, err := LinksFromJSONArray(lln) if err != nil { return nil, errors.Wrap(err, "failed parsing Subject 'links'") } @@ -82,13 +82,13 @@ func SubjectFromJSON(rawJson interface{}, normalizeHref LinkHrefNormalizer) (*Su // Creates a list of [Subject] from its RWPM JSON representation. // The [links]' href and their children's will be normalized recursively using the provided [normalizeHref] closure. -func SubjectFromJSONArray(rawJsonArray interface{}, normalizeHref LinkHrefNormalizer) ([]Subject, error) { +func SubjectFromJSONArray(rawJsonArray interface{}) ([]Subject, error) { var subjects []Subject switch rjx := rawJsonArray.(type) { case []interface{}: subjects = make([]Subject, 0, len(rjx)) for i, entry := range rjx { - rs, err := SubjectFromJSON(entry, normalizeHref) + rs, err := SubjectFromJSON(entry) if err != nil { return nil, errors.Wrapf(err, "failed unmarshalling Subject at position %d", i) } @@ -98,7 +98,7 @@ func SubjectFromJSONArray(rawJsonArray interface{}, normalizeHref LinkHrefNormal subjects = append(subjects, *rs) } default: - s, err := SubjectFromJSON(rjx, normalizeHref) + s, err := SubjectFromJSON(rjx) if err != nil { return nil, err } @@ -115,7 +115,7 @@ func (s *Subject) UnmarshalJSON(data []byte) error { if err != nil { return err } - fs, err := SubjectFromJSON(object, LinkHrefNormalizerIdentity) + fs, err := SubjectFromJSON(object) if err != nil { return err } diff --git a/pkg/manifest/subject_test.go b/pkg/manifest/subject_test.go index 99391bca..9a2878d1 100644 --- a/pkg/manifest/subject_test.go +++ b/pkg/manifest/subject_test.go @@ -8,7 +8,7 @@ import ( ) func TestSubjectUnmarshalJSONString(t *testing.T) { - s, err := SubjectFromJSON("Fantasy", nil) + s, err := SubjectFromJSON("Fantasy") assert.NoError(t, err) assert.Equal(t, &Subject{ @@ -45,14 +45,14 @@ func TestSubjectUnmarshalFullJSON(t *testing.T) { Scheme: "http://scheme", Code: "CODE", Links: []Link{ - {Href: "pub1"}, - {Href: "pub2"}, + {Href: MustNewHREFFromString("pub1", false)}, + {Href: MustNewHREFFromString("pub2", false)}, }, }, &s, "parsed JSON object should be equal to Subject object") } func TestSubjectUnmarshalNilJSON(t *testing.T) { - s, err := SubjectFromJSON(nil, nil) + s, err := SubjectFromJSON(nil) assert.NoError(t, err) assert.Nil(t, s) } @@ -82,13 +82,13 @@ func TestSubjectUnmarshalJSONArray(t *testing.T) { } func TestSubjectUnmarshalNilJSONArray(t *testing.T) { - ss, err := SubjectFromJSONArray(nil, nil) + ss, err := SubjectFromJSONArray(nil) assert.NoError(t, err) assert.Equal(t, 0, len(ss)) } func TestSubjectUnmarshalJSONArrayString(t *testing.T) { - ss, err := SubjectFromJSONArray("Fantasy", nil) + ss, err := SubjectFromJSONArray("Fantasy") assert.NoError(t, err) assert.Equal(t, []Subject{ {LocalizedName: NewLocalizedStringFromString("Fantasy")}, @@ -122,8 +122,8 @@ func TestSubjectFullJSON(t *testing.T) { Scheme: "http://scheme", Code: "CODE", Links: []Link{ - {Href: "pub1"}, - {Href: "pub2"}, + {Href: MustNewHREFFromString("pub1", false)}, + {Href: MustNewHREFFromString("pub2", false)}, }, }) assert.NoError(t, err) diff --git a/pkg/mediatype/mediatype.go b/pkg/mediatype/mediatype.go index c3b1bc7d..5299c7e1 100644 --- a/pkg/mediatype/mediatype.go +++ b/pkg/mediatype/mediatype.go @@ -43,7 +43,7 @@ func New(str string, name string, extension string) (mt MediaType, err error) { mt.fileExtension = extension mtype, params, merr := mime.ParseMediaType(str) - if err != nil { + if merr != nil { err = merr return } @@ -107,9 +107,36 @@ func New(str string, name string, extension string) (mt MediaType, err error) { // Create a new MediaType solely from a mime string. // When an error is returned, do not use the resulting MediaType, as it will be incomplete/invalid func NewOfString(str string) (MediaType, error) { + if knownMatch, ok := knownMatches[str]; ok { + // The string was recognized as a known mimetype. + // This not only shortcuts building of the MediaType, + // but also ensures that the resulting MediaType is identical + // to an expected one in tests, and provides back nice + // names and file extensions as properties. We might want + // to match more roughly in the future, but for now this works. + return *knownMatch, nil + } return New(str, "", "") } +// Proxy for NewOfString, but panics on error. +func MustNewOfString(str string) MediaType { + mt, err := NewOfString(str) + if err != nil { + panic(err) + } + return mt +} + +// Proxy for NewOfString, but returns nil on error. +func MaybeNewOfString(str string) *MediaType { + mt, err := NewOfString(str) + if err != nil { + return nil + } + return &mt +} + // Structured syntax suffix, e.g. `+zip` in `application/epub+zip`. // // Gives a hint on the underlying structure of this media type. diff --git a/pkg/mediatype/mediatype_of.go b/pkg/mediatype/mediatype_of.go index 4f124bf2..eb3640c9 100644 --- a/pkg/mediatype/mediatype_of.go +++ b/pkg/mediatype/mediatype_of.go @@ -21,7 +21,7 @@ var Sniffers = []Sniffer{ SniffLCPLicense, SniffW3CWPUB, SniffWebpub, - // Note SniffSystem isn't here! + // Note SniffKnown and SniffSystem aren't here! } // Resolves a media type from a sniffer context. @@ -73,15 +73,21 @@ func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sn } } - // Falls back on the system-wide registered media types. - // Note: This is done after the heavy sniffing of the provided [sniffers], because - // otherwise it will detect JSON, XML or ZIP formats before we have a chance of sniffing - // their content (for example, for RWPM). context := SnifferContext{ content: content, mediaTypes: mediaTypes, fileExtensions: fileExtensions, } + + // Check if the media type is within the well-known list + // Note: This is done after the heavy sniffing of the provided [sniffers], because + // otherwise it will detect JSON, XML or ZIP formats before we have a chance of sniffing + // their content (for example, for RWPM). + if c := SniffKnown(context); c != nil { + return c + } + + // Falls back on the system-wide registered media types. if c := SniffSystem(context); c != nil { return c } diff --git a/pkg/mediatype/sniffer.go b/pkg/mediatype/sniffer.go index c392192f..4019bd20 100644 --- a/pkg/mediatype/sniffer.go +++ b/pkg/mediatype/sniffer.go @@ -286,7 +286,7 @@ var cbz_extensions = map[string]struct{}{ "jpeg": {}, "png": {}, "tif": {}, "tiff": {}, "webp": {}, "avif": {}, "jxl": {}, // Metadata - "acbf": {}, "xml": {}, "txt": {}, + "acbf": {}, "xml": {}, "txt": {}, "json": {}, } // Authorized extensions for resources in a ZAB archive (Zipped Audio Book). @@ -348,6 +348,20 @@ func SniffPDF(context SnifferContext) *MediaType { return nil } +func SniffKnown(context SnifferContext) *MediaType { + for k, v := range knownMatches { + if context.HasMediaType(k) { + return v + } + if v.fileExtension != "" { + if v.fileExtension != "json" && context.HasFileExtension(v.fileExtension) { + return v + } + } + } + return nil +} + func SniffSystem(context SnifferContext) *MediaType { for _, mt := range context.MediaTypes() { mts := mt.String() @@ -379,9 +393,6 @@ func SniffSystem(context SnifferContext) *MediaType { continue } exr := exts[0] - if exr == ".htm" { - exr = ".html" // Fix for Go's first html extension being .htm - } nm = strings.TrimSuffix(nm, "; charset=utf-8") // Fix for Go assuming file's content is UTF-8 if nmt, err := New(nm, "", exr[1:]); err == nil { return &nmt diff --git a/pkg/mediatype/sniffer_context.go b/pkg/mediatype/sniffer_context.go index ce849e38..53da2d15 100644 --- a/pkg/mediatype/sniffer_context.go +++ b/pkg/mediatype/sniffer_context.go @@ -95,6 +95,19 @@ func (s SnifferContext) HasMediaType(mediaTypes ...string) bool { return false } +// Returns whether this context has any media type with a one of the provided types (the first component of the media type). +func (s SnifferContext) HasMediaTypeWithType(types ...string) bool { + selfMediaTypes := s.MediaTypes() + for _, typ := range types { + for _, mt := range selfMediaTypes { + if mt.Type == typ { + return true + } + } + } + return false +} + // Content as plain text. // Extracts the charset parameter from the media type hints to figure out an encoding. Otherwise, UTF-8 is assumed. func (s SnifferContext) ContentAsString() (string, error) { diff --git a/pkg/mediatype/sniffer_mimes.go b/pkg/mediatype/sniffer_mimes.go deleted file mode 100644 index 1f64dc64..00000000 --- a/pkg/mediatype/sniffer_mimes.go +++ /dev/null @@ -1,10 +0,0 @@ -package mediatype - -import "mime" - -// Explicitly add certain mimetypes to the system sniffer to work around OS differences -func init() { - mime.AddExtensionType(".xml", "application/xml") - mime.AddExtensionType(".ncx", "application/x-dtbncx+xml") - mime.AddExtensionType(".opf", "application/oebps-package+xml") -} diff --git a/pkg/mediatype/types.go b/pkg/mediatype/types.go index 7968286a..c5e6e841 100644 --- a/pkg/mediatype/types.go +++ b/pkg/mediatype/types.go @@ -33,7 +33,8 @@ var OPDS1Entry, _ = New("application/atom+xml;type=entry;profile=opds-catalog", var OPDS2, _ = New("application/opds+json", "OPDS 2 Catalog", "") var OPDS2Publication, _ = New("application/opds-publication+json", "OPDS 2 Publication", "") var OPDSAuthentication, _ = New("application/opds-authentication+json", "OPDS 2 Authentication Document", "") -var OPUS, _ = New("audio/opus", "OPUS Audio", "opus") +var OPF, _ = New("application/oebps-package+xml", "EPUB Package Document", "opf") +var OPUS, _ = New("audio/ogg; codecs=opus", "OPUS Audio", "opus") var OTF, _ = New("font/otf", "OpenType Font", "otf") var PDF, _ = New("application/pdf", "PDF", "pdf") var PNG, _ = New("image/png", "Portable Network Graphics", "png") diff --git a/pkg/mediatype/types_matcher.go b/pkg/mediatype/types_matcher.go new file mode 100644 index 00000000..d2fe54db --- /dev/null +++ b/pkg/mediatype/types_matcher.go @@ -0,0 +1,70 @@ +package mediatype + +// To generate, search and replace `types.go` with the following: +// var (\w+)[^\n]+New\(("[^"]+"),[^\n]+ +// $2: &$1, +var knownMatches = map[string]*MediaType{ + "audio/aac": &AAC, + "application/vnd.adobe.adept+xml": &ACSM, + "audio/aiff": &AIFF, + "video/x-msvideo": &AVI, + "image/avif": &AVIF, + "application/octet-stream": &Binary, + "image/bmp": &BMP, + "application/vnd.comicbook+zip": &CBZ, + "application/vnd.comicbook-rar": &CBR, + "text/css": &CSS, + "application/epub+zip": &EPUB, + "image/gif": &GIF, + "application/gzip": &GZ, + "text/html": &HTML, + "text/javascript": &JavaScript, + "image/jpeg": &JPEG, + "application/json": &JSON, + "image/jxl": &JXL, + "application/vnd.readium.lcp.license.v1.0+json": &LCPLicenseDocument, + "application/audiobook+lcp": &LCPProtectedAudiobook, + "application/pdf+lcp": &LCPProtectedPDF, + "application/vnd.readium.license.status.v1.0+json": &LCPStatusDocument, + "application/lpf+zip": &LPF, + "audio/mpeg": &MP3, + "video/mpeg": &MPEG, + "application/x-dtbncx+xml": &NCX, + "audio/ogg": &OGG, + "video/ogg": &OGV, + "application/atom+xml;profile=opds-catalog": &OPDS1, + "application/atom+xml;type=entry;profile=opds-catalog": &OPDS1Entry, + "application/opds+json": &OPDS2, + "application/opds-publication+json": &OPDS2Publication, + "application/opds-authentication+json": &OPDSAuthentication, + "application/oebps-package+xml": &OPF, + "audio/opus": &OPUS, + "font/otf": &OTF, + "application/pdf": &PDF, + "image/png": &PNG, + "application/audiobook+zip": &ReadiumAudiobook, + "application/audiobook+json": &ReadiumAudiobookManifest, + "application/vnd.readium.content+json": &ReadiumContentDocument, + "application/divina+zip": &ReadiumDivina, + "application/divina+json": &ReadiumDivinaManifest, + "application/guided-navigation+json": &ReadiumGuidedNavigationDocument, + "application/vnd.readium.position-list+json": &ReadiumPositionList, + "application/webpub+zip": &ReadiumWebpub, + "application/webpub+json": &ReadiumWebpubManifest, + "application/smil+xml": &SMIL, + "image/svg+xml": &SVG, + "text/plain": &Text, + "image/tiff": &TIFF, + "font/ttf": &TTF, + "application/x.readium.w3c.wpub+json": &W3CWPUBManifest, + "audio/wav": &WAV, + "audio/webm": &WEBMAudio, + "video/webm": &WEBMVideo, + "image/webp": &WEBP, + "font/woff": &WOFF, + "font/woff2": &WOFF2, + "application/xhtml+xml": &XHTML, + "application/xml": &XML, + "application/x.readium.zab+zip": &ZAB, + "application/zip": &ZIP, +} diff --git a/pkg/parser/epub/deobfuscator_test.go b/pkg/parser/epub/deobfuscator_test.go index ba30d42f..ddbd8579 100644 --- a/pkg/parser/epub/deobfuscator_test.go +++ b/pkg/parser/epub/deobfuscator_test.go @@ -12,11 +12,11 @@ import ( const identifier = "urn:uuid:36d5078e-ff7d-468e-a5f3-f47c14b91f2f" func withDeobfuscator(t *testing.T, href string, algorithm string, start, end int64, f func([]byte, []byte)) { - ft := fetcher.NewFileFetcher("/deobfuscation", "./testdata/deobfuscation") + ft := fetcher.NewFileFetcher("deobfuscation", "./testdata/deobfuscation") t.Log(href) // Cleartext font - clean, err := ft.Get(manifest.Link{Href: "/deobfuscation/cut-cut.woff"}).Read(start, end) + clean, err := ft.Get(manifest.Link{Href: manifest.MustNewHREFFromString("deobfuscation/cut-cut.woff", false)}).Read(start, end) if !assert.Nil(t, err) { assert.NoError(t, err.Cause) f(nil, nil) @@ -25,7 +25,7 @@ func withDeobfuscator(t *testing.T, href string, algorithm string, start, end in // Obfuscated font link := manifest.Link{ - Href: href, + Href: manifest.MustNewHREFFromString(href, false), } if algorithm != "" { link.Properties = manifest.Properties{ @@ -53,37 +53,37 @@ func withDeobfuscator(t *testing.T, href string, algorithm string, start, end in } func TestDeobfuscatorIDPF(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 0, 0, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 0, 0, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } func TestDeobfuscatorIDPFRangeIn(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 20, 40, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 20, 40, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } func TestDeobfuscatorIDPFRangeOut(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 60, 2000, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.obf.woff", "http://www.idpf.org/2008/embedding", 60, 2000, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } func TestDeobfuscatorAdobe(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.adb.woff", "http://ns.adobe.com/pdf/enc#RC", 0, 0, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.adb.woff", "http://ns.adobe.com/pdf/enc#RC", 0, 0, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } func TestDeobfuscatorNoAlgorithm(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.woff", "", 0, 0, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.woff", "", 0, 0, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } func TestDeobfuscatorUnknownAlgorithm(t *testing.T) { - withDeobfuscator(t, "/deobfuscation/cut-cut.woff", "unknown algorithm", 0, 0, func(clean, obfu []byte) { + withDeobfuscator(t, "deobfuscation/cut-cut.woff", "unknown algorithm", 0, 0, func(clean, obfu []byte) { assert.Equal(t, clean, obfu) }) } diff --git a/pkg/parser/epub/factory.go b/pkg/parser/epub/factory.go index b5e33da5..ee8a2c40 100644 --- a/pkg/parser/epub/factory.go +++ b/pkg/parser/epub/factory.go @@ -3,13 +3,14 @@ package epub import ( "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" ) type PublicationFactory struct { FallbackTitle string PackageDocument PackageDocument NavigationData map[string]manifest.LinkList - EncryptionData map[string]manifest.Encryption + EncryptionData map[url.URL]manifest.Encryption DisplayOptions map[string]string itemById map[string]Item @@ -119,9 +120,9 @@ func (f PublicationFactory) Create() manifest.Manifest { // Compute a Publication [Link] from an EPUB metadata link func mapEPUBLink(link EPUBLink) manifest.Link { l := manifest.Link{ - Href: link.href, - Type: link.mediaType, - Rels: link.rels, + Href: manifest.NewHREF(link.href), + MediaType: link.mediaType, + Rels: link.rels, } var contains []string @@ -149,8 +150,8 @@ func (f PublicationFactory) computeLink(item Item, fallbackChain []string) manif rels, properties := f.computePropertiesAndRels(item, &itemref) ret := manifest.Link{ - Href: item.Href, - Type: item.MediaType, + Href: manifest.NewHREF(item.Href), + MediaType: item.MediaType, Rels: rels, Alternates: f.computeAlternates(item, fallbackChain), } diff --git a/pkg/parser/epub/media_overlay_service.go b/pkg/parser/epub/media_overlay_service.go index cc7f1f1a..067e08d7 100644 --- a/pkg/parser/epub/media_overlay_service.go +++ b/pkg/parser/epub/media_overlay_service.go @@ -2,7 +2,6 @@ package epub import ( "slices" - "strings" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" @@ -13,24 +12,23 @@ import ( func MediaOverlayFactory() pub.ServiceFactory { return func(context pub.Context) pub.Service { // Process reading order to find and replace SMIL alternates - smilMediatype := mediatype.SMIL.String() smilMap := make(map[string]manifest.Link) var smilIndexes []string for i := range context.Manifest.ReadingOrder { alts := context.Manifest.ReadingOrder[i].Alternates for j := range alts { alt := context.Manifest.ReadingOrder[i].Alternates[j] - if alt.Type == smilMediatype { + if alt.MediaType.Equal(&mediatype.SMIL) { // SMIL alternate for reading order item found // Create a guided navigation link for the SMIL alt - // TODO: remove prefix trim when url utils are updated - href := strings.TrimPrefix(context.Manifest.ReadingOrder[i].Href, "/") - gnLink := pub.GuidedNavigationLink.ExpandTemplate( + href := context.Manifest.ReadingOrder[i].Href.String() + gnLink := pub.GuidedNavigationLink + gnLink.Href = manifest.NewHREF(gnLink.URL(nil, map[string]string{ "ref": href, }, - ) + )) // Store the original SMIL alt in an internal map smilMap[href] = alt @@ -91,7 +89,7 @@ func (s *MediaOverlayService) GuideForResource(href string) (*manifest.GuidedNav } // Convert SMIL to guided navigation document - doc, err := ParseSMILDocument(n, link.Href) + doc, err := ParseSMILDocument(n, link.URL(nil, nil)) if err != nil { return nil, err } @@ -100,16 +98,18 @@ func (s *MediaOverlayService) GuideForResource(href string) (*manifest.GuidedNav // Then enhance the document with additional next/prev links idx := slices.Index(s.originalSmilIndexes, href) if idx > 0 { - l := pub.GuidedNavigationLink.ExpandTemplate(map[string]string{ + l := pub.GuidedNavigationLink + l.Href = manifest.NewHREF(l.Href.Resolve(nil, map[string]string{ "ref": s.originalSmilIndexes[idx-1], - }) + })) l.Rels = append(l.Rels, "prev") doc.Links = append(doc.Links, l) } if idx < len(s.originalSmilIndexes)-1 { - l := pub.GuidedNavigationLink.ExpandTemplate(map[string]string{ + l := pub.GuidedNavigationLink + l.Href = manifest.NewHREF(l.Href.Resolve(nil, map[string]string{ "ref": s.originalSmilIndexes[idx+1], - }) + })) l.Rels = append(l.Rels, "next") doc.Links = append(doc.Links, l) } diff --git a/pkg/parser/epub/metadata.go b/pkg/parser/epub/metadata.go index e38a59fc..85ffc7d2 100644 --- a/pkg/parser/epub/metadata.go +++ b/pkg/parser/epub/metadata.go @@ -8,7 +8,8 @@ import ( "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) @@ -20,9 +21,9 @@ type Title struct { } type EPUBLink struct { - href string + href url.URL rels []string // set - mediaType string + mediaType *mediatype.MediaType refines string properties []string } @@ -49,7 +50,7 @@ func NewMetadataParser(epubVersion float64, prefixMap map[string]string) Metadat } } -func (m MetadataParser) Parse(document *xmlquery.Node, filePath string) *EPUBMetadata { +func (m MetadataParser) Parse(document *xmlquery.Node, filePath url.URL) *EPUBMetadata { // Init lang if l := document.SelectElement("/" + NSSelect(NamespaceOPF, "package")); l != nil { for _, attr := range l.Attr { @@ -123,7 +124,7 @@ func (m MetadataParser) language(element *xmlquery.Node) string { return m.metaLanguage } -func (m MetadataParser) parseElements(metadataElement *xmlquery.Node, filePath string) ([]MetadataItem, []EPUBLink) { +func (m MetadataParser) parseElements(metadataElement *xmlquery.Node, filePath url.URL) ([]MetadataItem, []EPUBLink) { var metas []MetadataItem var links []EPUBLink @@ -149,7 +150,7 @@ func (m MetadataParser) parseElements(metadataElement *xmlquery.Node, filePath s return metas, links } -func (m MetadataParser) parseLinkElement(element *xmlquery.Node, filePath string) *EPUBLink { +func (m MetadataParser) parseLinkElement(element *xmlquery.Node, filePath url.URL) *EPUBLink { if element == nil { return nil } @@ -157,15 +158,15 @@ func (m MetadataParser) parseLinkElement(element *xmlquery.Node, filePath string if href == "" { return nil } - - hr, err := util.NewHREF(href, filePath).String() + u, err := url.FromEPUBHref(href) if err != nil { return nil } + hr := filePath.Resolve(u) link := &EPUBLink{ href: hr, - mediaType: element.SelectAttr("media-type"), + mediaType: mediatype.MaybeNewOfString(element.SelectAttr("media-type")), refines: strings.TrimPrefix(element.SelectAttr("refines"), "#"), } @@ -664,7 +665,7 @@ func (m PubMetadataAdapter) a11yConformsTo() []manifest.A11yProfile { } for _, link := range m.Links(VocabularyDCTerms + "conformsTo") { - if profile := a11yProfile(link.href); profile != "" { + if profile := a11yProfile(link.href.String()); profile != "" { profiles = append(profiles, profile) } } @@ -711,14 +712,14 @@ func (m PubMetadataAdapter) a11yCertification() *manifest.A11yCertification { c.Credential = items[0].value } if link, ok := m.FirstLinkRefining(VocabularyA11Y+"certifierReport", certifierItem.id); ok { - c.Report = link.href + c.Report = link.href.String() } } else { c.Credential = m.FirstValue(VocabularyA11Y + "certifierCredential") c.Report = m.FirstValue(VocabularyA11Y + "certifierReport") if c.Report == "" { if link, ok := m.FirstLink(VocabularyA11Y + "certifierReport"); ok { - c.Report = link.href + c.Report = link.href.String() } } } diff --git a/pkg/parser/epub/metadata_test.go b/pkg/parser/epub/metadata_test.go index f96c5a2a..d04de26f 100644 --- a/pkg/parser/epub/metadata_test.go +++ b/pkg/parser/epub/metadata_test.go @@ -6,6 +6,8 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -20,7 +22,7 @@ func loadMetadata(name string) (*manifest.Metadata, error) { return nil, rerr.Cause } - d, err := ParsePackageDocument(n, "") + d, err := ParsePackageDocument(n, url.MustURLFromString("")) if err != nil { return nil, err } @@ -500,9 +502,9 @@ func TestMetadataCoverLink(t *testing.T) { assert.NoError(t, err) expected := &manifest.Link{ - Href: "/OEBPS/cover.jpg", - Type: "image/jpeg", - Rels: []string{"cover"}, + Href: manifest.MustNewHREFFromString("OEBPS/cover.jpg", false), + MediaType: &mediatype.JPEG, + Rels: []string{"cover"}, } assert.Equal(t, m2.Resources.FirstWithRel("cover"), expected) assert.Equal(t, m3.Resources.FirstWithRel("cover"), expected) diff --git a/pkg/parser/epub/parser.go b/pkg/parser/epub/parser.go index b7a8037c..283f6d27 100644 --- a/pkg/parser/epub/parser.go +++ b/pkg/parser/epub/parser.go @@ -8,7 +8,7 @@ import ( "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" "github.com/readium/go-toolkit/pkg/pub" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" ) type Parser struct { @@ -36,11 +36,10 @@ func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Bui if err != nil { return nil, err } - if opfPath[0] != '/' { - opfPath = "/" + opfPath - } - opfXmlDocument, errx := f.Get(manifest.Link{Href: opfPath}).ReadAsXML(map[string]string{ + // Detect DRM + + opfXmlDocument, errx := f.Get(manifest.Link{Href: manifest.NewHREF(opfPath)}).ReadAsXML(map[string]string{ NamespaceOPF: "opf", NamespaceDC: "dc", VocabularyDCTerms: "dcterms", @@ -78,8 +77,8 @@ func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Bui return pub.NewBuilder(manifest, ffetcher, builder), nil } -func parseEncryptionData(fetcher fetcher.Fetcher) (ret map[string]manifest.Encryption) { - n, err := fetcher.Get(manifest.Link{Href: "/META-INF/encryption.xml"}).ReadAsXML(map[string]string{ +func parseEncryptionData(fetcher fetcher.Fetcher) (ret map[url.URL]manifest.Encryption) { + n, err := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/encryption.xml", false)}).ReadAsXML(map[string]string{ NamespaceENC: "enc", NamespaceSIG: "ds", NamespaceCOMP: "comp", @@ -103,7 +102,7 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche } } else { for _, v := range packageDocument.Manifest { - if mediatype.NCX.ContainsFromString(v.MediaType) { + if mediatype.NCX.Contains(v.MediaType) { ncxItem = &v break } @@ -112,11 +111,8 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche if ncxItem == nil { return } - ncxPath, err := util.NewHREF(ncxItem.Href, packageDocument.Path).String() - if err != nil { - return - } - n, nerr := fetcher.Get(manifest.Link{Href: ncxPath}).ReadAsXML(map[string]string{ + ncxPath := packageDocument.Path.Resolve(ncxItem.Href) + n, nerr := fetcher.Get(manifest.Link{Href: manifest.NewHREF(ncxPath)}).ReadAsXML(map[string]string{ NamespaceNCX: "ncx", }) if nerr != nil { @@ -139,11 +135,8 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche if navItem == nil { return } - navPath, err := util.NewHREF(navItem.Href, packageDocument.Path).String() - if err != nil { - return - } - n, errx := fetcher.Get(manifest.Link{Href: navPath}).ReadAsXML(map[string]string{ + navPath := packageDocument.Path.Resolve(navItem.Href) + n, errx := fetcher.Get(manifest.Link{Href: manifest.NewHREF(navPath)}).ReadAsXML(map[string]string{ NamespaceXHTML: "html", NamespaceOPS: "epub", }) @@ -157,9 +150,9 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche func parseDisplayOptions(fetcher fetcher.Fetcher) (ret map[string]string) { ret = make(map[string]string) - displayOptionsXml, err := fetcher.Get(manifest.Link{Href: "/META-INF/com.apple.ibooks.display-options.xml"}).ReadAsXML(nil) + displayOptionsXml, err := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.apple.ibooks.display-options.xml", false)}).ReadAsXML(nil) if err != nil { - displayOptionsXml, err = fetcher.Get(manifest.Link{Href: "/META-INF/com.kobobooks.display-options.xml"}).ReadAsXML(nil) + displayOptionsXml, err = fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.kobobooks.display-options.xml", false)}).ReadAsXML(nil) if err != nil { return } diff --git a/pkg/parser/epub/parser_encryption.go b/pkg/parser/epub/parser_encryption.go index 1f936aca..5fd18d86 100644 --- a/pkg/parser/epub/parser_encryption.go +++ b/pkg/parser/epub/parser_encryption.go @@ -5,16 +5,16 @@ import ( "github.com/readium/go-toolkit/pkg/drm" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func ParseEncryption(document *xmlquery.Node) (ret map[string]manifest.Encryption) { +func ParseEncryption(document *xmlquery.Node) (ret map[url.URL]manifest.Encryption) { for _, node := range document.SelectElements("//" + NSSelect(NamespaceENC, "EncryptedData")) { u, e := parseEncryptedData(node) if e != nil { if ret == nil { - ret = make(map[string]manifest.Encryption) + ret = make(map[url.URL]manifest.Encryption) } ret[u] = *e } @@ -22,14 +22,14 @@ func ParseEncryption(document *xmlquery.Node) (ret map[string]manifest.Encryptio return } -func parseEncryptedData(node *xmlquery.Node) (string, *manifest.Encryption) { +func parseEncryptedData(node *xmlquery.Node) (url.URL, *manifest.Encryption) { cdat := node.SelectElement(NSSelect(NamespaceENC, "CipherData")) if cdat == nil { - return "", nil + return nil, nil } cipherref := cdat.SelectElement(NSSelect(NamespaceENC, "CipherReference")) if cipherref == nil { - return "", nil + return nil, nil } resourceURI := cipherref.SelectAttr("URI") @@ -60,7 +60,11 @@ func parseEncryptedData(node *xmlquery.Node) (string, *manifest.Encryption) { } } - ru, _ := util.NewHREF(resourceURI, "").String() + ru, err := url.FromEPUBHref(resourceURI) + if err != nil { + return nil, nil + } + return ru, ret } diff --git a/pkg/parser/epub/parser_encryption_test.go b/pkg/parser/epub/parser_encryption_test.go index b56886e8..dc893230 100644 --- a/pkg/parser/epub/parser_encryption_test.go +++ b/pkg/parser/epub/parser_encryption_test.go @@ -5,6 +5,7 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -18,17 +19,23 @@ func loadEncryption(name string) (map[string]manifest.Encryption, error) { return nil, rerr.Cause } - return ParseEncryption(n), nil + enc := ParseEncryption(n) + ret := make(map[string]manifest.Encryption) + for k, v := range enc { + ret[k.String()] = v + } + + return ret, nil } var testEncMap = map[string]manifest.Encryption{ - "/OEBPS/xhtml/chapter01.xhtml": { + url.MustURLFromString("OEBPS/xhtml/chapter01.xhtml").String(): { Scheme: "http://readium.org/2014/01/lcp", OriginalLength: 13291, Algorithm: "http://www.w3.org/2001/04/xmlenc#aes256-cbc", Compression: "deflate", }, - "/OEBPS/xhtml/chapter02.xhtml": { + url.MustURLFromString("OEBPS/xhtml/chapter02.xhtml").String(): { Scheme: "http://readium.org/2014/01/lcp", OriginalLength: 12914, Algorithm: "http://www.w3.org/2001/04/xmlenc#aes256-cbc", @@ -52,13 +59,13 @@ func TestEncryptionParserUnknownRetrievalMethod(t *testing.T) { e, err := loadEncryption("unknown-method") assert.NoError(t, err) assert.Equal(t, map[string]manifest.Encryption{ - "/OEBPS/xhtml/chapter.xhtml": { + url.MustURLFromString("OEBPS/images/image.jpeg").String(): { + Algorithm: "http://www.w3.org/2001/04/xmlenc#kw-aes128", + }, + url.MustURLFromString("OEBPS/xhtml/chapter.xhtml").String(): { Algorithm: "http://www.w3.org/2001/04/xmlenc#kw-aes128", Compression: "deflate", OriginalLength: 12914, }, - "/OEBPS/images/image.jpeg": { - Algorithm: "http://www.w3.org/2001/04/xmlenc#kw-aes128", - }, }, e) } diff --git a/pkg/parser/epub/parser_navdoc.go b/pkg/parser/epub/parser_navdoc.go index 1cc2815c..21d9bc5e 100644 --- a/pkg/parser/epub/parser_navdoc.go +++ b/pkg/parser/epub/parser_navdoc.go @@ -4,11 +4,11 @@ import ( "strings" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func ParseNavDoc(document *xmlquery.Node, filePath string) map[string]manifest.LinkList { +func ParseNavDoc(document *xmlquery.Node, filePath url.URL) map[string]manifest.LinkList { ret := make(map[string]manifest.LinkList) docPrefixes := parsePrefixes(SelectNodeAttrNs(document, NamespaceOPS, "prefix")) for k, v := range ContentReservedPrefixes { @@ -41,7 +41,7 @@ func ParseNavDoc(document *xmlquery.Node, filePath string) map[string]manifest.L return ret } -func parseNavElement(nav *xmlquery.Node, filePath string, prefixMap map[string]string) ([]string, manifest.LinkList) { +func parseNavElement(nav *xmlquery.Node, filePath url.URL, prefixMap map[string]string) ([]string, manifest.LinkList) { typeAttr := SelectNodeAttrNs(nav, NamespaceOPS, "type") if typeAttr == "" { return nil, nil @@ -60,7 +60,7 @@ func parseNavElement(nav *xmlquery.Node, filePath string, prefixMap map[string]s return nil, nil } -func parseOlElement(ol *xmlquery.Node, filePath string) manifest.LinkList { +func parseOlElement(ol *xmlquery.Node, filePath url.URL) manifest.LinkList { if ol == nil { return nil } @@ -75,7 +75,7 @@ func parseOlElement(ol *xmlquery.Node, filePath string) manifest.LinkList { return links } -func parseLiElement(li *xmlquery.Node, filePath string) (link *manifest.Link) { +func parseLiElement(li *xmlquery.Node, filePath url.URL) (link *manifest.Link) { if li == nil { return nil } @@ -88,21 +88,21 @@ func parseLiElement(li *xmlquery.Node, filePath string) (link *manifest.Link) { title = strings.TrimSpace(muchSpaceSuchWowMatcher.ReplaceAllString(first.InnerText(), " ")) } rawHref := first.SelectAttr("href") - href := "#" + href := url.MustURLFromString("#") if first.Data == "a" && rawHref != "" { - s, err := util.NewHREF(rawHref, filePath).String() + s, err := url.FromEPUBHref(rawHref) if err == nil { - href = s + href = filePath.Resolve(s) } } children := parseOlElement(li.SelectElement(NSSelect(NamespaceXHTML, "ol")), filePath) - if len(children) == 0 && (href == "#" || title == "") { + if len(children) == 0 && (href.String() == "" || title == "") { return nil } return &manifest.Link{ Title: title, - Href: href, + Href: manifest.NewHREF(href), Children: children, } } diff --git a/pkg/parser/epub/parser_navdoc_test.go b/pkg/parser/epub/parser_navdoc_test.go index 6140fc3a..a38fe7be 100644 --- a/pkg/parser/epub/parser_navdoc_test.go +++ b/pkg/parser/epub/parser_navdoc_test.go @@ -5,6 +5,7 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -17,7 +18,7 @@ func loadNavDoc(name string) (map[string]manifest.LinkList, error) { return nil, rerr.Cause } - return ParseNavDoc(n, "/OEBPS/xhtml/nav.xhtml"), nil + return ParseNavDoc(n, url.MustURLFromString("OEBPS/xhtml/nav.xhtml")), nil } func TestNavDocParserNondirectDescendantOfBody(t *testing.T) { @@ -26,7 +27,7 @@ func TestNavDocParserNondirectDescendantOfBody(t *testing.T) { assert.Equal(t, manifest.LinkList{ { Title: "Chapter 1", - Href: "/OEBPS/xhtml/chapter1.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false), }, }, n["toc"]) } @@ -36,7 +37,7 @@ func TestNavDocParserNewlinesTrimmedFromTitle(t *testing.T) { assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with new lines splitting the text", - Href: "/OEBPS/xhtml/chapter1.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false), }) } @@ -45,7 +46,7 @@ func TestNavDocParserSpacesTrimmedFromTitle(t *testing.T) { assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with ignorable spaces", - Href: "/OEBPS/xhtml/chapter2.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter2.xhtml", false), }) } @@ -54,7 +55,7 @@ func TestNavDocParserNestestHTMLElementsAllowedInTitle(t *testing.T) { assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with nested HTML elements", - Href: "/OEBPS/xhtml/chapter3.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter3.xhtml", false), }) } @@ -63,7 +64,7 @@ func TestNavDocParserEntryWithoutTitleOrChildrenIgnored(t *testing.T) { assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "", - Href: "/OEBPS/xhtml/chapter4.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter4.xhtml", false), }) } @@ -72,7 +73,7 @@ func TestNavDocParserEntryWithoutLinkOrChildrenIgnored(t *testing.T) { assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "An unlinked element without children must be ignored", - Href: "#", + Href: manifest.MustNewHREFFromString("#", false), }) } @@ -80,21 +81,21 @@ func TestNavDocParserHierarchicalItemsNotAllowed(t *testing.T) { n, err := loadNavDoc("nav-children") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "Introduction", Href: "/OEBPS/xhtml/introduction.xhtml"}, + {Title: "Introduction", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/introduction.xhtml", false)}, { Title: "Part I", - Href: "#", + Href: manifest.MustNewHREFFromString("#", false), Children: manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/part1/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/part1/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part1/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part1/chapter2.xhtml", false)}, }, }, { Title: "Part II", - Href: "/OEBPS/xhtml/part2/chapter1.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter1.xhtml", false), Children: manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/part2/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/part2/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter2.xhtml", false)}, }, }, }, n["toc"]) @@ -110,8 +111,8 @@ func TestNavDocParserTOC(t *testing.T) { n, err := loadNavDoc("nav-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter2.xhtml", false)}, }, n["toc"]) } @@ -119,7 +120,7 @@ func TestNavDocParserPageList(t *testing.T) { n, err := loadNavDoc("nav-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "1", Href: "/OEBPS/xhtml/chapter1.xhtml#page1"}, - {Title: "2", Href: "/OEBPS/xhtml/chapter1.xhtml#page2"}, + {Title: "1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page1", false)}, + {Title: "2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page2", false)}, }, n["page-list"]) } diff --git a/pkg/parser/epub/parser_ncx.go b/pkg/parser/epub/parser_ncx.go index 634d1553..b21ac03d 100644 --- a/pkg/parser/epub/parser_ncx.go +++ b/pkg/parser/epub/parser_ncx.go @@ -4,11 +4,11 @@ import ( "strings" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func ParseNCX(document *xmlquery.Node, filePath string) map[string]manifest.LinkList { +func ParseNCX(document *xmlquery.Node, filePath url.URL) map[string]manifest.LinkList { toc := document.SelectElement("//" + NSSelect(NamespaceNCX, "navMap")) pageList := document.SelectElement("//" + NSSelect(NamespaceNCX, "pageList")) @@ -29,7 +29,7 @@ func ParseNCX(document *xmlquery.Node, filePath string) map[string]manifest.Link return ret } -func parseNavMapElement(element *xmlquery.Node, filePath string) manifest.LinkList { +func parseNavMapElement(element *xmlquery.Node, filePath url.URL) manifest.LinkList { var links manifest.LinkList for _, el := range element.SelectElements(NSSelect(NamespaceNCX, "navPoint")) { if p := parseNavPointElement(el, filePath); p != nil { @@ -39,24 +39,24 @@ func parseNavMapElement(element *xmlquery.Node, filePath string) manifest.LinkLi return links } -func parsePageListElement(element *xmlquery.Node, filePath string) manifest.LinkList { +func parsePageListElement(element *xmlquery.Node, filePath url.URL) manifest.LinkList { selectedElements := element.SelectElements(NSSelect(NamespaceNCX, "pageTarget")) links := make([]manifest.Link, 0, len(selectedElements)) for _, el := range selectedElements { href := extractHref(el, filePath) title := extractTitle(el) - if href == "" || title == "" { + if href == nil || title == "" { continue } links = append(links, manifest.Link{ Title: title, - Href: href, + Href: manifest.NewHREF(href), }) } return links } -func parseNavPointElement(element *xmlquery.Node, filePath string) *manifest.Link { +func parseNavPointElement(element *xmlquery.Node, filePath url.URL) *manifest.Link { title := extractTitle(element) href := extractHref(element, filePath) var children manifest.LinkList @@ -65,15 +65,15 @@ func parseNavPointElement(element *xmlquery.Node, filePath string) *manifest.Lin children = append(children, *p) } } - if len(children) == 0 && (href == "" || title == "") { + if len(children) == 0 && (href == nil || title == "") { return nil } - if href == "" { - href = "#" + if href == nil { + href = url.MustURLFromString("#") } return &manifest.Link{ Title: title, - Href: href, + Href: manifest.NewHREF(href), Children: children, } } @@ -86,15 +86,18 @@ func extractTitle(element *xmlquery.Node) string { return strings.TrimSpace(muchSpaceSuchWowMatcher.ReplaceAllString(tel.InnerText(), " ")) } -func extractHref(element *xmlquery.Node, filePath string) string { +func extractHref(element *xmlquery.Node, filePath url.URL) url.URL { el := element.SelectElement(NSSelect(NamespaceNCX, "content")) if el == nil { - return "" + return nil } src := el.SelectAttr("src") if src == "" { - return "" + return nil + } + + if s, err := url.FromEPUBHref(src); err == nil { + return filePath.Resolve(s) } - s, _ := util.NewHREF(src, filePath).String() - return s + return nil } diff --git a/pkg/parser/epub/parser_ncx_test.go b/pkg/parser/epub/parser_ncx_test.go index 2cbf9302..3d669866 100644 --- a/pkg/parser/epub/parser_ncx_test.go +++ b/pkg/parser/epub/parser_ncx_test.go @@ -5,6 +5,7 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -16,7 +17,7 @@ func loadNcx(name string) (map[string]manifest.LinkList, error) { return nil, rerr.Cause } - return ParseNCX(n, "OEBPS/ncx.ncx"), nil + return ParseNCX(n, url.MustURLFromString("OEBPS/ncx.ncx")), nil } func TestNCXParserNewlinesTrimmedFromTitle(t *testing.T) { @@ -24,7 +25,7 @@ func TestNCXParserNewlinesTrimmedFromTitle(t *testing.T) { assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with new lines splitting the text", - Href: "/OEBPS/xhtml/chapter1.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false), }) } @@ -33,7 +34,7 @@ func TestNCXParserSpacesTrimmedFromTitle(t *testing.T) { assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with ignorable spaces", - Href: "/OEBPS/xhtml/chapter2.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter2.xhtml", false), }) } @@ -42,7 +43,7 @@ func TestNCXParserEntryWithNoTitleOrChildrenIgnored(t *testing.T) { assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "", - Href: "/OEBPS/xhtml/chapter3.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter3.xhtml", false), }) } @@ -51,7 +52,7 @@ func TestNCXParserUnlinkedEntriesWithoutChildrenIgnored(t *testing.T) { assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "An unlinked element without children must be ignored", - Href: "#", + Href: manifest.MustNewHREFFromString("#", false), }) } @@ -59,21 +60,21 @@ func TestNCXParserHierarchicalItemsAllowed(t *testing.T) { n, err := loadNcx("ncx-children") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "Introduction", Href: "/OEBPS/xhtml/introduction.xhtml"}, + {Title: "Introduction", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/introduction.xhtml", false)}, { Title: "Part I", - Href: "#", + Href: manifest.MustNewHREFFromString("#", false), Children: manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/part1/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/part1/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part1/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part1/chapter2.xhtml", false)}, }, }, { Title: "Part II", - Href: "/OEBPS/xhtml/part2/chapter1.xhtml", + Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter1.xhtml", false), Children: manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/part2/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/part2/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/part2/chapter2.xhtml", false)}, }, }, }, n["toc"]) @@ -89,8 +90,8 @@ func TestNCXParserTOC(t *testing.T) { n, err := loadNcx("ncx-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "Chapter 1", Href: "/OEBPS/xhtml/chapter1.xhtml"}, - {Title: "Chapter 2", Href: "/OEBPS/xhtml/chapter2.xhtml"}, + {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false)}, + {Title: "Chapter 2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter2.xhtml", false)}, }, n["toc"]) } @@ -98,7 +99,7 @@ func TestNCXParserPageList(t *testing.T) { n, err := loadNcx("ncx-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ - {Title: "1", Href: "/OEBPS/xhtml/chapter1.xhtml#page1"}, - {Title: "2", Href: "/OEBPS/xhtml/chapter1.xhtml#page2"}, + {Title: "1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page1", false)}, + {Title: "2", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page2", false)}, }, n["page-list"]) } diff --git a/pkg/parser/epub/parser_packagedoc.go b/pkg/parser/epub/parser_packagedoc.go index fec2599c..dec2037a 100644 --- a/pkg/parser/epub/parser_packagedoc.go +++ b/pkg/parser/epub/parser_packagedoc.go @@ -5,12 +5,13 @@ import ( "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) type PackageDocument struct { - Path string + Path url.URL EPUBVersion float64 EPUBVersionString string uniqueIdentifierID string @@ -19,7 +20,7 @@ type PackageDocument struct { Spine Spine } -func ParsePackageDocument(document *xmlquery.Node, filePath string) (*PackageDocument, error) { +func ParsePackageDocument(document *xmlquery.Node, filePath url.URL) (*PackageDocument, error) { pkg := document.SelectElement("/" + NSSelect(NamespaceOPF, "package")) if pkg == nil { return nil, errors.New("package root element not found") @@ -80,29 +81,30 @@ func ParsePackageDocument(document *xmlquery.Node, filePath string) (*PackageDoc } type Item struct { - Href string + Href url.URL ID string fallback string mediaOverlay string - MediaType string + MediaType *mediatype.MediaType Properties []string } -func ParseItem(element *xmlquery.Node, filePath string, prefixMap map[string]string) *Item { +func ParseItem(element *xmlquery.Node, filePath url.URL, prefixMap map[string]string) *Item { rawHref := element.SelectAttr("href") if rawHref == "" { return nil } - href, err := util.NewHREF(rawHref, filePath).String() + u, err := url.FromEPUBHref(rawHref) if err != nil { return nil } + u = filePath.Resolve(u) item := &Item{ - Href: href, + Href: u, ID: element.SelectAttr("id"), fallback: element.SelectAttr("fallback"), mediaOverlay: element.SelectAttr("media-overlay"), - MediaType: element.SelectAttr("media-type"), + MediaType: mediatype.MaybeNewOfString(element.SelectAttr("media-type")), } pp := parseProperties(element.SelectAttr("properties")) if len(pp) > 0 { diff --git a/pkg/parser/epub/parser_packagedoc_test.go b/pkg/parser/epub/parser_packagedoc_test.go index 36b55cb3..8a233554 100644 --- a/pkg/parser/epub/parser_packagedoc_test.go +++ b/pkg/parser/epub/parser_packagedoc_test.go @@ -5,6 +5,8 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -19,7 +21,7 @@ func loadPackageDoc(name string) (*manifest.Manifest, error) { return nil, rerr.Cause } - d, err := ParsePackageDocument(n, "OEBPS/content.opf") + d, err := ParsePackageDocument(n, url.MustURLFromString("OEBPS/content.opf")) if err != nil { return nil, err } @@ -114,12 +116,12 @@ func TestPackageDocLinkReadingOrder(t *testing.T) { assert.Equal(t, manifest.LinkList{ { - Href: "/titlepage.xhtml", - Type: "application/xhtml+xml", + Href: manifest.MustNewHREFFromString("titlepage.xhtml", false), + MediaType: &mediatype.XHTML, }, { - Href: "/OEBPS/chapter01.xhtml", - Type: "application/xhtml+xml", + Href: manifest.MustNewHREFFromString("OEBPS/chapter01.xhtml", false), + MediaType: &mediatype.XHTML, }, }, p.ReadingOrder) } @@ -128,44 +130,46 @@ func TestPackageDocLinkResources(t *testing.T) { p, err := loadPackageDoc("links") assert.NoError(t, err) + ft := mediatype.OfString("application/vnd.ms-opentype") + assert.Equal(t, manifest.LinkList{ { - Href: "/OEBPS/fonts/MinionPro.otf", - Type: "application/vnd.ms-opentype", + Href: manifest.MustNewHREFFromString("OEBPS/fonts/MinionPro.otf", false), + MediaType: ft, }, { - Href: "/OEBPS/nav.xhtml", - Type: "application/xhtml+xml", - Rels: manifest.Strings{"contents"}, + Href: manifest.MustNewHREFFromString("OEBPS/nav.xhtml", false), + MediaType: &mediatype.XHTML, + Rels: manifest.Strings{"contents"}, }, { - Href: "/style.css", - Type: "text/css", + Href: manifest.MustNewHREFFromString("style.css", false), + MediaType: &mediatype.CSS, }, { - Href: "/OEBPS/chapter02.xhtml", - Type: "application/xhtml+xml", + Href: manifest.MustNewHREFFromString("OEBPS/chapter02.xhtml", false), + MediaType: &mediatype.XHTML, }, { - Href: "/OEBPS/chapter01.smil", - Type: "application/smil+xml", + Href: manifest.MustNewHREFFromString("OEBPS/chapter01.smil", false), + MediaType: &mediatype.SMIL, }, { - Href: "/OEBPS/chapter02.smil", - Type: "application/smil+xml", - Duration: 1949.0, + Href: manifest.MustNewHREFFromString("OEBPS/chapter02.smil", false), + MediaType: &mediatype.SMIL, + Duration: 1949.0, }, { - Href: "/OEBPS/images/alice01a.png", - Type: "image/png", - Rels: manifest.Strings{"cover"}, + Href: manifest.MustNewHREFFromString("OEBPS/images/alice01a.png", false), + MediaType: &mediatype.PNG, + Rels: manifest.Strings{"cover"}, }, { - Href: "/OEBPS/images/alice02a.gif", - Type: "image/gif", + Href: manifest.MustNewHREFFromString("OEBPS/images/alice02a.gif", false), + MediaType: &mediatype.GIF, }, { - Href: "/OEBPS/nomediatype.txt", + Href: manifest.MustNewHREFFromString("OEBPS/nomediatype.txt", false), }, }, p.Resources) } diff --git a/pkg/parser/epub/parser_smil.go b/pkg/parser/epub/parser_smil.go index 4ada0335..1a38aca3 100644 --- a/pkg/parser/epub/parser_smil.go +++ b/pkg/parser/epub/parser_smil.go @@ -2,15 +2,14 @@ package epub import ( "strconv" - "strings" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func ParseSMILDocument(document *xmlquery.Node, filePath string) (*manifest.GuidedNavigationDocument, error) { +func ParseSMILDocument(document *xmlquery.Node, filePath url.URL) (*manifest.GuidedNavigationDocument, error) { smil := document.SelectElement("/" + DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "smil")) if smil == nil { return nil, errors.New("SMIL root element not found") @@ -32,7 +31,7 @@ func ParseSMILDocument(document *xmlquery.Node, filePath string) (*manifest.Guid }, nil } -func ParseSMILSeq(seq *xmlquery.Node, filePath string) ([]manifest.GuidedNavigationObject, error) { +func ParseSMILSeq(seq *xmlquery.Node, filePath url.URL) ([]manifest.GuidedNavigationObject, error) { childElements := seq.SelectElements(ManyNSSelectMany([]string{NamespaceSMIL, NamespaceSMIL2}, []string{"par", "seq"})) if len(childElements) == 0 && seq.Data == "body" { return nil, errors.New("SMIL body is empty") @@ -54,8 +53,11 @@ func ParseSMILSeq(seq *xmlquery.Node, filePath string) ([]manifest.GuidedNavigat if o.TextRef == "" { return nil, errors.New("SMIL seq has no textref") } - o.TextRef, _ = util.NewHREF(o.TextRef, filePath).String() - o.TextRef = strings.TrimPrefix(o.TextRef, "/") + u, err := url.URLFromString(o.TextRef) + if err != nil { + return nil, errors.Wrap(err, "failed parsing SMIL seq textref") + } + o.TextRef = filePath.Resolve(u).String() // epub:type pp := parseProperties(SelectNodeAttrNs(el, NamespaceOPS, "type")) @@ -81,7 +83,7 @@ func ParseSMILSeq(seq *xmlquery.Node, filePath string) ([]manifest.GuidedNavigat return objects, nil } -func ParseSMILPar(par *xmlquery.Node, filePath string) (*manifest.GuidedNavigationObject, error) { +func ParseSMILPar(par *xmlquery.Node, filePath url.URL) (*manifest.GuidedNavigationObject, error) { text := par.SelectElement(DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "text")) if text == nil { return nil, errors.New("SMIL par has no text element") @@ -92,8 +94,11 @@ func ParseSMILPar(par *xmlquery.Node, filePath string) (*manifest.GuidedNavigati if o.TextRef == "" { return nil, errors.New("SMIL par text element has empty src attribute") } - o.TextRef, _ = util.NewHREF(o.TextRef, filePath).String() - o.TextRef = strings.TrimPrefix(o.TextRef, "/") + u, err := url.URLFromString(o.TextRef) + if err != nil { + return nil, errors.Wrap(err, "failed parsing SMIL par text element textref") + } + o.TextRef = filePath.Resolve(u).String() // Audio is optional if audio := par.SelectElement(DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "audio")); audio != nil { @@ -113,8 +118,11 @@ func ParseSMILPar(par *xmlquery.Node, filePath string) (*manifest.GuidedNavigati o.AudioRef += "," + strconv.FormatFloat(*end, 'f', -1, 64) } - o.AudioRef, _ = util.NewHREF(o.AudioRef, filePath).String() - o.AudioRef = strings.TrimPrefix(o.AudioRef, "/") + u, err := url.URLFromString(o.AudioRef) + if err != nil { + return nil, errors.Wrap(err, "failed parsing SMIL par audio element textref") + } + o.AudioRef = filePath.Resolve(u).String() } // epub:type diff --git a/pkg/parser/epub/parser_smil_test.go b/pkg/parser/epub/parser_smil_test.go index 6efbbeda..ab9b9b33 100644 --- a/pkg/parser/epub/parser_smil_test.go +++ b/pkg/parser/epub/parser_smil_test.go @@ -5,6 +5,7 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -18,7 +19,7 @@ func loadSmil(name string) (*manifest.GuidedNavigationDocument, error) { return nil, rerr.Cause } - return ParseSMILDocument(n, "OEBPS/page1.smil") + return ParseSMILDocument(n, url.MustURLFromString("OEBPS/page1.smil")) } func TestSMILDocTypicalAudio(t *testing.T) { diff --git a/pkg/parser/epub/positions_service.go b/pkg/parser/epub/positions_service.go index 08bc13aa..48bf17e8 100644 --- a/pkg/parser/epub/positions_service.go +++ b/pkg/parser/epub/positions_service.go @@ -6,6 +6,7 @@ import ( "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" "github.com/readium/go-toolkit/pkg/pub" ) @@ -109,18 +110,19 @@ func (s *PositionsService) createReflowable(link manifest.Link, startPosition ui } func (s *PositionsService) createLocator(link manifest.Link, progression float64, position uint) manifest.Locator { + mt := link.MediaType + if mt == nil { + mt = &mediatype.HTML + } loc := manifest.Locator{ - Href: link.Href, - Type: link.Type, - Title: link.Title, + Href: link.URL(nil, nil), + MediaType: *mt, + Title: link.Title, Locations: manifest.Locations{ Progression: extensions.Pointer(progression), Position: extensions.Pointer(position), }, } - if loc.Type == "" { - loc.Type = "text/html" - } return loc } diff --git a/pkg/parser/epub/utils.go b/pkg/parser/epub/utils.go index 2276b6a2..09e9b182 100644 --- a/pkg/parser/epub/utils.go +++ b/pkg/parser/epub/utils.go @@ -7,26 +7,32 @@ import ( "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func GetRootFilePath(fetcher fetcher.Fetcher) (string, error) { - res := fetcher.Get(manifest.Link{Href: "/META-INF/container.xml"}) +func GetRootFilePath(fetcher fetcher.Fetcher) (url.URL, error) { + res := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/container.xml", false)}) xml, err := res.ReadAsXML(map[string]string{ "urn:oasis:names:tc:opendocument:xmlns:container": "cn", }) if err != nil { - return "", errors.Wrap(err, "failed loading container.xml") + return nil, errors.Wrap(err, "failed loading container.xml") } n := xml.SelectElement("/container/rootfiles/rootfile") if n == nil { - return "", errors.New("rootfile not found in container") + return nil, errors.New("rootfile not found in container") } p := n.SelectAttr("full-path") if p == "" { - return "", errors.New("no full-path in rootfile") + return nil, errors.New("no full-path in rootfile") } - return p, nil + u, merr := url.FromEPUBHref(p) + if merr != nil { + return nil, errors.Wrap(err, "failed parsing rootfile full-path") + } + + return u, nil } // TODO: Use updated xpath/xmlquery functions diff --git a/pkg/parser/parser_audio.go b/pkg/parser/parser_audio.go index 661d2ee6..d8809a28 100644 --- a/pkg/parser/parser_audio.go +++ b/pkg/parser/parser_audio.go @@ -30,13 +30,15 @@ func (p AudioParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher } readingOrder := make(manifest.LinkList, 0, len(links)) for _, link := range links { + path := link.URL(nil, nil).Path() + // Filter out all irrelevant files - fext := filepath.Ext(strings.ToLower(link.Href)) + fext := filepath.Ext(strings.ToLower(path)) if len(fext) > 1 { fext = fext[1:] // Remove "." from extension } _, contains := allowed_extensions_audio[fext] - if extensions.IsHiddenOrThumbs(link.Href) || !contains { + if extensions.IsHiddenOrThumbs(path) || !contains { continue } readingOrder = append(readingOrder, link) @@ -48,7 +50,7 @@ func (p AudioParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher // Sort in alphabetical order sort.Slice(readingOrder, func(i, j int) bool { - return readingOrder[i].Href < readingOrder[j].Href + return readingOrder[i].Href.String() < readingOrder[j].Href.String() }) // Try to figure out the publication's title @@ -88,13 +90,15 @@ func (p AudioParser) accepts(asset asset.PublicationAsset, fetcher fetcher.Fetch return false } for _, link := range links { - if extensions.IsHiddenOrThumbs(link.Href) { + path := link.URL(nil, nil).Path() + + if extensions.IsHiddenOrThumbs(path) { continue } - if link.MediaType().IsBitmap() { + if link.MediaType.IsBitmap() { continue } - fext := filepath.Ext(strings.ToLower(link.Href)) + fext := filepath.Ext(strings.ToLower(path)) if len(fext) > 1 { fext = fext[1:] // Remove "." from extension } diff --git a/pkg/parser/parser_image.go b/pkg/parser/parser_image.go index dbd22a22..2dd2f271 100644 --- a/pkg/parser/parser_image.go +++ b/pkg/parser/parser_image.go @@ -30,8 +30,10 @@ func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher } readingOrder := make(manifest.LinkList, 0, len(links)) for _, link := range links { + path := link.URL(nil, nil).Path() + // Filter out all irrelevant files - if extensions.IsHiddenOrThumbs(link.Href) || !link.MediaType().IsBitmap() { + if extensions.IsHiddenOrThumbs(path) || !link.MediaType.IsBitmap() { continue } readingOrder = append(readingOrder, link) @@ -43,7 +45,7 @@ func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher // Sort in alphabetical order sort.Slice(readingOrder, func(i, j int) bool { - return readingOrder[i].Href < readingOrder[j].Href + return readingOrder[i].Href.String() < readingOrder[j].Href.String() }) // Try to figure out the publication's title @@ -65,7 +67,7 @@ func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher } builder := pub.NewServicesBuilder(map[string]pub.ServiceFactory{ - pub.PositionsService_Name: pub.PerResourcePositionsServiceFactory("image/*"), + pub.PositionsService_Name: pub.PerResourcePositionsServiceFactory(mediatype.MustNewOfString("image/*")), }) return pub.NewBuilder(manifest, fetcher, builder), nil } @@ -81,13 +83,15 @@ func (p ImageParser) accepts(asset asset.PublicationAsset, fetcher fetcher.Fetch return false, err } for _, link := range links { - if extensions.IsHiddenOrThumbs(link.Href) { + path := link.URL(nil, nil).Path() + + if extensions.IsHiddenOrThumbs(path) { continue } - if link.MediaType().IsBitmap() { + if link.MediaType.IsBitmap() { continue } - fext := filepath.Ext(strings.ToLower(link.Href)) + fext := filepath.Ext(strings.ToLower(path)) if len(fext) > 1 { fext = fext[1:] // Remove "." from extension } diff --git a/pkg/parser/parser_image_test.go b/pkg/parser/parser_image_test.go index e391f31a..66f81c6b 100644 --- a/pkg/parser/parser_image_test.go +++ b/pkg/parser/parser_image_test.go @@ -1,13 +1,13 @@ package parser import ( - "strings" "testing" "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/asset" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/pub" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -49,13 +49,14 @@ func TestImageReadingOrderAlphabetical(t *testing.T) { assert.NotNil(t, p) pub := p.Build() assert.NotNil(t, pub) + base, _ := url.URLFromDecodedPath("Cory Doctorow's Futuristic Tales of the Here and Now/") hrefs := make([]string, 0, len(pub.Manifest.ReadingOrder)) for _, roi := range pub.Manifest.ReadingOrder { - hrefs = append(hrefs, strings.TrimPrefix(roi.Href, "/Cory Doctorow's Futuristic Tales of the Here and Now")) + hrefs = append(hrefs, base.Relativize(roi.URL(nil, nil)).String()) } assert.Exactly(t, []string{ - "/a-fc.jpg", "/x-002.jpg", "/x-003.jpg", "/x-004.jpg", + "a-fc.jpg", "x-002.jpg", "x-003.jpg", "x-004.jpg", }, hrefs, "readingOrder should be sorted alphabetically") }) } @@ -69,7 +70,8 @@ func TestImageCoverFirstItem(t *testing.T) { coverItem := pub.Manifest.ReadingOrder.FirstWithRel("cover") assert.NotNil(t, coverItem, "readingOrder should have an item with rel=cover") - assert.Equal(t, "/Cory Doctorow's Futuristic Tales of the Here and Now/a-fc.jpg", coverItem.Href) + u, _ := url.URLFromDecodedPath("Cory Doctorow's Futuristic Tales of the Here and Now/a-fc.jpg") + assert.Equal(t, manifest.NewHREF(u).String(), coverItem.Href.String()) }) } diff --git a/pkg/parser/parser_readium_webpub.go b/pkg/parser/parser_readium_webpub.go index e3278792..bd1cb673 100644 --- a/pkg/parser/parser_readium_webpub.go +++ b/pkg/parser/parser_readium_webpub.go @@ -35,7 +35,7 @@ func (p WebPubParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetche var manifestJSON map[string]interface{} if isPackage { - res := lFetcher.Get(manifest.Link{Href: "/manifest.json"}) + res := lFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("manifest.json", false)}) mjr, err := res.ReadAsJSON() if err != nil { return nil, err diff --git a/pkg/parser/pdf/parser_metadata.go b/pkg/parser/pdf/parser_metadata.go index 653daaf4..96545af8 100644 --- a/pkg/parser/pdf/parser_metadata.go +++ b/pkg/parser/pdf/parser_metadata.go @@ -4,7 +4,6 @@ import ( "encoding/hex" "fmt" "io" - "strings" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" @@ -12,6 +11,7 @@ import ( "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/trimmer-io/go-xmp/xmp" ) @@ -33,8 +33,8 @@ func loadDecoder(meta pdfcpu.Metadata) (*xmp.Document, []byte, error) { func ParseMetadata(ctx *model.Context, link *manifest.Link) (m manifest.Manifest, err error) { if link != nil { m.ReadingOrder = manifest.LinkList{{ - Href: strings.TrimPrefix(link.Href, "/"), - Type: mediatype.PDF.String(), + Href: link.Href, + MediaType: &mediatype.PDF, Title: link.Title, Rels: link.Rels, Properties: link.Properties, @@ -185,17 +185,17 @@ func ParsePDFMetadata(ctx *model.Context, m *manifest.Manifest) error { // Bookmarks (TOC) if bookmarks, err := pdfcpu.Bookmarks(ctx); err == nil { rootLink := m.ReadingOrder.FirstWithMediaType(&mediatype.PDF) - root := "" - if rootLink != nil { - root = rootLink.Href - } var bf func(toc manifest.LinkList, bookmarks []pdfcpu.Bookmark) bf = func(toc manifest.LinkList, bookmarks []pdfcpu.Bookmark) { for _, b := range bookmarks { lnk := manifest.Link{ - Href: fmt.Sprintf("%s#page=%d", root, b.PageFrom), - Title: b.Title, - Type: mediatype.PDF.String(), + Title: b.Title, + MediaType: &mediatype.PDF, + } + if rootLink == nil { + lnk.Href = manifest.NewHREF(url.MustURLFromString(fmt.Sprintf("#page=%d", b.PageFrom))) + } else { + lnk.Href = manifest.NewHREF(url.MustURLFromString(fmt.Sprintf("%s#page=%d", rootLink.URL(nil, nil).String(), b.PageFrom))) } if len(b.Kids) > 0 { bf(lnk.Children, b.Kids) diff --git a/pkg/parser/pdf/positions_service.go b/pkg/parser/pdf/positions_service.go index 4a220288..4b2d7ac0 100644 --- a/pkg/parser/pdf/positions_service.go +++ b/pkg/parser/pdf/positions_service.go @@ -7,6 +7,7 @@ import ( "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" "github.com/readium/go-toolkit/pkg/pub" + "github.com/readium/go-toolkit/pkg/util/url" ) // Positions Service for an PDF. @@ -54,21 +55,23 @@ func (s *PositionsService) computePositions() [][]manifest.Locator { positions := make([][]manifest.Locator, s.pageCount) for i := uint(0); i < s.pageCount; i++ { progression := float64(i) / float64(s.pageCount) - typ := s.link.Type - if typ == "" { - typ = mediatype.PDF.String() + typ := s.link.MediaType + if typ == nil { + typ = &mediatype.PDF } position := i + 1 fragment := fmt.Sprintf("page=%d", i+1) + u := s.link.URL(nil, nil) + var title string - if link := s.tableOfContents.FirstWithHref(s.link.Href + "#" + fragment); link != nil { + if link := s.tableOfContents.FirstWithHref(url.MustURLFromString(u.String() + "#" + fragment)); link != nil { title = link.Title } positions[i] = []manifest.Locator{{ - Href: s.link.Href, - Type: s.link.Type, + Href: u, + MediaType: *typ, Locations: manifest.Locations{ Fragments: []string{fragment}, Progression: &progression, diff --git a/pkg/parser/utils.go b/pkg/parser/utils.go index 00439227..7a0b360b 100644 --- a/pkg/parser/utils.go +++ b/pkg/parser/utils.go @@ -11,7 +11,7 @@ import ( func hrefCommonFirstComponent(links manifest.LinkList) string { latest := "" for _, link := range links { - normalized := strings.SplitN(strings.TrimPrefix(link.Href, "/"), "/", 2)[0] + normalized := strings.SplitN(link.URL(nil, nil).Path(), "/", 2)[0] if latest != "" { if latest != normalized { latest = "" // No distinct prefix @@ -32,7 +32,7 @@ func guessPublicationTitleFromFileStructure(fetcher fetcher.Fetcher) string { // if commonFirstComponent == "" { return "" } - if commonFirstComponent == strings.TrimPrefix("/", links[0].Href) { + if commonFirstComponent == links[0].Href.String() { return "" } diff --git a/pkg/parser/utils_test.go b/pkg/parser/utils_test.go index b85d8af4..02f36294 100644 --- a/pkg/parser/utils_test.go +++ b/pkg/parser/utils_test.go @@ -11,24 +11,24 @@ import ( func TestHCFCEmptyWhenFilesInRoot(t *testing.T) { assert.Equal(t, "", hrefCommonFirstComponent(manifest.LinkList{ - {Href: "/im1.jpg"}, - {Href: "/im2.jpg"}, - {Href: "/toc.xml"}, + {Href: manifest.MustNewHREFFromString("im1.jpg", false)}, + {Href: manifest.MustNewHREFFromString("im2.jpg", false)}, + {Href: manifest.MustNewHREFFromString("toc.xml", false)}, }), "hrefCommonFirstComponent is empty when files are in the root") } func TestHCFCEmptyWhenFilesInDifferentDirs(t *testing.T) { assert.Equal(t, "", hrefCommonFirstComponent(manifest.LinkList{ - {Href: "/dir1/im1.jpg"}, - {Href: "/dir2/im2.jpg"}, - {Href: "/toc.xml"}, + {Href: manifest.MustNewHREFFromString("dir1/im1.jpg", false)}, + {Href: manifest.MustNewHREFFromString("dir2/im2.jpg", false)}, + {Href: manifest.MustNewHREFFromString("toc.xml", false)}, }), "hrefCommonFirstComponent is empty when files are in different directories") } func TestHCFCCorrectWhenSameDir(t *testing.T) { assert.Equal(t, "root", hrefCommonFirstComponent(manifest.LinkList{ - {Href: "/root/im1.jpg"}, - {Href: "/root/im2.jpg"}, - {Href: "/root/xml/toc.xml"}, + {Href: manifest.MustNewHREFFromString("root/im1.jpg", false)}, + {Href: manifest.MustNewHREFFromString("root/im2.jpg", false)}, + {Href: manifest.MustNewHREFFromString("root/xml/toc.xml", false)}, }), "hrefCommonFirstComponent is empty when files are in different directories") } diff --git a/pkg/pub/publication.go b/pkg/pub/publication.go index 9e08ebdd..c6bec3c4 100644 --- a/pkg/pub/publication.go +++ b/pkg/pub/publication.go @@ -2,11 +2,10 @@ package pub import ( "encoding/json" - "path" - "strings" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" ) // The Publication shared model is the entrypoint for all the metadata and services related to a Readium publication. @@ -14,7 +13,6 @@ type Publication struct { Manifest manifest.Manifest // The manifest holding the publication metadata extracted from the publication file. Fetcher fetcher.Fetcher // The underlying fetcher used to read publication resources. // TODO servicesBuilder - // TODO positionsFactory services map[string]Service } @@ -26,7 +24,7 @@ func (p Publication) ConformsTo(profile manifest.Profile) bool { // Finds the first [Link] with the given href in the publication's links. // Searches through (in order) the reading order, resources and links recursively following alternate and children links. // If there's no match, tries again after removing any query parameter and anchor from the given href. -func (p Publication) LinkWithHref(href string) *manifest.Link { +func (p Publication) LinkWithHref(href url.URL) *manifest.Link { return p.Manifest.LinkWithHref(href) } @@ -91,32 +89,15 @@ func (p *Publication) Positions() []manifest.Locator { } // The URL where this publication is served, computed from the [Link] with `self` relation. -func (p Publication) BaseURL() *string { +func (p Publication) BaseURL() url.URL { lnk := p.Manifest.Links.FirstWithRel("self") if lnk == nil { return nil } - dir := path.Dir(lnk.Href) - return &dir -} - -// Returns the first existing link matching the given [path]. -func (p Publication) Find(path string) *manifest.Link { - link := p.Manifest.Links.FirstWithHref(path) - if link == nil { - link = p.Manifest.ReadingOrder.FirstWithHref(path) - if link == nil { - link = p.Manifest.Resources.FirstWithHref(path) - if link == nil { - return nil - } - } + if !lnk.Href.IsTemplated() { + return lnk.URL(nil, nil) } - - if !strings.HasPrefix(link.Href, "/") { - link.Href = "/" + link.Href - } - return link + return nil } func (p Publication) FindService(serviceName string) Service { diff --git a/pkg/pub/service_content.go b/pkg/pub/service_content.go index 853c563d..672e731c 100644 --- a/pkg/pub/service_content.go +++ b/pkg/pub/service_content.go @@ -14,8 +14,8 @@ import ( // TODO content iterator special ~readium link var ContentLink = manifest.Link{ - Href: "/~readium/content.json", - Type: mediatype.ReadiumContentDocument.String(), + Href: manifest.MustNewHREFFromString("~readium/content.json", false), + MediaType: &mediatype.ReadiumContentDocument, } // TODO uri template or something so we're not just dumping entire content diff --git a/pkg/pub/service_guided_navigation.go b/pkg/pub/service_guided_navigation.go index 5be76432..bf879364 100644 --- a/pkg/pub/service_guided_navigation.go +++ b/pkg/pub/service_guided_navigation.go @@ -2,19 +2,24 @@ package pub import ( "encoding/json" - "strings" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" - "github.com/readium/go-toolkit/pkg/util" + "github.com/readium/go-toolkit/pkg/util/url" ) var GuidedNavigationLink = manifest.Link{ - Href: "/~readium/guided-navigation.json{?ref}", - Type: mediatype.ReadiumGuidedNavigationDocument.String(), - Templated: true, + Href: manifest.MustNewHREFFromString("~readium/guided-navigation.json{?ref}", true), + MediaType: &mediatype.ReadiumGuidedNavigationDocument, +} + +// Pre-cached value of the guided navigation link's path +var resolvedGuidedNavigation url.URL + +func init() { + resolvedGuidedNavigation = GuidedNavigationLink.URL(nil, nil) } // GuidedNavigationService implements Service @@ -26,21 +31,14 @@ type GuidedNavigationService interface { } func GetForGuidedNavigationService(service GuidedNavigationService, link manifest.Link) (fetcher.Resource, bool) { - // TODO: this is a shortcut to avoid full href parsing and template expansion - // just just to check if the link is the guided navigation link. It should - // probably be replaced by something better after the url utilities are updated. - link.Href = strings.TrimPrefix(link.Href, "/") - if !strings.HasPrefix(link.Href, "~readium/guided-navigation.json") { - return nil, false - } + u := link.URL(nil, nil) - href := util.NewHREF(link.Href, "") - params, err := href.QueryParameters() - if err != nil { - // Failed parsing query parameters + if u.Path() != resolvedGuidedNavigation.Path() { + // Not the guided navigation link return nil, false } - ref := params.Get("ref") + + ref := u.Raw().Query().Get("ref") if ref == "" { // No ref parameter // TODO: support omission of ref to generate entire doc. @@ -48,24 +46,22 @@ func GetForGuidedNavigationService(service GuidedNavigationService, link manifes return nil, false } - // Check if the provided link's href matches the guided navigation link in expanded form - expandedLink := GuidedNavigationLink.ExpandTemplate(map[string]string{ + // Overrride the link's href with the expanded guided navigation link + expandedHref := GuidedNavigationLink.URL(nil, map[string]string{ "ref": ref, }) - if link.Href != strings.TrimPrefix(expandedLink.Href, "/") { - return nil, false - } + link.Href = manifest.NewHREF(expandedHref) // Check if the referenced resource has a guided navigation document if !service.HasGuideForResource(ref) { return fetcher.NewFailureResource( - expandedLink, fetcher.NotFound( + link, fetcher.NotFound( errors.New("referenced resource has no associated guided navigation document"), ), ), true } - return fetcher.NewBytesResource(expandedLink, func() []byte { + return fetcher.NewBytesResource(link, func() []byte { doc, err := service.GuideForResource(ref) if err != nil { // TODO: handle error somehow diff --git a/pkg/pub/service_positions.go b/pkg/pub/service_positions.go index a239c3ee..8e06f510 100644 --- a/pkg/pub/service_positions.go +++ b/pkg/pub/service_positions.go @@ -10,8 +10,8 @@ import ( ) var PositionsLink = manifest.Link{ - Href: "/~readium/positions.json", - Type: mediatype.ReadiumPositionList.String(), + Href: manifest.MustNewHREFFromString("~readium/positions.json", false), + MediaType: &mediatype.ReadiumPositionList, } // PositionsService implements Service @@ -26,11 +26,11 @@ type PositionsService interface { // Simple [PositionsService] which generates one position per [readingOrder] resource. type PerResourcePositionsService struct { readingOrder manifest.LinkList - fallbackMediaType string + fallbackMediaType mediatype.MediaType } func GetForPositionsService(service PositionsService, link manifest.Link) (fetcher.Resource, bool) { - if link.Href != PositionsLink.Href { + if !link.URL(nil, nil).Equivalent(PositionsLink.URL(nil, nil)) { return nil, false } @@ -67,14 +67,14 @@ func (s PerResourcePositionsService) PositionsByReadingOrder() [][]manifest.Loca positions := make([][]manifest.Locator, len(s.readingOrder)) pageCount := len(s.readingOrder) for i, v := range s.readingOrder { - typ := v.Type - if typ == "" { - typ = s.fallbackMediaType + typ := v.MediaType + if typ == nil { + typ = &s.fallbackMediaType } positions[i] = []manifest.Locator{{ - Href: v.Href, - Type: typ, - Title: v.Title, + Href: v.Href.Resolve(nil, nil), + MediaType: *typ, + Title: v.Title, Locations: manifest.Locations{ Position: extensions.Pointer(uint(i) + 1), TotalProgression: extensions.Pointer(float64(i) / float64(pageCount)), @@ -84,7 +84,7 @@ func (s PerResourcePositionsService) PositionsByReadingOrder() [][]manifest.Loca return positions } -func PerResourcePositionsServiceFactory(fallbackMediaType string) ServiceFactory { +func PerResourcePositionsServiceFactory(fallbackMediaType mediatype.MediaType) ServiceFactory { return func(context Context) Service { return PerResourcePositionsService{ readingOrder: context.Manifest.ReadingOrder, diff --git a/pkg/pub/service_positions_test.go b/pkg/pub/service_positions_test.go index f1e1767e..0b3813a3 100644 --- a/pkg/pub/service_positions_test.go +++ b/pkg/pub/service_positions_test.go @@ -5,6 +5,8 @@ import ( "github.com/readium/go-toolkit/pkg/internal/extensions" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -15,12 +17,12 @@ func TestPerResourcePositionsServiceEmptyReadingOrder(t *testing.T) { func TestPerResourcePositionsServiceSingleReadingOrder(t *testing.T) { service := PerResourcePositionsService{ - readingOrder: manifest.LinkList{{Href: "res", Type: "image/png"}}, + readingOrder: manifest.LinkList{{Href: manifest.MustNewHREFFromString("res", false), MediaType: &mediatype.PNG}}, } assert.Equal(t, []manifest.Locator{{ - Href: "res", - Type: "image/png", + Href: url.MustURLFromString("res"), + MediaType: mediatype.PNG, Locations: manifest.Locations{ Position: extensions.Pointer(uint(1)), TotalProgression: extensions.Pointer(float64(0.0)), @@ -31,33 +33,34 @@ func TestPerResourcePositionsServiceSingleReadingOrder(t *testing.T) { func TestPerResourcePositionsServiceMultiReadingOrder(t *testing.T) { service := PerResourcePositionsService{ readingOrder: manifest.LinkList{ - {Href: "res"}, - {Href: "chap1", Type: "image/png"}, - {Href: "chap2", Type: "image/png", Title: "Chapter 2"}, + {Href: manifest.MustNewHREFFromString("res", false)}, + {Href: manifest.MustNewHREFFromString("chap1", false), MediaType: &mediatype.PNG}, + {Href: manifest.MustNewHREFFromString("chap2", false), MediaType: &mediatype.PNG, Title: "Chapter 2"}, }, + fallbackMediaType: mediatype.Binary, } assert.Equal(t, []manifest.Locator{ { - Href: "res", - Type: "", + Href: url.MustURLFromString("res"), + MediaType: mediatype.Binary, Locations: manifest.Locations{ Position: extensions.Pointer(uint(1)), TotalProgression: extensions.Pointer(float64(0.0)), }, }, { - Href: "chap1", - Type: "image/png", + Href: url.MustURLFromString("chap1"), + MediaType: mediatype.PNG, Locations: manifest.Locations{ Position: extensions.Pointer(uint(2)), TotalProgression: extensions.Pointer(float64(1.0 / 3.0)), }, }, { - Href: "chap2", - Type: "image/png", - Title: "Chapter 2", + Href: url.MustURLFromString("chap2"), + MediaType: mediatype.PNG, + Title: "Chapter 2", Locations: manifest.Locations{ Position: extensions.Pointer(uint(3)), TotalProgression: extensions.Pointer(float64(2.0 / 3.0)), @@ -68,13 +71,14 @@ func TestPerResourcePositionsServiceMultiReadingOrder(t *testing.T) { func TestPerResourcePositionsServiceMediaTypeFallback(t *testing.T) { service := PerResourcePositionsService{ - readingOrder: manifest.LinkList{{Href: "res"}}, - fallbackMediaType: "image/*", + readingOrder: manifest.LinkList{{Href: manifest.MustNewHREFFromString("res", false)}}, + fallbackMediaType: mediatype.MustNewOfString("image/*"), } + mt, _ := mediatype.NewOfString("image/*") assert.Equal(t, []manifest.Locator{{ - Href: "res", - Type: "image/*", + Href: url.MustURLFromString("res"), + MediaType: mt, Locations: manifest.Locations{ Position: extensions.Pointer(uint(1)), TotalProgression: extensions.Pointer(float64(0.0)), diff --git a/pkg/streamer/a11y_infer.go b/pkg/streamer/a11y_infer.go index 4d3d2068..b6da5d6e 100644 --- a/pkg/streamer/a11y_infer.go +++ b/pkg/streamer/a11y_infer.go @@ -49,7 +49,7 @@ func inferA11yMetadataFromManifest(mf manifest.Manifest) *manifest.A11y { *mf.Metadata.Presentation.Layout == manifest.EPUBLayoutReflowable { isTextual = true for _, link := range allResources { - mt := link.MediaType() + mt := link.MediaType if mt.IsAudio() || mt.IsVideo() || (mt.IsBitmap() && !extensions.Contains(link.Rels, "cover")) || @@ -70,7 +70,7 @@ func inferA11yMetadataFromManifest(mf manifest.Manifest) *manifest.A11y { // audio or video resource (inspect "resources" and "readingOrder" in // RWPM). for _, link := range allResources { - if link.MediaType().IsAudio() || link.MediaType().IsVideo() { + if link.MediaType.IsAudio() || link.MediaType.IsVideo() { inferredA11y.AccessModes = append(inferredA11y.AccessModes, manifest.A11yAccessModeAuditory) break } @@ -80,7 +80,7 @@ func inferA11yMetadataFromManifest(mf manifest.Manifest) *manifest.A11y { // or a video resource (inspect "resources" and "readingOrder" in // RWPM). for _, link := range allResources { - if link.MediaType().IsBitmap() || link.MediaType().IsVideo() { + if link.MediaType.IsBitmap() || link.MediaType.IsVideo() { inferredA11y.AccessModes = append(inferredA11y.AccessModes, manifest.A11yAccessModeVisual) break } @@ -131,7 +131,7 @@ func inferA11yMetadataFromManifest(mf manifest.Manifest) *manifest.A11y { } for _, link := range mf.Resources { - if link.MediaType().Matches(&mediatype.SMIL) { + if link.MediaType.Matches(&mediatype.SMIL) { addFeature(manifest.A11yFeatureSynchronizedAudioText) break } diff --git a/pkg/streamer/a11y_infer_test.go b/pkg/streamer/a11y_infer_test.go index 440b7b42..be20c5c5 100644 --- a/pkg/streamer/a11y_infer_test.go +++ b/pkg/streamer/a11y_infer_test.go @@ -36,8 +36,8 @@ func TestReturnsAdditionalInferredA11yMetadata(t *testing.T) { func newLink(mt mediatype.MediaType, extension string) manifest.Link { return manifest.Link{ - Href: "file." + extension, - Type: mt.String(), + Href: manifest.MustNewHREFFromString("file."+extension, false), + MediaType: &mt, } } diff --git a/pkg/util/href.go b/pkg/util/href.go deleted file mode 100644 index 2316f30a..00000000 --- a/pkg/util/href.go +++ /dev/null @@ -1,112 +0,0 @@ -package util - -import ( - "net/url" - "strings" - - "github.com/readium/go-toolkit/pkg/internal/extensions" - "golang.org/x/net/idna" -) - -type QueryParameter struct { - name string - value string -} - -type HREF struct { - href string - baseHref string -} - -func NewHREF(href string, base string) HREF { - if base == "" { - base = "/" - } - return HREF{href: href, baseHref: base} -} - -// Returns the normalized string representation for this HREF. -func (h HREF) String() (string, error) { - baseHref := extensions.RemovePercentEncoding(h.baseHref) - href := extensions.RemovePercentEncoding(h.href) - - // HREF is just an anchor inside the base. - if strings.TrimSpace(href) == "" || strings.HasPrefix(href, "#") { - return baseHref + href, nil - } - - // HREF is already absolute. - uri, err := url.Parse(extensions.AddPercentEncodingPath(href)) - if err != nil { - return "", err - } - if uri.IsAbs() { - return href, nil - } - - baseuri, err := url.Parse(extensions.AddPercentEncodingPath(baseHref)) - if err != nil { - return "", err - } - - uri = baseuri.ResolveReference(uri) - var url string - if uri.Scheme == "https" || uri.Scheme == "http" { - url = uri.String() - } else { - url = uri.String() - if !strings.HasPrefix(url, "/") { - url = "/" + url - } - } - return extensions.RemovePercentEncoding(url), nil -} - -// Returns the normalized string representation for this HREF, encoded for URL uses. -func (h HREF) PercentEncodedString() (string, error) { - str, err := h.String() - if err != nil { - return "", err - } - str = extensions.AddPercentEncodingPath(str) - if strings.HasPrefix(str, "/") { - str = "file://" + str - } - - ul, err := url.Parse(str) - if err != nil { - return "", err - } - - idh, err := idna.ToASCII(ul.Hostname()) - if err != nil { - idh = ul.Hostname() - } - if ul.Port() != "" { - idh = idh + ":" + ul.Port() - } - - ui := url.URL{ - Scheme: ul.Scheme, - Opaque: ul.Opaque, - User: ul.User, - Host: idh, - Path: ul.Path, - Fragment: ul.Fragment, - RawQuery: ul.RawQuery, - } - return strings.TrimPrefix(ui.String(), "file://"), nil // TODO: why (or why not) does this need forced ASCII? -} - -// Returns the query parameters present in this HREF, in the order they appear. -func (h HREF) QueryParameters() (url.Values, error) { - ul, err := h.PercentEncodedString() - if err != nil { - return nil, err - } - ulx, err := url.Parse(ul) - if err != nil { - return nil, err - } - return ulx.Query(), nil -} diff --git a/pkg/util/href_test.go b/pkg/util/href_test.go deleted file mode 100644 index fba0cd08..00000000 --- a/pkg/util/href_test.go +++ /dev/null @@ -1,212 +0,0 @@ -package util - -import ( - "net/url" - "testing" - - "github.com/stretchr/testify/assert" -) - -func hrefString(t *testing.T, href string, base string) string { - h, err := NewHREF(href, base).String() - assert.NoError(t, err) - return h -} - -func TestHrefNormalizeToBase(t *testing.T) { - assert.Equal(t, "/folder/", hrefString(t, "", "/folder/")) - assert.Equal(t, "/", hrefString(t, "/", "/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefString(t, "foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "foo/bar.txt", "/folder")) - assert.Equal(t, "/folder/foo/bar.txt", hrefString(t, "foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/folder/foo/bar.txt", hrefString(t, "foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/foo/bar.txt", hrefString(t, "foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefString(t, "/foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "/foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "/foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "/foo/bar.txt", "/folder")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "/foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "/foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "/foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "/foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefString(t, "../foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "../foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "../foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "../foo/bar.txt", "/folder")) - assert.Equal(t, "/foo/bar.txt", hrefString(t, "../foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "../foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "../foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefString(t, "../foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/bar.txt", hrefString(t, "foo/../bar.txt", "")) - assert.Equal(t, "/bar.txt", hrefString(t, "foo/../bar.txt", "/")) - assert.Equal(t, "/bar.txt", hrefString(t, "foo/../bar.txt", "/file.txt")) - assert.Equal(t, "/bar.txt", hrefString(t, "foo/../bar.txt", "/folder")) - assert.Equal(t, "/folder/bar.txt", hrefString(t, "foo/../bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/folder/bar.txt", hrefString(t, "foo/../bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/bar.txt", hrefString(t, "foo/../bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/bar.txt", hrefString(t, "foo/../bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "http://absolute.com/foo/bar.txt", hrefString(t, "http://absolute.com/foo/bar.txt", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt", hrefString(t, "http://absolute.com/foo/bar.txt", "https://example.com/")) - - // Anchor and query parameters are preserved - assert.Equal(t, "/foo/bar.txt#anchor", hrefString(t, "foo/bar.txt#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefString(t, "foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefString(t, "/foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt?query=param#anchor", hrefString(t, "http://absolute.com/foo/bar.txt?query=param#anchor", "/")) - - assert.Equal(t, "/foo/bar.txt#anchor", hrefString(t, "foo/bar.txt#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefString(t, "foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefString(t, "/foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt?query=param#anchor", hrefString(t, "http://absolute.com/foo/bar.txt?query=param#anchor", "/")) - - // HREF that is just an anchor - assert.Equal(t, "/#anchor", hrefString(t, "#anchor", "")) - assert.Equal(t, "/#anchor", hrefString(t, "#anchor", "/")) - assert.Equal(t, "/file.txt#anchor", hrefString(t, "#anchor", "/file.txt")) - assert.Equal(t, "/folder#anchor", hrefString(t, "#anchor", "/folder")) - assert.Equal(t, "/folder/#anchor", hrefString(t, "#anchor", "/folder/")) - assert.Equal(t, "http://example.com/folder/file.txt#anchor", hrefString(t, "#anchor", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/folder#anchor", hrefString(t, "#anchor", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/#anchor", hrefString(t, "#anchor", "http://example.com/folder/")) - - // HREF containing spaces. - assert.Equal(t, "/foo bar.txt", hrefString(t, "foo bar.txt", "")) - assert.Equal(t, "/foo bar.txt", hrefString(t, "foo bar.txt", "/")) - assert.Equal(t, "/foo bar.txt", hrefString(t, "foo bar.txt", "/file.txt")) - assert.Equal(t, "/foo bar.txt", hrefString(t, "foo bar.txt", "/base folder")) - assert.Equal(t, "/base folder/foo bar.txt", hrefString(t, "foo bar.txt", "/base folder/")) - assert.Equal(t, "/base folder/foo bar.txt", hrefString(t, "foo bar.txt", "/base folder/file.txt")) - assert.Equal(t, "/base folder/foo bar.txt", hrefString(t, "foo bar.txt", "base folder/file.txt")) - - // HREF containing special characters - assert.Equal(t, "/base%folder/foo bar/baz%qux.txt", hrefString(t, "foo bar/baz%qux.txt", "/base%folder/")) - assert.Equal(t, "/base folder/foo bar/baz%qux.txt", hrefString(t, "foo%20bar/baz%25qux.txt", "/base%20folder/")) - assert.Equal(t, "http://example.com/foo bar/baz qux.txt", hrefString(t, "foo bar/baz qux.txt", "http://example.com/base%20folder")) - assert.Equal(t, "http://example.com/base folder/foo bar/baz qux.txt", hrefString(t, "foo bar/baz qux.txt", "http://example.com/base%20folder/")) - assert.Equal(t, "http://example.com/base folder/foo bar/baz%qux.txt", hrefString(t, "foo bar/baz%qux.txt", "http://example.com/base%20folder/")) - assert.Equal(t, "/foo bar.txt?query=param#anchor", hrefString(t, "/foo bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://example.com/foo bar.txt?query=param#anchor", hrefString(t, "/foo bar.txt?query=param#anchor", "http://example.com/")) - assert.Equal(t, "http://example.com/foo bar.txt?query=param#anchor", hrefString(t, "/foo%20bar.txt?query=param#anchor", "http://example.com/")) - assert.Equal(t, "http://absolute.com/foo bar.txt?query=param#Hello world £500", hrefString(t, "http://absolute.com/foo%20bar.txt?query=param#Hello%20world%20%C2%A3500", "/")) - assert.Equal(t, "http://absolute.com/foo bar.txt?query=param#Hello world £500", hrefString(t, "http://absolute.com/foo bar.txt?query=param#Hello world £500", "/")) -} - -func hrefPEString(t *testing.T, href string, base string) string { - h, err := NewHREF(href, base).PercentEncodedString() - assert.NoError(t, err) - return h -} - -func TestHrefPercentEncodedString(t *testing.T) { - assert.Equal(t, "/folder/", hrefPEString(t, "", "/folder/")) - assert.Equal(t, "/", hrefPEString(t, "/", "/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "/folder")) - assert.Equal(t, "/folder/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/folder/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/foo/bar.txt", hrefPEString(t, "foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "/folder")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "/foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "/")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "/file.txt")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "/folder")) - assert.Equal(t, "/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/foo/bar.txt", hrefPEString(t, "../foo/bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "/bar.txt", hrefPEString(t, "foo/../bar.txt", "")) - assert.Equal(t, "/bar.txt", hrefPEString(t, "foo/../bar.txt", "/")) - assert.Equal(t, "/bar.txt", hrefPEString(t, "foo/../bar.txt", "/file.txt")) - assert.Equal(t, "/bar.txt", hrefPEString(t, "foo/../bar.txt", "/folder")) - assert.Equal(t, "/folder/bar.txt", hrefPEString(t, "foo/../bar.txt", "/folder/")) - assert.Equal(t, "http://example.com/folder/bar.txt", hrefPEString(t, "foo/../bar.txt", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/bar.txt", hrefPEString(t, "foo/../bar.txt", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/bar.txt", hrefPEString(t, "foo/../bar.txt", "http://example.com/folder/")) - - assert.Equal(t, "http://absolute.com/foo/bar.txt", hrefPEString(t, "http://absolute.com/foo/bar.txt", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt", hrefPEString(t, "http://absolute.com/foo/bar.txt", "https://example.com/")) - - // Anchor and query parameters are preserved - assert.Equal(t, "/foo/bar.txt#anchor", hrefPEString(t, "foo/bar.txt#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefPEString(t, "foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefPEString(t, "/foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt?query=param#anchor", hrefPEString(t, "http://absolute.com/foo/bar.txt?query=param#anchor", "/")) - - assert.Equal(t, "/foo/bar.txt#anchor", hrefPEString(t, "foo/bar.txt#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefPEString(t, "foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "/foo/bar.txt?query=param#anchor", hrefPEString(t, "/foo/bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://absolute.com/foo/bar.txt?query=param#anchor", hrefPEString(t, "http://absolute.com/foo/bar.txt?query=param#anchor", "/")) - - // HREF that is just an anchor - assert.Equal(t, "/#anchor", hrefPEString(t, "#anchor", "")) - assert.Equal(t, "/#anchor", hrefPEString(t, "#anchor", "/")) - assert.Equal(t, "/file.txt#anchor", hrefPEString(t, "#anchor", "/file.txt")) - assert.Equal(t, "/folder#anchor", hrefPEString(t, "#anchor", "/folder")) - assert.Equal(t, "/folder/#anchor", hrefPEString(t, "#anchor", "/folder/")) - assert.Equal(t, "http://example.com/folder/file.txt#anchor", hrefPEString(t, "#anchor", "http://example.com/folder/file.txt")) - assert.Equal(t, "http://example.com/folder#anchor", hrefPEString(t, "#anchor", "http://example.com/folder")) - assert.Equal(t, "http://example.com/folder/#anchor", hrefPEString(t, "#anchor", "http://example.com/folder/")) - - // HREF containing spaces. - assert.Equal(t, "/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "")) - assert.Equal(t, "/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "/")) - assert.Equal(t, "/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "/file.txt")) - assert.Equal(t, "/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "/base folder")) - assert.Equal(t, "/base%20folder/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "/base folder/")) - assert.Equal(t, "/base%20folder/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "/base folder/file.txt")) - assert.Equal(t, "/base%20folder/foo%20bar.txt", hrefPEString(t, "foo bar.txt", "base folder/file.txt")) - - // HREF containing special characters - assert.Equal(t, "/base%25folder/foo%20bar/baz%25qux.txt", hrefPEString(t, "foo bar/baz%qux.txt", "/base%folder/")) - assert.Equal(t, "/base%20folder/foo%20bar/baz%25qux.txt", hrefPEString(t, "foo%20bar/baz%25qux.txt", "/base%20folder/")) - assert.Equal(t, "http://example.com/foo%20bar/baz%20qux.txt", hrefPEString(t, "foo bar/baz qux.txt", "http://example.com/base%20folder")) - assert.Equal(t, "http://example.com/base%20folder/foo%20bar/baz%20qux.txt", hrefPEString(t, "foo bar/baz qux.txt", "http://example.com/base%20folder/")) - assert.Equal(t, "http://example.com/base%20folder/foo%20bar/baz%25qux.txt", hrefPEString(t, "foo bar/baz%qux.txt", "http://example.com/base%20folder/")) - assert.Equal(t, "/foo%20bar.txt?query=param#anchor", hrefPEString(t, "/foo bar.txt?query=param#anchor", "/")) - assert.Equal(t, "http://example.com/foo%20bar.txt?query=param#anchor", hrefPEString(t, "/foo bar.txt?query=param#anchor", "http://example.com/")) - assert.Equal(t, "http://example.com/foo%20bar.txt?query=param#anchor", hrefPEString(t, "/foo%20bar.txt?query=param#anchor", "http://example.com/")) - assert.Equal(t, "http://absolute.com/foo%20bar.txt?query=param#Hello%20world%20%C2%A3500", hrefPEString(t, "http://absolute.com/foo%20bar.txt?query=param#Hello%20world%20%C2%A3500", "/")) - - assert.Equal(t, "http://absolute.com/foo%20bar.txt?query=param#Hello%20world%20%C2%A3500", hrefPEString(t, "http://absolute.com/foo bar.txt?query=param#Hello world £500", "/")) - -} - -func hrefQueryParams(t *testing.T, href string) url.Values { - h, err := NewHREF(href, "").QueryParameters() - assert.NoError(t, err) - return h -} - -func TestHrefQueryParameters(t *testing.T) { - assert.Equal(t, make(url.Values), hrefQueryParams(t, "http://domain.com/path")) - assert.Equal(t, url.Values{ - "query": []string{"param"}, - }, hrefQueryParams(t, "http://domain.com/path?query=param#anchor")) - assert.Equal(t, url.Values{ - "query": []string{"param", "other"}, - "fruit": []string{"banana"}, - "empty": []string{""}, - }, hrefQueryParams(t, "http://domain.com/path?query=param&fruit=banana&query=other&empty")) -} diff --git a/pkg/util/uri_template.go b/pkg/util/uri_template.go deleted file mode 100644 index abc6e5d9..00000000 --- a/pkg/util/uri_template.go +++ /dev/null @@ -1,112 +0,0 @@ -package util - -import ( - "strings" - - "github.com/agext/regexp" - - "github.com/readium/go-toolkit/pkg/internal/extensions" -) - -/** - * A lightweight implementation of URI Template (RFC 6570). - * - * Only handles simple cases, fitting Readium's use cases. - * See https://tools.ietf.org/html/rfc6570 - */ - -type URITemplate struct { - uri string -} - -func NewURITemplate(uri string) URITemplate { - return URITemplate{ - uri: uri, - } -} - -var paramRegex = regexp.MustCompile(`\{\??([^}]+)\}`) -var expandRegex = regexp.MustCompile(`\{(\??)([^}]+)\}`) - -// List of URI template parameter keys. -func (u URITemplate) Parameters() []string { - params := paramRegex.FindAllStringSubmatch(u.uri, -1) - ret := make([]string, 0, len(params)) - for _, p := range params { - if len(p) != 2 { - continue - } - for _, v := range strings.Split(p[1], ",") { - ret = extensions.AddToSet(ret, v) - } - } - - return ret -} - -func expandSimpleString(s string, parameters map[string]string) string { - strs := strings.Split(s, ",") - for i, str := range strs { - v, _ := parameters[str] - strs[i] = v - } - return strings.Join(strs, ",") -} - -func expandFormStyle(s string, parameters map[string]string) string { - strs := strings.Split(s, ",") - var sb strings.Builder - sb.WriteRune('?') - var added bool - for i, str := range strs { - v, ok := parameters[str] - if !ok { - continue - } - if i != 0 { - sb.WriteRune('&') - } - added = true - sb.WriteString(str) - sb.WriteRune('=') - if v == "" { - continue - } - sb.WriteString(v) - } - if !added { - // Remove '?' if no params were actually added - s := sb.String() - return s[:len(s)-1] - } - return sb.String() -} - -// Expands the HREF by replacing URI template variables by the given parameters. -func (u URITemplate) Expand(parameters map[string]string) string { - // `+` is considered like an encoded space, and will not be properly encoded in parameters. - // This is an issue for ISO 8601 date for example. - // As a workaround, we encode manually this character. We don't do it in the full URI, - // because it could contain some legitimate +-as-space characters. - for k, v := range parameters { - parameters[k] = strings.Replace(v, "+", "~~+~~", -1) - } - - href, _ := NewHREF(expandRegex.ReplaceAllStringSubmatchFunc(u.uri, func(s []string) string { - if len(s) != 3 { - return "" - } - if s[1] == "" { - return expandSimpleString(s[2], parameters) - } else { - return expandFormStyle(s[2], parameters) - } - }), "").PercentEncodedString() - - return strings.ReplaceAll(strings.ReplaceAll(href, "~~%20~~", "%2B"), "~~+~~", "%2B") - -} - -func (u URITemplate) Description() string { - return u.uri -} diff --git a/pkg/util/uri_template_test.go b/pkg/util/uri_template_test.go deleted file mode 100644 index e2874708..00000000 --- a/pkg/util/uri_template_test.go +++ /dev/null @@ -1,55 +0,0 @@ -package util - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestUTParameters(t *testing.T) { - assert.Equal( - t, - []string{"x", "hello", "y", "z", "w"}, - NewURITemplate("/url{?x,hello,y}name{z,y,w}").Parameters(), - ) -} - -func TestUTExpandSimpleStringTemplates(t *testing.T) { - parameters := map[string]string{ - "x": "aaa", - "hello": "Hello, world", - "y": "b", - "z": "45", - "w": "w", - } - assert.Equal( - t, - "/urlaaa,Hello,%20world,bname45,b,w", - NewURITemplate("/url{x,hello,y}name{z,y,w}").Expand(parameters), - ) -} - -func TestUTExpandComplicatedTemplated(t *testing.T) { // form-style ampersand-separated templates - parameters := map[string]string{ - "x": "aaa", - "hello": "Hello, world", - "y": "b", - } - assert.Equal( - t, - "/url?x=aaa&hello=Hello,%20world&y=bname", - NewURITemplate("/url{?x,hello,y,z}name").Expand(parameters), - ) - - assert.Equal( - t, - "https://lsd-test.edrlab.org/licenses/39ef1ff2-cda2-4219-a26a-d504fbb24c17/renew?end=2020-11-12T16:02:00.000%2B01:00&id=38dfd7ba-a80b-4253-a047-e6aa9c21d6f0&name=Pixel%203a", - NewURITemplate( - "https://lsd-test.edrlab.org/licenses/39ef1ff2-cda2-4219-a26a-d504fbb24c17/renew{?end,id,name}", - ).Expand(map[string]string{ - "id": "38dfd7ba-a80b-4253-a047-e6aa9c21d6f0", - "name": "Pixel 3a", - "end": "2020-11-12T16:02:00.000+01:00", - }), - ) -} diff --git a/pkg/util/url/scheme.go b/pkg/util/url/scheme.go new file mode 100644 index 00000000..a1377553 --- /dev/null +++ b/pkg/util/url/scheme.go @@ -0,0 +1,57 @@ +package url + +import "strings" + +type Scheme string + +const ( + SchemeHTTP Scheme = "http" + SchemeHTTPS Scheme = "https" + SchemeData Scheme = "data" + SchemeFTP Scheme = "ftp" + SchemeS3 Scheme = "s3" // Amazon S3-compatible + SchemeGS Scheme = "gs" // Google Cloud Storage + SchemeOPDS Scheme = "opds" + SchemeFile Scheme = "file" +) + +func SchemeFromString(s string) Scheme { + s = strings.ToLower(s) + switch s { + case "http": + fallthrough + case "https": + fallthrough + case "data": + fallthrough + case "ftp": + fallthrough + case "s3": + fallthrough + case "gs": + fallthrough + case "opds": + fallthrough + case "file": + return Scheme(s) + default: + // Not a known scheme. + return "" + } +} + +func (s Scheme) String() string { + return string(s) +} + +func (s Scheme) IsHTTP() bool { + return s == SchemeHTTP || s == SchemeHTTPS +} + +func (s Scheme) IsFile() bool { + return s == SchemeFile +} + +func (s Scheme) IsCloud() bool { + return s == SchemeS3 || s == SchemeGS +} diff --git a/pkg/util/url/uritemplates/README.txt b/pkg/util/url/uritemplates/README.txt new file mode 100644 index 00000000..dde83e94 --- /dev/null +++ b/pkg/util/url/uritemplates/README.txt @@ -0,0 +1,2 @@ +Copied from https://github.com/googleapis/google-api-go-client/tree/main/internal/third_party/uritemplates +Modified to have a Values function \ No newline at end of file diff --git a/pkg/util/url/uritemplates/uritemplates.go b/pkg/util/url/uritemplates/uritemplates.go new file mode 100644 index 00000000..8c27d19d --- /dev/null +++ b/pkg/util/url/uritemplates/uritemplates.go @@ -0,0 +1,248 @@ +// Copyright 2013 Joshua Tacoma. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package uritemplates is a level 3 implementation of RFC 6570 (URI +// Template, http://tools.ietf.org/html/rfc6570). +// uritemplates does not support composite values (in Go: slices or maps) +// and so does not qualify as a level 4 implementation. +package uritemplates + +import ( + "bytes" + "errors" + "regexp" + "strconv" + "strings" +) + +var ( + unreserved = regexp.MustCompile("[^A-Za-z0-9\\-._~]") + reserved = regexp.MustCompile("[^A-Za-z0-9\\-._~:/?#[\\]@!$&'()*+,;=]") + validname = regexp.MustCompile("^([A-Za-z0-9_\\.]|%[0-9A-Fa-f][0-9A-Fa-f])+$") + hex = []byte("0123456789ABCDEF") +) + +func pctEncode(src []byte) []byte { + dst := make([]byte, len(src)*3) + for i, b := range src { + buf := dst[i*3 : i*3+3] + buf[0] = 0x25 + buf[1] = hex[b/16] + buf[2] = hex[b%16] + } + return dst +} + +// pairWriter is a convenience struct which allows escaped and unescaped +// versions of the template to be written in parallel. +type pairWriter struct { + escaped, unescaped bytes.Buffer +} + +// Write writes the provided string directly without any escaping. +func (w *pairWriter) Write(s string) { + w.escaped.WriteString(s) + w.unescaped.WriteString(s) +} + +// Escape writes the provided string, escaping the string for the +// escaped output. +func (w *pairWriter) Escape(s string, allowReserved bool) { + w.unescaped.WriteString(s) + if allowReserved { + w.escaped.Write(reserved.ReplaceAllFunc([]byte(s), pctEncode)) + } else { + w.escaped.Write(unreserved.ReplaceAllFunc([]byte(s), pctEncode)) + } +} + +// Escaped returns the escaped string. +func (w *pairWriter) Escaped() string { + return w.escaped.String() +} + +// Unescaped returns the unescaped string. +func (w *pairWriter) Unescaped() string { + return w.unescaped.String() +} + +// A uriTemplate is a parsed representation of a URI template. +type uriTemplate struct { + raw string + parts []templatePart +} + +// parse parses a URI template string into a uriTemplate object. +func parse(rawTemplate string) (*uriTemplate, error) { + split := strings.Split(rawTemplate, "{") + parts := make([]templatePart, len(split)*2-1) + for i, s := range split { + if i == 0 { + if strings.Contains(s, "}") { + return nil, errors.New("unexpected }") + } + parts[i].raw = s + continue + } + subsplit := strings.Split(s, "}") + if len(subsplit) != 2 { + return nil, errors.New("malformed template") + } + expression := subsplit[0] + var err error + parts[i*2-1], err = parseExpression(expression) + if err != nil { + return nil, err + } + parts[i*2].raw = subsplit[1] + } + return &uriTemplate{ + raw: rawTemplate, + parts: parts, + }, nil +} + +type templatePart struct { + raw string + terms []templateTerm + first string + sep string + named bool + ifemp string + allowReserved bool +} + +type templateTerm struct { + name string + explode bool + truncate int +} + +func parseExpression(expression string) (result templatePart, err error) { + switch expression[0] { + case '+': + result.sep = "," + result.allowReserved = true + expression = expression[1:] + case '.': + result.first = "." + result.sep = "." + expression = expression[1:] + case '/': + result.first = "/" + result.sep = "/" + expression = expression[1:] + case ';': + result.first = ";" + result.sep = ";" + result.named = true + expression = expression[1:] + case '?': + result.first = "?" + result.sep = "&" + result.named = true + result.ifemp = "=" + expression = expression[1:] + case '&': + result.first = "&" + result.sep = "&" + result.named = true + result.ifemp = "=" + expression = expression[1:] + case '#': + result.first = "#" + result.sep = "," + result.allowReserved = true + expression = expression[1:] + default: + result.sep = "," + } + rawterms := strings.Split(expression, ",") + result.terms = make([]templateTerm, len(rawterms)) + for i, raw := range rawterms { + result.terms[i], err = parseTerm(raw) + if err != nil { + break + } + } + return result, err +} + +func parseTerm(term string) (result templateTerm, err error) { + // TODO(djd): Remove "*" suffix parsing once we check that no APIs have + // mistakenly used that attribute. + if strings.HasSuffix(term, "*") { + result.explode = true + term = term[:len(term)-1] + } + split := strings.Split(term, ":") + if len(split) == 1 { + result.name = term + } else if len(split) == 2 { + result.name = split[0] + var parsed int64 + parsed, err = strconv.ParseInt(split[1], 10, 0) + result.truncate = int(parsed) + } else { + err = errors.New("multiple colons in same term") + } + if !validname.MatchString(result.name) { + err = errors.New("not a valid name: " + result.name) + } + if result.explode && result.truncate > 0 { + err = errors.New("both explode and prefix modifiers on same term") + } + return result, err +} + +// Expand expands a URI template with a set of values to produce the +// resultant URI. Two forms of the result are returned: one with all the +// elements escaped, and one with the elements unescaped. +func (t *uriTemplate) Expand(values map[string]string) (escaped, unescaped string) { + var w pairWriter + for _, p := range t.parts { + p.expand(&w, values) + } + return w.Escaped(), w.Unescaped() +} + +func (tp *templatePart) expand(w *pairWriter, values map[string]string) { + if len(tp.raw) > 0 { + w.Write(tp.raw) + return + } + var first = true + for _, term := range tp.terms { + value, exists := values[term.name] + if !exists { + continue + } + if first { + w.Write(tp.first) + first = false + } else { + w.Write(tp.sep) + } + tp.expandString(w, term, value) + } +} + +func (tp *templatePart) expandName(w *pairWriter, name string, empty bool) { + if tp.named { + w.Write(name) + if empty { + w.Write(tp.ifemp) + } else { + w.Write("=") + } + } +} + +func (tp *templatePart) expandString(w *pairWriter, t templateTerm, s string) { + if len(s) > t.truncate && t.truncate > 0 { + s = s[:t.truncate] + } + tp.expandName(w, t.name, len(s) == 0) + w.Escape(s, tp.allowReserved) +} diff --git a/pkg/util/url/uritemplates/uritemplates_test.go b/pkg/util/url/uritemplates/uritemplates_test.go new file mode 100644 index 00000000..1296d374 --- /dev/null +++ b/pkg/util/url/uritemplates/uritemplates_test.go @@ -0,0 +1,280 @@ +// Copyright 2013 Joshua Tacoma. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uritemplates + +import ( + "fmt" + "log" + "net/url" + "testing" +) + +func ExampleExpand() { + values := map[string]string{ + "user": "golang", + "repo": "go", + } + expanded, _, err := Expand("https://api.github.com/repos{/user,repo}", values) + if err != nil { + log.Fatalf("Error expanding template: %v", err) + } + fmt.Println(expanded) + // Output: + // https://api.github.com/repos/golang/go +} + +func TestExpand(t *testing.T) { + testCases := []struct { + tmpl string + values map[string]string + want string + }{ + // These examples come from the RFC: + // http://tools.ietf.org/html/rfc6570 + { + tmpl: "http://www.example.com/foo{?query,number}", + values: map[string]string{"query": "mycelium", "number": "100"}, + want: "http://www.example.com/foo?query=mycelium&number=100", + }, + { + tmpl: "http://www.example.com/foo{?query,number}", + values: map[string]string{"query": "mycelium"}, + want: "http://www.example.com/foo?query=mycelium", + }, + { + tmpl: "http://www.example.com/foo{?query,number}", + values: map[string]string{}, + want: "http://www.example.com/foo", + }, + } + + for _, tt := range testCases { + exp, _, err := Expand(tt.tmpl, tt.values) + if err != nil { + t.Errorf("Expand(%q, %v) error: %v", tt.tmpl, tt.values, err) + continue + } + if exp != tt.want { + t.Errorf("Expand(%q, %v)\ngot %q\nwant %q", tt.tmpl, tt.values, exp, tt.want) + } + } +} + +func TestExpandRFCLevels(t *testing.T) { + values := map[string]string{ + "dub": "me/too", + "hello": "Hello World!", + "half": "50%", + "var": "value", + "who": "fred", + "base": "http://example.com/home/", + "path": "/foo/bar", + "semi": ";", + "v": "6", + "x": "1024", + "y": "768", + "empty": "", + // undef not mapped. + } + testCases := []struct { + tmpl, want string + }{ + // These examples come from the RFC levels specification. + // http://tools.ietf.org/html/rfc6570 + // Level 1 examples. + {tmpl: "{var}", want: "value"}, + {tmpl: "{hello}", want: "Hello%20World%21"}, + + // Level 2 examples. + {tmpl: "{+var}", want: "value"}, + {tmpl: "{+hello}", want: "Hello%20World!"}, + {tmpl: "{+path}/here", want: "/foo/bar/here"}, + {tmpl: "here?ref={+path}", want: "here?ref=/foo/bar"}, + {tmpl: "X{#var}", want: "X#value"}, + {tmpl: "X{#hello}", want: "X#Hello%20World!"}, + + // Level 3 examples. + {tmpl: "map?{x,y}", want: "map?1024,768"}, + {tmpl: "{x,hello,y}", want: "1024,Hello%20World%21,768"}, + {tmpl: "{+x,hello,y}", want: "1024,Hello%20World!,768"}, + {tmpl: "{+path,x}/here", want: "/foo/bar,1024/here"}, + {tmpl: "{#x,hello,y}", want: "#1024,Hello%20World!,768"}, + {tmpl: "{#path,x}/here", want: "#/foo/bar,1024/here"}, + {tmpl: "X{.var}", want: "X.value"}, + {tmpl: "X{.x,y}", want: "X.1024.768"}, + {tmpl: "{/var}", want: "/value"}, + {tmpl: "{/var,x}/here", want: "/value/1024/here"}, + {tmpl: "{;x,y}", want: ";x=1024;y=768"}, + {tmpl: "{;x,y,empty}", want: ";x=1024;y=768;empty"}, + {tmpl: "{?x,y}", want: "?x=1024&y=768"}, + {tmpl: "{?x,y,empty}", want: "?x=1024&y=768&empty="}, + {tmpl: "?fixed=yes{&x}", want: "?fixed=yes&x=1024"}, + {tmpl: "{&x,y,empty}", want: "&x=1024&y=768&empty="}, + + {tmpl: "{var:3}", want: "val"}, + {tmpl: "{var:30}", want: "value"}, + {tmpl: "{+path:6}/here", want: "/foo/b/here"}, + {tmpl: "{#path:6}/here", want: "#/foo/b/here"}, + {tmpl: "X{.var:3}", want: "X.val"}, + {tmpl: "{/var:1,var}", want: "/v/value"}, + {tmpl: "{;hello:5}", want: ";hello=Hello"}, + {tmpl: "{?var:3}", want: "?var=val"}, + {tmpl: "{&var:3}", want: "&var=val"}, + + // 2.4.1 Prefix values. + {tmpl: "{var}", want: "value"}, + {tmpl: "{var:20}", want: "value"}, + {tmpl: "{var:3}", want: "val"}, + {tmpl: "{semi}", want: "%3B"}, + {tmpl: "{semi:2}", want: "%3B"}, + // 3.2.2. Simple String Expansion: {var} + {tmpl: "{var}", want: "value"}, + {tmpl: "{hello}", want: "Hello%20World%21"}, + {tmpl: "{half}", want: "50%25"}, + {tmpl: "O{empty}X", want: "OX"}, + {tmpl: "O{undef}X", want: "OX"}, + {tmpl: "{x,y}", want: "1024,768"}, + {tmpl: "{x,hello,y}", want: "1024,Hello%20World%21,768"}, + {tmpl: "?{x,empty}", want: "?1024,"}, + {tmpl: "?{x,undef}", want: "?1024"}, + {tmpl: "?{undef,y}", want: "?768"}, + {tmpl: "{var:3}", want: "val"}, + {tmpl: "{var:30}", want: "value"}, + // 3.2.3. Reserved Expansion: {+var} + {tmpl: "{+var}", want: "value"}, + {tmpl: "{+hello}", want: "Hello%20World!"}, + {tmpl: "{+half}", want: "50%25"}, + {tmpl: "{base}index", want: "http%3A%2F%2Fexample.com%2Fhome%2Findex"}, + {tmpl: "{+base}index", want: "http://example.com/home/index"}, + {tmpl: "O{+empty}X", want: "OX"}, + {tmpl: "O{+undef}X", want: "OX"}, + {tmpl: "{+path}/here", want: "/foo/bar/here"}, + {tmpl: "here?ref={+path}", want: "here?ref=/foo/bar"}, + {tmpl: "up{+path}{var}/here", want: "up/foo/barvalue/here"}, + {tmpl: "{+x,hello,y}", want: "1024,Hello%20World!,768"}, + {tmpl: "{+path,x}/here", want: "/foo/bar,1024/here"}, + {tmpl: "{+path:6}/here", want: "/foo/b/here"}, + // 3.2.4. Fragment Expansion: {#var} + {tmpl: "{#var}", want: "#value"}, + {tmpl: "{#hello}", want: "#Hello%20World!"}, + {tmpl: "{#half}", want: "#50%25"}, + {tmpl: "foo{#empty}", want: "foo#"}, + {tmpl: "foo{#undef}", want: "foo"}, + {tmpl: "{#x,hello,y}", want: "#1024,Hello%20World!,768"}, + {tmpl: "{#path,x}/here", want: "#/foo/bar,1024/here"}, + {tmpl: "{#path:6}/here", want: "#/foo/b/here"}, + // 3.2.5. Label Expansion with Dot-Prefix: {.var} + {tmpl: "{.who}", want: ".fred"}, + {tmpl: "{.who,who}", want: ".fred.fred"}, + {tmpl: "{.half,who}", want: ".50%25.fred"}, + {tmpl: "X{.var}", want: "X.value"}, + {tmpl: "X{.empty}", want: "X."}, + {tmpl: "X{.undef}", want: "X"}, + {tmpl: "X{.var:3}", want: "X.val"}, + // 3.2.6. Path Segment Expansion: {/var} + {tmpl: "{/who}", want: "/fred"}, + {tmpl: "{/who,who}", want: "/fred/fred"}, + {tmpl: "{/half,who}", want: "/50%25/fred"}, + {tmpl: "{/who,dub}", want: "/fred/me%2Ftoo"}, + {tmpl: "{/var}", want: "/value"}, + {tmpl: "{/var,empty}", want: "/value/"}, + {tmpl: "{/var,undef}", want: "/value"}, + {tmpl: "{/var,x}/here", want: "/value/1024/here"}, + {tmpl: "{/var:1,var}", want: "/v/value"}, + // 3.2.7. Path-Style Parameter Expansion: {;var} + {tmpl: "{;who}", want: ";who=fred"}, + {tmpl: "{;half}", want: ";half=50%25"}, + {tmpl: "{;empty}", want: ";empty"}, + {tmpl: "{;v,empty,who}", want: ";v=6;empty;who=fred"}, + {tmpl: "{;v,bar,who}", want: ";v=6;who=fred"}, + {tmpl: "{;x,y}", want: ";x=1024;y=768"}, + {tmpl: "{;x,y,empty}", want: ";x=1024;y=768;empty"}, + {tmpl: "{;x,y,undef}", want: ";x=1024;y=768"}, + {tmpl: "{;hello:5}", want: ";hello=Hello"}, + // 3.2.8. Form-Style Query Expansion: {?var} + {tmpl: "{?who}", want: "?who=fred"}, + {tmpl: "{?half}", want: "?half=50%25"}, + {tmpl: "{?x,y}", want: "?x=1024&y=768"}, + {tmpl: "{?x,y,empty}", want: "?x=1024&y=768&empty="}, + {tmpl: "{?x,y,undef}", want: "?x=1024&y=768"}, + {tmpl: "{?var:3}", want: "?var=val"}, + // 3.2.9. Form-Style Query Continuation: {&var} + {tmpl: "{&who}", want: "&who=fred"}, + {tmpl: "{&half}", want: "&half=50%25"}, + {tmpl: "?fixed=yes{&x}", want: "?fixed=yes&x=1024"}, + {tmpl: "{&x,y,empty}", want: "&x=1024&y=768&empty="}, + {tmpl: "{&x,y,undef}", want: "&x=1024&y=768"}, + {tmpl: "{&var:3}", want: "&var=val"}, + } + for _, tt := range testCases { + esc, unesc, err := Expand(tt.tmpl, values) + if err != nil { + t.Errorf("Expand(%q) error: %v", tt.tmpl, err) + continue + } + if esc != tt.want { + t.Errorf("Expand(%q)\ngot %q\nwant %q", tt.tmpl, esc, tt.want) + } + // Check that the escaped form is equivalent to unescaped. + urlUnesc, err := url.QueryUnescape(esc) + if err != nil { + t.Errorf("Expand(%q) gave invalid escaping %q: %v", tt.tmpl, esc, err) + continue + } + if urlUnesc != unesc { + t.Errorf("Expand(%q) gave inconsistent escaped/unescaped\nunescaped %q\nescaped %q\nwhich is %q", tt.tmpl, unesc, esc, urlUnesc) + } + } +} + +func TestExpandUnescaped(t *testing.T) { + testCases := []struct { + tmpl, wantEsc, wantUnesc string + values map[string]string + }{ + { + tmpl: "/foo/{bucket}/bar", + values: map[string]string{ + "bucket": "simple", + }, + wantEsc: "/foo/simple/bar", + wantUnesc: "/foo/simple/bar", + }, + { + tmpl: "/foo/{bucket}/bar", + values: map[string]string{ + "bucket": "path/with/slash", + }, + wantEsc: "/foo/path%2Fwith%2Fslash/bar", + wantUnesc: "/foo/path/with/slash/bar", + }, + { + tmpl: "/foo/{+bucket}/bar", + values: map[string]string{ + "bucket": "path/with/slash", + }, + wantEsc: "/foo/path/with/slash/bar", + wantUnesc: "/foo/path/with/slash/bar", + }, + { + tmpl: "/foo/{bucket}/bar", + values: map[string]string{ + "bucket": "double%2Fescaped", + }, + wantEsc: "/foo/double%252Fescaped/bar", + wantUnesc: "/foo/double%2Fescaped/bar", + }, + } + for _, tt := range testCases { + esc, unesc, err := Expand(tt.tmpl, tt.values) + if err != nil { + t.Errorf("Expand(%q) error: %v", tt.tmpl, err) + continue + } + if esc != tt.wantEsc || unesc != tt.wantUnesc { + t.Errorf("Expand(%q)\ngot esc=%q, unesc=%q\nwant esc=%q, unesc=%q", tt.tmpl, esc, unesc, tt.wantEsc, tt.wantUnesc) + } + } +} diff --git a/pkg/util/url/uritemplates/utils.go b/pkg/util/url/uritemplates/utils.go new file mode 100644 index 00000000..7d44f372 --- /dev/null +++ b/pkg/util/url/uritemplates/utils.go @@ -0,0 +1,32 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uritemplates + +// Expand parses then expands a URI template with a set of values to produce +// the resultant URI. Two forms of the result are returned: one with all the +// elements escaped, and one with the elements unescaped. +func Expand(path string, values map[string]string) (escaped, unescaped string, err error) { + template, err := parse(path) + if err != nil { + return "", "", err + } + escaped, unescaped = template.Expand(values) + return escaped, unescaped, nil +} + +// Values returns the names of the variables in the URI template. +func Values(path string) ([]string, error) { + template, err := parse(path) + if err != nil { + return []string{}, err + } + parts := []string{} + for _, part := range template.parts { + for _, term := range part.terms { + parts = append(parts, term.name) + } + } + return parts, nil +} diff --git a/pkg/util/url/url.go b/pkg/util/url/url.go new file mode 100644 index 00000000..cda4c22c --- /dev/null +++ b/pkg/util/url/url.go @@ -0,0 +1,421 @@ +package url + +import ( + "errors" + gurl "net/url" + "os" + "path" + "path/filepath" + "strings" + + "github.com/readium/go-toolkit/pkg/internal/extensions" + "golang.org/x/net/idna" +) + +// A Uniform Resource Locator. +// +// https://url.spec.whatwg.org/ +type URL interface { + Path() string // Decoded path segments identifying a location. + Filename() string // Decoded filename portion of the URL path. + Extension() string // Extension of the filename portion of the URL path. + RemoveQuery() URL // Returns a copy of this URL after dropping its query. + Fragment() string // Returns the decoded fragment present in this URL, if any. + RemoveFragment() URL // Returns a copy of this URL after dropping its fragment. + Resolve(url URL) URL // Resolves the given [url] to this URL. + Relativize(url URL) URL // Relativizes the given [url] against this URL. + Normalize() URL // Normalizes the URL using a subset of the RFC-3986 rules (https://datatracker.ietf.org/doc/html/rfc3986#section-6). + String() string // Encodes the URL to a string. + Raw() *gurl.URL // Returns the underlying Go URL. + Equivalent(url URL) bool // Returns whether the receiver is equivalent to the given `url` after normalization. +} + +// Creates a [RelativeURL] from a percent-decoded path. +func URLFromDecodedPath(path string) (RelativeURL, error) { + return RelativeURLFromString(extensions.AddPercentEncodingPath(path)) +} + +// A proxy for [URLFromString] that panics on error. +func MustURLFromString(url string) URL { + u, err := URLFromString(url) + if err != nil { + panic(err) + } + return u +} + +// Creates a [URL] from its encoded string representation. +func URLFromString(url string) (URL, error) { + u, err := gurl.Parse(url) + if err != nil { + return nil, err + } + return URLFromGo(u) +} + +// Create a [URL] from a Go net/url URL. +func URLFromGo(url *gurl.URL) (URL, error) { + if url.IsAbs() { + return AbsoluteURLFromGo(url) + } else { + return RelativeURLFromGo(url) + } +} + +// Represents a relative Uniform Resource Locator. +// RelativeURL implements URL +type RelativeURL struct { + url *gurl.URL + normalized bool +} + +func (u RelativeURL) Path() string { + return u.url.Path +} + +// Filename implements URL +func (u RelativeURL) Filename() string { + if strings.HasSuffix(u.url.Path, "/") { + return "" + } + return path.Base(u.url.Path) +} + +// Extension implements URL +func (u RelativeURL) Extension() string { + return strings.TrimPrefix(path.Ext(u.Filename()), ".") +} + +// RemoveQuery implements URL +func (u RelativeURL) RemoveQuery() URL { + u.url.RawQuery = "" + return RelativeURL{url: u.url, normalized: u.normalized} +} + +// Fragment implements URL +func (u RelativeURL) Fragment() string { + return u.url.Fragment +} + +// RemoveFragment implements URL +func (u RelativeURL) RemoveFragment() URL { + u.url.Fragment = "" + return RelativeURL{url: u.url, normalized: u.normalized} +} + +// Resolve implements URL +func (u RelativeURL) Resolve(url URL) URL { + if _, ok := url.(AbsoluteURL); ok { + return url + } else if rel, ok := url.(RelativeURL); ok { + res := u.url.ResolveReference(rel.url) + + // ResolveReference always adds a fowards slash to the path, even if the given URL has no slash prefix. + // To match the other toolkits, we remove the slash if the URL and the given URL have no slash. + if strings.HasPrefix(res.Path, "/") { + if len(rel.url.Path) == 0 { + res.Path = res.Path[1:] + } else if !strings.HasPrefix(rel.url.Path, "/") && !strings.HasPrefix(u.url.Path, "/") { + res.Path = res.Path[1:] + } + } + + return RelativeURL{url: res} + } else { + panic("URL type not supported") + } +} + +// Relativize implements URL +// Note that unlike other functions, this can return nil! +// Logic copied from Java: https://github.com/openjdk/jdk/blob/de90204b60c408ef258a2d2515ad252de4b23536/src/java.base/share/classes/java/net/URI.java#L2269 +func (u RelativeURL) Relativize(url URL) URL { + if url, ok := url.(RelativeURL); ok { + if len(u.url.Opaque) > 0 || len(url.url.Opaque) > 0 { + return url + } + if u.url.Scheme != url.url.Scheme && u.url.Host != url.url.Host { + return url + } + + bp := path.Clean(u.url.Path) + cp := path.Clean(url.url.Path) + if bp != cp { + if !strings.HasSuffix(bp, "/") { + bp = bp + "/" + } + if !strings.HasPrefix(cp, bp) { + return url + } + } + + return RelativeURL{url: &gurl.URL{ + Path: cp[len(bp):], + Fragment: url.url.Fragment, + RawQuery: url.url.RawQuery, + ForceQuery: url.url.ForceQuery, + }} + } + + // Cannot relativize a relative URL against an non-relative URL. + return url +} + +// Normalize implements URL +func (u RelativeURL) Normalize() URL { + if u.normalized { + // Already normalized + return u + } + + var hadSlash bool + if strings.HasSuffix(u.url.Path, "/") { + hadSlash = true + } + u.url.Path = path.Clean(u.url.Path) + if hadSlash { + u.url.Path += "/" + } + + return RelativeURL{url: u.url, normalized: true} +} + +// String implements URL +func (u RelativeURL) String() string { + return u.url.String() +} + +// Raw implements URL +func (u RelativeURL) Raw() *gurl.URL { + return u.url +} + +// Equivalent implements URL +func (u RelativeURL) Equivalent(url URL) bool { + return u.Normalize().String() == url.Normalize().String() +} + +// Creates a [RelativeURL] from its encoded string representation. +func RelativeURLFromString(url string) (RelativeURL, error) { + u, err := gurl.Parse(url) + if err != nil { + return RelativeURL{}, err + } + return RelativeURLFromGo(u) +} + +// Create a [RelativeURL] from a Go net/url URL. +func RelativeURLFromGo(url *gurl.URL) (RelativeURL, error) { + if url.IsAbs() { + return RelativeURL{}, errors.New("URL is not relative") + } + return RelativeURL{url: url}, nil +} + +type AbsoluteURL struct { + url *gurl.URL + scheme Scheme + normalized bool +} + +// Path implements URL +func (u AbsoluteURL) Path() string { + return u.url.Path +} + +// Filename implements URL +func (u AbsoluteURL) Filename() string { + if strings.HasSuffix(u.url.Path, "/") { + return "" + } + return path.Base(u.url.Path) +} + +// Extension implements URL +func (u AbsoluteURL) Extension() string { + return strings.TrimPrefix(path.Ext(u.Filename()), ".") +} + +// RemoveQuery implements URL +func (u AbsoluteURL) RemoveQuery() URL { + u.url.RawQuery = "" + return AbsoluteURL{url: u.url, scheme: u.scheme, normalized: u.normalized} +} + +// Fragment implements URL +func (u AbsoluteURL) Fragment() string { + return u.url.Fragment +} + +// RemoveFragment implements URL +func (u AbsoluteURL) RemoveFragment() URL { + u.url.Fragment = "" + return AbsoluteURL{url: u.url, scheme: u.scheme, normalized: u.normalized} +} + +// Resolve implements URL +func (u AbsoluteURL) Resolve(url URL) URL { + if _, ok := url.(AbsoluteURL); ok { + return url + } else if rel, ok := url.(RelativeURL); ok { + res := u.url.ResolveReference(rel.url) + return AbsoluteURL{url: res, scheme: u.scheme} + } else { + panic("URL type not supported") + } +} + +// Relativize implements URL +// Note that unlike other functions, this can return nil! +// Logic copied from Java: https://github.com/openjdk/jdk/blob/de90204b60c408ef258a2d2515ad252de4b23536/src/java.base/share/classes/java/net/URI.java#L2269 +func (u AbsoluteURL) Relativize(url URL) URL { + if url, ok := url.(AbsoluteURL); ok { + if len(u.url.Opaque) > 0 || len(url.url.Opaque) > 0 { + return url + } + if u.url.Scheme != url.url.Scheme && u.url.Host != url.url.Host { + return url + } + + bp := path.Clean(u.url.Path) + cp := path.Clean(url.url.Path) + if bp != cp { + if !strings.HasSuffix(bp, "/") { + bp = bp + "/" + } + if !strings.HasPrefix(cp, bp) { + return url + } + } + + return RelativeURL{url: &gurl.URL{ + Path: cp[len(bp):], + Fragment: url.url.Fragment, + RawQuery: url.url.RawQuery, + ForceQuery: url.url.ForceQuery, + }} + } + + // Cannot relativize an absolute URL against a relative URL. + return url +} + +// Normalize implements URL +func (u AbsoluteURL) Normalize() URL { + if u.normalized { + // Already normalized + return u + } + + var hadSlash bool + if strings.HasSuffix(u.url.Path, "/") { + hadSlash = true + } + u.url.Path = path.Clean(u.url.Path) + if hadSlash { + u.url.Path += "/" + } + + u.url.Scheme = SchemeFromString(u.url.Scheme).String() + asciiHost, err := idna.ToASCII(u.url.Host) + if err == nil { + u.url.Host = asciiHost + } + + return AbsoluteURL{url: u.url, scheme: Scheme(u.url.Scheme), normalized: true} +} + +// String implements URL +func (u AbsoluteURL) String() string { + return u.url.String() +} + +// Raw implements URL +func (u AbsoluteURL) Raw() *gurl.URL { + return u.url +} + +// Equivalent implements URL +func (u AbsoluteURL) Equivalent(url URL) bool { + return u.Normalize().String() == url.Normalize().String() +} + +// Identifies the type of URL. +func (u AbsoluteURL) Scheme() Scheme { + return u.scheme +} + +// Indicates whether this URL points to a HTTP resource. +func (u AbsoluteURL) IsHTTP() bool { + return u.scheme.IsHTTP() +} + +// Indicates whether this URL points to a file. +func (u AbsoluteURL) IsFile() bool { + return u.scheme.IsFile() +} + +// Converts the URL to a filepath, if it's a file URL. +func (u AbsoluteURL) ToFilepath() string { + if !u.IsFile() { + return "" + } + return filepath.FromSlash(u.url.Path) +} + +// Creates a [AbsoluteURL] from its encoded string representation. +func AbsoluteURLFromString(url string) (AbsoluteURL, error) { + u, err := gurl.Parse(url) + if err != nil { + return AbsoluteURL{}, err + } + return AbsoluteURLFromGo(u) +} + +// Create a [AbsoluteURL] from a Go net/url URL. +func AbsoluteURLFromGo(url *gurl.URL) (AbsoluteURL, error) { + if !url.IsAbs() { + return AbsoluteURL{}, errors.New("URL is not absolute") + } + scheme := SchemeFromString(url.Scheme) + if scheme == "" { + if url.Scheme == "" { + return AbsoluteURL{}, errors.New("URL has no scheme") + } else { + return AbsoluteURL{}, errors.New("URL has an unsupported scheme") + } + } + + return AbsoluteURL{url: url, scheme: scheme}, nil +} + +/* +According to the EPUB specification, the HREFs in the EPUB package must be valid URLs (so +percent-encoded). Unfortunately, many EPUBs don't follow this rule, and use invalid HREFs such +as `my chapter.html` or `/dir/my chapter.html`. + +As a workaround, we assume the HREFs are valid percent-encoded URLs, and fallback to decoded paths +if we can't parse the URL. +*/ +func FromEPUBHref(href string) (URL, error) { + u, err := URLFromString(href) + if err != nil { + return URLFromDecodedPath(href) + } + return u, nil +} + +func FromFilepath(path string) (URL, error) { + return AbsoluteURLFromGo(&gurl.URL{ + Path: filepath.ToSlash(path), + Scheme: SchemeFile.String(), + }) +} + +func FromLocalFile(file *os.File) (URL, error) { + apath, err := filepath.Abs(file.Name()) + if err != nil { + return nil, err + } + return FromFilepath(apath) +} diff --git a/pkg/util/url/url_test.go b/pkg/util/url/url_test.go new file mode 100644 index 00000000..0e0c2b49 --- /dev/null +++ b/pkg/util/url/url_test.go @@ -0,0 +1,427 @@ +package url + +import ( + gurl "net/url" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCreateFromInvalidURL(t *testing.T) { + urlTests := []string{ + "f:///////f", + ":C", + } + for _, urlTest := range urlTests { + _, err := URLFromString(urlTest) + assert.Error(t, err, "Expected error parsing URL '%s'", urlTest) + } +} + +func TestCreateFromRelativePath(t *testing.T) { + for _, urlTest := range []string{ + "/foo/bar", + "foo/bar", + "../bar", + } { + a, err := RelativeURLFromString(urlTest) + if assert.NoError(t, err) { + b, err := URLFromString(urlTest) + if assert.NoError(t, err) { + assert.Equal(t, a, b) + } + } + } + + // Special characters valid in a path. + u, err := RelativeURLFromString("$&+,/=@") + if assert.NoError(t, err) { + assert.Equal(t, "$&+,/=@", u.Path()) + } + + // Used in the EPUB parser + uu, err := URLFromString("#") + if assert.NoError(t, err) { + assert.Empty(t, uu.Path()) + assert.Empty(t, uu.Fragment()) + } +} + +func TestCreateFromFragmentOnly(t *testing.T) { + u, err := URLFromString("#fragment") + if assert.NoError(t, err) { + guu, err := gurl.Parse("#fragment") + if assert.NoError(t, err) { + uu, err := RelativeURLFromGo(guu) + if assert.NoError(t, err) { + assert.Equal(t, uu, u) + } + } + } +} + +func TestCreateFromQueryOnly(t *testing.T) { + u, err := URLFromString("?query=param") + if assert.NoError(t, err) { + guu, err := gurl.Parse("?query=param") + if assert.NoError(t, err) { + uu, err := RelativeURLFromGo(guu) + if assert.NoError(t, err) { + assert.Equal(t, uu, u) + } + } + } +} + +func TestCreateFromAbsoluteURL(t *testing.T) { + u, err := URLFromString("http://example.com/foo") + if assert.NoError(t, err) { + guu, err := gurl.Parse("http://example.com/foo") + if assert.NoError(t, err) { + uu, err := AbsoluteURLFromGo(guu) + if assert.NoError(t, err) { + assert.Equal(t, uu, u) + } + } + } + + u, err = URLFromString("file:///foo/bar") + if assert.NoError(t, err) { + guu, err := gurl.Parse("file:///foo/bar") + if assert.NoError(t, err) { + uu, err := AbsoluteURLFromGo(guu) + if assert.NoError(t, err) { + assert.Equal(t, uu, u) + } + } + } +} + +func TestString(t *testing.T) { + for _, urlTest := range []string{ + "foo/bar?query#fragment", + "http://example.com/foo/bar?query#fragment", + "file:///foo/bar?query#fragment", + } { + u, err := URLFromString(urlTest) + if assert.NoError(t, err) { + assert.Equal(t, urlTest, u.String()) + } + } +} + +func TestPath(t *testing.T) { + for k, v := range map[string]string{ + "foo/bar?query#fragment": "foo/bar", + "http://example.com/foo/bar/": "/foo/bar/", + "http://example.com/foo/bar?query#fragment": "/foo/bar", + "file:///foo/bar/": "/foo/bar/", + "file:///foo/bar?query#fragment": "/foo/bar", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Path()) + } + } +} + +func TestPathFromEmptyRelativeURL(t *testing.T) { + u, err := RelativeURLFromString("#fragment") + if assert.NoError(t, err) { + assert.Empty(t, u.Path()) + } +} + +func TestPathIsPercentDecoded(t *testing.T) { + for k, v := range map[string]string{ + "foo/%25bar%20quz": "foo/%bar quz", + "http://example.com/foo/%25bar%20quz": "/foo/%bar quz", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Path()) + } + } +} + +func TestFilename(t *testing.T) { + for k, v := range map[string]string{ + "foo/bar?query#fragment": "bar", + "foo/bar/?query#fragment": "", + "http://example.com/foo/bar?query#fragment": "bar", + "http://example.com/foo/bar/": "", + "file:///foo/bar?query#fragment": "bar", + "file:///foo/bar/": "", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Filename()) + } + } +} + +func TestFilenameIsPercentDecoded(t *testing.T) { + for k, v := range map[string]string{ + "foo/%25bar%20quz": "%bar quz", + "http://example.com/foo/%25bar%20quz": "%bar quz", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Filename()) + } + } +} + +func TestExtension(t *testing.T) { + for k, v := range map[string]string{ + "foo/bar.txt?query#fragment": "txt", + "foo/bar?query#fragment": "", + "foo/bar/?query#fragment": "", + "http://example.com/foo/bar.txt?query#fragment": "txt", + "http://example.com/foo/bar?query#fragment": "", + "http://example.com/foo/bar/": "", + "file:///foo/bar.txt?query#fragment": "txt", + "file:///foo/bar?query#fragment": "", + "file:///foo/bar/": "", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Extension()) + } + } +} + +func TestExtensionIsPercentDecoded(t *testing.T) { + for k, v := range map[string]string{ + "foo.%25bar": "%bar", + "http://example.com/foo.%25bar": "%bar", + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.Extension()) + } + } +} + +func TestScheme(t *testing.T) { + for k, v := range map[string]Scheme{ + "file:///foo/bar": SchemeFromString("file"), + "FILE:///foo/bar": SchemeFromString("file"), + "http://example.com/foo": SchemeFromString("http"), + "https://example.com/foo": SchemeFromString("https"), + } { + u, err := URLFromString(k) + if assert.NoError(t, err) { + assert.Equal(t, v, u.(AbsoluteURL).Scheme()) + } + } + + u, _ := URLFromString("file:///foo/bar") + assert.True(t, u.(AbsoluteURL).Scheme().IsFile()) + assert.False(t, u.(AbsoluteURL).Scheme().IsHTTP()) + + u, _ = URLFromString("http://example.com/foo") + assert.True(t, u.(AbsoluteURL).Scheme().IsHTTP()) + assert.False(t, u.(AbsoluteURL).Scheme().IsFile()) + + u, _ = URLFromString("https://example.com/foo") + assert.True(t, u.(AbsoluteURL).Scheme().IsHTTP()) +} + +func TestResolveHttpURL(t *testing.T) { + base, _ := URLFromString("http://example.com/foo/bar") + for k, v := range map[string]string{ + "quz/baz": "http://example.com/foo/quz/baz", + "../quz/baz": "http://example.com/quz/baz", + "/quz/baz": "http://example.com/quz/baz", + "#fragment": "http://example.com/foo/bar#fragment", + "file:///foo/bar": "file:///foo/bar", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } + + // With trailing slash + base, _ = URLFromString("http://example.com/foo/bar/") + for k, v := range map[string]string{ + "quz/baz": "http://example.com/foo/bar/quz/baz", + "../quz/baz": "http://example.com/foo/quz/baz", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } +} + +func TestResolveFileURL(t *testing.T) { + base, _ := URLFromString("file:///root/foo/bar") + for k, v := range map[string]string{ + "quz": "file:///root/foo/quz", + "quz/baz": "file:///root/foo/quz/baz", + "../quz": "file:///root/quz", + "/quz/baz": "file:///quz/baz", + "http://example.com/foo/bar": "http://example.com/foo/bar", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } + + // With trailing slash + base, _ = URLFromString("file:///root/foo/bar/") + for k, v := range map[string]string{ + "quz/baz": "file:///root/foo/bar/quz/baz", + "../quz": "file:///root/foo/quz", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } +} + +func TestResolveTwoRelativeURLs(t *testing.T) { + base, _ := URLFromString("foo/bar") + for k, v := range map[string]string{ + "quz/baz": "foo/quz/baz", + "../quz/baz": "quz/baz", + "/quz/baz": "/quz/baz", + "#fragment": "foo/bar#fragment", + "http://example.com/foo/bar": "http://example.com/foo/bar", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } + + // With trailing slash + base, _ = URLFromString("foo/bar/") + for k, v := range map[string]string{ + "quz/baz": "foo/bar/quz/baz", + "../quz/baz": "foo/quz/baz", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } + + // With starting slash + base, _ = URLFromString("/foo/bar") + for k, v := range map[string]string{ + "quz/baz": "/foo/quz/baz", + "/quz/baz": "/quz/baz", + } { + u, _ := URLFromString(v) + ur, _ := URLFromString(k) + assert.Equal(t, u, base.Resolve(ur)) + } +} + +func TestRelativizeHttpURL(t *testing.T) { + base, _ := URLFromString("http://example.com/foo") + for k, v := range map[string]string{ + "http://example.com/foo/quz/baz": "quz/baz", + "http://example.com/foo#fragment": "#fragment", + "http://example.com/foo/#fragment": "#fragment", + "file:///foo/bar": "file:///foo/bar", + } { + u, _ := URLFromString(k) + ur, _ := URLFromString(v) + assert.Equal(t, ur, base.Relativize(u)) + } + + // With trailing slash + base, _ = URLFromString("http://example.com/foo/") + u, _ := URLFromString("http://example.com/foo/quz/baz") + ur, _ := URLFromString("quz/baz") + assert.Equal(t, ur, base.Relativize(u)) +} + +func TestRelativizeFileURL(t *testing.T) { + base, _ := URLFromString("file:///root/foo") + for k, v := range map[string]string{ + "file:///root/foo/quz/baz": "quz/baz", + "http://example.com/foo/bar": "http://example.com/foo/bar", + } { + u, _ := URLFromString(k) + ur, _ := URLFromString(v) + assert.Equal(t, ur, base.Relativize(u)) + } + + // With trailing slash + base, _ = URLFromString("file:///root/foo/") + u, _ := URLFromString("file:///root/foo/quz/baz") + ur, _ := URLFromString("quz/baz") + assert.Equal(t, ur, base.Relativize(u)) +} + +func TestRelativizeTwoRelativeURLs(t *testing.T) { + base, _ := URLFromString("foo") + for k, v := range map[string]string{ + "foo/quz/baz": "quz/baz", + "quz/baz": "quz/baz", + "/quz/baz": "/quz/baz", + "foo#fragment": "#fragment", + "foo/#fragment": "#fragment", + "http://example.com/foo/bar": "http://example.com/foo/bar", + } { + u, _ := URLFromString(k) + ur, _ := URLFromString(v) + assert.Equal(t, ur, base.Relativize(u)) + } + + // With trailing slash + base, _ = URLFromString("foo/") + u, _ := URLFromString("foo/quz/baz") + ur, _ := URLFromString("quz/baz") + assert.Equal(t, ur, base.Relativize(u)) + + // With starting slash + base, _ = URLFromString("/foo") + u, _ = URLFromString("/foo/quz/baz") + ur, _ = URLFromString("quz/baz") + assert.Equal(t, ur, base.Relativize(u)) +} + +func TestFromFile(t *testing.T) { + u, _ := AbsoluteURLFromString("file:///tmp/test.txt") + f, _ := FromFilepath("/tmp/test.txt") + assert.Equal(t, u, f) +} + +func TestToFile(t *testing.T) { + u, _ := AbsoluteURLFromString("file:///tmp/test.txt") + assert.Equal(t, "/tmp/test.txt", u.ToFilepath()) +} + +func TestNormalize(t *testing.T) { + // Scheme is lower case. + u, _ := URLFromString("HTTP://example.com/foo") + assert.Equal(t, "http://example.com/foo", u.Normalize().String()) + + // Host becomes punycode equivalent. + u, _ = URLFromString("http://도메인.com/foo") + assert.Equal(t, "http://xn--hq1bm8jm9l.com/foo", u.Normalize().String()) + + // Percent encoding of path is normalized. + u, _ = URLFromString("HTTP://example.com/c'est%20valide") + assert.Equal(t, "http://example.com/c'est%20valide", u.Normalize().String()) + u, _ = URLFromString("c'est%20valide") + assert.Equal(t, "c'est%20valide", u.Normalize().String()) + + // Relative paths are resolved. + u, _ = URLFromString("http://example.com/foo/./bar//../baz") + assert.Equal(t, "http://example.com/foo/baz", u.Normalize().String()) + u, _ = URLFromString("foo/./bar//../baz") + assert.Equal(t, "foo/baz", u.Normalize().String()) + u, _ = URLFromString("foo/./bar/../../../baz") + assert.Equal(t, "../baz", u.Normalize().String()) + + // Trailing slash is kept. + u, _ = URLFromString("http://example.com/foo/") + assert.Equal(t, "http://example.com/foo/", u.Normalize().String()) + + // The other components are left as-is. + u, _ = URLFromString("http://user:password@example.com:443/foo?b=b&a=a#fragment") + assert.Equal(t, "http://user:password@example.com:443/foo?b=b&a=a#fragment", u.Normalize().String()) +}