Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[shaping] Add an input segmenter #110

Merged
merged 7 commits into from
Dec 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions fontscan/fontmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,26 @@ func (fm *FontMap) FontMetadata(ft font.Font) (family string, aspect meta.Aspect
return item.Family, item.Aspect
}

// FindSystemFont looks for a system font with the given [family],
// returning the first match, or false is no one is found.
//
// User added fonts are ignored, and the [FontMap] must have been
// initialized with [UseSystemFonts] or this method will always return false.
//
// Family names are compared through [meta.Normalize].
func (fm *FontMap) FindSystemFont(family string) (Location, bool) {
family = meta.NormalizeFamily(family)
for _, footprint := range fm.database {
if footprint.isUserProvided {
continue
}
if footprint.Family == family {
return footprint.Location, true
}
}
return Location{}, false
}

// SetQuery set the families and aspect required, influencing subsequent
// `ResolveFace` calls.
func (fm *FontMap) SetQuery(query Query) {
Expand Down
30 changes: 30 additions & 0 deletions fontscan/fontmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,33 @@ func TestQueryHelveticaLinux(t *testing.T) {
family, _ := fm.FontMetadata(fm.ResolveFace('x').Font)
tu.Assert(t, family == meta.NormalizeFamily("Nimbus Sans")) // prefered Helvetica replacement
}

func TestFindSytemFont(t *testing.T) {
fm := NewFontMap(log.New(io.Discard, "", 0))
_, ok := fm.FindSystemFont("Nimbus")
tu.Assert(t, !ok) // no match on an empty fontmap

// simulate system fonts
fm.appendFootprints(footprint{
Family: meta.NormalizeFamily("Nimbus"),
Location: Location{File: "nimbus.ttf"},
},
footprint{
Family: meta.NormalizeFamily("Noto Sans"),
Location: Location{File: "noto.ttf"},
isUserProvided: true,
},
)

nimbus, ok := fm.FindSystemFont("Nimbus")
tu.Assert(t, ok && nimbus.File == "nimbus.ttf")

_, ok = fm.FindSystemFont("nimbus ")
tu.Assert(t, ok)

_, ok = fm.FindSystemFont("Arial")
tu.Assert(t, !ok)

_, ok = fm.FindSystemFont("Noto Sans")
tu.Assert(t, !ok) // user provided font are ignored
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ go 1.17
require (
github.com/go-text/typesetting-utils v0.0.0-20230616150549-2a7df14b6a22
golang.org/x/image v0.3.0
golang.org/x/text v0.9.0
)
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
Expand Down
223 changes: 218 additions & 5 deletions shaping/input.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/go-text/typesetting/language"
"github.com/go-text/typesetting/opentype/loader"
"golang.org/x/image/math/fixed"
"golang.org/x/text/unicode/bidi"
)

type Input struct {
Expand Down Expand Up @@ -103,18 +104,230 @@ func SplitByFontGlyphs(input Input, availableFaces []font.Face) []Input {
// the return value of the `Fontmap.ResolveFace` call.
// The 'Face' field of 'input' is ignored: only 'availableFaces' is used to select the face.
func SplitByFace(input Input, availableFaces Fontmap) []Input {
var splitInputs []Input
return splitByFace(input, availableFaces, nil)
}

// Segmenter holds a state used to split input
// according to three caracteristics : text direction (bidi),
// script, and face.
type Segmenter struct {
// pools of inputs, used to reduce allocations,
// which are alternatively considered as input/output of the segmentation
buffer1, buffer2 []Input

// used to handle Common script
delimStack []delimEntry

// buffer used for bidi segmentation
bidiParagraph bidi.Paragraph
}

type delimEntry struct {
index int // in the [pairedDelims] list
script language.Script // resolved from the context
}

// Split segments the given pre-configured input according to:
// - text direction
// - script
// - face, as defined by [faces]
//
// Only the input runes in the range [text.RunStart] to [text.RunEnd] will be split.
//
// As a consequence, it sets the following fields of the returned runs:
// - Text, RunStart, RunEnd
// - Direction
// - Script
// - Face
//
// [text.Direction] is used during bidi ordering, and should refer to the general
// context [text] is used in (typically the user system preference for GUI apps.)
//
// The returned sliced is owned by the [Segmenter] and is only valid until
// the next call to [Split].
func (seg *Segmenter) Split(text Input, faces Fontmap) []Input {
seg.reset()
seg.splitByBidi(text)
seg.splitByScript()
seg.splitByFace(faces)
return seg.buffer1
}

func (seg *Segmenter) reset() {
// zero the slices to avoid 'memory leak' on pointer slice fields
for i := range seg.buffer1 {
seg.buffer1[i].Text = nil
seg.buffer1[i].FontFeatures = nil
}
for i := range seg.buffer2 {
seg.buffer2[i].Text = nil
seg.buffer2[i].FontFeatures = nil
}
seg.buffer1 = seg.buffer1[:0]
seg.buffer2 = seg.buffer2[:0]

// bidiParagraph is reset when using SetString

seg.delimStack = seg.delimStack[:0]
}

// fills buffer1
func (seg *Segmenter) splitByBidi(text Input) {
if text.Direction.Axis() != di.Horizontal || text.RunStart >= text.RunEnd {
seg.buffer1 = append(seg.buffer1, text)
return
}
def := bidi.LeftToRight
if text.Direction.Progression() == di.TowardTopLeft {
def = bidi.RightToLeft
}
seg.bidiParagraph.SetString(string(text.Text[text.RunStart:text.RunEnd]), bidi.DefaultDirection(def))
out, err := seg.bidiParagraph.Order()
if err != nil {
seg.buffer1 = append(seg.buffer1, text)
return
}

input := text // start a rune 0 of the run
for i := 0; i < out.NumRuns(); i++ {
currentInput := input
run := out.Run(i)
dir := run.Direction()
_, endRune := run.Pos()
endRune += text.RunStart // shift by the input run position
currentInput.RunEnd = endRune + 1

// override the direction
if dir == bidi.RightToLeft {
currentInput.Direction = di.DirectionRTL
} else {
currentInput.Direction = di.DirectionLTR
}

seg.buffer1 = append(seg.buffer1, currentInput)
input.RunStart = currentInput.RunEnd
}
}

// lookupDelimIndex binary searches in the list of the paired delimiters,
// and returns -1 if `ch` is not found
func lookupDelimIndex(ch rune) int {
lower := 0
upper := len(pairedDelims) - 1

for lower <= upper {
mid := (lower + upper) / 2

if ch < pairedDelims[mid] {
upper = mid - 1
} else if ch > pairedDelims[mid] {
lower = mid + 1
} else {
return mid
}
}

return -1
}

// uses buffer1 as input and fills buffer2
//
// See https://unicode.org/reports/tr24/#Common for reference
func (seg *Segmenter) splitByScript() {
for _, input := range seg.buffer1 {
currentInput := input
currentInput.Script = language.Common

for i := input.RunStart; i < input.RunEnd; i++ {
r := input.Text[i]
rScript := language.LookupScript(r)

// to properly handle Common script,
// we register paired delimiters

delimIndex := -1
if rScript == language.Common {
delimIndex = lookupDelimIndex(r)
}

if delimIndex >= 0 { // handle paired characters
if delimIndex%2 == 0 {
// this is an open character : push it onto the stack
seg.delimStack = append(seg.delimStack, delimEntry{delimIndex, currentInput.Script})
} else {
// this is a close character : try to look backward in the stack
// for its counterpart
counterPartIndex := delimIndex - 1
j := len(seg.delimStack) - 1
for ; j >= 0; j-- {
if seg.delimStack[j].index == counterPartIndex { // found a match, use its script
rScript = seg.delimStack[j].script
break
}
}
// in any case, pop the open characters
if j == -1 {
j = 0
}
seg.delimStack = seg.delimStack[:j]
}
}

// check if we have a 'real' change of script, or not
if rScript == language.Common || rScript == currentInput.Script {
// no change
continue
} else if currentInput.Script == language.Common {
// update the pair stack to attribute the resolved script
for i := range seg.delimStack {
seg.delimStack[i].script = rScript
}
// set the resolved script to the current run,
// but do NOT create a new run
currentInput.Script = rScript
} else {
// split to a new run
if i != input.RunStart { // push the existing one
currentInput.RunEnd = i
seg.buffer2 = append(seg.buffer2, currentInput)
}

currentInput.RunStart = i
currentInput.Script = rScript
}
}
// close and add the last input
currentInput.RunEnd = input.RunEnd
seg.buffer2 = append(seg.buffer2, currentInput)
}
}

// uses buffer2 as input, resets and fills buffer1
func (seg *Segmenter) splitByFace(faces Fontmap) {
seg.buffer1 = seg.buffer1[:0]
for _, input := range seg.buffer2 {
seg.buffer1 = splitByFace(input, faces, seg.buffer1)
}
}

func splitByFace(input Input, availableFaces Fontmap, buffer []Input) []Input {
currentInput := input
for i := input.RunStart; i < input.RunEnd; i++ {
r := input.Text[i]
if currentInput.Face != nil && ignoreFaceChange(r) {
if ignoreFaceChange(r) {
// add the rune to the current input
continue
}

// select the first font supporting r
selectedFace := availableFaces.ResolveFace(r)

// now that we have a font, apply it back,
// but do NOT create a new run
if currentInput.Face == nil {
currentInput.Face = selectedFace
}

if currentInput.Face == selectedFace {
// add the rune to the current input
continue
Expand All @@ -126,7 +339,7 @@ func SplitByFace(input Input, availableFaces Fontmap) []Input {
// close the current input ...
currentInput.RunEnd = i
// ... add it to the output ...
splitInputs = append(splitInputs, currentInput)
buffer = append(buffer, currentInput)
}

// ... and create a new one
Expand All @@ -137,8 +350,8 @@ func SplitByFace(input Input, availableFaces Fontmap) []Input {

// close and add the last input
currentInput.RunEnd = input.RunEnd
splitInputs = append(splitInputs, currentInput)
return splitInputs
buffer = append(buffer, currentInput)
return buffer
}

// ignoreFaceChange returns `true` is the given rune should not trigger
Expand Down
Loading
Loading