ref: a82d2700fcc772aada15d65b8f76913ca23f7404
parent: ae816452b171b6b6aabca6a7423ed28a653baaa2
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
date: Sat Jan 4 06:28:19 EST 2020
markup/goldmark: Make auto IDs GitHub compatible You can turn off this behaviour: ```toml [markup] [markup.goldmark] [markup.goldmark.parser] autoHeadingIDAsciiOnly = true ``` Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler. Fixes #6616
--- /dev/null
+++ b/common/text/transform.go
@@ -1,0 +1,47 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package text
+
+import (
+ "sync"
+ "unicode"
+
+ "golang.org/x/text/runes"
+ "golang.org/x/text/transform"
+ "golang.org/x/text/unicode/norm"
+)
+
+var accentTransformerPool = &sync.Pool{
+ New: func() interface{} {
+ return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
+ },
+}
+
+// RemoveAccents removes all accents from b.
+func RemoveAccents(b []byte) []byte {
+ t := accentTransformerPool.Get().(transform.Transformer)
+ b, _, _ = transform.Bytes(t, b)
+ t.Reset()
+ accentTransformerPool.Put(t)
+ return b
+}
+
+// RemoveAccentsString removes all accents from s.
+func RemoveAccentsString(s string) string {
+ t := accentTransformerPool.Get().(transform.Transformer)
+ s, _, _ = transform.String(t, s)
+ t.Reset()
+ accentTransformerPool.Put(t)
+ return s
+}
--- /dev/null
+++ b/common/text/transform_test.go
@@ -1,0 +1,29 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package text
+
+import (
+ "testing"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestRemoveAccents(t *testing.T) {
+ c := qt.New(t)
+
+ c.Assert(string(RemoveAccents([]byte("Resumé"))), qt.Equals, "Resume")
+ c.Assert(string(RemoveAccents([]byte("Hugo Rocks!"))), qt.Equals, "Hugo Rocks!")
+ c.Assert(string(RemoveAccentsString("Resumé")), qt.Equals, "Resume")
+
+}
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -48,8 +48,9 @@
// ContentSpec provides functionality to render markdown content.
type ContentSpec struct {
- Converters markup.ConverterProvider
- MardownConverter converter.Converter // Markdown converter with no document context
+ Converters markup.ConverterProvider
+ MardownConverter converter.Converter // Markdown converter with no document context
+ anchorNameSanitizer converter.AnchorNameSanitizer
// SummaryLength is the length of the summary that Hugo extracts from a content.
summaryLength int
@@ -91,6 +92,17 @@
return nil, err
}
spec.MardownConverter = conv
+ if as, ok := conv.(converter.AnchorNameSanitizer); ok {
+ spec.anchorNameSanitizer = as
+ } else {
+ // Use Goldmark's sanitizer
+ p := converterProvider.Get("goldmark")
+ conv, err := p.New(converter.DocumentContext{})
+ if err != nil {
+ return nil, err
+ }
+ spec.anchorNameSanitizer = conv.(converter.AnchorNameSanitizer)
+ }
return spec, nil
}
@@ -190,6 +202,10 @@
return nil, err
}
return b.Bytes(), nil
+}
+
+func (c *ContentSpec) SanitizeAnchorName(s string) string {
+ return c.anchorNameSanitizer.SanitizeAnchorName(s)
}
func (c *ContentSpec) ResolveMarkup(in string) string {
--- a/helpers/path.go
+++ b/helpers/path.go
@@ -24,6 +24,8 @@
"strings"
"unicode"
+ "github.com/gohugoio/hugo/common/text"
+
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/hugofs"
@@ -31,9 +33,6 @@
"github.com/gohugoio/hugo/common/hugio"
_errors "github.com/pkg/errors"
"github.com/spf13/afero"
- "golang.org/x/text/runes"
- "golang.org/x/text/transform"
- "golang.org/x/text/unicode/norm"
)
var (
@@ -134,6 +133,10 @@
// are also removed.
// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string {
+ if p.RemovePathAccents {
+ s = text.RemoveAccentsString(s)
+ }
+
source := []rune(s)
target := make([]rune, 0, len(source))
var prependHyphen bool
@@ -154,17 +157,7 @@
}
}
- var result string
-
- if p.RemovePathAccents {
- // remove accents - see https://blog.golang.org/normalization
- t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
- result, _, _ = transform.String(t, string(target))
- } else {
- result = string(target)
- }
-
- return result
+ return string(target)
}
// ReplaceExtension takes a path and an extension, strips the old extension
--- a/markup/blackfriday/convert.go
+++ b/markup/blackfriday/convert.go
@@ -60,6 +60,10 @@
cfg converter.ProviderConfig
}
+func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
+ return blackfriday.SanitizedAnchorName(s)
+}
+
func (c *blackfridayConverter) AnchorSuffix() string {
if c.bf.PlainIDAnchors {
return ""
@@ -204,5 +208,6 @@
}
var (
- _ converter.DocumentInfo = (*blackfridayConverter)(nil)
+ _ converter.DocumentInfo = (*blackfridayConverter)(nil)
+ _ converter.AnchorNameSanitizer = (*blackfridayConverter)(nil)
)
--- a/markup/converter/converter.go
+++ b/markup/converter/converter.go
@@ -87,6 +87,11 @@
TableOfContents() tableofcontents.Root
}
+// AnchorNameSanitizer tells how a converter sanitizes anchor names.
+type AnchorNameSanitizer interface {
+ SanitizeAnchorName(s string) string
+}
+
// Bytes holds a byte slice and implements the Result interface.
type Bytes []byte
--- /dev/null
+++ b/markup/goldmark/autoid.go
@@ -1,0 +1,125 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package goldmark
+
+import (
+ "bytes"
+ "strconv"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/gohugoio/hugo/common/text"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/util"
+
+ bp "github.com/gohugoio/hugo/bufferpool"
+)
+
+func sanitizeAnchorNameString(s string, asciiOnly bool) string {
+ return string(sanitizeAnchorName([]byte(s), asciiOnly))
+}
+
+func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
+ return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
+}
+
+func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
+ buf := bp.GetBuffer()
+
+ if asciiOnly {
+ // Normalize it to preserve accents if possible.
+ b = text.RemoveAccents(b)
+ }
+
+ for len(b) > 0 {
+ r, size := utf8.DecodeRune(b)
+ switch {
+ case asciiOnly && size != 1:
+ case isSpace(r):
+ buf.WriteString("-")
+ case r == '-' || isAlphaNumeric(r):
+ buf.WriteRune(unicode.ToLower(r))
+ default:
+ }
+
+ b = b[size:]
+ }
+
+ if hook != nil {
+ hook(buf)
+ }
+
+ result := make([]byte, buf.Len())
+ copy(result, buf.Bytes())
+
+ bp.PutBuffer(buf)
+
+ return result
+}
+
+func isAlphaNumeric(r rune) bool {
+ return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+}
+
+func isSpace(r rune) bool {
+ return r == ' ' || r == '\t'
+}
+
+var _ parser.IDs = (*idFactory)(nil)
+
+type idFactory struct {
+ asciiOnly bool
+ vals map[string]struct{}
+}
+
+func newIDFactory(asciiOnly bool) *idFactory {
+ return &idFactory{
+ vals: make(map[string]struct{}),
+ asciiOnly: asciiOnly,
+ }
+}
+
+func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
+ return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
+ if buf.Len() == 0 {
+ if kind == ast.KindHeading {
+ buf.WriteString("heading")
+ } else {
+ buf.WriteString("id")
+ }
+ }
+
+ if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found {
+ // Append a hypen and a number, starting with 1.
+ buf.WriteRune('-')
+ pos := buf.Len()
+ for i := 1; ; i++ {
+ buf.WriteString(strconv.Itoa(i))
+ if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found {
+ break
+ }
+ buf.Truncate(pos)
+ }
+ }
+
+ ids.vals[buf.String()] = struct{}{}
+
+ })
+}
+
+func (ids *idFactory) Put(value []byte) {
+ ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
+}
--- /dev/null
+++ b/markup/goldmark/autoid_test.go
@@ -1,0 +1,121 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package goldmark
+
+import (
+ "strings"
+ "testing"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestSanitizeAnchorName(t *testing.T) {
+ c := qt.New(t)
+
+ // Tests generated manually on github.com
+ tests := `
+God is good: 神真美好
+Number 32
+Question?
+1+2=3
+Special !"#$%&(parens)=?´* chars
+Resumé
+One-Hyphen
+Multiple--Hyphens
+Trailing hyphen-
+Many spaces here
+Forward/slash
+Backward\slash
+Under_score
+`
+
+ expect := `
+god-is-good-神真美好
+number-32
+question
+123
+special-parens-chars
+resumé
+one-hyphen
+multiple--hyphens
+trailing-hyphen-
+many---spaces--here
+forwardslash
+backwardslash
+under_score
+`
+
+ tests, expect = strings.TrimSpace(tests), strings.TrimSpace(expect)
+
+ testlines, expectlines := strings.Split(tests, "\n"), strings.Split(expect, "\n")
+
+ if len(testlines) != len(expectlines) {
+ panic("test setup failed")
+ }
+
+ for i, input := range testlines {
+ input := input
+ expect := expectlines[i]
+ c.Run(input, func(c *qt.C) {
+ b := []byte(input)
+ got := string(sanitizeAnchorName(b, false))
+ c.Assert(got, qt.Equals, expect)
+ c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
+ c.Assert(string(b), qt.Equals, input)
+ })
+ }
+}
+
+func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
+ c := qt.New(t)
+
+ c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
+ c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
+
+}
+
+func BenchmarkSanitizeAnchorName(b *testing.B) {
+ input := []byte("God is good: 神真美好")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorName(input, false)
+ if len(result) != 24 {
+ b.Fatalf("got %d", len(result))
+
+ }
+ }
+}
+
+func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
+ input := []byte("God is good: 神真美好")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorName(input, true)
+ if len(result) != 12 {
+ b.Fatalf("got %d", len(result))
+
+ }
+ }
+}
+
+func BenchmarkSanitizeAnchorNameString(b *testing.B) {
+ input := "God is good: 神真美好"
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorNameString(input, false)
+ if len(result) != 24 {
+ b.Fatalf("got %d", len(result))
+ }
+ }
+}
--- a/markup/goldmark/convert.go
+++ b/markup/goldmark/convert.go
@@ -50,21 +50,35 @@
func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
md := newMarkdown(cfg)
+
return converter.NewProvider("goldmark", func(ctx converter.DocumentContext) (converter.Converter, error) {
return &goldmarkConverter{
ctx: ctx,
cfg: cfg,
md: md,
+ sanitizeAnchorName: func(s string) string {
+ return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)
+ },
}, nil
}), nil
}
+var (
+ _ converter.AnchorNameSanitizer = (*goldmarkConverter)(nil)
+)
+
type goldmarkConverter struct {
md goldmark.Markdown
ctx converter.DocumentContext
cfg converter.ProviderConfig
+
+ sanitizeAnchorName func(s string) string
}
+func (c *goldmarkConverter) SanitizeAnchorName(s string) string {
+ return c.sanitizeAnchorName(s)
+}
+
func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
mcfg := pcfg.MarkupConfig
cfg := pcfg.MarkupConfig.Goldmark
@@ -226,7 +240,7 @@
buf := &bufWriter{Buffer: &bytes.Buffer{}}
result = buf
- pctx := newParserContext(ctx)
+ pctx := c.newParserContext(ctx)
reader := text.NewReader(ctx.Src)
doc := c.md.Parser().Parse(
@@ -265,8 +279,8 @@
return featureSet[feature.GetIdentity()]
}
-func newParserContext(rctx converter.RenderContext) *parserContext {
- ctx := parser.NewContext()
+func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
+ ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)))
ctx.Set(tocEnableKey, rctx.RenderTOC)
return &parserContext{
Context: ctx,
--- a/markup/goldmark/convert_test.go
+++ b/markup/goldmark/convert_test.go
@@ -28,6 +28,23 @@
qt "github.com/frankban/quicktest"
)
+func convert(c *qt.C, mconf markup_config.Config, content string) converter.Result {
+
+ p, err := Provider.New(
+ converter.ProviderConfig{
+ MarkupConfig: mconf,
+ Logger: loggers.NewErrorLogger(),
+ },
+ )
+ c.Assert(err, qt.IsNil)
+ conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
+ c.Assert(err, qt.IsNil)
+ b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
+ c.Assert(err, qt.IsNil)
+
+ return b
+}
+
func TestConvert(t *testing.T) {
c := qt.New(t)
@@ -92,6 +109,12 @@
: the description for the content.
+## 神真美好
+
+## 神真美好
+
+## 神真美好
+
[^1]: And that's the footnote.
`
@@ -98,23 +121,11 @@
// Code fences
content = strings.Replace(content, "§§§", "```", -1)
-
mconf := markup_config.Default
mconf.Highlight.NoClasses = false
mconf.Goldmark.Renderer.Unsafe = true
- p, err := Provider.New(
- converter.ProviderConfig{
- MarkupConfig: mconf,
- Logger: loggers.NewErrorLogger(),
- },
- )
- c.Assert(err, qt.IsNil)
- conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
- c.Assert(err, qt.IsNil)
- b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
- c.Assert(err, qt.IsNil)
-
+ b := convert(c, mconf, content)
got := string(b.Bytes())
// Links
@@ -123,6 +134,9 @@
// Header IDs
c.Assert(got, qt.Contains, `<h2 id="custom">Custom ID</h2>`, qt.Commentf(got))
c.Assert(got, qt.Contains, `<h2 id="auto-id">Auto ID</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好">神真美好</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好-1">神真美好</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好-2">神真美好</h2>`, qt.Commentf(got))
// Code fences
c.Assert(got, qt.Contains, "<div class=\"highlight\"><pre class=\"chroma\"><code class=\"language-bash\" data-lang=\"bash\">LINE1\n</code></pre></div>")
@@ -146,6 +160,20 @@
tocHTML := toc.TableOfContents().ToHTML(1, 2, false)
c.Assert(tocHTML, qt.Contains, "TableOfContents")
+}
+
+func TestConvertAutoIDAsciiOnly(t *testing.T) {
+ c := qt.New(t)
+
+ content := `
+## God is Good: 神真美好
+`
+ mconf := markup_config.Default
+ mconf.Goldmark.Parser.AutoHeadingIDAsciiOnly = true
+ b := convert(c, mconf, content)
+ got := string(b.Bytes())
+
+ c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
}
func TestCodeFence(t *testing.T) {
--- a/markup/goldmark/goldmark_config/config.go
+++ b/markup/goldmark/goldmark_config/config.go
@@ -69,6 +69,10 @@
// auto generated heading ids.
AutoHeadingID bool
+ // When AutoHeadingID is enabled this will generate IDs with Ascii
+ // characters only.
+ AutoHeadingIDAsciiOnly bool
+
// Enables custom attributes.
Attribute bool
}
--- a/tpl/urls/urls.go
+++ b/tpl/urls/urls.go
@@ -25,7 +25,6 @@
"github.com/gohugoio/hugo/common/urls"
"github.com/gohugoio/hugo/deps"
_errors "github.com/pkg/errors"
- "github.com/russross/blackfriday"
"github.com/spf13/cast"
)
@@ -90,7 +89,7 @@
if err != nil {
return "", nil
}
- return blackfriday.SanitizedAnchorName(s), nil
+ return ns.deps.ContentSpec.SanitizeAnchorName(s), nil
}
// Ref returns the absolute URL path to a given content item.