ref: dd45e6d7e5406991d8df3a2f9ba4c7e5ae039c34
parent: 4abaec5c045e92ae5f8b3a2dc66606b080ef6ea5
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
date: Wed Aug 17 09:41:48 EDT 2016
Lazy calculate WordCount, ReadingTime and FuzzyWordCount This avoids having to execute these expensive operations for sites not using these values. This commit sums up a set of wordcounting and autosummary related performance improvements. The effect of these kind of depends on what features your site use, but a benchmark from 4 Hugo sites in the wild shows promise: ``` benchmark old ns/op new ns/op delta BenchmarkHugo-4 21293005843 20032857342 -5.92% benchmark old allocs new allocs delta BenchmarkHugo-4 65290922 65186032 -0.16% benchmark old bytes new bytes delta BenchmarkHugo-4 9771213416 9681866464 -0.91% ``` Closes #2378
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -138,19 +138,28 @@
// Walk through the string removing all tags
b := bp.GetBuffer()
defer bp.PutBuffer(b)
-
- inTag := false
+ var inTag, isSpace, wasSpace bool
for _, r := range s {- switch r {- case '<':
+ if !inTag {+ isSpace = false
+ }
+
+ switch {+ case r == '<':
inTag = true
- case '>':
+ case r == '>':
inTag = false
+ case unicode.IsSpace(r):
+ isSpace = true
+ fallthrough
default:
- if !inTag {+ if !inTag && (!isSpace || (isSpace && !wasSpace)) {b.WriteRune(r)
}
}
+
+ wasSpace = isSpace
+
}
return b.String()
}
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -34,11 +34,22 @@
}
data := []test{ {"<h1>strip h1 tag <h1>", "strip h1 tag "},- {"<p> strip p tag </p>", " strip p tag \n"},+ {"<p> strip p tag </p>", " strip p tag "}, {"</br> strip br<br>", " strip br\n"}, {"</br> strip br2<br />", " strip br2\n"}, {"This <strong>is</strong> a\nnewline", "This is a newline"}, {"No Tags", "No Tags"},+ {`<p>Summary Next Line. +<figure >
+
+ <img src="/not/real" />
+
+
+</figure>
+.
+More text here.</p>
+
+<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"},
}
for i, d := range data {output := StripHTML(d.input)
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -107,9 +107,10 @@
source.File
}
type PageMeta struct {- WordCount int
- FuzzyWordCount int
- ReadingTime int
+ wordCount int
+ fuzzyWordCount int
+ readingTime int
+ pageMetaInit sync.Once
Weight int
}
@@ -485,28 +486,48 @@
return int64(len(p.rawContent)), nil
}
+func (p *Page) WordCount() int {+ p.analyzePage()
+ return p.wordCount
+}
+
+func (p *Page) ReadingTime() int {+ p.analyzePage()
+ return p.readingTime
+}
+
+func (p *Page) FuzzyWordCount() int {+ p.analyzePage()
+ return p.fuzzyWordCount
+}
+
func (p *Page) analyzePage() {- if p.isCJKLanguage {- p.WordCount = 0
- for _, word := range p.PlainWords() {- runeCount := utf8.RuneCountInString(word)
- if len(word) == runeCount {- p.WordCount++
- } else {- p.WordCount += runeCount
+ p.pageMetaInit.Do(func() {+ if p.isCJKLanguage {+ p.wordCount = 0
+ for _, word := range p.PlainWords() {+ runeCount := utf8.RuneCountInString(word)
+ if len(word) == runeCount {+ p.wordCount++
+ } else {+ p.wordCount += runeCount
+ }
}
+ } else {+ p.wordCount = helpers.TotalWords(p.Plain())
}
- } else {- p.WordCount = len(p.PlainWords())
- }
- p.FuzzyWordCount = (p.WordCount + 100) / 100 * 100
+ // TODO(bep) is set in a test. Fix that.
+ if p.fuzzyWordCount == 0 {+ p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
+ }
- if p.isCJKLanguage {- p.ReadingTime = (p.WordCount + 500) / 501
- } else {- p.ReadingTime = (p.WordCount + 212) / 213
- }
+ if p.isCJKLanguage {+ p.readingTime = (p.wordCount + 500) / 501
+ } else {+ p.readingTime = (p.wordCount + 212) / 213
+ }
+ })
}
func (p *Page) permalink() (*url.URL, error) {--- a/hugolib/pageSort_test.go
+++ b/hugolib/pageSort_test.go
@@ -95,11 +95,11 @@
func TestPageSortReverse(t *testing.T) {p1 := createSortTestPages(10)
- assert.Equal(t, 0, p1[0].FuzzyWordCount)
- assert.Equal(t, 9, p1[9].FuzzyWordCount)
+ assert.Equal(t, 0, p1[0].fuzzyWordCount)
+ assert.Equal(t, 9, p1[9].fuzzyWordCount)
p2 := p1.Reverse()
- assert.Equal(t, 9, p2[0].FuzzyWordCount)
- assert.Equal(t, 0, p2[9].FuzzyWordCount)
+ assert.Equal(t, 9, p2[0].fuzzyWordCount)
+ assert.Equal(t, 0, p2[9].fuzzyWordCount)
// cached
assert.True(t, probablyEqualPages(p2, p1.Reverse()))
}
@@ -149,7 +149,7 @@
if i%2 == 0 {w = 10
}
- pages[i].FuzzyWordCount = i
+ pages[i].fuzzyWordCount = i
pages[i].Weight = w
pages[i].Description = "initial"
}
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -504,10 +504,13 @@
}
func normalizeContent(c string) string {- norm := strings.Replace(c, "\n", "", -1)
+ norm := c
+ norm = strings.Replace(norm, "\n", " ", -1)
norm = strings.Replace(norm, " ", " ", -1)
norm = strings.Replace(norm, " ", " ", -1)
norm = strings.Replace(norm, " ", " ", -1)
+ norm = strings.Replace(norm, "p> ", "p>", -1)
+ norm = strings.Replace(norm, "> <", "> <", -1)
return strings.TrimSpace(norm)
}
@@ -710,8 +713,8 @@
assertFunc := func(t *testing.T, ext string, p *Page) {checkPageTitle(t, p, "Simple")
- checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. <figure > <img src=\"/not/real\" /> </figure>.\nMore text here.</p><p>Some more text</p>"), ext)
- checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text", ext)
+ checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. \n<figure >\n \n <img src=\"/not/real\" />\n \n \n</figure>\n.\nMore text here.</p>\n\n<p>Some more text</p>\n"))
+ checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text")
checkPageType(t, p, "page")
checkPageLayout(t, p, "page/single.html", "_default/single.html", "theme/page/single.html", "theme/_default/single.html")
}
@@ -793,8 +796,8 @@
testCommonResetState()
assertFunc := func(t *testing.T, ext string, p *Page) {- if p.WordCount != 8 {- t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount)+ if p.WordCount() != 8 {+ t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount())}
}
@@ -806,11 +809,10 @@
viper.Set("HasCJKLanguage", true) assertFunc := func(t *testing.T, ext string, p *Page) {- if p.WordCount != 15 {- t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount)+ if p.WordCount() != 15 {+ t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount())}
}
-
testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithAllCJKRunes)
}
@@ -820,8 +822,8 @@
viper.Set("HasCJKLanguage", true) assertFunc := func(t *testing.T, ext string, p *Page) {- if p.WordCount != 74 {- t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)+ if p.WordCount() != 74 {+ t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())}
if p.Summary != simplePageWithMainEnglishWithCJKRunesSummary {@@ -828,7 +830,6 @@
t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,simplePageWithMainEnglishWithCJKRunesSummary, p.Summary)
}
-
}
testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithMainEnglishWithCJKRunes)
@@ -839,8 +840,8 @@
viper.Set("HasCJKLanguage", true) assertFunc := func(t *testing.T, ext string, p *Page) {- if p.WordCount != 75 {- t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)+ if p.WordCount() != 75 {+ t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())}
if p.Summary != simplePageWithIsCJKLanguageFalseSummary {@@ -847,7 +848,6 @@
t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,simplePageWithIsCJKLanguageFalseSummary, p.Summary)
}
-
}
testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithIsCJKLanguageFalse)
@@ -857,16 +857,16 @@
func TestWordCount(t *testing.T) { assertFunc := func(t *testing.T, ext string, p *Page) {- if p.WordCount != 483 {- t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount)+ if p.WordCount() != 483 {+ t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount())}
- if p.FuzzyWordCount != 500 {- t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount)+ if p.FuzzyWordCount() != 500 {+ t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount())}
- if p.ReadingTime != 3 {- t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime)+ if p.ReadingTime() != 3 {+ t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime())}
checkTruncation(t, p, true, "long page")
--- a/hugolib/pagination_test.go
+++ b/hugolib/pagination_test.go
@@ -55,7 +55,7 @@
// first group 10 in weight
assert.Equal(t, 10, pg.Key)
for _, p := range pg.Pages {- assert.True(t, p.FuzzyWordCount%2 == 0) // magic test
+ assert.True(t, p.fuzzyWordCount%2 == 0) // magic test
}
}
} else {@@ -70,7 +70,7 @@
// last should have 5 in weight
assert.Equal(t, 5, pg.Key)
for _, p := range pg.Pages {- assert.True(t, p.FuzzyWordCount%2 != 0) // magic test
+ assert.True(t, p.fuzzyWordCount%2 != 0) // magic test
}
}
} else {@@ -443,10 +443,10 @@
page21, _ := f2.page(1)
page2Nil, _ := f2.page(3)
- assert.Equal(t, 1, page11.FuzzyWordCount)
+ assert.Equal(t, 3, page11.fuzzyWordCount)
assert.Nil(t, page1Nil)
- assert.Equal(t, 1, page21.FuzzyWordCount)
+ assert.Equal(t, 3, page21.fuzzyWordCount)
assert.Nil(t, page2Nil)
}
@@ -468,7 +468,7 @@
if i%2 == 0 {w = 10
}
- pages[i].FuzzyWordCount = i
+ pages[i].fuzzyWordCount = i + 2
pages[i].Weight = w
}
--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@@ -33,6 +33,11 @@
"github.com/stretchr/testify/require"
)
+func init() {+ //There are expected ERROR logging in tests that produces a lot of noise.
+ jww.SetStdoutThreshold(jww.LevelCritical)
+}
+
const (
pageSimpleTitle = `---
title: simple template
--
⑨