shithub: hugo

--- a/helpers/content.go

+++ b/helpers/content.go

@@ -138,19 +138,28 @@

 	// Walk through the string removing all tags

 	b := bp.GetBuffer()

 	defer bp.PutBuffer(b)

-	inTag := false

+	var inTag, isSpace, wasSpace bool

 	for _, r := range s {

-		switch r {

-		case '<':

+		if !inTag {

+			isSpace = false

+		}

+		switch {

+		case r == '<':

 			inTag = true

-		case '>':

+		case r == '>':

 			inTag = false

+		case unicode.IsSpace(r):

+			isSpace = true

+			fallthrough

 		default:

-			if !inTag {

+			if !inTag && (!isSpace || (isSpace && !wasSpace)) {

 				b.WriteRune(r)

+		wasSpace = isSpace

 	return b.String()

--- a/helpers/content_test.go

+++ b/helpers/content_test.go

@@ -34,11 +34,22 @@

 	data := []test{

 		{"<h1>strip h1 tag <h1>", "strip h1 tag "},

-		{"<p> strip p tag </p>", " strip p tag \n"},

+		{"<p> strip p tag </p>", " strip p tag "},

 		{"</br> strip br<br>", " strip br\n"},

 		{"</br> strip br2<br />", " strip br2\n"},

 		{"This <strong>is</strong> a\nnewline", "This is a newline"},

 		{"No Tags", "No Tags"},

+		{`<p>Summary Next Line.

+<figure >

+        <img src="/not/real" />

+</figure>

+.

+More text here.</p>

+<p>Some more text</p>`, "Summary Next Line.  . More text here.\nSome more text\n"},

 	for i, d := range data {

 		output := StripHTML(d.input)

--- a/hugolib/page.go

+++ b/hugolib/page.go

@@ -107,9 +107,10 @@

 	source.File

 type PageMeta struct {

-	WordCount      int

-	FuzzyWordCount int

-	ReadingTime    int

+	wordCount      int

+	fuzzyWordCount int

+	readingTime    int

+	pageMetaInit   sync.Once

 	Weight         int

@@ -485,28 +486,48 @@

 	return int64(len(p.rawContent)), nil

+func (p *Page) WordCount() int {

+	p.analyzePage()

+	return p.wordCount

+}

+func (p *Page) ReadingTime() int {

+	p.analyzePage()

+	return p.readingTime

+}

+func (p *Page) FuzzyWordCount() int {

+	p.analyzePage()

+	return p.fuzzyWordCount

+}

 func (p *Page) analyzePage() {

-	if p.isCJKLanguage {

-		p.WordCount = 0

-		for _, word := range p.PlainWords() {

-			runeCount := utf8.RuneCountInString(word)

-			if len(word) == runeCount {

-				p.WordCount++

-			} else {

-				p.WordCount += runeCount

+	p.pageMetaInit.Do(func() {

+		if p.isCJKLanguage {

+			p.wordCount = 0

+			for _, word := range p.PlainWords() {

+				runeCount := utf8.RuneCountInString(word)

+				if len(word) == runeCount {

+					p.wordCount++

+				} else {

+					p.wordCount += runeCount

+				}

+		} else {

+			p.wordCount = helpers.TotalWords(p.Plain())

-	} else {

-		p.WordCount = len(p.PlainWords())

-	}

-	p.FuzzyWordCount = (p.WordCount + 100) / 100 * 100

+		// TODO(bep) is set in a test. Fix that.

+		if p.fuzzyWordCount == 0 {

+			p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100

+		}

-	if p.isCJKLanguage {

-		p.ReadingTime = (p.WordCount + 500) / 501

-	} else {

-		p.ReadingTime = (p.WordCount + 212) / 213

-	}

+		if p.isCJKLanguage {

+			p.readingTime = (p.wordCount + 500) / 501

+		} else {

+			p.readingTime = (p.wordCount + 212) / 213

+		}

+	})

 func (p *Page) permalink() (*url.URL, error) {

--- a/hugolib/pageSort_test.go

+++ b/hugolib/pageSort_test.go

@@ -95,11 +95,11 @@

 func TestPageSortReverse(t *testing.T) {

 	p1 := createSortTestPages(10)

-	assert.Equal(t, 0, p1[0].FuzzyWordCount)

-	assert.Equal(t, 9, p1[9].FuzzyWordCount)

+	assert.Equal(t, 0, p1[0].fuzzyWordCount)

+	assert.Equal(t, 9, p1[9].fuzzyWordCount)

 	p2 := p1.Reverse()

-	assert.Equal(t, 9, p2[0].FuzzyWordCount)

-	assert.Equal(t, 0, p2[9].FuzzyWordCount)

+	assert.Equal(t, 9, p2[0].fuzzyWordCount)

+	assert.Equal(t, 0, p2[9].fuzzyWordCount)

 	// cached

 	assert.True(t, probablyEqualPages(p2, p1.Reverse()))

@@ -149,7 +149,7 @@

 		if i%2 == 0 {

 			w = 10

-		pages[i].FuzzyWordCount = i

+		pages[i].fuzzyWordCount = i

 		pages[i].Weight = w

 		pages[i].Description = "initial"

--- a/hugolib/page_test.go

+++ b/hugolib/page_test.go

@@ -504,10 +504,13 @@

 func normalizeContent(c string) string {

-	norm := strings.Replace(c, "\n", "", -1)

+	norm := c

+	norm = strings.Replace(norm, "\n", " ", -1)

 	norm = strings.Replace(norm, "    ", " ", -1)

 	norm = strings.Replace(norm, "   ", " ", -1)

 	norm = strings.Replace(norm, "  ", " ", -1)

+	norm = strings.Replace(norm, "p> ", "p>", -1)

+	norm = strings.Replace(norm, ">  <", "> <", -1)

 	return strings.TrimSpace(norm)

@@ -710,8 +713,8 @@

 	assertFunc := func(t *testing.T, ext string, p *Page) {

 		checkPageTitle(t, p, "Simple")

-		checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. <figure > <img src=\"/not/real\" /> </figure>.\nMore text here.</p><p>Some more text</p>"), ext)

-		checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text", ext)

+		checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. \n<figure >\n    \n        <img src=\"/not/real\" />\n    \n    \n</figure>\n.\nMore text here.</p>\n\n<p>Some more text</p>\n"))

+		checkPageSummary(t, p, "Summary Next Line.  . More text here. Some more text")

 		checkPageType(t, p, "page")

 		checkPageLayout(t, p, "page/single.html", "_default/single.html", "theme/page/single.html", "theme/_default/single.html")

@@ -793,8 +796,8 @@

 	testCommonResetState()

 	assertFunc := func(t *testing.T, ext string, p *Page) {

-		if p.WordCount != 8 {

-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount)

+		if p.WordCount() != 8 {

+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount())

@@ -806,11 +809,10 @@

 	viper.Set("HasCJKLanguage", true)

 	assertFunc := func(t *testing.T, ext string, p *Page) {

-		if p.WordCount != 15 {

-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount)

+		if p.WordCount() != 15 {

+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount())

 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithAllCJKRunes)

@@ -820,8 +822,8 @@

 	viper.Set("HasCJKLanguage", true)

 	assertFunc := func(t *testing.T, ext string, p *Page) {

-		if p.WordCount != 74 {

-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)

+		if p.WordCount() != 74 {

+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())

 		if p.Summary != simplePageWithMainEnglishWithCJKRunesSummary {

@@ -828,7 +830,6 @@

 			t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,

 				simplePageWithMainEnglishWithCJKRunesSummary, p.Summary)

 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithMainEnglishWithCJKRunes)

@@ -839,8 +840,8 @@

 	viper.Set("HasCJKLanguage", true)

 	assertFunc := func(t *testing.T, ext string, p *Page) {

-		if p.WordCount != 75 {

-			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)

+		if p.WordCount() != 75 {

+			t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())

 		if p.Summary != simplePageWithIsCJKLanguageFalseSummary {

@@ -847,7 +848,6 @@

 			t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,

 				simplePageWithIsCJKLanguageFalseSummary, p.Summary)

 	testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithIsCJKLanguageFalse)

@@ -857,16 +857,16 @@

 func TestWordCount(t *testing.T) {

 	assertFunc := func(t *testing.T, ext string, p *Page) {

-		if p.WordCount != 483 {

-			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount)

+		if p.WordCount() != 483 {

+			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount())

-		if p.FuzzyWordCount != 500 {

-			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount)

+		if p.FuzzyWordCount() != 500 {

+			t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount())

-		if p.ReadingTime != 3 {

-			t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime)

+		if p.ReadingTime() != 3 {

+			t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime())

 		checkTruncation(t, p, true, "long page")

--- a/hugolib/pagination_test.go

+++ b/hugolib/pagination_test.go

@@ -55,7 +55,7 @@

 			// first group 10 in weight

 			assert.Equal(t, 10, pg.Key)

 			for _, p := range pg.Pages {

-				assert.True(t, p.FuzzyWordCount%2 == 0) // magic test

+				assert.True(t, p.fuzzyWordCount%2 == 0) // magic test

 	} else {

@@ -70,7 +70,7 @@

 			// last should have 5 in weight

 			assert.Equal(t, 5, pg.Key)

 			for _, p := range pg.Pages {

-				assert.True(t, p.FuzzyWordCount%2 != 0) // magic test

+				assert.True(t, p.fuzzyWordCount%2 != 0) // magic test

 	} else {

@@ -443,10 +443,10 @@

 	page21, _ := f2.page(1)

 	page2Nil, _ := f2.page(3)

-	assert.Equal(t, 1, page11.FuzzyWordCount)

+	assert.Equal(t, 3, page11.fuzzyWordCount)

 	assert.Nil(t, page1Nil)

-	assert.Equal(t, 1, page21.FuzzyWordCount)

+	assert.Equal(t, 3, page21.fuzzyWordCount)

 	assert.Nil(t, page2Nil)

@@ -468,7 +468,7 @@

 		if i%2 == 0 {

 			w = 10

-		pages[i].FuzzyWordCount = i

+		pages[i].fuzzyWordCount = i + 2

 		pages[i].Weight = w

--- a/hugolib/site_test.go

+++ b/hugolib/site_test.go

@@ -33,6 +33,11 @@

 	"github.com/stretchr/testify/require"

+func init() {

+	//There are expected ERROR logging in tests that produces a lot of noise.

+	jww.SetStdoutThreshold(jww.LevelCritical)

+}

 const (

 	pageSimpleTitle = `---

 title: simple template

--

⑨