shithub: hugo

Download patch

ref: 653e6856ea1cfc60cc16733807d23b302dbe4bd5
parent: f4f566edf4bd6a590cf9cdbd5cfc0026ecd93b14
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
date: Fri Oct 11 09:55:46 EDT 2019

resources/page: Use binary search in Pages.Prev/Next if possible

This is obviously much faster for lager data sets:

```bash
name                         old time/op    new time/op    delta
SearchPage/ByWeight-100-4       267ns ± 4%     272ns ± 5%     ~     (p=0.457 n=4+4)
SearchPage/ByWeight-5000-4     10.8µs ± 3%     1.2µs ± 2%  -88.99%  (p=0.029 n=4+4)
SearchPage/ByWeight-10000-4    21.1µs ± 1%     1.4µs ±11%  -93.28%  (p=0.029 n=4+4)
```

See #4500

--- a/hugolib/pages_test.go
+++ b/hugolib/pages_test.go
@@ -63,7 +63,7 @@
 		Variant{"Pages.Shuffled.Prev", shufflePages, func(p page.Page, pages page.Pages) { pages.Prev(p) }},
 		Variant{"Pages.ByTitle.Next", func(pages page.Pages) page.Pages { return pages.ByTitle() }, func(p page.Page, pages page.Pages) { pages.Next(p) }},
 	} {
-		for _, numPages := range []int{100, 300, 900, 5000} {
+		for _, numPages := range []int{300, 5000} {
 			b.Run(fmt.Sprintf("%s-pages-%d", variant.name, numPages), func(b *testing.B) {
 				b.StopTimer()
 				builder := newPagesPrevNextTestSite(b, numPages)
--- a/resources/page/pages_prev_next.go
+++ b/resources/page/pages_prev_next.go
@@ -15,26 +15,21 @@
 
 // Next returns the next page reletive to the given
 func (p Pages) Next(cur Page) Page {
-	for x, c := range p {
-		if c.Eq(cur) {
-			if x == 0 {
-				return nil
-			}
-			return p[x-1]
-		}
+	x := searchPage(cur, p)
+	if x <= 0 {
+		return nil
 	}
-	return nil
+	return p[x-1]
 }
 
 // Prev returns the previous page reletive to the given
 func (p Pages) Prev(cur Page) Page {
-	for x, c := range p {
-		if c.Eq(cur) {
-			if x < len(p)-1 {
-				return p[x+1]
-			}
-			return nil
-		}
+	x := searchPage(cur, p)
+
+	if x == -1 || len(p)-x < 2 {
+		return nil
 	}
-	return nil
+
+	return p[x+1]
+
 }
--- a/resources/page/pages_sort.go
+++ b/resources/page/pages_sort.go
@@ -46,60 +46,79 @@
 	sort.Stable(ps)
 }
 
-// DefaultPageSort is the default sort func for pages in Hugo:
-// Order by Weight, Date, LinkTitle and then full file path.
-var DefaultPageSort = func(p1, p2 Page) bool {
-	if p1.Weight() == p2.Weight() {
-		if p1.Date().Unix() == p2.Date().Unix() {
-			c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
-			if c == 0 {
-				if p1.File().IsZero() || p2.File().IsZero() {
-					return p1.File().IsZero()
+var (
+
+	// DefaultPageSort is the default sort func for pages in Hugo:
+	// Order by Weight, Date, LinkTitle and then full file path.
+	DefaultPageSort = func(p1, p2 Page) bool {
+		if p1.Weight() == p2.Weight() {
+			if p1.Date().Unix() == p2.Date().Unix() {
+				c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
+				if c == 0 {
+					if p1.File().IsZero() || p2.File().IsZero() {
+						return p1.File().IsZero()
+					}
+					return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
 				}
-				return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+				return c < 0
 			}
-			return c < 0
+			return p1.Date().Unix() > p2.Date().Unix()
 		}
-		return p1.Date().Unix() > p2.Date().Unix()
-	}
 
-	if p2.Weight() == 0 {
-		return true
-	}
+		if p2.Weight() == 0 {
+			return true
+		}
 
-	if p1.Weight() == 0 {
-		return false
+		if p1.Weight() == 0 {
+			return false
+		}
+
+		return p1.Weight() < p2.Weight()
 	}
 
-	return p1.Weight() < p2.Weight()
-}
+	lessPageLanguage = func(p1, p2 Page) bool {
 
-var languagePageSort = func(p1, p2 Page) bool {
-
-	if p1.Language().Weight == p2.Language().Weight {
-		if p1.Date().Unix() == p2.Date().Unix() {
-			c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
-			if c == 0 {
-				if !p1.File().IsZero() && !p2.File().IsZero() {
-					return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+		if p1.Language().Weight == p2.Language().Weight {
+			if p1.Date().Unix() == p2.Date().Unix() {
+				c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
+				if c == 0 {
+					if !p1.File().IsZero() && !p2.File().IsZero() {
+						return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+					}
 				}
+				return c < 0
 			}
-			return c < 0
+			return p1.Date().Unix() > p2.Date().Unix()
 		}
-		return p1.Date().Unix() > p2.Date().Unix()
+
+		if p2.Language().Weight == 0 {
+			return true
+		}
+
+		if p1.Language().Weight == 0 {
+			return false
+		}
+
+		return p1.Language().Weight < p2.Language().Weight
 	}
 
-	if p2.Language().Weight == 0 {
-		return true
+	lessPageTitle = func(p1, p2 Page) bool {
+		return compare.LessStrings(p1.Title(), p2.Title())
 	}
 
-	if p1.Language().Weight == 0 {
-		return false
+	lessPageLinkTitle = func(p1, p2 Page) bool {
+		return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
 	}
 
-	return p1.Language().Weight < p2.Language().Weight
-}
+	lessPageDate = func(p1, p2 Page) bool {
+		return p1.Date().Unix() < p2.Date().Unix()
+	}
 
+	lessPagePubDate = func(p1, p2 Page) bool {
+		return p1.PublishDate().Unix() < p2.PublishDate().Unix()
+	}
+)
+
 func (ps *pageSorter) Len() int      { return len(ps.pages) }
 func (ps *pageSorter) Swap(i, j int) { ps.pages[i], ps.pages[j] = ps.pages[j], ps.pages[i] }
 
@@ -139,11 +158,7 @@
 
 	const key = "pageSort.ByTitle"
 
-	title := func(p1, p2 Page) bool {
-		return compare.LessStrings(p1.Title(), p2.Title())
-	}
-
-	pages, _ := spc.get(key, pageBy(title).Sort, p)
+	pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p)
 	return pages
 }
 
@@ -156,12 +171,8 @@
 
 	const key = "pageSort.ByLinkTitle"
 
-	linkTitle := func(p1, p2 Page) bool {
-		return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
-	}
+	pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p)
 
-	pages, _ := spc.get(key, pageBy(linkTitle).Sort, p)
-
 	return pages
 }
 
@@ -174,12 +185,8 @@
 
 	const key = "pageSort.ByDate"
 
-	date := func(p1, p2 Page) bool {
-		return p1.Date().Unix() < p2.Date().Unix()
-	}
+	pages, _ := spc.get(key, pageBy(lessPageDate).Sort, p)
 
-	pages, _ := spc.get(key, pageBy(date).Sort, p)
-
 	return pages
 }
 
@@ -192,12 +199,8 @@
 
 	const key = "pageSort.ByPublishDate"
 
-	pubDate := func(p1, p2 Page) bool {
-		return p1.PublishDate().Unix() < p2.PublishDate().Unix()
-	}
+	pages, _ := spc.get(key, pageBy(lessPagePubDate).Sort, p)
 
-	pages, _ := spc.get(key, pageBy(pubDate).Sort, p)
-
 	return pages
 }
 
@@ -276,7 +279,7 @@
 
 	const key = "pageSort.ByLanguage"
 
-	pages, _ := spc.get(key, pageBy(languagePageSort).Sort, p)
+	pages, _ := spc.get(key, pageBy(lessPageLanguage).Sort, p)
 
 	return pages
 }
@@ -283,7 +286,7 @@
 
 // SortByLanguage sorts the pages by language.
 func SortByLanguage(pages Pages) {
-	pageBy(languagePageSort).Sort(pages)
+	pageBy(lessPageLanguage).Sort(pages)
 }
 
 // Reverse reverses the order in Pages and returns a copy.
--- /dev/null
+++ b/resources/page/pages_sort_search.go
@@ -1,0 +1,126 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package page
+
+import "sort"
+
+// Used in page binary search, the most common in front.
+var pageLessFunctions = []func(p1, p2 Page) bool{
+	DefaultPageSort,
+	lessPageDate,
+	lessPagePubDate,
+	lessPageTitle,
+	lessPageLinkTitle,
+}
+
+func searchPage(p Page, pages Pages) int {
+	if len(pages) < 1000 {
+		// For smaller data sets, doing a linear search is faster.
+		return searchPageLinear(p, pages, 0)
+	}
+
+	less := isPagesProbablySorted(pages, pageLessFunctions...)
+	if less == nil {
+		return searchPageLinear(p, pages, 0)
+	}
+
+	i := searchPageBinary(p, pages, less)
+	if i != -1 {
+		return i
+	}
+
+	return searchPageLinear(p, pages, 0)
+}
+
+func searchPageLinear(p Page, pages Pages, start int) int {
+	for i := start; i < len(pages); i++ {
+		c := pages[i]
+		if c.Eq(p) {
+			return i
+		}
+	}
+	return -1
+}
+
+func searchPageBinary(p Page, pages Pages, less func(p1, p2 Page) bool) int {
+	n := len(pages)
+
+	f := func(i int) bool {
+		c := pages[i]
+		isLess := less(c, p)
+		return !isLess || c.Eq(p)
+	}
+
+	i := sort.Search(n, f)
+
+	if i == n {
+		return -1
+	}
+
+	return searchPageLinear(p, pages, i)
+
+}
+
+// isProbablySorted tests if the pages slice is probably sorted.
+func isPagesProbablySorted(pages Pages, lessFuncs ...func(p1, p2 Page) bool) func(p1, p2 Page) bool {
+	n := len(pages)
+	step := 1
+	if n > 500 {
+		step = 50
+	}
+
+	is := func(less func(p1, p2 Page) bool) bool {
+		samples := 0
+
+		for i := n - 1; i > 0; i = i - step {
+			if less(pages[i], pages[i-1]) {
+				return false
+			}
+			samples++
+			if samples >= 15 {
+				return true
+			}
+		}
+		return samples > 0
+	}
+
+	isReverse := func(less func(p1, p2 Page) bool) bool {
+		samples := 0
+
+		for i := 0; i < n-1; i = i + step {
+			if less(pages[i], pages[i+1]) {
+				return false
+			}
+			samples++
+
+			if samples > 15 {
+				return true
+			}
+		}
+		return samples > 0
+	}
+
+	for _, less := range lessFuncs {
+		if is(less) {
+			return less
+		}
+		if isReverse(less) {
+			return func(p1, p2 Page) bool {
+				return less(p2, p1)
+			}
+		}
+	}
+
+	return nil
+}
--- /dev/null
+++ b/resources/page/pages_sort_search_test.go
@@ -1,0 +1,124 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package page
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+	"time"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestSearchPage(t *testing.T) {
+	t.Parallel()
+	c := qt.New(t)
+	pages := createSortTestPages(10)
+	for i, p := range pages {
+		p.(*testPage).title = fmt.Sprintf("Title %d", i%2)
+	}
+
+	for _, pages := range []Pages{pages.ByTitle(), pages.ByTitle().Reverse()} {
+		less := isPagesProbablySorted(pages, lessPageTitle)
+		c.Assert(less, qt.Not(qt.IsNil))
+		for i, p := range pages {
+			idx := searchPageBinary(p, pages, less)
+			c.Assert(idx, qt.Equals, i)
+		}
+	}
+
+}
+
+func BenchmarkSearchPage(b *testing.B) {
+	type Variant struct {
+		name         string
+		preparePages func(pages Pages) Pages
+		search       func(p Page, pages Pages) int
+	}
+
+	shufflePages := func(pages Pages) Pages {
+		rand.Shuffle(len(pages), func(i, j int) { pages[i], pages[j] = pages[j], pages[i] })
+		return pages
+	}
+
+	linearSearch := func(p Page, pages Pages) int {
+		return searchPageLinear(p, pages, 0)
+	}
+
+	createPages := func(num int) Pages {
+		pages := createSortTestPages(num)
+		for _, p := range pages {
+			tp := p.(*testPage)
+			tp.weight = rand.Intn(len(pages))
+			tp.title = fmt.Sprintf("Title %d", rand.Intn(len(pages)))
+
+			tp.pubDate = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
+			tp.date = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
+		}
+
+		return pages
+	}
+
+	for _, variant := range []Variant{
+		Variant{"Shuffled", shufflePages, searchPage},
+		Variant{"ByWeight", func(pages Pages) Pages {
+			return pages.ByWeight()
+		}, searchPage},
+		Variant{"ByWeight.Reverse", func(pages Pages) Pages {
+			return pages.ByWeight().Reverse()
+		}, searchPage},
+		Variant{"ByDate", func(pages Pages) Pages {
+			return pages.ByDate()
+		}, searchPage},
+		Variant{"ByPublishDate", func(pages Pages) Pages {
+			return pages.ByPublishDate()
+		}, searchPage},
+		Variant{"ByTitle", func(pages Pages) Pages {
+			return pages.ByTitle()
+		}, searchPage},
+		Variant{"ByTitle Linear", func(pages Pages) Pages {
+			return pages.ByTitle()
+		}, linearSearch},
+	} {
+		for _, numPages := range []int{100, 500, 1000, 5000} {
+			b.Run(fmt.Sprintf("%s-%d", variant.name, numPages), func(b *testing.B) {
+				b.StopTimer()
+				pages := createPages(numPages)
+				if variant.preparePages != nil {
+					pages = variant.preparePages(pages)
+				}
+				b.StartTimer()
+				for i := 0; i < b.N; i++ {
+					j := rand.Intn(numPages)
+					k := variant.search(pages[j], pages)
+					if k != j {
+						b.Fatalf("%d != %d", k, j)
+					}
+				}
+			})
+		}
+	}
+}
+
+func TestIsPagesProbablySorted(t *testing.T) {
+	t.Parallel()
+	c := qt.New(t)
+
+	c.Assert(isPagesProbablySorted(createSortTestPages(6).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
+	c.Assert(isPagesProbablySorted(createSortTestPages(300).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
+	c.Assert(isPagesProbablySorted(createSortTestPages(6), DefaultPageSort), qt.IsNil)
+	c.Assert(isPagesProbablySorted(createSortTestPages(300).ByTitle(), pageLessFunctions...), qt.Not(qt.IsNil))
+
+}
--- a/resources/page/pages_sort_test.go
+++ b/resources/page/pages_sort_test.go
@@ -269,6 +269,7 @@
 	for i := 0; i < num; i++ {
 		p := newTestPage()
 		p.path = fmt.Sprintf("/x/y/p%d.md", i)
+		p.title = fmt.Sprintf("Title %d", i%(num+1/2))
 		p.params = map[string]interface{}{
 			"arbitrarily": map[string]interface{}{
 				"nested": ("xyz" + fmt.Sprintf("%v", 100-i)),
--- a/resources/page/testhelpers_test.go
+++ b/resources/page/testhelpers_test.go
@@ -310,6 +310,9 @@
 
 func (p *testPage) LinkTitle() string {
 	if p.linkTitle == "" {
+		if p.title == "" {
+			return p.path
+		}
 		return p.title
 	}
 	return p.linkTitle
--- a/resources/page/weighted.go
+++ b/resources/page/weighted.go
@@ -99,7 +99,7 @@
 // this weighted page set.
 func (wp WeightedPages) Next(cur Page) Page {
 	for x, c := range wp {
-		if c.Page == cur {
+		if c.Page.Eq(cur) {
 			if x == 0 {
 				return nil
 			}
@@ -113,7 +113,7 @@
 // this weighted page set.
 func (wp WeightedPages) Prev(cur Page) Page {
 	for x, c := range wp {
-		if c.Page == cur {
+		if c.Page.Eq(cur) {
 			if x < len(wp)-1 {
 				return wp[x+1].Page
 			}