shithub: hugo

Download patch

ref: c950c86b4e5fb93f787ec78ca823bded9ef9fa3a
parent: 915202494b140882d594e0542153531f6afada02
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
date: Mon May 25 17:05:59 EDT 2020

publisher: Fix tag collector for nested table elements

Fixes #7318

--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@@ -1123,7 +1123,7 @@
 		els := stats.HTMLElements
 
 		b.Assert(els.Classes, qt.HasLen, 3606) // (4 * 900) + 4 +2
-		b.Assert(els.Tags, qt.HasLen, 8)
+		b.Assert(els.Tags, qt.HasLen, 9)
 		b.Assert(els.IDs, qt.HasLen, 1)
 	}
 
--- a/publisher/htmlElementsCollector.go
+++ b/publisher/htmlElementsCollector.go
@@ -116,7 +116,13 @@
 
 					w.buff.Reset()
 
+					if strings.HasPrefix(s, "</") {
+						continue
+					}
+
+					s, tagName := w.insertStandinHTMLElement(s)
 					el := parseHTMLElement(s)
+					el.Tag = tagName
 
 					w.collector.mu.Lock()
 					w.collector.elementSet[s] = true
@@ -130,6 +136,20 @@
 	}
 
 	return
+}
+
+// The net/html parser does not handle single table elemnts as input, e.g. tbody.
+// We only care about the element/class/ids, so just store away the original tag name
+// and pretend it's a <div>.
+func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, string) {
+	tag := el[1:]
+	spacei := strings.Index(tag, " ")
+	if spacei != -1 {
+		tag = tag[:spacei]
+	}
+	newv := strings.Replace(el, tag, "div", 1)
+	return newv, strings.ToLower(tag)
+
 }
 
 func (c *cssClassCollectorWriter) endCollecting(drop bool) {
--- a/publisher/htmlElementsCollector_test.go
+++ b/publisher/htmlElementsCollector_test.go
@@ -51,6 +51,12 @@
 		{"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
 		{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
 		{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
+		{"thead", `
+		https://github.com/gohugoio/hugo/issues/7318
+<table class="cl1">
+    <thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
+    <tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
+</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
 		// https://github.com/gohugoio/hugo/issues/7161
 		{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},