shithub: mycel

Download patch

ref: 96375d3675de6ca7e7cc4d3a841d0c6a61cd7d73
parent: 910bfe3e9d2177b8efde268281233d5d1ed333cd
author: Philip Silva <philip.silva@protonmail.com>
date: Mon Apr 26 13:55:12 EDT 2021

Map full width runes to canonical widths

--- a/nodes/nodes.go
+++ b/nodes/nodes.go
@@ -4,6 +4,7 @@
 	"bytes"
 	"fmt"
 	"golang.org/x/net/html"
+	"golang.org/x/text/width"
 	"github.com/chris-ramon/douceur/css"
 	"github.com/psilva261/opossum/logger"
 	"github.com/psilva261/opossum/style"
@@ -77,9 +78,10 @@
 	return
 }
 
-// filterText removes line break runes (TODO: add this later but handle properly)
-func filterText(t string) (text string) {
-	return strings.ReplaceAll(t, "­", "")
+// filterText removes line break runes (TODO: add this later but handle properly) and maps runes to canonical widths
+func filterText(t string) string {
+	t = strings.ReplaceAll(t, "­", "")
+	return width.Fold.String(t)
 }
 
 func (n Node) Type() html.NodeType {
--- a/nodes/nodes_test.go
+++ b/nodes/nodes_test.go
@@ -17,6 +17,14 @@
 	}
 }
 
+func TestFilterTextFw(t *testing.T) {
+	const s = "(1999)"
+	f := filterText(s)
+	if f != "(1999)" {
+		t.Errorf("%v", f)
+	}
+}
+
 func TestQueryRef(t *testing.T) {
 	buf := strings.NewReader(`
 	<html>