shithub: mycel

ref: 94389da8017a2bcca3cb29a4f954c928d577a31a
dir: /browser/website.go/

View raw version
package browser

import (
	"github.com/mjl-/duit"
	"golang.org/x/net/html"
	"golang.org/x/text/encoding"
	"io/ioutil"
	"net/url"
	"github.com/psilva261/opossum"
	"github.com/psilva261/opossum/domino"
	"github.com/psilva261/opossum/nodes"
	"github.com/psilva261/opossum/style"
	"strings"
	"sync"
)

const (
	InitialLayout = iota
	ClickRelayout
)

type Website struct {
	duit.UI
	opossum.ContentType
	d *domino.Domino

	mu sync.Mutex
	html string
	js []string
}

func (w *Website) Html() string {
	w.mu.Lock()
	defer w.mu.Unlock()

	return w.html
}

func (w *Website) Js() []string {
	w.mu.Lock()
	defer w.mu.Unlock()

	return append([]string{}, w.js...)
}

func (w *Website) layout(f opossum.Fetcher, htm string, layouting int) {
	defer func() {
		browser.statusBarMsg("", false)
	}()
	pass := func(htm string, csss ...string) (*html.Node, map[*html.Node]style.Map) {

		if debugPrintHtml {
			log.Printf("%v\n", htm)
		}

		var doc *html.Node
		var err error
		doc, err = html.ParseWithOptions(
			strings.NewReader(htm),
			html.ParseOptionEnableScripting(*ExperimentalJsInsecure),
		)
		if err != nil {
			panic(err.Error())
		}

		log.Printf("Retrieving CSS Rules...")
		var cssSize int
		nodeMap := make(map[*html.Node]style.Map)
		for i, css := range csss {

			log.Printf("CSS size %v kB", cssSize/1024)

			nm, err := style.FetchNodeMap(doc, css, 1280)
			if err == nil {
				log.Printf("[%v/%v] Fetch CSS Rules successful!", i+1, len(csss))
				if debugPrintHtml {
					log.Printf("%v", nm)
				}
				style.MergeNodeMaps(nodeMap, nm)
			} else {
				log.Errorf("Fetch CSS Rules failed: %v", err)
				if *DebugDumpCSS {
					ioutil.WriteFile("info.css", []byte(css), 0644)
				}
			}
		}

		return doc, nodeMap
	}

	log.Printf("1st pass")
	doc, _ := pass(htm)

	log.Printf("2nd pass")
	log.Printf("Download style...")
	csss := cssSrcs(f, doc)
	doc, nodeMap := pass(htm, csss...)

	// 3rd pass is only needed initially to load the scripts and set the goja VM
	// state. During subsequent calls from click handlers that state is kept.
	if *ExperimentalJsInsecure && layouting != ClickRelayout {
		log.Printf("3rd pass")
		nt := nodes.NewNodeTree(doc, style.Map{}, nodeMap, nil)
		jsSrcs := domino.Srcs(nt)
		downloads := make(map[string]string)
		for _, src := range jsSrcs {
			url, err := f.LinkedUrl(src)
			if err != nil {
				log.Printf("error parsing %v", src)
				continue
			}
			log.Printf("Download %v", url)
			buf, _, err := f.Get(url)
			if err != nil {
				log.Printf("error downloading %v", url)
				continue
			}
			downloads[src] = string(buf)
		}
		codes := domino.Scripts(nt, downloads)
		log.Infof("JS pipeline start")
		if w.d != nil {
			log.Infof("Stop existing JS instance")
			w.d.Stop()
		}
		w.d = domino.NewDomino(htm, browser, nt)
		w.d.Start()
		jsProcessed, changed, err := processJS2(w.d, codes)
		if changed && err == nil {
			htm = jsProcessed
			if debugPrintHtml {
				log.Printf("%v\n", jsProcessed)
			}
			doc, nodeMap = pass(htm, csss...)
		} else if err != nil {
			log.Errorf("JS error: %v", err)
		}
		log.Infof("JS pipeline end")
	}
	var countHtmlNodes func(*html.Node) int
	countHtmlNodes = func(n *html.Node) (num int) {
		num++
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			num += countHtmlNodes(c)
		}
		return
	}
	log.Printf("%v html nodes found...", countHtmlNodes(doc))

	body := grep(doc, "body")

	log.Printf("Layout website...")
	scroller = NewScroll(
		NodeToBox(0, browser, nodes.NewNodeTree(body, style.Map{}, nodeMap, &nodes.Node{})),
	)
	numElements := 0
	TraverseTree(scroller, func(ui duit.UI) {
		numElements++
	})
	w.UI = scroller
	log.Printf("Layouting done (%v elements created)", numElements)
	if numElements < 10 {
		log.Errorf("Less than 10 elements layouted, seems css processing failed. Will layout without css")
		scroller = NewScroll(
			NodeToBox(0, browser, nodes.NewNodeTree(body, style.Map{}, make(map[*html.Node]style.Map), nil)),
		)
		w.UI = scroller
	}

	w.mu.Lock()
	w.html = htm
	w.mu.Unlock()

	log.Flush()
}

func cssSrcs(f opossum.Fetcher, doc *html.Node) (csss []string) {
	cssHrefs := style.Hrefs(doc)
	inlines := make([]string, 0, 3)
	ntAll := nodes.NewNodeTree(doc, style.Map{}, make(map[*html.Node]style.Map), nil)
	inls := ntAll.FindAll("style")

	for _, inl := range inls {
		inlines = append(inlines, inl.ContentString(true))
	}
	csss = make([]string, 0, len(inlines)+len(cssHrefs))
	csss = append(csss, style.AddOnCSS)
	csss = append(csss, inlines...)
	for _, href := range cssHrefs {
		url, err := f.LinkedUrl(href)
		if err != nil {
			log.Printf("error parsing %v", href)
			continue
		}
		log.Printf("Download %v", url)
		buf, contentType, err := f.Get(url)
		if err != nil {
			log.Printf("error downloading %v", url)
			continue
		}
		if contentType.IsCSS() {
			csss = append(csss, string(buf))
		} else {
			log.Printf("css: unexpected %v", contentType)
		}
	}

	return
}

func formData(n, submitBtn *html.Node) (data url.Values) {
	data = make(url.Values)
	nm := attr(*n, "name")

	switch n.Data {
	case "input", "select":
		if attr(*n, "type") == "submit" && n != submitBtn {
			return
		}
		if nm != "" {
			data.Set(nm, attr(*n, "value"))
		}
	case "textarea":
		nn := nodes.NewNodeTree(n, style.Map{}, make(map[*html.Node]style.Map), nil)

		if nm != "" {
			data.Set(nm, nn.ContentString(false))
		}
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		for k, vs := range formData(c, submitBtn) {
			for _, v := range vs {
				data.Add(k, v)
			}
		}
	}

	return
}

func escapeValues(ct opossum.ContentType, q url.Values) (qe url.Values) {
	qe = make(url.Values)
	enc := encoding.HTMLEscapeUnsupported(ct.Encoding().NewEncoder())

	for k, vs := range q {
		ke, err := enc.String(k)
		if err != nil {
			log.Errorf("string: %v", err)
			ke = k
		}
		for _, v := range vs {
			ve, err := enc.String(v)
			if err != nil {
				log.Errorf("string: %v", err)
				ve = v
			}
			qe.Add(ke, ve)
		}
	}

	return
}

func (b *Browser) submit(form *html.Node, submitBtn *html.Node) {
	var err error
	var buf []byte
	var contentType opossum.ContentType

	method := "GET" // TODO
	if m := attr(*form, "method"); m != "" {
		method = strings.ToUpper(m)
	}
	uri := b.URL()
	if action := attr(*form, "action"); action != "" {
		uri, err = b.LinkedUrl(action)
		if err != nil {
			log.Printf("error parsing %v", action)
			return
		}
	}

	if method == "GET" {
		q := uri.Query()
		for k, vs := range formData(form, submitBtn) {
			q.Set(k, vs[0]) // TODO: what is with the rest?
		}
		uri.RawQuery = escapeValues(b.Website.ContentType, q).Encode()
		buf, contentType, err = b.get(uri, true)
	} else {
		buf, contentType, err = b.PostForm(uri, formData(form, submitBtn))
	}

	if err != nil {
		log.Errorf("submit form: %v", err)
		return
	}

	if !contentType.IsHTML() {
		log.Errorf("post: unexpected %v", contentType)
		return
	}

	b.render(contentType, buf)
}