shithub: riscv

Download patch

ref: d48a0894865e810f46e1ef2f07476c4f3101698a
parent: f3b5bcffceb05ce2b9f8c19d2e0721f3e54098e8
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Thu Dec 15 18:27:01 EST 2016

pc64: implement simple write combining for framebuffers with the PAT

on some modern machines like the x250, the bios arranges the mtrr's
and the framebuffer membar in a way that doesnt allow us to mark
the framebuffer pages as write combining, leading to slow graphics.

since the pentium III, the processor interprets the page table bit
combinations of the WT, CD and bit7 bits as an index into the
page attribute table (PAT).

to not change the semantics of the WT and CD bits, we preserve
the bit patterns 0-3 and use the last entry 7 for write combining.
(done in mmuinit() for each core).

the new patwc() function takes virtual address range and changes
the page table marking the range as write combining. no attempt
is made on invalidating tlb's. doesnt matter in our case as the
following mtrr() call in screen.c does it for us.

--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -123,6 +123,7 @@
 void	outl(int, ulong);
 void	outsl(int, void*, int);
 ulong	paddr(void*);
+void	patwc(void*, int);
 ulong	pcibarsize(Pcidev*, int);
 void	pcibussize(Pcidev*, ulong*, ulong*);
 int	pcicfgr8(Pcidev*, int);
--- a/sys/src/9/pc/mmu.c
+++ b/sys/src/9/pc/mmu.c
@@ -1065,3 +1065,7 @@
 	return -KZERO - pa;
 }
 
+void
+patwc(void *, int)
+{
+}
--- a/sys/src/9/pc/screen.c
+++ b/sys/src/9/pc/screen.c
@@ -492,6 +492,9 @@
 	scr->vaddr = vmap(npaddr, nsize);
 	if(scr->vaddr == 0)
 		return "cannot allocate vga frame buffer";
+
+	patwc(scr->vaddr, nsize);
+
 	scr->vaddr = (char*)scr->vaddr+x;
 	scr->paddr = paddr;
 	scr->apsize = nsize;
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -117,6 +117,7 @@
 void	outl(int, ulong);
 void	outsl(int, void*, int);
 uintptr	paddr(void*);
+void	patwc(void*, int);
 ulong	pcibarsize(Pcidev*, int);
 void	pcibussize(Pcidev*, ulong*, ulong*);
 int	pcicfgr8(Pcidev*, int);
--- a/sys/src/9/pc64/mmu.c
+++ b/sys/src/9/pc64/mmu.c
@@ -31,13 +31,16 @@
 	ulong	nfree;
 } mmupool;
 
-/* level */
 enum {
+	/* level */
 	PML4E	= 2,
 	PDPE	= 1,
 	PDE	= 0,
 
 	MAPBITS	= 8*sizeof(m->mmumap[0]),
+
+	/* PAT entry used for write combining */
+	PATWC	= 7,
 };
 
 static void
@@ -130,6 +133,12 @@
 
 	/* SYSCALL flags mask */
 	wrmsr(0xc0000084, 0x200);
+
+	/* IA32_PAT write combining */
+	rdmsr(0x277, &v);
+	v &= ~(255LL<<(PATWC*8));
+	v |= 1LL<<(PATWC*8);	/* WC */
+	wrmsr(0x277, v);
 }
 
 /*
@@ -534,4 +543,29 @@
 vunmap(void *v, int)
 {
 	paddr(v);	/* will panic on error */
+}
+
+/*
+ * mark pages as write combining (used for framebuffer)
+ */
+void
+patwc(void *v, int n)
+{
+	uintptr *pte, mask, attr, va;
+	int z, l;
+
+	/* set the bits for all pages in range */
+	for(va = (uintptr)v; n > 0; n -= z, va += z){
+		l = 0;
+		pte = mmuwalk(m->pml4, va, l, 0);
+		if(pte == 0)
+			pte = mmuwalk(m->pml4, va, ++l, 0);
+		if(pte == 0 || (*pte & PTEVALID) == 0)
+			panic("patwc: va=%#p", va);
+		z = PGLSZ(l);
+		z -= va & (z-1);
+		mask = l == 0 ? 3<<3 | 1<<7 : 3<<3 | 1<<12;
+		attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
+		*pte = (*pte & ~mask) | (attr & mask);
+	}
 }