ref: d48a0894865e810f46e1ef2f07476c4f3101698a
parent: f3b5bcffceb05ce2b9f8c19d2e0721f3e54098e8
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Thu Dec 15 18:27:01 EST 2016
pc64: implement simple write combining for framebuffers with the PAT on some modern machines like the x250, the bios arranges the mtrr's and the framebuffer membar in a way that doesnt allow us to mark the framebuffer pages as write combining, leading to slow graphics. since the pentium III, the processor interprets the page table bit combinations of the WT, CD and bit7 bits as an index into the page attribute table (PAT). to not change the semantics of the WT and CD bits, we preserve the bit patterns 0-3 and use the last entry 7 for write combining. (done in mmuinit() for each core). the new patwc() function takes virtual address range and changes the page table marking the range as write combining. no attempt is made on invalidating tlb's. doesnt matter in our case as the following mtrr() call in screen.c does it for us.
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -123,6 +123,7 @@
void outl(int, ulong);
void outsl(int, void*, int);
ulong paddr(void*);
+void patwc(void*, int);
ulong pcibarsize(Pcidev*, int);
void pcibussize(Pcidev*, ulong*, ulong*);
int pcicfgr8(Pcidev*, int);
--- a/sys/src/9/pc/mmu.c
+++ b/sys/src/9/pc/mmu.c
@@ -1065,3 +1065,7 @@
return -KZERO - pa;
}
+void
+patwc(void *, int)
+{
+}
--- a/sys/src/9/pc/screen.c
+++ b/sys/src/9/pc/screen.c
@@ -492,6 +492,9 @@
scr->vaddr = vmap(npaddr, nsize);
if(scr->vaddr == 0)
return "cannot allocate vga frame buffer";
+
+ patwc(scr->vaddr, nsize);
+
scr->vaddr = (char*)scr->vaddr+x;
scr->paddr = paddr;
scr->apsize = nsize;
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -117,6 +117,7 @@
void outl(int, ulong);
void outsl(int, void*, int);
uintptr paddr(void*);
+void patwc(void*, int);
ulong pcibarsize(Pcidev*, int);
void pcibussize(Pcidev*, ulong*, ulong*);
int pcicfgr8(Pcidev*, int);
--- a/sys/src/9/pc64/mmu.c
+++ b/sys/src/9/pc64/mmu.c
@@ -31,13 +31,16 @@
ulong nfree;
} mmupool;
-/* level */
enum {
+ /* level */
PML4E = 2,
PDPE = 1,
PDE = 0,
MAPBITS = 8*sizeof(m->mmumap[0]),
+
+ /* PAT entry used for write combining */
+ PATWC = 7,
};
static void
@@ -130,6 +133,12 @@
/* SYSCALL flags mask */
wrmsr(0xc0000084, 0x200);
+
+ /* IA32_PAT write combining */
+ rdmsr(0x277, &v);
+ v &= ~(255LL<<(PATWC*8));
+ v |= 1LL<<(PATWC*8); /* WC */
+ wrmsr(0x277, v);
}
/*
@@ -534,4 +543,29 @@
vunmap(void *v, int)
{
paddr(v); /* will panic on error */
+}
+
+/*
+ * mark pages as write combining (used for framebuffer)
+ */
+void
+patwc(void *v, int n)
+{
+ uintptr *pte, mask, attr, va;
+ int z, l;
+
+ /* set the bits for all pages in range */
+ for(va = (uintptr)v; n > 0; n -= z, va += z){
+ l = 0;
+ pte = mmuwalk(m->pml4, va, l, 0);
+ if(pte == 0)
+ pte = mmuwalk(m->pml4, va, ++l, 0);
+ if(pte == 0 || (*pte & PTEVALID) == 0)
+ panic("patwc: va=%#p", va);
+ z = PGLSZ(l);
+ z -= va & (z-1);
+ mask = l == 0 ? 3<<3 | 1<<7 : 3<<3 | 1<<12;
+ attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
+ *pte = (*pte & ~mask) | (attr & mask);
+ }
}