ref: eaffa1ef55825c34e138246fe5db5bbf996a8dbb
parent: 39321d74d876356004c5314fbeb58fdaa36545aa
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Thu Jan 4 21:12:42 EST 2024
kernel: fix EDF scheduler double ready() and more robust double-ready detection Move the "double-ready" check into queueproc() function, doing it while holding the runq lock, meaning all transitions to Ready state are serialized. We do not just check for double-ready but for any "illegal" transisions: ready() on Dead, Moribund, New, Ready, Running and Waitrelease is not allowed. ready() on Queueing*, Wakeme, Broken, Stopped and Rendez is only valid when done from another process. For rescheduling, we have to go to Scheding state before calling ready(). (rebalance(), schedinit()...) The EDF scheduler had this bug where it could ready() multiple times as it was staying in Waitrelease state after releasing the edflock. Now it transitions thru Scheding avoiding the issue.
--- a/sys/src/9/port/edf.c
+++ b/sys/src/9/port/edf.c
@@ -209,7 +209,6 @@
releaseintr(Ureg *u, Timer *t)
{
Proc *p;
- Schedq *rq;
if(panicking || active.exiting)
return;
@@ -224,21 +223,17 @@
return;
case Ready:
/* remove proc from current runq */
- rq = &runq[p->priority];
- if(dequeueproc(rq, p) != p){
+ if(dequeueproc(&runq[p->priority], p) != p){
DPRINT("releaseintr: can't find proc or lock race\n");
release(p); /* It'll start best effort */
edfunlock();
return;
}
- p->state = Waitrelease;
/* fall through */
case Waitrelease:
+ p->state = Scheding;
release(p);
edfunlock();
- if(p->state == Wakeme){
- iprint("releaseintr: wakeme\n");
- }
ready(p);
if(up){
up->delaysched++;
@@ -412,13 +407,13 @@
DPRINT("%lud edfadmit other %lud[%s], release at %lud\n",
now, p->pid, statename[p->state], e->t);
if(e->tt == nil){
- e->tf = releaseintr;
- e->ta = p;
tns = e->t - now;
if(tns < 20)
tns = 20;
e->tns = 1000LL * tns;
e->tmode = Trelative;
+ e->tf = releaseintr;
+ e->ta = p;
timeradd(e);
}
}
@@ -476,8 +471,8 @@
if(n < 20)
n = 20;
up->tns = 1000LL * n;
- up->tf = releaseintr;
up->tmode = Trelative;
+ up->tf = releaseintr;
up->ta = up;
up->trend = &up->sleep;
timeradd(up);
@@ -488,6 +483,8 @@
nexterror();
}
sleep(&up->sleep, yfn, nil);
+ up->trend = nil;
+ timerdel(up);
poperror();
}
@@ -495,17 +492,10 @@
edfready(Proc *p)
{
Edf *e;
- Schedq *rq;
- Proc *l, *pp;
- void (*pt)(Proc*, int, vlong);
long n;
if((e = edflock(p)) == nil)
return 0;
-
- if(p->state == Wakeme && p->r){
- iprint("edfready: wakeme\n");
- }
if(e->d - now <= 0){
/* past deadline, arrange for next release */
if((e->flags & Sporadic) == 0){
@@ -550,7 +540,7 @@
now, p->pid, statename[p->state], e->t);
p->state = Waitrelease;
edfunlock();
- return 1; /* Make runnable later */
+ return -1; /* Make runnable later */
}
DPRINT("%lud edfready %lud %s release now\n", now, p->pid, statename[p->state]);
/* release now */
@@ -558,31 +548,6 @@
}
edfunlock();
DPRINT("^");
- rq = &runq[PriEdf];
- /* insert in queue in earliest deadline order */
- lock(runq);
- l = nil;
- for(pp = rq->head; pp; pp = pp->rnext){
- if(pp->edf->d > e->d)
- break;
- l = pp;
- }
- p->rnext = pp;
- if (l == nil)
- rq->head = p;
- else
- l->rnext = p;
- if(pp == nil)
- rq->tail = p;
- rq->n++;
- nrdy++;
- runvec |= 1 << PriEdf;
- p->priority = PriEdf;
- p->readytime = m->ticks;
- p->state = Ready;
- unlock(runq);
- if(p->trace && (pt = proctrace))
- pt(p, SReady, 0);
return 1;
}
--- a/sys/src/9/port/portclock.c
+++ b/sys/src/9/port/portclock.c
@@ -136,7 +136,7 @@
/* rare, but tf can still be active on another cpu */
while(dt->tactive == mp && dt->tt == nil)
- if(up->nlocks == 0 && islo())
+ if(up->state == Running && up->nlocks == 0 && islo())
sched();
}
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -769,7 +769,6 @@
ulong cpu; /* cpu average */
ulong lastupdate;
uchar yield; /* non-zero if the process just did a sleep(0) */
- ulong readytime; /* time process came ready */
int preempted; /* true if this process hasn't finished the interrupt
* that last preempted it
*/
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -12,8 +12,8 @@
int schedgain = 30; /* units in seconds */
int nrdy;
-void updatecpu(Proc*);
-int reprioritize(Proc*);
+static void updatecpu(Proc*);
+static int reprioritize(Proc*);
ulong delayedscheds; /* statistics */
ulong skipscheds;
@@ -78,17 +78,13 @@
updatecpu(up);
break;
case Running:
+ up->state = Scheding;
ready(up);
break;
case Moribund:
mmurelease(up);
- up->state = Dead;
- edfstop(up);
- if(up->edf != nil){
- free(up->edf);
- up->edf = nil;
- }
lock(&procalloc);
+ up->state = Dead;
up->mach = nil;
up->qnext = procalloc.free;
procalloc.free = up;
@@ -95,14 +91,15 @@
/* proc is free now, make sure unlock() wont touch it */
up = procalloc.Lock.p = nil;
unlock(&procalloc);
-
- sched();
+ goto out;
}
coherence();
up->mach = nil;
up = nil;
}
+out:
sched();
+ panic("schedinit");
}
int
@@ -164,15 +161,12 @@
void
sched(void)
{
- Proc *p;
-
if(m->ilockdepth)
- panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
+ panic("cpu%d: ilockdepth %d, last lock %#p at %#p",
m->machno,
m->ilockdepth,
up != nil ? up->lastilock: nil,
- (up != nil && up->lastilock != nil) ? up->lastilock->pc: 0,
- getcallerpc(&p+2));
+ (up != nil && up->lastilock != nil) ? up->lastilock->pc: 0);
if(up != nil) {
/*
* Delay the sched until the process gives up the locks
@@ -204,18 +198,15 @@
spllo();
return;
}
- p = runproc();
- if(p->edf == nil){
- updatecpu(p);
- p->priority = reprioritize(p);
- }
- if(p != m->readied)
+ up = runproc();
+ if(up->edf == nil)
+ up->priority = reprioritize(up);
+ if(up != m->readied)
m->schedticks = m->ticks + HZ/10;
m->readied = nil;
- up = p;
- up->state = Running;
- up->mach = MACHP(m->machno);
m->proc = up;
+ up->mach = up->mp = MACHP(m->machno);
+ up->state = Running;
mmuswitch(up);
gotolabel(&up->sched);
}
@@ -310,7 +301,7 @@
* to maintain accurate cpu usage statistics. It can be called
* at any time to bring the stats for a given proc up-to-date.
*/
-void
+static void
updatecpu(Proc *p)
{
ulong t, ocpu, n, D;
@@ -348,11 +339,12 @@
* of 3 means you're just right. Having a higher priority (up to p->basepri)
* means you're not using as much as you could.
*/
-int
+static int
reprioritize(Proc *p)
{
int fairshare, n, load, ratio;
+ updatecpu(p);
load = MACHP(0)->load;
if(load == 0)
return p->basepri;
@@ -378,27 +370,101 @@
/*
* add a process to a scheduling queue
*/
-void
+static int
queueproc(Schedq *rq, Proc *p)
{
- int pri;
+ int pri = rq - runq;
- pri = rq - runq;
lock(runq);
+ switch(p->state){
+ case New:
+ case Queueing:
+ case QueueingR:
+ case QueueingW:
+ case Wakeme:
+ case Broken:
+ case Stopped:
+ case Rendezvous:
+ if(p != up)
+ break;
+ /* wet floor */
+ case Dead:
+ case Moribund:
+ case Ready:
+ case Running:
+ case Waitrelease:
+ unlock(runq);
+ return -1;
+ }
+ p->state = Ready;
p->priority = pri;
- p->rnext = nil;
- if(rq->tail != nil)
- rq->tail->rnext = p;
- else
- rq->head = p;
- rq->tail = p;
+ if(pri == PriEdf){
+ Proc *pp, *l;
+
+ /* insert in queue in earliest deadline order */
+ l = nil;
+ for(pp = rq->head; pp != nil; pp = pp->rnext){
+ if(pp->edf->d > p->edf->d)
+ break;
+ l = pp;
+ }
+ p->rnext = pp;
+ if(l == nil)
+ rq->head = p;
+ else
+ l->rnext = p;
+ if(pp == nil)
+ rq->tail = p;
+ } else {
+ p->rnext = nil;
+ if(rq->tail != nil)
+ rq->tail->rnext = p;
+ else
+ rq->head = p;
+ rq->tail = p;
+ }
rq->n++;
nrdy++;
runvec |= 1<<pri;
unlock(runq);
+ return 0;
}
/*
+ * ready(p) picks a new priority for a process and sticks it in the
+ * runq for that priority.
+ */
+void
+ready(Proc *p)
+{
+ int s, pri;
+
+ s = splhi();
+ switch(edfready(p)){
+ default:
+ splx(s);
+ return;
+ case 0:
+ pri = reprioritize(p);
+ break;
+ case 1:
+ pri = PriEdf;
+ break;
+ }
+ if(queueproc(&runq[pri], p) < 0){
+ iprint("ready %s %lud %s pc %p\n",
+ p->text, p->pid, statename[p->state], getcallerpc(&p));
+ } else {
+ void (*pt)(Proc*, int, vlong);
+ pt = proctrace;
+ if(pt != nil)
+ pt(p, SReady, 0);
+ }
+ splx(s);
+}
+
+
+/*
* try to remove a process from a scheduling queue (called splhi)
*/
Proc*
@@ -437,63 +503,16 @@
runvec &= ~(1<<(rq-runq));
rq->n--;
nrdy--;
- if(p->state != Ready)
- print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
-
+ if(p->state != Ready){
+ iprint("dequeueproc %s %lud %s pc %p\n",
+ p->text, p->pid, statename[p->state], getcallerpc(&rq));
+ p = nil;
+ }
unlock(runq);
return p;
}
/*
- * ready(p) picks a new priority for a process and sticks it in the
- * runq for that priority.
- */
-void
-ready(Proc *p)
-{
- int s, pri;
- Schedq *rq;
- void (*pt)(Proc*, int, vlong);
-
- switch(p->state){
- case Running:
- if(p == up)
- break;
- /* wet floor */
- case Dead:
- case Moribund:
- case Scheding:
- print("ready %s %s %lud pc %p\n", statename[p->state],
- p->text, p->pid, getcallerpc(&p));
- return;
- case Ready:
- print("double ready %s %lud pc %p\n",
- p->text, p->pid, getcallerpc(&p));
- return;
- }
-
- s = splhi();
- if(edfready(p)){
- splx(s);
- return;
- }
-
- if(up != p && (p->wired == nil || p->wired == MACHP(m->machno)))
- m->readied = p; /* group scheduling */
-
- updatecpu(p);
- pri = reprioritize(p);
- p->priority = pri;
- rq = &runq[pri];
- p->state = Ready;
- queueproc(rq, p);
- pt = proctrace;
- if(pt != nil)
- pt(p, SReady, 0);
- splx(s);
-}
-
-/*
* yield the processor and drop our priority
*/
void
@@ -516,7 +535,7 @@
static void
rebalance(void)
{
- int pri, npri, x;
+ int pri, npri;
Schedq *rq;
Proc *p;
ulong t;
@@ -526,6 +545,8 @@
return;
balancetime = t;
+ assert(!islo());
+
for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
another:
p = rq->head;
@@ -533,15 +554,16 @@
continue;
if(pri == p->basepri)
continue;
- updatecpu(p);
npri = reprioritize(p);
if(npri != pri){
- x = splhi();
p = dequeueproc(rq, p);
- if(p != nil)
- queueproc(&runq[npri], p);
- splx(x);
- goto another;
+ if(p != nil){
+ p->state = Scheding;
+ if(queueproc(&runq[npri], p) < 0)
+ iprint("rebalance: queueproc %lud %s %s\n",
+ p->pid, p->text, statename[p->state]);
+ goto another;
+ }
}
}
}
@@ -606,10 +628,6 @@
p = dequeueproc(rq, p);
if(p == nil)
goto loop;
-
- p->state = Scheding;
- p->mp = MACHP(m->machno);
-
if(edflock(p)){
edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
edfunlock();
@@ -831,12 +849,6 @@
error(Eintr);
}
-static int
-tfn(void *arg)
-{
- return up->trend == nil || up->tfn(arg);
-}
-
void
twakeup(Ureg*, Timer *t)
{
@@ -851,6 +863,12 @@
}
}
+static int
+tfn(void *arg)
+{
+ return up->trend == nil || up->tfn(arg);
+}
+
void
tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
{
@@ -860,8 +878,8 @@
timerdel(up);
}
up->tns = MS2NS(ms);
- up->tf = twakeup;
up->tmode = Trelative;
+ up->tf = twakeup;
up->ta = up;
up->trend = r;
up->tfn = fn;
@@ -1342,6 +1360,10 @@
qunlock(&up->seglock);
edfstop(up);
+ if(up->edf != nil){
+ free(up->edf);
+ up->edf = nil;
+ }
up->state = Moribund;
sched();
panic("pexit");