shithub: devlimit-patch

ref: a9f94671c16558d2155ae9c37cd8c4e4101fef46
dir: /0001-limits-added-devlimit-process-and-memory-limits.patch/

View raw version
From: kitzman <kitzman@disroot.org>
Date: Sun, 18 Feb 2024 09:36:59 +0000
Subject: [PATCH] limits: added devlimit, process and memory limits

---
diff 79a7b4ae59c2e0352b354cf719bc7ef4055f83ae c7a44c1564d2fab0b82b744927ed0f2be468fa1b
--- /dev/null
+++ b/sys/man/3/limit
@@ -1,0 +1,136 @@
+.TH LIMIT 3
+.SH NAME
+limit \- system limit interface
+.SH SYNOPSIS
+.nf
+.B bind #Λ /mnt/limit
+
+.B /mnt/limit/ctl
+.B /mnt/limit/clone
+.B /mnt/limit/switch
+.B /mnt/limit/status
+.BI /mnt/limit/groups/ \&...
+.fi
+.SH DESCRIPTION
+The
+.B #Λ
+device provides the interface to the system's limit mechanism.
+Each process has a limit group assigned. Groups are created as
+children of other groups, are represented by an id, and follow the
+restrictions imposed by their parents. A group is deleted the
+moment there are no more processes or segments referring to it.
+The group is
+.IR owned
+by the uname and gid of the creator process, and the attributes
+can be changed. The system's group, with id 1, imposes no
+restrictions and does not increase counters.
+.PP
+The root directory contains the
+.BR ctl ,
+.BR clone ,
+.BR switch ,
+.BR status
+files, and a directory named
+.BR groups .
+The current process' limit group is always presented in the root directory,
+and other groups, in the
+.B groups
+subdirectory, where they have the same files, except for the
+.B groups .
+.PP
+The
+.B ctl
+file controls the limit group's attributes, namely, the label, and the limits.
+Reading the file returns the limit group's id. The following commands are
+supported:
+.RS
+.TF "\fLmproc number \fR"
+.PD
+.
+.TP
+.BI "label " label
+Set the group's label; the labels need not be unique.
+.TP
+.BI "mlim " number
+Set the maximum amount of groups that can be
+created from this group.
+.TP
+.BI "mproc " number
+Set the maximum amount of processes that can be
+forked inside this group.
+.TP
+.BI "mpage " number
+Set the maximum total amount of pages that segments
+inside the limit group can have.
+.PD
+.RE
+.PP
+Reading the
+.B status
+file shows the group's attributes: the label (or an empty line
+if not set), the current limit restrictions on the left, and the counts,
+on the right.
+.PP
+To create a new group, the
+.B clone
+file should be opened. Reading from the fid returns the newly created
+group's id. This creates a new reference, and when the fid is clunked,
+the reference is destroyed. A reference is also created for the parent limit group.
+Thus, the opener's process limit group is updated.
+.PP
+To switch to an existing limit group, the
+.B switch
+should be opened.
+.PP
+The following limits exist and are imposed: limit limits, which represent the
+amount of maximum amount of limits which can be \"forked\" from this group,
+and process limits, which represent the maximum number of processes which can
+exist in the group. The kernel already imposes limits on the number of files
+which can be opened, so implementing that should be redundant.
+.SH EXAMPLES
+To create a new limit group and restrict the amount of sublimits and processes:
+.IP
+.EX
+% <>[10] /mnt/limit/clone
+% echo label mygroup >/mnt/limit/ctl
+% echo mlim 1 >/mnt/limit/ctl
+% echo mproc 20 >/mnt/limit/ctl
+.EE
+.PP
+Snippet to attach to an existing limit group, looking for a specific label:
+.IP
+.EX
+#!/bin/rc
+
+slabel=$1
+shift
+prog=$*
+
+bind '#Λ' /mnt/limit
+
+for (lgrpstat in `{walk -f /mnt/limit/groups | grep 'status$'}) {
+	lgrpdir=`{basename -d $lgrpstat}
+	lgrplabel=`{cat $lgrpstat | sed 1q}
+	if(~ $lgrplabel $slabel) {
+		<>[10] $lgrpdir/switch
+		exec $prog
+	}
+}
+
+echo lgrp $slabel not found
+exit nolgrp
+.EE
+.SH SOURCE
+.B /sys/src/9/port/limit.c
+.br
+.B /sys/src/9/port/devlimit.c
+.SH BUGS
+.PP
+Not all processes use
+.B pexit ,
+probably the ones which are created during boot. This
+has to be checked.
+.PP
+Spawning a lot of processes inside a limit group which is not
+the root group can cause a kernel panic related
+to runlock. Needs to be investigated.
--- a/sys/src/9/arm64/mkfile
+++ b/sys/src/9/arm64/mkfile
@@ -20,6 +20,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/bcm/mkfile
+++ b/sys/src/9/bcm/mkfile
@@ -21,6 +21,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/bcm64/mkfile
+++ b/sys/src/9/bcm64/mkfile
@@ -19,6 +19,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/cycv/mkfile
+++ b/sys/src/9/cycv/mkfile
@@ -20,6 +20,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
 	page.$O\
--- a/sys/src/9/imx8/mkfile
+++ b/sys/src/9/imx8/mkfile
@@ -20,6 +20,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/kw/mkfile
+++ b/sys/src/9/kw/mkfile
@@ -21,6 +21,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
 	page.$O\
--- a/sys/src/9/mt7688/mkfile
+++ b/sys/src/9/mt7688/mkfile
@@ -24,6 +24,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/mtx/mkfile
+++ b/sys/src/9/mtx/mkfile
@@ -18,6 +18,7 @@
 	edf.$O\
 	fault.$O\
 	iomap.$O\
+	limit.$O\
 	log.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
--- a/sys/src/9/omap/mkfile
+++ b/sys/src/9/omap/mkfile
@@ -22,6 +22,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
 	page.$O\
--- a/sys/src/9/pc/mkfile
+++ b/sys/src/9/pc/mkfile
@@ -25,6 +25,7 @@
 	edf.$O\
 	fault.$O\
 	iomap.$O\
+	limit.$O\
 	memmap.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/pc64/mkfile
+++ b/sys/src/9/pc64/mkfile
@@ -23,6 +23,7 @@
 	edf.$O\
 	fault.$O\
 	iomap.$O\
+	limit.$O\
 	memmap.$O\
 	page.$O\
 	parse.$O\
--- /dev/null
+++ b/sys/src/9/port/devlimit.c
@@ -1,0 +1,415 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+extern ulong	kerndate;
+
+extern Lgrp *lgrptab[LIMMAX];
+extern Lock lgrptablock;
+
+Lgrp*	getlgrp(int);
+void	switchlgrp(Lgrp*);
+
+/*	filesystem	*/
+enum {
+	LimitQidPos		=	8,
+	LimitQidMask	=	0xff,
+	LabelSize		=	64,
+	StatusSize		=	256,
+};
+
+#define	QID(q)			((int)(q & LimitQidMask))
+#define	LGID(q)			((int)(q >> LimitQidPos))
+#define	LGPATH(i, q)	(((uvlong)i << LimitQidPos) + (uvlong)q)
+
+enum
+{
+	Qroot,
+	Qctl,
+	Qclone,
+	Qswitch,
+	Qstatus,
+	Qgroups,
+};
+
+static Dirtab limitdir[] =
+{
+	".",			{Qroot, 0, QTDIR},				0,	DMDIR|0550,
+	"ctl",			{Qctl},							0,	0640,
+	"clone",		{Qclone},						0,	0440,
+	"switch",		{Qswitch},						0,	0440,
+	"status",		{Qstatus},						0,	0440,
+	"groups",		{Qgroups, 0, QTDIR},			0,	DMDIR|0550,
+};
+
+// ctl commands
+enum {
+	CMlabel,
+	CMsetmlim,
+	CMsetmproc,
+	CMsetmpage,
+};
+
+static
+Cmdtab limitcmd[] = {
+	CMlabel,	"label",	2,
+	CMsetmlim,	"mlim",		2,
+	CMsetmproc,	"mproc",	2,
+	CMsetmpage,	"mpage",	2,
+};
+
+void
+limdir(Chan *c, Qid qid, char *n, vlong length, char *user, char *group, long perm, Dir *db)
+{
+	db->name = n;
+	if(c->flag&CMSG)
+		qid.type |= QTMOUNT;
+	db->qid = qid;
+	db->type = devtab[c->type]->dc;
+	db->dev = c->dev;
+	db->mode = perm;
+	db->mode |= qid.type << 24;
+	db->atime = seconds();
+	db->mtime = kerndate;
+	db->length = length;
+	db->uid = user;
+	db->gid = group;
+	db->muid = user;
+}
+
+static int
+limgen(Chan *c, char *name, Dirtab* tab, int ntab, int s, Dir *dp)
+{
+	Lgrp *l;
+	Qid q;
+	long perm;
+
+	int lgid = LGID(c->qid.path);
+	int i;
+
+	if(lgid)
+		l = getlgrp(lgid);
+	else
+		l = up->lgrp;
+	if(l == nil)
+		return -1;
+
+	/* device root */
+	if(s == DEVDOTDOT){
+		rlock(l);
+		if(lgid == 0) {
+			c->qid.vers = 1;
+			limdir(c, c->qid, "#λ", 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
+		} else {
+			limdir(c, tab[Qgroups].qid, tab[Qgroups].name, 0, l->uid, l->gid, (long)tab[Qgroups].perm, dp);
+		}
+		runlock(l);
+		return 1;
+	}
+
+	/* tab is part of every gen due to (i) and (ii) */
+	if(QID(c->qid.path) == Qgroups) goto groupsgen;	// or not?
+	if(name) {
+		if(lgid != 0 && strcmp(name, tab[Qgroups].name) == 0)
+			return 0;
+		if(strcmp(name, tab[QID(c->qid.path)].name) == 0)
+			return -1;
+		for(i = 0; i < ntab; i++) {
+			if(strcmp(name, tab[i].name) == 0) {
+				rlock(l);
+				perm = tab[i].perm;
+				if(i == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
+					perm = 0440;
+				mkqid(&q, LGPATH(lgid, i), 0, tab[i].qid.type);
+				limdir(c, q, name, 0, l->uid, l->gid, perm, dp);
+				runlock(l);
+				return 1;
+			}
+		}
+	} else {
+		if(s < ntab) {
+			if(lgid != 0 && s == Qgroups)
+				return 0;
+			if(QID(c->qid.path) == s)
+				return 0;
+			rlock(l);
+			perm = tab[s].perm;
+			if(s == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
+				perm = 0440;
+			mkqid(&q, LGPATH(lgid, s), 0, tab[s].qid.type);
+			limdir(c, q, tab[s].name, 0, l->uid, l->gid, perm, dp);
+			runlock(l);
+			return 1;
+		}
+	}
+
+	/* the lgrp dirs are only part of Qgroups (i) and the dirs themselves (ii) */
+groupsgen:
+	if(QID(c->qid.path) == Qgroups && s < ntab)
+		return 0;
+	if(QID(c->qid.path) != Qgroups || (QID(c->qid.path) == Qroot && lgid == 0))
+		return -1;
+	if(name) {
+		i = atoi(name);
+		if(i > LIMMAX || i < 1)
+			return -1;
+		if(l = getlgrp(i)) {
+			rlock(l);
+			mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
+			limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
+			runlock(l);
+			return 1;
+		}
+	} else {
+		i = s - ntab + 1;
+		if(i > LIMMAX || i < 1)
+			return -1;
+		if(l = getlgrp(i)) {
+			rlock(l);
+			name = malloc(NUMSIZE);
+			snprint(name, NUMSIZE, "%d", s - ntab + 1);
+			mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
+			limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
+			runlock(l);
+			return 1;
+		}
+		return 0;
+	}
+	return -1;
+}
+
+static Chan*
+limattach(char *spec)
+{
+	return devattach(L'Λ', spec);
+}
+
+static Walkqid*
+limwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, limitdir, nelem(limitdir), limgen);
+}
+
+static int
+limstat(Chan *c, uchar *db, int n)
+{
+	return devstat(c, db, n, limitdir, nelem(limitdir), limgen);
+}
+
+static Chan*
+limopen(Chan *c, int omode)
+{
+	Chan *co;
+	Lgrp *l = up->lgrp;
+	int lgid = LGID(c->qid.path);
+
+	if(lgid)
+		l = getlgrp(lgid);
+	if(l == nil)
+		error(Enonexist);
+
+	if(c->qid.type & QTDIR)
+		if(omode != OREAD)
+			error(Eperm);
+
+	co = devopen(c, omode, limitdir, nelem(limitdir), limgen);
+
+	switch(QID(c->qid.path)) {
+	case Qclone:
+		l = newlgrp(l);
+		switchlgrp(l);
+		break;
+	case Qswitch:
+		switchlgrp(l);
+		break;
+	}
+
+	return co;
+}
+
+static void
+limclose(Chan *c)
+{
+	Lgrp *l = up->lgrp;
+	int lgid = LGID(c->qid.path);
+
+	if(lgid)
+		l = getlgrp(lgid);
+	if(l == nil)
+		error(Enonexist);
+}
+
+static void
+limremove(Chan*)
+{
+	error(Eperm);
+}
+
+static long
+limread(Chan *c, void *va, long n, vlong off)
+{
+	Lgrp *l = up->lgrp;
+	char *buf;
+	long m;
+	int lgid = LGID(c->qid.path);
+
+	if(lgid)
+		l = getlgrp(lgid);
+	if(l == nil)
+		error(Enonexist);
+
+	switch(QID(c->qid.path)){
+	case Qroot:
+		return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
+	case Qctl:
+		rlock(l);
+		m = readnum((ulong) off, va, n, l->lgid, NUMSIZE);
+		runlock(l);
+		return m;
+		break;
+	case Qclone:
+	case Qswitch:
+		return readstr((ulong) off, va, n, "");
+		break;
+	case Qstatus:
+		buf = malloc(StatusSize);
+		rlock(l);
+		if(l->label) m = snprint(buf, LabelSize + 1, "%s\n", l->label);
+		else m = snprint(buf, LabelSize + 1, "\n");
+		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mlim, NUMSIZE-1, l->clim);
+		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mproc, NUMSIZE-1, l->cproc);
+		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mpage, NUMSIZE-1, l->cpage);
+		runlock(l);
+		m = readstr((ulong) off, va, n, buf);
+		free(buf);
+		return m;
+	case Qgroups:
+		if(lgid != 0)
+			error(Eperm);
+		return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
+	default:
+		error(Eperm);
+		break;
+	}
+}
+
+static long
+limwrite(Chan *c, void *va, long n, vlong)
+{
+	Lgrp *l = up->lgrp;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+	char *label, *newm;
+	long m;
+	int lgid = LGID(c->qid.path);
+
+	if(lgid)
+		l = getlgrp(lgid);
+	if(l == nil)
+		error(Enonexist);
+
+	switch(QID(c->qid.path)){
+	case Qctl:
+		cb = parsecmd(va, n);
+		if(waserror()) {
+			free(cb);
+			nexterror();
+		}
+		ct = lookupcmd(cb, limitcmd, nelem(limitcmd));
+		if(ct == nil)
+			error(Ebadctl);
+
+		switch(ct->index) {
+		case CMlabel:
+			label = cb->f[1];
+			if(strlen(label) > LabelSize - 1)
+				error(Eperm);
+			wlock(l);
+			kstrdup(&l->label, label);
+			wunlock(l);
+			break;
+		case CMsetmlim:
+			newm = cb->f[1];
+			m = atoi(newm);
+			if(!m)
+				error(Ebadctl);
+			wlock(l);
+			l->mlim = m;
+			wunlock(l);
+			break;
+		case CMsetmproc:
+			newm = cb->f[1];
+			m = atoi(newm);
+			if(!m)
+				error(Ebadctl);
+			wlock(l);
+			l->mproc = m;
+			wunlock(l);
+			break;
+		case CMsetmpage:
+			newm = cb->f[1];
+			m = atoi(newm);
+			if(!m)
+				error(Ebadctl);
+			wlock(l);
+			l->mpage = m;
+			wunlock(l);
+			break;
+		default:
+			error(Ebadctl);
+			break;
+		}
+		free(cb);
+		poperror();
+		break;
+	default:
+		error(Eperm);
+		break;
+	}
+
+	return n;
+}
+
+Dev limitdevtab = {
+	L'Λ',
+	"limit",
+
+	devreset,
+	devinit,
+	devshutdown,
+	limattach,
+	limwalk,
+	limstat,
+	limopen,
+	devcreate,
+	limclose,
+	limread,
+	devbread,
+	limwrite,
+	devbwrite,
+	limremove,
+	devwstat,
+};
+
+/*	helper functions	*/
+Lgrp*
+getlgrp(int lgid)
+{
+	Lgrp *l;
+	lock(&lgrptablock);
+	l = lgrptab[lgid - 1];
+	unlock(&lgrptablock);
+	return l;
+}
+
+void
+switchlgrp(Lgrp *l)
+{
+	Lgrp *o = up->lgrp;
+	incref(l);
+	up->lgrp = l;
+	closelgrp(o);
+}
--- /dev/null
+++ b/sys/src/9/port/limit.c
@@ -1,0 +1,216 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+Lgrp *lgrptab[LIMMAX] = { nil };
+Lock lgrptablock;
+
+/*	helper functions	*/
+void
+addchild(Lgrp *parent, Lgrp *new)
+{
+	wlock(parent);
+	incref(parent);
+	if(parent->submax == 0 || parent->subgrp == nil) {
+		parent->submax = LIMINISUB;
+		parent->subgrp = malloc(LIMINISUB);
+	}
+	if(parent->subcount + 1 > parent->submax) {
+		parent->subgrp = realloc(parent->subgrp, parent->submax * 2);
+		parent->submax *= 2;
+	}
+	parent->subgrp[parent->subcount] = new;
+	parent->subcount++;
+	wunlock(parent);
+}
+
+void
+removechild(Lgrp *old)
+{
+	Lgrp *parent;
+	int i;
+
+	if(old->parent) parent = old->parent; else return;
+	wlock(parent);
+	for(i = 0; i < parent->subcount; i++)
+		if(parent->subgrp[i] == old)
+			break;
+	if(i == parent->subcount) {
+		wunlock(parent);
+		return;
+	}
+
+	for(; i < parent->subcount - 1; i++)
+		parent->subgrp[i] = parent->subgrp[i + 1];
+	parent->subcount--;		
+
+	wunlock(parent);
+	closelgrp(parent);
+}
+
+/*	kernel functions	*/
+Lgrp*
+newlgrp(Lgrp *parent)
+{
+	int lgid;
+
+	lock(&lgrptablock);
+	for(lgid = 0; lgid < LIMMAX; lgid++)
+		if(lgrptab[lgid] == nil)
+			break;
+
+	if(waserror()) {
+		unlock(&lgrptablock);
+		nexterror();
+	}
+	if(lgid == LIMMAX)
+		error("system has reached the maximum amount of limits");
+	lgid++;
+
+	if(parent) inclimit(LTLIM, parent, 1);
+
+	Lgrp* l = malloc(sizeof(Lgrp));
+	l->lgid = lgid;
+	if(parent) addchild(parent, l);
+	if(parent) rlock(parent);
+	wlock(l);
+	lgrptab[lgid - 1] = l;
+	poperror();
+	unlock(&lgrptablock);
+	l->clim = 0;
+	l->cproc = 0;
+	l->cpage = 0;
+	l->mlim = 0;
+	l->mproc = 0;
+	l->mpage = 0;
+	l->subcount = 0;
+	l->submax = 0;
+	l->subgrp = nil;
+	if(parent) {
+		l->mlim = parent->mlim;
+		l->mproc = parent->mproc;
+		l->mpage = parent->mpage;
+	}
+	if(up->user) {
+		kstrdup(&l->uid, up->user);
+		kstrdup(&l->gid, up->user);
+	} else {
+		kstrdup(&l->uid, eve);
+		kstrdup(&l->gid, eve);
+	}
+	l->parent = parent;
+	l->subgrp = nil;
+	if(parent) runlock(parent);
+	wunlock(l);
+
+	return l;
+}
+
+void
+inclimit(int limit, Lgrp *l, int q)
+{
+	Lgrp *c;
+	ulong cval, mval;
+
+	for(c = l; c; c = c->parent) {
+		if(c->lgid == 1)
+			break;
+		rlock(c);
+		switch(limit) {
+		case LTLIM:
+			cval = c->clim;
+			mval = c->mlim;
+			break;
+		case LTPROC:
+			cval = c->cproc;
+			mval = c->mproc;
+			break;
+		case LTPAGE:
+			cval = c->cpage;
+			mval = c->mpage;
+			break;
+		default:
+			runlock(c);
+			error("unknown limit type");
+		}
+		if(mval && cval + q > mval) {
+			runlock(c);
+			error("limit reached");
+		}
+		runlock(c);
+	}
+	/*	small amounts over the limit can't hurt	*/
+	for(c = l; c; c = c->parent) {
+		if(c->lgid == 1)
+			break;
+		wlock(c);
+		switch(limit) {
+		case LTLIM:
+			c->clim += q;
+			break;
+		case LTPROC:
+			c->cproc += q;
+			break;
+		case LTPAGE:
+			c->cpage += q;
+			break;
+		default:
+			wunlock(c);
+			error("unknown limit type");
+		}
+		wunlock(c);
+	}
+}
+
+void
+declimit(int limit, Lgrp *l, int q)
+{
+	Lgrp *c;
+
+	for(c = l; c; c = c->parent) {
+		if(c->lgid == 1)
+			break;
+		wlock(c);
+		switch(limit) {
+		case LTLIM:
+			c->clim -= q;
+			break;
+		case LTPROC:
+			c->cproc -= q;
+			break;
+		case LTPAGE:
+			c->cpage -= q;
+			break;
+		default:
+			wunlock(c);
+			error("unknown limit type");
+		}
+		wunlock(c);
+	}
+}
+
+void
+closelgrp(Lgrp* l)
+{
+	if(decref(l) == 0) {
+		wlock(l);
+		if(waserror()) {
+			wunlock(l);
+			nexterror();
+		}
+		if(l->parent) declimit(LTLIM, l->parent, 1);
+		removechild(l);
+		wunlock(l);
+		poperror();
+		lock(&lgrptablock);
+		lgrptab[l->lgid - 1] = nil;
+		unlock(&lgrptablock);
+		if(l->label) free(l->label);
+		free(l->uid);
+		free(l->gid);
+		free(l);
+	}
+}
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -14,6 +14,7 @@
 typedef struct Image	Image;
 typedef struct Log	Log;
 typedef struct Logflag	Logflag;
+typedef struct Lgrp	Lgrp;
 typedef struct Mntcache Mntcache;
 typedef struct Mount	Mount;
 typedef struct Mntrah	Mntrah;
@@ -433,6 +434,7 @@
 	Pte	*ssegmap[SSEGMAPSIZE];
 	Sema	sema;
 	ulong	mark;		/* portcountrefs */
+	Lgrp		*lgrp;
 };
 
 struct Segio
@@ -538,6 +540,31 @@
 	DELTAFD	= 20		/* incremental increase in Fgrp.fd's */
 };
 
+struct Lgrp
+{
+	Ref;
+	RWlock;
+	int		lgid;
+	ulong	clim, mlim;
+	ulong	cproc, mproc;
+	ulong	cpage, mpage;
+	char*	label;
+	char*	uid;
+	char*	gid;
+	Lgrp*	parent;
+	uint	subcount, submax;
+	Lgrp**	subgrp;
+};
+
+enum
+{
+	LIMMAX = 4096,
+	LIMINISUB = 8,
+	LTLIM = 0,
+	LTPROC,
+	LTPAGE,
+};
+
 struct Palloc
 {
 	Lock;
@@ -692,6 +719,7 @@
 	Egrp 	*egrp;		/* Environment group */
 	Fgrp	*fgrp;		/* File descriptor group */
 	Rgrp	*rgrp;		/* Rendez group */
+	Lgrp	*lgrp;		/* Limit group */
 
 	Fgrp	*closingfgrp;	/* used during teardown */
 
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -34,6 +34,7 @@
 void		ccloseq(Chan*);
 void		closeegrp(Egrp*);
 void		closefgrp(Fgrp*);
+void		closelgrp(Lgrp*);
 void		closepgrp(Pgrp*);
 void		closergrp(Rgrp*);
 long		clrfpintr(void);
@@ -55,6 +56,7 @@
 void		cupdate(Chan*, uchar*, int, vlong);
 void		cwrite(Chan*, uchar*, int, vlong);
 uintptr		dbgpc(Proc*);
+void		declimit(int, Lgrp*, int);
 long		decref(Ref*);
 int		decrypt(void*, void*, int);
 void		delay(int);
@@ -141,6 +143,7 @@
 void		iunlock(Lock*);
 ulong		imagecached(void);
 ulong		imagereclaim(int);
+void		inclimit(int, Lgrp*, int);
 long		incref(Ref*);
 void		init0(void);
 void		initseg(void);
@@ -210,6 +213,7 @@
 int		needpages(void*);
 Chan*		newchan(void);
 int		newfd(Chan*, int);
+Lgrp*		newlgrp(Lgrp*);
 Mhead*		newmhead(Chan*);
 Mount*		newmount(Chan*, int, char*);
 Page*		newpage(int, Segment **, uintptr);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -1243,6 +1243,7 @@
 	Egrp *egrp;
 	Rgrp *rgrp;
 	Pgrp *pgrp;
+	Lgrp *lgrp;
 	Chan *dot;
 	void (*pt)(Proc*, int, vlong);
 
@@ -1262,6 +1263,8 @@
 	up->rgrp = nil;
 	pgrp = up->pgrp;
 	up->pgrp = nil;
+	lgrp = up->lgrp;
+	up->lgrp = nil;
 	dot = up->dot;
 	up->dot = nil;
 	qunlock(&up->debug);
@@ -1276,6 +1279,10 @@
 		cclose(dot);
 	if(pgrp != nil)
 		closepgrp(pgrp);
+	if(lgrp != nil) {
+		declimit(LTPROC, lgrp, 1);
+		closelgrp(lgrp);
+	}
 
 	if(up->parentpid == 0){
 		if(exitstr == nil)
--- a/sys/src/9/port/segment.c
+++ b/sys/src/9/port/segment.c
@@ -56,6 +56,10 @@
 	s = malloc(sizeof(Segment));
 	if(s == nil)
 		error(Enomem);
+	if(waserror()) {
+		if(s) free(s);
+		nexterror();
+	}
 	s->ref = 1;
 	s->type = type;
 	s->base = base;
@@ -70,13 +74,17 @@
 		return s;
 	}
 
+	if(up && up->lgrp) s->lgrp = up->lgrp;
+	if(s->lgrp != nil) {
+		inclimit(LTPAGE, s->lgrp, size);
+		incref(s->lgrp);
+	}
+
 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
 	if(mapsize > nelem(s->ssegmap)){
 		s->map = malloc(mapsize*sizeof(Pte*));
-		if(s->map == nil){
-			free(s);
+		if(s->map == nil)
 			error(Enomem);
-		}
 		s->mapsize = mapsize;
 	}
 	else{
@@ -84,6 +92,7 @@
 		s->mapsize = nelem(s->ssegmap);
 	}
 
+	poperror();
 	return s;
 }
 
@@ -122,6 +131,11 @@
 			free(s->map);
 	}
 
+	if(s->lgrp != nil) {
+		declimit(LTPAGE, s->lgrp, s->size);
+		closelgrp(s->lgrp);
+	}
+
 	if(s->profile != nil)
 		free(s->profile);
 
@@ -409,13 +423,15 @@
 		return s->base;
 
 	qlock(s);
+	if(waserror()) {
+		qunlock(s);
+		nexterror();
+	}
 
 	/* We may start with the bss overlapping the data */
 	if(addr < s->base) {
-		if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
-			qunlock(s);
+		if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base)
 			error(Enovmem);
-		}
 		addr = s->base;
 	}
 
@@ -427,13 +443,13 @@
 		 * to-be-freed address space may have been passed to the kernel
 		 * already by another proc and is past the validaddr stage.
 		 */
-		if(s->ref > 1){
-			qunlock(s);
+		if(s->ref > 1)
 			error(Einuse);
-		}
 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
 		s->top = newtop;
 		s->size = newsize;
+		if(s->lgrp)
+			declimit(LTPAGE, s->lgrp, s->size - newsize);
 		qunlock(s);
 		flushmmu();
 		return 0;
@@ -443,33 +459,39 @@
 		ns = up->seg[i];
 		if(ns == nil || ns == s)
 			continue;
-		if(newtop > ns->base && s->base < ns->top) {
-			qunlock(s);
+		if(newtop > ns->base && s->base < ns->top)
 			error(Esoverlap);
-		}
 	}
 
-	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
-		qunlock(s);
+	if(newsize > (SEGMAPSIZE*PTEPERTAB))
 		error(Enovmem);
-	}
+
 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
 	if(mapsize > s->mapsize){
 		map = malloc(mapsize*sizeof(Pte*));
-		if(map == nil){
-			qunlock(s);
+		if(map == nil)
 			error(Enomem);
+		if(waserror()) {
+			free(map);
+			nexterror();
 		}
+		if(s->lgrp)
+			inclimit(LTPAGE, s->lgrp, newsize - s->size);
 		memmove(map, s->map, s->mapsize*sizeof(Pte*));
 		if(s->map != s->ssegmap)
 			free(s->map);
 		s->map = map;
 		s->mapsize = mapsize;
+		poperror();
+	} else {
+		if(s->lgrp)
+			inclimit(LTPAGE, s->lgrp, newsize - s->size);
 	}
 
 	s->top = newtop;
 	s->size = newsize;
 	qunlock(s);
+	poperror();
 	return 0;
 }
 
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -96,6 +96,8 @@
 	if((p = newproc()) == nil)
 		error("no procs");
 
+	inclimit(LTPROC, up->lgrp, 1);
+
 	qlock(&up->debug);
 	qlock(&p->debug);
 
@@ -211,6 +213,10 @@
 		p->egrp = up->egrp;
 		incref(p->egrp);
 	}
+
+	/* Limit group */
+	p->lgrp = up->lgrp;
+	incref(p->lgrp);
 
 	procfork(p);
 
--- a/sys/src/9/port/userinit.c
+++ b/sys/src/9/port/userinit.c
@@ -33,6 +33,7 @@
 	up->egrp->ref = 1;
 	up->fgrp = dupfgrp(nil);
 	up->rgrp = newrgrp();
+	up->lgrp = newlgrp(nil);
 
 	/*
 	 * These are o.k. because rootinit is null.
--- a/sys/src/9/ppc/mkfile
+++ b/sys/src/9/ppc/mkfile
@@ -19,6 +19,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	log.$O\
 	rebootcmd.$O\
 	page.$O\
--- a/sys/src/9/sgi/mkfile
+++ b/sys/src/9/sgi/mkfile
@@ -26,6 +26,7 @@
 	edf.$O\
 	fault.$O\
 	fptrap.$O\
+	limit.$O\
 	mul64fract.$O\
 	page.$O\
 	parse.$O\
--- a/sys/src/9/teg2/mkfile
+++ b/sys/src/9/teg2/mkfile
@@ -23,6 +23,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
 	page.$O\
--- a/sys/src/9/xen/mkfile
+++ b/sys/src/9/xen/mkfile
@@ -23,6 +23,7 @@
 	edf.$O\
 	fault.$O\
 	iomap.$O\
+	limit.$O\
 	page.$O\
 	parse.$O\
 	pgrp.$O\
--- a/sys/src/9/zynq/mkfile
+++ b/sys/src/9/zynq/mkfile
@@ -20,6 +20,7 @@
 	dev.$O\
 	edf.$O\
 	fault.$O\
+	limit.$O\
 	mul64fract.$O\
 	rebootcmd.$O\
 	page.$O\