shithub: riscv

Download patch

ref: 45f7b30244297a8bf87789793eec875246ed063c
parent: dd79854239a8c434b1d50e29b381cb21c62f713f
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Oct 29 09:36:00 EDT 2023

ndb/dns: built-in tcp server, remove cruft, refactor

Remove external ndb/dnstcp (tcp53) service and serve
tcp clients directly fron ndb/dns process,
taking advantage from the cache.

This avoids slow startup delay for tcp as dnstcp
would need to parse the full network database for
each request. Also, for unknown reasons, dnstcp
would "refresh" the main server on exit, causling
ander database reload for no reason.

Remove cruft:

- straddeling server configuration (-o flag)

I dont think anyone has used this in 9front and
the logic is hard to maintain and test.

- UDP forwarding (-T flag)

Just use snoopy. Or use ipmux and aux/trampoline.

- zonerefreshprogram (-z flag)

This interface sucks. Anyone on the internet can
trigger a refresh and there is no way to authenitcate
the sender.

- undocumented "dump" and "stats" messages.

This should instead be exposed in the file-system
interface. For now, one can access the stats with
acid(1).

Fix bugs:

Handling of unknown RR types was wrong. convM2RR()
would skip ahead in the message when hitting a
unknown RR type, but this gets out of sync with
rrloop()'s rr counter causing spurious parse errors.

Instead, add a Block into the RR type RR.unknown,
where we can stuff the RR data of unknown requests
when the rr is not supported: rrsupported() == 0.

These "unknown" RR types can be printed, but are
discarded in rrattach() as the cache should never
contain unknown RR's.

Fix ip/snoopy -f 'tcp(dns)', need to skip the 2 byte
length field as dns code assumes UDP format.

The refactor:

Big sweep over dnresolve.c, keeping stuff local
on the stack (such as the reply message, fd's)
while putting common stuff in the Query (such as
the recursion depth).

Make logging consistent. Dont have ndb/dnsdebug
enable debuguing, spewing crap on the console.

--- a/sys/man/8/ndb
+++ b/sys/man/8/ndb
@@ -1,6 +1,6 @@
 .TH NDB 8
 .SH NAME
-query, ipquery, mkhash, mkdb, mkhosts, cs, csquery, dns, dnstcp, dnsquery, dnsdebug, dnsgetip, inform \- network database
+query, ipquery, mkhash, mkdb, mkhosts, cs, csquery, dns, dnsquery, dnsdebug, dnsgetip, inform \- network database
 .SH SYNOPSIS
 .B ndb/query
 [
@@ -57,7 +57,7 @@
 .br
 .B ndb/dns
 [
-.B -norR
+.B -FnrR
 ] [
 .B -a
 .I maxage
@@ -71,9 +71,6 @@
 .B -x
 .I netmtpt
 ] [
-.B -z
-.I program
-] [
 .B -s
 [
 .I addrs...
@@ -80,19 +77,6 @@
 ]
 ]
 .br
-.B ndb/dnstcp
-[
-.B -arR
-] [
-.B -f
-.I dbfile
-] [
-.B -x
-.I netmtpt
-] [
-.I conn-dir
-]
-.br
 .B ndb/dnsquery
 [
 .B -x
@@ -403,31 +387,6 @@
 .I target
 rather than the default of 8,000.
 .TP
-.B -o
-used with
-.BR -s ,
-.B -o
-causes
-.I dns
-to assume that it straddles inside and outside networks
-and that the outside network is mounted on
-.BR /net.alt .
-Queries for inside addresses will be sent via
-.B /net/udp
-(or
-.B /net/tcp
-in response to truncated replies)
-and those for outside addresses via
-.B /net.alt/udp
-(or
-.BR /net.alt/tcp ).
-This makes
-.I dns
-suitable for serving non-Plan-9 systems in an organization with
-firewalls, DNS proxies, etc.,
-particularly if they don't work very well.
-See `Straddling Server' below for details.
-.TP
 .B -r
 act as a resolver only:
 send `recursive' queries, asking the other servers
@@ -447,7 +406,7 @@
 .B -s
 also answer domain requests sent to IP
 .I addrs
-on UDP port 53.
+on UDP/TCP port 53.
 If no IP
 .I addrs
 are given, listen on any interface on network mount point
@@ -454,13 +413,7 @@
 .IR netmtpt .
 .TP
 .B -x
-specifies the mount point of the
-network.
-.TP
-.B -z
-whenever we receive a UDP NOTIFY message, run
-.I program
-with the domain name of the area as its argument.
+specifies the mount point of the network.
 .PD
 .PP
 When the
@@ -655,87 +608,13 @@
 .BI www. ...
 a synonym for the canonical name
 .BI anna. ... .
-.SS "Straddling Server"
-Many companies have an inside network
-protected from outside access with firewalls.
-They usually provide internal `root' DNS servers
-(of varying reliability and correctness)
-that serve internal domains and pass on DNS queries for
-outside domains to the outside, relaying the results
-back and caching them for future use.
-Some companies don't even let DNS queries nor replies through
-their firewalls at all, in either direction.
-.PP
-In such a situation, running
-.B "dns -so"
-on a machine that imports access to the outside network via
-.B /net.alt
-from a machine that straddles the firewalls,
-or that straddles the firewalls itself,
-will let internal machines query such a machine
-and receive answers from outside nameservers for outside addresses
-and inside nameservers for inside addresses, giving the appearance
-of a unified domain name space,
-while bypassing the corporate DNS proxies or firewalls.
-This is different from running
-.B "dns -s"
-and
-.B "dns -sRx /net.alt -f /lib/ndb/external"
-on the same machine,
-which keeps the inside and outside namespaces entirely separate.
-.PP
-Under
-.BR -o ,
-several
-.I sys
-names are significant:
-.BR inside-dom ,
-.BR inside-ns ,
-and
-.BR outside-ns .
-.I Inside-dom
-should contain a series of
-.B dom
-pairs naming domains internal to the organization.
-.I Inside-ns
-should contain a series of
-.B ip
-pairs naming the internal DNS `root' servers.
-.I Outside-ns
-should contain a series of
-.B ip
-pairs naming the external DNS servers to consult.
 .SS "Zone Transfers and TCP"
-.I Dnstcp
-is invoked,
-usually from
-.BR /rc/bin/service/tcp53 ,
-to answer DNS queries with long answers via TCP,
-notably to transfer a zone within the database
-.I dbfile
-(default
-.BR /lib/ndb/local )
-to its invoker on the network at
-.I netmtpt
-(default
-.BR /net ).
-Standard input will be read for DNS requests and the DNS answers
-will appear on standard output.
-Recursion is disabled by
-.BR -R ;
-acting as a pure resolver is enabled by
-.BR -r .
-Unless the
-.B -a
-flag is provided, clients requesting DNS zone transfer must be listed
-with a
+TCP clients requesting DNS zone transfer must be listed with a
 .B dnsslave
 attribute for the relevant domain.
-If
-.I conn-dir
-is provided, it is assumed to be a directory within
-.IB netmtpt /tcp
-and is used to find the caller's address.
+A value of
+.B *
+means any client is accepted.
 .SS "DNS Queries and Debugging"
 .I Ndb/dnsquery
 can be used to query
--- a/sys/src/cmd/ip/snoopy/dns.c
+++ b/sys/src/cmd/ip/snoopy/dns.c
@@ -98,8 +98,6 @@
 		rr->owner->name, rr->ttl);
 	if(!quest)
 	switch(rr->type){
-	default:
-		break;
 	case Thinfo:
 		m->p = seprint(m->p, m->e, " cpu=%s os=%s",
 			rr->cpu->name, rr->os->name);
@@ -173,6 +171,11 @@
 		m->p = seprint(m->p, m->e, " flags=%d tag=%s caa=\"%.*s\"",
 			rr->caa->flags, rr->caa->tag->name,
 			rr->caa->dlen, (char*)rr->caa->data);
+	default:
+		if(rrsupported(rr->type))
+			break;
+		m->p = seprint(m->p, m->e, " unknown=%.*H",
+			rr->unknown->dlen, rr->unknown->data);
 	}
 	rrfree(rr);
 }
@@ -364,9 +367,6 @@
 	}
 }
 
-int debug;				/* for ndb/dns.h */
-ulong now = 0;
-
 void
 dnslog(char *fmt, ...)			/* don't log */
 {
@@ -377,6 +377,11 @@
  * Everything below here is copied from /sys/src/cmd/ndb/dn.c
  * without modification and can be recopied to update.
  */
+int
+rrsupported(int type)
+{
+	return type >= 0 && type < nelem(rrtname) && rrtname[type] != nil;
+}
 
 /*
  *  convert an integer RR type to it's ascii name
@@ -387,7 +392,7 @@
 	char *t;
 
 	t = nil;
-	if(type >= 0 && type <= Tall)
+	if(type >= 0 && type < nelem(rrtname))
 		t = rrtname[type];
 	if(t==nil){
 		snprint(buf, len, "%d", type);
@@ -409,7 +414,6 @@
 		rrfree(rp);
 	}
 }
-
 void
 freeserverlist(Server *s)
 {
@@ -464,6 +468,11 @@
 		rp->null = emalloc(sizeof(*rp->null));
 		setmalloctag(rp->null, rp->pc);
 		break;
+	default:
+		if(rrsupported(rp->type))
+			break;
+		rp->unknown = emalloc(sizeof(*rp->unknown));
+		setmalloctag(rp->unknown, rp->pc);
 	}
 	rp->ttl = 0;
 	rp->expire = 0;
@@ -532,6 +541,13 @@
 			memset(t, 0, sizeof *t);	/* cause trouble */
 			free(t);
 		}
+		break;
+	default:
+		if(rrsupported(rp->type))
+			break;
+		free(rp->unknown->data);
+		memset(rp->unknown, 0, sizeof *rp->unknown);	/* cause trouble */
+		free(rp->unknown);
 		break;
 	}
 
--- a/sys/src/cmd/ip/snoopy/tcp.c
+++ b/sys/src/cmd/ip/snoopy/tcp.c
@@ -166,6 +166,11 @@
 	sport = NetS(h->sport);
 	demux(p_mux, sport, dport, m, &dump);
 
+	/* drop the 2-byte length field as
+	 * proto dns assumes udp format */
+	if(strcmp(m->pr->name, "dns") == 0)
+		m->ps += 2;
+
 	m->p = seprint(m->p, m->e, "s=%d d=%d seq=%lud ack=%lud fl=%s win=%d ck=%4.4ux",
 			NetS(h->sport), dport,
 			(ulong)NetL(h->seq), (ulong)NetL(h->ack),
--- a/sys/src/cmd/ndb/convM2DNS.c
+++ b/sys/src/cmd/ndb/convM2DNS.c
@@ -46,8 +46,6 @@
 	/* hack to cope with servers that don't set Ftrunc when they should */
 	if (remain < Maxudp && need > Maxudp)
 		sp->trunc = 1;
-	if (debug && rp)
-		dnslog("malformed rr: %R", rp);
 	return 0;
 }
 
@@ -304,8 +302,6 @@
 {
 	if ((uchar)type == 0 && (type>>8) != 0) {
 		USED(where);
-//		dnslog("%s: byte-swapped type field in ptr rr from win2k",
-//			where);
 		if (sp->rcode == Rok)
 			sp->rcode = Rformat;
 		type >>= 8;
@@ -335,7 +331,6 @@
 	RR *rp;
 	Txt *t, **l;
 
-retry:
 	rp = nil;
 	NAME(dname);
 	USHORT(type);
@@ -371,11 +366,6 @@
 		len = left;
 
 	switch(type){
-	default:
-		/* unknown type, just ignore it */
-		sp->p = data + len;
-		rrfree(rp);
-		goto retry;
 	case Thinfo:
 		SYMBOL(rp->cpu);
 		SYMBOL(rp->os);
@@ -475,6 +465,12 @@
 		SYMBOL(rp->caa->tag);
 		BYTES(rp->caa->data, rp->caa->dlen);
 		break;
+	default:
+		if(rrsupported(type)){
+			sp->p = data + len;
+			break;
+		}
+		BYTES(rp->unknown->data, rp->unknown->dlen);
 	}
 	if(sp->p - data != len) {
 		char ptype[64];
@@ -497,7 +493,6 @@
 			rp = nil;
 		}
 	}
-	// if(rp) dnslog("convM2RR: got %R", rp);
 	return rp;
 }
 
--- a/sys/src/cmd/ndb/cs.c
+++ b/sys/src/cmd/ndb/cs.c
@@ -1414,7 +1414,7 @@
 				if(parseip(ip, nt->val) == -1)
 					continue;
 				maskip(ip, lifc->mask, tnet);
-				if(memcmp(net, tnet, IPaddrlen) == 0){
+				if(ipcmp(net, tnet) == 0){
 					qunlock(&ipifclock);
 					return ndbreorder(t, nt);
 				}
--- a/sys/src/cmd/ndb/dblookup.c
+++ b/sys/src/cmd/ndb/dblookup.c
@@ -57,9 +57,6 @@
 	[Tcaa]		1,
 };
 
-/* straddle server configuration */
-static Ndbtuple *indoms, *innmsrvs, *outnmsrvs;
-
 static void
 nstrcpy(char *to, char *from, int len)
 {
@@ -260,7 +257,8 @@
 		f = caarr;
 		break;
 	default:
-//		dnslog("dblookup1(%s) bad type", name);
+		if(debug)
+			dnslog("dblookup1(%s) bad type", name);
 		return nil;
 	}
 
@@ -659,10 +657,8 @@
 	}
 	else if(strcmp(pair->attr, "ns") == 0)
 		rp = nsrr(entry, pair);
-	else if(strcmp(pair->attr, "soa") == 0) {
+	else if(strcmp(pair->attr, "soa") == 0)
 		rp = soarr(entry, pair);
-		addarea(dp, rp, pair);
-	}
 	else if(strcmp(pair->attr, "mx") == 0)
 		rp = mxrr(entry, pair);
 	else if(strcmp(pair->attr, "srv") == 0)
@@ -681,9 +677,11 @@
 	rp->owner = dp;
 	rp->db = 1;
 	rp->ttl = intval(entry, pair, "ttl", rp->ttl);
+	if(rp->type == Tsoa)
+		addarea(rp, pair);
 	rrattach(rp, Notauthoritative);
-	dnagenever(dp);
 }
+
 static void
 dbtuple2cache(Ndbtuple *t)
 {
@@ -722,29 +720,6 @@
 	}
 }
 
-/* called with dblock held */
-static void
-loaddomsrvs(void)
-{
-	Ndbs s;
-
-	if (!cfg.inside || !cfg.straddle || !cfg.serve)
-		return;
-	if (indoms) {
-		ndbfree(indoms);
-		ndbfree(innmsrvs);
-		ndbfree(outnmsrvs);
-		indoms = innmsrvs = outnmsrvs = nil;
-	}
-	if (db == nil)
-		opendatabase();
-	free(ndbgetvalue(db, &s, "sys", "inside-dom", "dom", &indoms));
-	free(ndbgetvalue(db, &s, "sys", "inside-ns",  "ip",  &innmsrvs));
-	free(ndbgetvalue(db, &s, "sys", "outside-ns", "ip",  &outnmsrvs));
-	dnslog("[%d] ndb changed: reloaded inside-dom, inside-ns, outside-ns",
-		getpid());
-}
-
 /*
  *  get all my xxx
  *  caller ndbfrees the result
@@ -786,6 +761,7 @@
 	Ndb *ndb;
 	Dir *d;
 	static Ndbtuple *olddoms;
+	static Area *oldowned, *olddelegated;
 	static ulong lastcheck, lastyoungest;
 
 	/* no faster than once every 2 minutes */
@@ -792,8 +768,6 @@
 	if(now < lastcheck + 2*Min && !doit)
 		return;
 
-	refresh_areas(owned);
-
 	qlock(&dblock);
 	if(opendatabase() < 0){
 		qunlock(&dblock);
@@ -824,21 +798,21 @@
 			}
 		if(!doit && youngest == lastyoungest)
 			break;
+		doit = 0;
+		lastyoungest = youngest;
 
-		/* forget our area definition */
-		freearea(&owned);
-		freearea(&delegated);
-
 		/* reopen all the files (to get oldest for time stamp) */
 		for(ndb = db; ndb; ndb = ndb->next)
 			ndbreopen(ndb);
 
-		/* reload straddle-server configuration */
-		loaddomsrvs();
-
 		/* mark all db records as timed out */
 		dnagedb();
 
+		/* forget our area definition */
+		freeareas(&oldowned), freeareas(&olddelegated);
+		oldowned = owned, olddelegated = delegated;
+		owned = nil, delegated = nil;
+
 		if(cfg.cachedb){
 			/* read in new entries */
 			for(ndb = db; ndb; ndb = ndb->next)
@@ -851,8 +825,6 @@
 		 */
 		dnauthdb();
 
-		doit = 0;
-		lastyoungest = youngest;
 		createptrs();
 	}
 	qunlock(&dblock);
@@ -915,8 +887,8 @@
 		return;
 	}
 
-	/* reject our own ip addresses so we don't query ourselves via udp */
-	if(myip(ip)){
+	/* reject our own ip addresses so we don't query ourselves */
+	if(cfg.serve && myip(ip)){
 		dnslog("rejecting my ip %I as local dns server", ip);
 		return;
 	}
@@ -953,16 +925,13 @@
 	rp->db = 1;
 	rp->ttl = 10*Min;
 	rrattach(rp, Authoritative);	/* will not attach rrs in my area */
-	dnagenever(dp);
 
 	rp = rralloc(type);
 	rp->ip = ipdp;
 	rp->owner = nsdp;
-	rp->local = 1;
 	rp->db = 1;
 	rp->ttl = 10*Min;
 	rrattach(rp, Authoritative);	/* will not attach rrs in my area */
-	dnagenever(nsdp);
 
 	dnslog("added local dns server %s at %I", buf, ip);
 }
@@ -1018,7 +987,6 @@
 	rp->db = 1;
 	rp->ttl = 10*Min;
 	rrattach(rp, Authoritative);
-	dnagenever(dp);
 }
 
 /*
@@ -1212,77 +1180,4 @@
 {
 	createv4ptrs();
 	createv6ptrs();
-}
-
-/*
- * is this domain (or DOMAIN or Domain or dOMAIN)
- * internal to our organisation (behind our firewall)?
- * only inside straddling servers care, everybody else gets told `yes',
- * so they'll use mntpt for their queries.
- */
-int
-insideaddr(char *dom)
-{
-	int domlen, vallen, rv;
-	Ndbtuple *t;
-
-	if (!cfg.inside || !cfg.straddle || !cfg.serve)
-		return 1;
-	if (dom[0] == '\0' || strcmp(dom, ".") == 0)	/* dns root? */
-		return 1;			/* hack for initialisation */
-
-	qlock(&dblock);
-	if (indoms == nil)
-		loaddomsrvs();
-	if (indoms == nil) {
-		qunlock(&dblock);
-		return 1;  /* no "inside-dom" sys, try inside nameservers */
-	}
-
-	rv = 0;
-	domlen = strlen(dom);
-	for (t = indoms; t != nil; t = t->entry) {
-		if (strcmp(t->attr, "dom") != 0)
-			continue;
-		vallen = strlen(t->val);
-		if (cistrcmp(dom, t->val) == 0 ||
-		    domlen > vallen &&
-		     cistrcmp(dom + domlen - vallen, t->val) == 0 &&
-		     dom[domlen - vallen - 1] == '.') {
-			rv = 1;
-			break;
-		}
-	}
-	qunlock(&dblock);
-	return rv;
-}
-
-int
-insidens(uchar *ip)
-{
-	uchar ipa[IPaddrlen];
-	Ndbtuple *t;
-
-	for (t = innmsrvs; t != nil; t = t->entry)
-		if (strcmp(t->attr, "ip") == 0) {
-			if (parseip(ipa, t->val) != -1 && ipcmp(ipa, ip) == 0)
-				return 1;
-		}
-	return 0;
-}
-
-int
-outsidensip(int n, uchar *ip)
-{
-	int i;
-	Ndbtuple *t;
-
-	i = 0;
-	for (t = outnmsrvs; t != nil; t = t->entry)
-		if (strcmp(t->attr, "ip") == 0 && i++ == n) {
-			if (parseip(ip, t->val) == -1)
-				return -1;
-			return 0;
-		}
-	return -1;
 }
--- a/sys/src/cmd/ndb/dn.c
+++ b/sys/src/cmd/ndb/dn.c
@@ -8,30 +8,30 @@
  *  this comment used to say `our target is 4000 names cached, this should
  *  be larger on large servers'.  dns at Bell Labs starts off with
  *  about 1780 names.
- *
- * aging seems to corrupt the cache, so raise the trigger from 4000 until we
- * figure it out.
  */
 enum {
 	/* these settings will trigger frequent aging */
 	Deftarget	= 4000,
-	Minage		=  1*Min,
+	Defmaxage	= 60*Min,	/* default domain name max. age */
 	Defagefreq	= 15*Min,	/* age names this often (seconds) */
+	Minage		=  1*Min,
+
+	/* length of domain name hash table */
+	HTLEN		= 4*1024,
 };
 
 /*
- *  Hash table for domain names.  The hash is based only on the
- *  first element of the domain name.
+ *  Hash table for domain names.
  */
-DN *ht[HTLEN];
+static DN *ht[HTLEN];
 
 static struct {
 	QLock;
 	ulong	names;		/* names allocated */
 	ulong	oldest;		/* longest we'll leave a name around */
-	ulong	lastage;
+	ulong	lastage;	/* time of lask dnageall() */
 	ushort	id;		/* same size as in packet */
-	uchar	mark;		/* mark bit for gc */
+	uchar	mark;		/* current mark bit for gc */
 	int	active[2];	/* number of active processes per mark */
 } dnvars;
 
@@ -134,9 +134,13 @@
 [Oupdate]	"update",
 };
 
+int maxage = Defmaxage;
 ulong target = Deftarget;
-Lock dnlock;
+int needrefresh;
+ulong now;
+uvlong nowms;
 
+static Lock dnlock;
 static ulong agefreq = Defagefreq;
 
 static int rrequiv(RR *r1, RR *r2);
@@ -172,10 +176,13 @@
 
 	timems();
 
+	if (maxage <= 0)
+		maxage = Defmaxage;
+
 	dnvars.names = 0;
 	dnvars.oldest = maxage;
 	dnvars.lastage = now;
-	dnvars.id = truerand();	/* don't start with same id every time */
+	dnvars.id = 0;
 	dnvars.mark = 0;
 
 	notify(ding);
@@ -201,6 +208,8 @@
 static void
 dnmark(DN *dp)
 {
+	if(dp == nil)
+		return;
 	dp->mark = (dp->mark & ~1) | dnvars.mark;
 }
 
@@ -282,78 +291,6 @@
 }
 
 /*
- * dump the stats
- */
-void
-dnstats(char *file)
-{
-	int i, fd;
-
-	fd = create(file, OWRITE, 0666);
-	if(fd < 0)
-		return;
-
-	qlock(&stats);
-	fprint(fd, "# system %s\n", sysname());
-	fprint(fd, "# slave procs high-water mark\t%lud\n", stats.slavehiwat);
-	fprint(fd, "# queries received by 9p\t%lud\n", stats.qrecvd9p);
-	fprint(fd, "# queries received by udp\t%lud\n", stats.qrecvdudp);
-	fprint(fd, "# queries answered from memory\t%lud\n", stats.answinmem);
-	fprint(fd, "# queries sent by udp\t%lud\n", stats.qsent);
-	for (i = 0; i < nelem(stats.under10ths); i++)
-		if (stats.under10ths[i] || i == nelem(stats.under10ths) - 1)
-			fprint(fd, "# responses arriving within %.1f s.\t%lud\n",
-				(double)(i+1)/10, stats.under10ths[i]);
-	fprint(fd, "\n# queries sent & timed-out\t%lud\n", stats.tmout);
-	fprint(fd, "# cname queries timed-out\t%lud\n", stats.tmoutcname);
-	fprint(fd, "# ipv6  queries timed-out\t%lud\n", stats.tmoutv6);
-	fprint(fd, "\n# negative answers received\t%lud\n", stats.negans);
-	fprint(fd, "# negative answers w Rserver set\t%lud\n", stats.negserver);
-	fprint(fd, "# negative answers w bad delegation\t%lud\n",
-		stats.negbaddeleg);
-	fprint(fd, "# negative answers w bad delegation & no answers\t%lud\n",
-		stats.negbdnoans);
-	fprint(fd, "# negative answers w no Rname set\t%lud\n", stats.negnorname);
-	fprint(fd, "# negative answers cached\t%lud\n", stats.negcached);
-	qunlock(&stats);
-
-	lock(&dnlock);
-	fprint(fd, "\n# domain names %lud target %lud\n", dnvars.names, target);
-	unlock(&dnlock);
-	close(fd);
-}
-
-/*
- *  dump the cache
- */
-void
-dndump(char *file)
-{
-	int i, fd;
-	DN *dp;
-	RR *rp;
-
-	fd = create(file, OWRITE, 0666);
-	if(fd < 0)
-		return;
-
-	lock(&dnlock);
-	for(i = 0; i < HTLEN; i++)
-		for(dp = ht[i]; dp; dp = dp->next){
-			fprint(fd, "%s\n", dp->name);
-			for(rp = dp->rr; rp; rp = rp->next) {
-				fprint(fd, "\t%R %c%c %ld/%lud\n",
-					rp, rp->auth? 'A': 'U',
-					rp->db? 'D': 'N', (long)(rp->expire - now), rp->ttl);
-				if (rronlist(rp, rp->next))
-					fprint(fd, "*** duplicate:\n");
-			}
-		}
-	unlock(&dnlock);
-	close(fd);
-}
-
-/*
  *  purge all records
  */
 void
@@ -378,23 +315,22 @@
 }
 
 /*
- *  return all refernced domain names of a RR.
+ *  mark all refernced domain names of an RR.
  *  call with dnlock held.
  */
-static int
-rrnames(RR *rp, DN **dn)
+static void
+rrmark(RR *rp)
 {
-	int n = 0;
-
-	dn[n++] = rp->owner;
+	dnmark(rp->owner);
 	if(rp->negative){
-		if((dn[n] = rp->negsoaowner) != nil) n++;
-		return n;
+		dnmark(rp->negsoaowner);
+		return;
 	}
 	switch(rp->type){
 	case Thinfo:
-		if((dn[n] = rp->cpu) != nil) n++;
-		if((dn[n] = rp->os) != nil) n++;
+		dnmark(rp->cpu);
+		dnmark(rp->os);
+		break;
 	case Ttxt:
 		break;
 	case Tcname:
@@ -404,57 +340,41 @@
 	case Tns:
 	case Tmx:
 	case Tsrv:
-		if((dn[n] = rp->host) != nil) n++;
+		dnmark(rp->host);
 		break;
 	case Tmg:
 	case Tmr:
-		if((dn[n] = rp->mb) != nil) n++;
+		dnmark(rp->mb);
 		break;
 	case Tminfo:
-		if((dn[n] = rp->rmb) != nil) n++;
-		if((dn[n] = rp->mb) != nil) n++;
+		dnmark(rp->rmb);
+		dnmark(rp->mb);
 		break;
 	case Trp:
-		if((dn[n] = rp->rmb) != nil) n++;
-		if((dn[n] = rp->rp) != nil) n++;
+		dnmark(rp->rmb);
+		dnmark(rp->rp);
 		break;
 	case Ta:
 	case Taaaa:
-		if((dn[n] = rp->ip) != nil) n++;
+		dnmark(rp->ip);
 		break;
 	case Tptr:
-		if((dn[n] = rp->ptr) != nil) n++;
+		dnmark(rp->ptr);
 		break;
 	case Tsoa:
-		if((dn[n] = rp->host) != nil) n++;
-		if((dn[n] = rp->rmb) != nil) n++;
+		dnmark(rp->host);
+		dnmark(rp->rmb);
 		break;
 	case Tsig:
-		if((dn[n] = rp->sig->signer) != nil) n++;
+		dnmark(rp->sig->signer);
 		break;
 	case Tcaa:
-		if((dn[n] = rp->caa->tag) != nil) n++;
+		dnmark(rp->caa->tag);
 		break;
 	}
-	return n;	
 }
 
 /*
- *  mark all refernced domain names of an RR.
- *  call with dnlock held.
- */
-static void
-rrmark(RR *rp)
-{
-	DN *dn[RRnames];
-	int i, n;
-
-	n = rrnames(rp, dn);
-	for(i = 0; i < n; i++)
-		dnmark(dn[i]);
-}
-
-/*
  *  delete head of *l and free the old head.
  *  call with dnlock held.
  */
@@ -472,7 +392,13 @@
 }
 
 /*
- *  check the age of resource records, free any that have timed out.
+ *  check the age of resource records,
+ *  delete any that have timed out and
+ *  mark referenced domain names of the remaining records.
+ *
+ *  note that db records are handled by dbagedb()/dnauthdb()
+ *  so they are ignored here.
+ *
  *  call with dnlock held.
  */
 static void
@@ -480,42 +406,21 @@
 {
 	RR **l, *rp;
 
-	/* see dnagenever() below */
-	if(dp->mark & ~1)
-		return;
-
 	l = &dp->rr;
 	while ((rp = *l) != nil){
 		assert(rp->cached);
+		assert(rp->owner == dp);
+
 		if(!rp->db && ((long)(rp->expire - now) <= 0
 		|| (long)(now - (rp->expire - rp->ttl)) > dnvars.oldest))
 			rrdelhead(l); /* rp == *l before; *l == rp->next after */
-		else
+		else {
 			l = &rp->next;
+			rrmark(rp);
+		}
 	}
 }
 
-/* mark a domain name and those in its RRs as never to be aged */
-void
-dnagenever(DN *dp)
-{
-	DN *dn[RRnames];
-	RR *rp;
-	int i, n;
-
-	lock(&dnlock);
-
-	/* mark all referenced domain names */
-	for(rp = dp->rr; rp; rp = rp->next){
-		assert(rp->owner == dp);
-		n = rrnames(rp, dn);
-		for(i = 0; i < n; i++)
-			dn[i]->mark |= ~1;
-	}
-
-	unlock(&dnlock);
-}
-
 /*
  *  periodicly sweep for old records and remove unreferenced domain names
  *
@@ -522,9 +427,9 @@
  *  this is called once all activity ceased for the non-current
  *  mark bit (previous cycle), meaning there are no more
  *  unaccounted references to DN's with the non-current mark
- *  from other activity procs.
+ *  from other activity slaves.
  *
- *  this can run concurrently to current mark bit activity procs
+ *  this can run concurrently to current mark bit activity slaves
  *  as DN's with current mark bit are not freed in this cycle, but
  *  in the next cycle when the previously current mark bit activity
  *  has ceased.
@@ -533,7 +438,6 @@
 dnageall(int doit)
 {
 	DN *dp, **l;
-	RR *rp;
 	int i;
 
 	if(!doit){
@@ -563,19 +467,15 @@
 
 	lock(&dnlock);
 
-	/* timeout all expired records */
+	/*
+	 * delete all expired records and
+	 * mark referenced domain names
+	 * of the remaining records.
+	 */
 	for(i = 0; i < HTLEN; i++)
 		for(dp = ht[i]; dp; dp = dp->next)
 			dnage(dp);
 
-	/* mark all referenced domain names */
-	for(i = 0; i < HTLEN; i++)
-		for(dp = ht[i]; dp; dp = dp->next)
-			for(rp = dp->rr; rp; rp = rp->next){
-				assert(rp->owner == dp);
-				rrmark(rp);
-			}
-
 	/* bump mark */
 	dnvars.mark ^= 1;
 	assert(dnvars.active[dnvars.mark] == 0);
@@ -612,10 +512,8 @@
 
 	lock(&dnlock);
 
-	/* time out all database entries */
 	for(i = 0; i < HTLEN; i++)
 		for(dp = ht[i]; dp; dp = dp->next) {
-			dp->mark &= 1;
 			for(rp = dp->rr; rp; rp = rp->next)
 				if(rp->db)
 					rp->expire = 0;
@@ -632,7 +530,6 @@
 dnauthdb(void)
 {
 	int i;
-	ulong minttl;
 	Area *area;
 	DN *dp;
 	RR *rp, **l;
@@ -639,7 +536,6 @@
 
 	lock(&dnlock);
 
-	/* time out all database entries */
 	for(i = 0; i < HTLEN; i++)
 		for(dp = ht[i]; dp; dp = dp->next){
 			area = inmyarea(dp->name);
@@ -651,12 +547,16 @@
 						continue;
 					}
 					if(area){
-						minttl = area->soarr->soa->minttl;
+						ulong minttl = area->soarr->soa->minttl;
 						if(rp->ttl < minttl)
 							rp->ttl = minttl;
 						rp->auth = 1;
 					} else if(rp->type == Tns && inmyarea(rp->host->name))
 						rp->auth = 1;
+				} else if(area){
+					/* no outside spoofing */
+					rrdelhead(l);
+					continue;
 				}
 				l = &rp->next;
 			}
@@ -672,11 +572,6 @@
 void
 getactivity(Request *req)
 {
-	if(traceactivity)
-		dnslog("get: %d active by pid %d from %p",
-			dnvars.active[0] + dnvars.active[1],
-			getpid(), getcallerpc(&req));
-
 	qlock(&dnvars);
 	req->aux = nil;
 	req->id = ++dnvars.id;
@@ -688,11 +583,6 @@
 void
 putactivity(Request *req)
 {
-	if(traceactivity)
-		dnslog("put: %d active by pid %d from %p",
-			dnvars.active[0] + dnvars.active[1],
-			getpid(), getcallerpc(&req));
-
 	qlock(&dnvars);
 	dnvars.active[req->mark]--;
 	assert(dnvars.active[req->mark] >= 0);
@@ -704,17 +594,6 @@
 	qunlock(&dnvars);
 }
 
-int
-rrlistlen(RR *rp)
-{
-	int n;
-
-	n = 0;
-	for(; rp; rp = rp->next)
-		++n;
-	return n;
-}
-
 /*
  *  Attach a single resource record to a domain name (new->owner).
  *	- Avoid duplicates with already present RR's
@@ -737,7 +616,7 @@
 	dp = new->owner;
 	assert(dp != nil);
 	new->auth |= auth;
-	new->next = 0;
+	new->next = nil;
 
 	/*
 	 * try not to let responses expire before we
@@ -758,6 +637,7 @@
 	l = &dp->rr;
 	for(rp = *l; rp; rp = *l){
 		assert(rp->cached);
+		assert(rp->owner == dp);
 		if(rp->type == new->type)
 			break;
 		l = &rp->next;
@@ -774,6 +654,7 @@
 	 */
 	while ((rp = *l) != nil){
 		assert(rp->cached);
+		assert(rp->owner == dp);
 		if(rp->type != new->type)
 			break;
 
@@ -832,15 +713,14 @@
 rrattach(RR *rp, int auth)
 {
 	RR *next;
-	DN *dp;
 
 	lock(&dnlock);
 	for(; rp; rp = next){
 		next = rp->next;
 		rp->next = nil;
-		dp = rp->owner;
 		/* avoid any outside spoofing */
-		if(cfg.cachedb && !rp->db && inmyarea(dp->name))
+		if(cfg.cachedb && !rp->db && inmyarea(rp->owner->name)
+		|| !rrsupported(rp->type))
 			rrfree(rp);
 		else
 			rrattach1(rp, auth);
@@ -932,6 +812,8 @@
 		}
 		break;
 	default:
+		/* cache must only contain supported RR's */
+		assert(rrsupported(rp->type));
 		*nrp = *rp;
 		break;
 	}
@@ -939,6 +821,9 @@
 	setmalloctag(nrp, nrp->pc);
 	nrp->cached = 0;
 	nrp->next = nil;
+
+	rrmark(nrp);
+
 	*last = nrp;
 	return &nrp->next;
 }
@@ -1021,14 +906,73 @@
 		}
 
 out:
-	for(rp = first; rp; rp = rp->next)
-		rrmark(rp);
 	unlock(&dnlock);
 	unique(first);
 	return first;
 }
 
+static int
+inzone(DN *dp, char *name, int namelen, int depth)
+{
+	int n;
+
+	for(n = 0; dp->name[n]; n++)
+		if(dp->name[n] == '.')
+			depth--;
+
+	if(depth != 1 || n < namelen)
+		return 0;
+	if(cistrcmp(name, dp->name + n - namelen) != 0)
+		return 0;
+	if(n > namelen && dp->name[n - namelen - 1] != '.')
+		return 0;
+	return 1;
+}
+
 /*
+ *  return all resources (except SOA) of a zone.
+ */
+RR*
+rrgetzone(char *name)
+{	
+	int found, depth, h, n;
+	RR *rp, *first, **l;
+	DN *dp;
+
+	for(n = 0, depth = 1; name[n]; n++)
+		if(name[n] == '.')
+			depth++;
+
+	first = nil;
+	l = &first;
+	lock(&dnlock);
+	do {
+		found = 0;
+		for(h = 0; h < HTLEN; h++)
+			for(dp = ht[h]; dp; dp = dp->next)
+				if(inzone(dp, name, n, depth)){
+					for(rp = dp->rr; rp; rp = rp->next){
+						/*
+						 * there shouldn't be negatives,
+						 * but just in case.
+						 * don't send any soa's,
+						 * ns's are enough.
+						 */
+						if (rp->negative ||
+						    rp->type == Tsoa)
+							continue;
+						l = rrcopy(rp, l);
+					}
+					found = 1;
+				}
+		depth++;
+	} while(found);
+	unlock(&dnlock);
+
+	return first;
+}
+
+/*
  *  convert an ascii RR type name to its integer representation
  */
 int
@@ -1340,6 +1284,15 @@
 				rp->caa->flags, dnname(rp->caa->tag),
 				rp->caa->dlen, rp->caa->data);
 		break;
+	default:
+		if(rrsupported(rp->type))
+			break;
+		if (rp->unknown == nil)
+			fmtprint(&fstr, "\t<null>");
+		else
+			fmtprint(&fstr, "\t%.*H",
+				rp->unknown->dlen,
+				rp->unknown->data);
 	}
 out:
 	strp = fmtstrflush(&fstr);
@@ -1479,6 +1432,13 @@
 				rp->caa->flags, dnname(rp->caa->tag),
 				rp->caa->dlen, rp->caa->data);
 		break;
+	default:
+		if (rp->unknown == nil)
+			fmtprint(&fstr, " type%d=<null>", rp->type);
+		else
+			fmtprint(&fstr, " type%d=%.*H", rp->type,
+				rp->unknown->dlen,
+				rp->unknown->data);
 	}
 out:
 	strp = fmtstrflush(&fstr);
@@ -1525,8 +1485,7 @@
 
 	procs = dnvars.active[0] + dnvars.active[1];
 	if(procs >= Maxactive){
-		if(traceactivity)
-			dnslog("[%d] too much activity", getpid());
+		dnslog("%d: [%d] too much activity", req->id, getpid());
 		return;
 	}
 
@@ -1540,8 +1499,6 @@
 		break;
 	case 0:
 		procsetname("request slave of pid %d", ppid);
-		if(traceactivity)
-			dnslog("[%d] take activity from %d", getpid(), ppid);
 
 		/*
 		 * this relies on rfork producing separate, initially-identical
@@ -1626,6 +1583,9 @@
 		return txtequiv(r1->txt, r2->txt);
 	case Tcaa:
 		return r1->caa->flags == r2->caa->flags && r1->caa->tag == r2->caa->tag && blockequiv(r1->caa, r2->caa);
+	default:
+		if(!rrsupported(r1->type))
+			return 0;	/* unknown never equal */
 	}
 	return 1;
 }
@@ -1775,7 +1735,7 @@
  *  pointer records for them.
  */
 void
-dnptr(uchar *net, uchar *mask, char *dom, int forwtype, int subdoms, int ttl)
+dnptr(uchar *net, uchar *mask, char *dom, int type, int subdoms, int ttl)
 {
 	int i, j, len;
 	char *p, *e;
@@ -1783,48 +1743,54 @@
 	uchar *ipp;
 	uchar ip[IPaddrlen], nnet[IPaddrlen];
 	uchar nibip[IPaddrlen*2];
+	RR *rp, *first, **l;
 	DN *dp;
-	RR *rp, *nrp, *first, **l;
 
 	l = &first;
 	first = nil;
-	for(i = 0; i < HTLEN; i++)
-		for(dp = ht[i]; dp; dp = dp->next)
+
+	lock(&dnlock);
+	for(i = 0; i < HTLEN; i++){
+		for(dp = ht[i]; dp; dp = dp->next){
 			for(rp = dp->rr; rp; rp = rp->next){
-				if(rp->type != forwtype || rp->negative)
+				if(rp->type != type || rp->negative)
 					continue;
-				parseip(ip, rp->ip->name);
+				if(parseip(ip, rp->ip->name) == -1)
+					continue;
 				maskip(ip, mask, nnet);
 				if(ipcmp(net, nnet) != 0)
 					continue;
+				l = rrcopy(rp, l);
+			}
+		}
+	}
+	unlock(&dnlock);
 
-				ipp = ip;
-				len = IPaddrlen;
-				if (forwtype == Taaaa) {
-					bytes2nibbles(nibip, ip, IPaddrlen);
-					ipp = nibip;
-					len = 2*IPaddrlen;
-				}
+	for(rp = first; rp; rp = rp->next){
+		if(parseip(ip, rp->ip->name) == -1)
+			continue;
+		maskip(ip, mask, nnet);
+		if(ipcmp(net, nnet) != 0)
+			continue;
 
-				p = ptr;
-				e = ptr+sizeof(ptr);
-				for(j = len - 1; j >= len - subdoms; j--)
-					p = seprint(p, e, (forwtype == Ta?
-						"%d.": "%x."), ipp[j]);
-				seprint(p, e, "%s", dom);
+		ipp = ip;
+		len = IPaddrlen;
+		if (type == Taaaa) {
+			bytes2nibbles(nibip, ip, IPaddrlen);
+			ipp = nibip;
+			len = 2*IPaddrlen;
+		}
 
-				nrp = mkptr(dp, ptr, ttl);
-				*l = nrp;
-				l = &nrp->next;
-			}
+		p = ptr;
+		e = ptr+sizeof(ptr);
+		for(j = len - 1; j >= len - subdoms; j--)
+			p = seprint(p, e, (type == Ta?
+				"%d.": "%x."), ipp[j]);
+		seprint(p, e, "%s", dom);
 
-	for(rp = first; rp != nil; rp = nrp){
-		nrp = rp->next;
-		rp->next = nil;
-		dp = rp->owner;
-		rrattach(rp, Authoritative);
-		dnagenever(dp);
+		rrattach(mkptr(rp->owner, ptr, ttl), Authoritative);
 	}
+	rrfreelist(first);
 }
 
 void
@@ -1908,11 +1874,10 @@
 {
 	RR *rp;
 
+	assert((type & ~0xFFFF) == 0);
 	rp = emalloc(sizeof(*rp));
 	rp->pc = getcallerpc(&type);
 	rp->type = type;
-	if (rp->type != type)
-		dnslog("rralloc: bogus type %d", type);
 	setmalloctag(rp, rp->pc);
 	switch(type){
 	case Tsoa:
@@ -1945,10 +1910,15 @@
 		rp->null = emalloc(sizeof(*rp->null));
 		setmalloctag(rp->null, rp->pc);
 		break;
+	default:
+		if(rrsupported(type))
+			break;
+		rp->unknown = emalloc(sizeof(*rp->unknown));
+		setmalloctag(rp->unknown, rp->pc);
 	}
 	rp->ttl = 0;
 	rp->expire = 0;
-	rp->next = 0;
+	rp->next = nil;
 	return rp;
 }
 
@@ -2006,8 +1976,14 @@
 			free(t);
 		}
 		break;
+	default:
+		if(rrsupported(rp->type))
+			break;
+		free(rp->unknown->data);
+		memset(rp->unknown, 0, sizeof *rp->unknown);	/* cause trouble */
+		free(rp->unknown);
+		break;
 	}
-
 	memset(rp, 0, sizeof *rp);		/* cause trouble */
 	free(rp);
 }
--- a/sys/src/cmd/ndb/dnarea.c
+++ b/sys/src/cmd/ndb/dnarea.c
@@ -44,15 +44,16 @@
  *  we serve
  */
 void
-addarea(DN *dp, RR *rp, Ndbtuple *t)
+addarea(RR *rp, Ndbtuple *t)
 {
+	DN *dp;
 	Area *s;
 	Area **l;
 	int len;
 
+	dp = rp->owner;
 	len = strlen(dp->name);
 
-	lock(&dnlock);
 	if(t->val[0])
 		l = &delegated;
 	else
@@ -61,10 +62,8 @@
 	for (s = *l; s != nil; l = &s->next, s = s->next){
 		if(s->len < len)
 			break;
-		if(s->soarr->owner == dp) {
-			unlock(&dnlock);
+		if(s->soarr->owner == dp)
 			return;		/* we've already got one */
-		}
 	}
 
 	/*
@@ -75,9 +74,6 @@
 	s = emalloc(sizeof(*s));
 	s->len = len;
 	rrcopy(rp, &s->soarr);
-	s->soarr->owner = dp;
-	s->soarr->db = 1;
-	s->soarr->ttl = Hour;
 	s->neednotify = 1;
 	s->needrefresh = 0;
 
@@ -87,59 +83,17 @@
 
 	s->next = *l;
 	*l = s;
-	unlock(&dnlock);
 }
 
 void
-freearea(Area **l)
+freeareas(Area **l)
 {
 	Area *s;
 
-	lock(&dnlock);
 	while(s = *l){
 		*l = s->next;
 		rrfree(s->soarr);
 		memset(s, 0, sizeof *s);	/* cause trouble */
 		free(s);
-	}
-	unlock(&dnlock);
-}
-
-/*
- * refresh all areas that need it
- *  this entails running a command 'zonerefreshprogram'.  This could
- *  copy over databases from elsewhere or just do a zone transfer.
- */
-void
-refresh_areas(Area *s)
-{
-	int pid;
-	Waitmsg *w;
-
-	for(; s != nil; s = s->next){
-		if(!s->needrefresh)
-			continue;
-
-		if(zonerefreshprogram == nil){
-			s->needrefresh = 0;
-			continue;
-		}
-
-		pid = fork();
-		if (pid == -1) {
-			sleep(1000);	/* don't fork again immediately */
-			continue;
-		}
-		if (pid == 0){
-			execl(zonerefreshprogram, "zonerefresh",
-				s->soarr->owner->name, nil);
-			exits("exec zonerefresh failed");
-		}
-		while ((w = wait()) != nil && w->pid != pid)
-			free(w);
-		if (w && w->pid == pid)
-			if(w->msg == nil || *w->msg == '\0')
-				s->needrefresh = 0;
-		free(w);
 	}
 }
--- a/sys/src/cmd/ndb/dnnotify.c
+++ b/sys/src/cmd/ndb/dnnotify.c
@@ -7,13 +7,12 @@
 
 /* get a notification from another system of a changed zone */
 void
-dnnotify(DNSmsg *reqp, DNSmsg *repp, Request *)
+dnnotify(DNSmsg *reqp, DNSmsg *repp, Request *req)
 {
 	RR *tp;
 	Area *a;
 
 	/* move one question from reqp to repp */
-	memset(repp, 0, sizeof(*repp));
 	tp = reqp->qd;
 	reqp->qd = tp->next;
 	tp->next = 0;
@@ -21,27 +20,24 @@
 	repp->id = reqp->id;
 	repp->flags = Fresp  | Onotify | Fauth;
 
-	/* anything to do? */
-	if(zonerefreshprogram == nil)
-		return;
-
 	/* make sure its the right type */
 	if(repp->qd->type != Tsoa)
 		return;
 
-	dnslog("notification for %s", repp->qd->owner->name);
-
 	/* is it something we care about? */
 	a = inmyarea(repp->qd->owner->name);
 	if(a == nil)
 		return;
 
-	dnslog("serial old %lud new %lud", a->soarr->soa->serial,
-		repp->qd->soa->serial);
-
 	/* do nothing if it didn't change */
-	if(a->soarr->soa->serial != repp->qd->soa->serial)
-		a->needrefresh = 1;
+	if(a->soarr->soa->serial == repp->qd->soa->serial)
+		return;
+
+	dnslog("%d: notification for %s: serial old %lud new %lud",
+		req->id, repp->qd->owner->name,
+		a->soarr->soa->serial, repp->qd->soa->serial);
+
+	a->needrefresh++;
 }
 
 static int
@@ -53,21 +49,31 @@
 	nips = 0;
 	if(nips <= maxips)
 		return nips;
+
+	if(strcmp(name, "*") == 0)
+		return nips;
+
 	if(strcmp(ipattr(name), "ip") == 0) {
 		if(parseip(ips, name) != -1 && !myip(ips))
 			nips++;
 		return nips;
 	}
-	list = dnresolve(name, Cin, Ta, req, nil, 0, Recurse, 0, nil);
-	rrcat(&list, dnresolve(name, Cin, Taaaa, req, nil, 0, Recurse, 0, nil));
-	rp = list = randomize(list);
-	while(rp != nil && nips < maxips){
+
+	rp = dnresolve(name, Cin, Ta, req, nil, 0, Recurse, 0, nil);
+	rrfreelist(rrremneg(&rp));
+	list = rp;
+	rp = dnresolve(name, Cin, Taaaa, req, nil, 0, Recurse, 0, nil);
+	rrfreelist(rrremneg(&rp));
+	rrcat(&list, rp);
+
+	list = randomize(list);
+	for(rp = list; rp != nil && nips < maxips; rp = rp->next){
 		uchar *ip = ips + nips*IPaddrlen;
 		if(parseip(ip, rp->ip->name) != -1 && !myip(ip))
 			nips++;
-		rp = rp->next;
 	}
 	rrfreelist(list);
+
 	return nips;
 }
 
@@ -75,8 +81,8 @@
 static void
 send_notify(char *mntpt, char *slave, RR *soa, Request *req)
 {
-	int i, j, len, n, reqno, fd, nips, send;
 	uchar ips[8*IPaddrlen], ibuf[Maxudp+Udphdrsize], obuf[Maxudp+Udphdrsize];
+	int i, j, len, n, reqno, fd, nips, send;
 	Udphdr *up = (Udphdr*)obuf;
 	DNSmsg repmsg;
 	char *err;
@@ -83,7 +89,7 @@
 
 	nips = getips(slave, ips, sizeof(ips)/IPaddrlen, req);
 	if(nips <= 0){
-		dnslog("no address %s to notify", slave);
+		dnslog("%d: no address %s to notify", req->id, slave);
 		return;
 	}
 
@@ -90,6 +96,7 @@
 	/* create the request */
 	reqno = rand();
 	n = mkreq(soa->owner, Cin, obuf, Fauth | Onotify, reqno);
+	n += Udphdrsize;
 
 	fd = udpport(mntpt);
 	if(fd < 0)
@@ -96,7 +103,6 @@
 		return;
 
 	/* send 3 times or until we get anything back */
-	n += Udphdrsize;
 	for(i = 0; i < 3; i++, freeanswers(&repmsg)){
 		memset(&repmsg, 0, sizeof repmsg);
 		send = 0;
@@ -103,8 +109,9 @@
 		for(j = 0; j < nips; j++){
 			ipmove(up->raddr, ips + j*IPaddrlen);
 			if(write(fd, obuf, n) == n){
-				dnslog("send %d bytes notify to %s/%I.%d about %s", n, slave,
-					up->raddr, nhgets(up->rport), soa->owner->name);
+				dnslog("%d: send %d bytes notify to %s/%I.%d about %s",
+					req->id, n, slave, up->raddr, up->rport[0]<<8 | up->rport[1],
+					soa->owner->name);
 				send++;
 			}
 		}
@@ -166,9 +173,11 @@
 	procsetname("notify slaves");
 	memset(&req, 0, sizeof req);
 	req.isslave = 1;	/* don't fork off subprocesses */
+	req.from = "notify";
 
 	for(;;){
 		getactivity(&req);
+		req.aborttime = timems() + Maxreqtm;
 		notify_areas(mntpt, owned, &req);
 		putactivity(&req);
 		sleep(60*1000);
--- a/sys/src/cmd/ndb/dnresolve.c
+++ b/sys/src/cmd/ndb/dnresolve.c
@@ -19,7 +19,6 @@
 	Answnone,
 
 	Maxdest=	32,	/* maximum destinations for a request message */
-	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
 
 	/*
 	 * these are the old values; we're trying longer timeouts now
@@ -29,8 +28,6 @@
 	Maxtrans=	5,	/* maximum transmissions to a server */
 	Maxretries=	10,	/* cname+actual resends: was 32; have pity on user */
 };
-enum { Hurry, Patient, };
-enum { Outns, Inns, };
 
 struct Dest
 {
@@ -47,22 +44,14 @@
 	Request *req;
 	Query	*prev;		/* previous query */
 
-	RR	*nsrp;		/* name servers to consult */
+	int	depth;
+	ushort	id;		/* request id */
 
-	Dest	*dest;		/* array of destinations */
-	Dest	*curdest;	/* pointer to next to fill */
-	int	ndest;		/* transmit to this many on this round */
-
-	int	udpfd;
-
-	int	tcpset;
-	int	tcpfd;		/* if Tcp, read replies from here */
-	int	tcpctlfd;
-	uchar	tcpip[IPaddrlen];
+	RR	*nsrp;		/* name servers to consult */
 };
 
 static RR*	dnresolve1(char*, int, int, Request*, int, int);
-static int	netquery(Query *, int);
+static int	netquery(Query *);
 
 /*
  * reading /proc/pid/args yields either "name args" or "name [display args]",
@@ -123,7 +112,7 @@
 	char nname[Domlen];
 
 	if(status)
-		*status = 0;
+		*status = Rok;
 
 	if(depth > 12)			/* in a recursive loop? */
 		return nil;
@@ -194,18 +183,15 @@
 }
 
 static void
-queryinit(Query *qp, DN *dp, int type, Request *req)
+initquery(Query *qp, DN *dp, int type, Request *req, int depth)
 {
 	assert(dp != nil);
 
 	memset(qp, 0, sizeof *qp);
-	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
 	qp->dp = dp;
 	qp->type = type;
-	if (qp->type != type)
-		dnslog("queryinit: bogus type %d", type);
 	qp->nsrp = nil;
-	qp->dest = qp->curdest = nil;
+	qp->depth = depth;
 	qp->prev = req->aux;
 	qp->req = req;
 	req->aux = qp;
@@ -212,19 +198,11 @@
 }
 
 static void
-querydestroy(Query *qp)
+exitquery(Query *qp)
 {
 	if(qp->req->aux == qp)
 		qp->req->aux = qp->prev;
-	/* leave udpfd open */
-	if (qp->tcpfd >= 0)
-		close(qp->tcpfd);
-	if (qp->tcpctlfd >= 0) {
-		hangup(qp->tcpctlfd);
-		close(qp->tcpctlfd);
-	}
 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
-	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
 }
 
 /*
@@ -237,7 +215,6 @@
 static void
 notestats(long ms, int tmout, int type)
 {
-	qlock(&stats);
 	if (tmout) {
 		stats.tmout++;
 		if (type == Taaaa)
@@ -254,20 +231,17 @@
 		else
 			stats.under10ths[wait10ths]++;
 	}
-	qunlock(&stats);
 }
 
 static void
 noteinmem(void)
 {
-	qlock(&stats);
 	stats.answinmem++;
-	qunlock(&stats);
 }
 
 /* netquery with given name servers, free ns rrs when done */
 static int
-netqueryns(Query *qp, int depth, RR *nsrp)
+netqueryns(Query *qp, RR *nsrp)
 {
 	int rv;
 
@@ -274,7 +248,7 @@
 	if(nsrp == nil)
 		return Answnone;
 	qp->nsrp = nsrp;
-	rv = netquery(qp, depth);
+	rv = netquery(qp);
 	qp->nsrp = nil;		/* prevent accidents */
 	rrfreelist(nsrp);
 	return rv;
@@ -281,7 +255,7 @@
 }
 
 static RR*
-issuequery(Query *qp, char *name, int class, int depth, int recurse)
+issuequery(Query *qp, char *name, int class, int recurse)
 {
 	char *cp;
 	DN *nsdp;
@@ -294,7 +268,7 @@
 	if(cfg.resolver){
 		nsrp = randomize(getdnsservers(class));
 		if(nsrp != nil)
-			if(netqueryns(qp, depth+1, nsrp) > Answnone)
+			if(netqueryns(qp, nsrp) > Answnone)
 				return rrlookup(qp->dp, qp->type, OKneg);
 	}
 
@@ -342,11 +316,11 @@
 				cp = "";
 
 			/* query the name servers found in cache */
-			if(netqueryns(qp, depth+1, nsrp) > Answnone)
+			if(netqueryns(qp, nsrp) > Answnone)
 				return rrlookup(qp->dp, qp->type, OKneg);
 		} else if(dbnsrp)
 			/* try the name servers found in db */
-			if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
+			if(netqueryns(qp, dbnsrp) > Answnone)
 				return rrlookup(qp->dp, qp->type, NOneg);
 	}
 	return nil;
@@ -353,8 +327,7 @@
 }
 
 static RR*
-dnresolve1(char *name, int class, int type, Request *req, int depth,
-	int recurse)
+dnresolve1(char *name, int class, int type, Request *req, int depth, int recurse)
 {
 	Area *area;
 	DN *dp;
@@ -362,7 +335,8 @@
 	Query q;
 
 	if(debug)
-		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
+		dnslog("%d: dnresolve1 %s %d %d",
+			req->id, name, type, class);
 
 	/* only class Cin implemented so far */
 	if(class != Cin)
@@ -380,8 +354,8 @@
 			if(rp->auth) {
 				noteinmem();
 				if(debug)
-					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
-						getpid(), name, type, class);
+					dnslog("%d: dnresolve1 %s %d %d: auth rr in db",
+						req->id, name, type, class);
 				return rp;
 			}
 		} else
@@ -391,8 +365,8 @@
 				if(type != Tall || rp->query == Tall) {
 					noteinmem();
 					if(debug)
-						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
-							getpid(), name, type, class);
+						dnslog("%d: dnresolve1 %s %d %d: rr not in db",
+							req->id, name, type, class);
 					return rp;
 				}
 	rrfreelist(rp);
@@ -409,8 +383,8 @@
 		rrfreelist(rp);
 		if(rp){
 			if(debug)
-				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
-					getpid(), name, type, class);
+				dnslog("%d: dnresolve1 %s %d %d: rr from rrlookup for non-cname",
+					req->id, name, type, class);
 			return nil;
 		}
 	}
@@ -423,14 +397,14 @@
 	if (area || strncmp(dp->name, "local#", 6) == 0)
 		return nil;
 
-	queryinit(&q, dp, type, req);
-	rp = issuequery(&q, name, class, depth, recurse);
-	querydestroy(&q);
+	initquery(&q, dp, type, req, depth);
+	rp = issuequery(&q, name, class, recurse);
+	exitquery(&q);
 
 	if(rp){
 		if(debug)
-			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
-				getpid(), name, type, class);
+			dnslog("%d: dnresolve1 %s %d %d: rr from query",
+				req->id, name, type, class);
 		return rp;
 	}
 
@@ -438,8 +412,8 @@
 	rp = rrlookup(dp, type, OKneg);
 	if(rp){
 		if(debug)
-			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
-				getpid(), name, type, class);
+			dnslog("%d: dnresolve1 %s %d %d: rr from rrlookup",
+				req->id, name, type, class);
 		return rp;
 	}
 
@@ -447,12 +421,12 @@
 	rp = dblookup(name, class, type, 0, 0);
 	if (rp) {
 		if(debug)
-			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
-				getpid(), name, type, class);
+			dnslog("%d: dnresolve1 %s %d %d: rr from dblookup",
+				req->id, name, type, class);
 	}else{
 		if(debug)
-			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
-				getpid(), name, type, class);
+			dnslog("%d: dnresolve1 %s %d %d: no rr from dblookup; crapped out",
+				req->id, name, type, class);
 	}
 	return rp;
 }
@@ -486,12 +460,12 @@
 	static char hmsg[] = "headers";
 	static char imsg[] = "ignoreadvice";
 
-	char ds[64], adir[64];
+	char adir[NETPATHLEN], buf[NETPATHLEN];
 	int fd, ctl;
 
 	/* get a udp port */
-	snprint(ds, sizeof ds, "%s/udp!*!0", mntpt);
-	ctl = announce(ds, adir);
+	snprint(buf, sizeof buf, "%s/udp!*!0", mntpt);
+	ctl = announce(buf, adir);
 	if(ctl < 0)
 		return -1;
 
@@ -506,15 +480,15 @@
 	write(ctl, imsg, sizeof(imsg)-1);
 
 	/* grab the data file */
-	snprint(ds, sizeof ds, "%s/data", adir);
-	fd = open(ds, ORDWR);
+	snprint(buf, sizeof buf, "%s/data", adir);
+	fd = open(buf, ORDWR|OCEXEC);
 	if(fd < 0)
-		warning("can't open udp port %s: %r", ds);
+		warning("can't open udp port %s: %r", buf);
 	close(ctl);
 	return fd;
 }
 
-void
+static void
 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
 {
 	memset(mp, 0, sizeof *mp);
@@ -525,24 +499,24 @@
 		mp->qdcount = 1;
 }
 
-/* generate a DNS UDP query packet */
+/* generate a DNS UDP query packet, return size of request (without Udphdr) */
 int
-mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
+mkreq(DN *dp, int type, uchar *pkt, int flags, ushort id)
 {
+	Udphdr *uh = (Udphdr*)pkt;
 	DNSmsg m;
-	int len;
-	Udphdr *uh = (Udphdr*)buf;
 	RR *rp;
+	int len;
 
 	/* stuff port number into output buffer */
-	memset(uh, 0, sizeof *uh);
-	hnputs(uh->rport, 53);
+	memset(uh, 0, Udphdrsize);
+	uh->rport[1] = 53;
 
 	/* make request and convert it to output format */
 	rp = rralloc(type);
 	rp->owner = dp;
-	initdnsmsg(&m, rp, flags, reqno);
-	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
+	initdnsmsg(&m, rp, flags, id);
+	len = convDNS2M(&m, &pkt[Udphdrsize], Maxudp);
 	rrfreelist(rp);
 	return len;
 }
@@ -557,18 +531,17 @@
 	mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
 }
 
-/* timed read of reply.  sets srcip.  ibuf must be 64K to handle tcp answers. */
+/* timed read of reply. sets srcip if UDP. */
 static int
-readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp, uchar *srcip)
+readnet(Query *qp, int medium, int fd, uchar pkt[Maxpkt], uvlong endms,
+	uchar **replyp, uchar *srcip)
 {
-	int len, fd;
+	int len;
 	long ms;
 	uvlong startms;
 	uchar *reply;
-	uchar lenbuf[2];
 
 	*replyp = nil;
-	memset(srcip, 0, IPaddrlen);
 
 	startms = nowms;
 	ms = (long)(endms - startms);
@@ -576,47 +549,50 @@
 		return -1;		/* taking too long */
 
 	len = -1;			/* pessimism */
-	reply = ibuf;
-	if (medium == Udp)
-		if (qp->udpfd < 0)
-			dnslog("readnet: qp->udpfd closed");
-		else {
-			alarm(ms);
-			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
+	reply = pkt;
+	switch (medium) {
+	case Udp:
+		alarm(ms);
+		len = read(fd, pkt, Udphdrsize + Maxudp);
+		alarm(0);
+		if(len < 0)
+			break;
+		if(len <= Udphdrsize){
+			len = -1;
+			break;
+		}
+		ipmove(srcip, pkt);
+		len   -= Udphdrsize;
+		reply += Udphdrsize;
+		break;
+	case Tcp:
+		alarm(ms);
+		len = readn(fd, pkt, 2);
+		if(len < 0){
 			alarm(0);
-			if (len >= IPaddrlen)
-				memmove(srcip, ibuf, IPaddrlen);
-			if (len >= Udphdrsize) {
-				len   -= Udphdrsize;
-				reply += Udphdrsize;
-			}
+			break;
 		}
-	else {
-		if (!qp->tcpset)
-			dnslog("readnet: tcp params not set");
-		fd = qp->tcpfd;
-		if (fd < 0)
-			dnslog("readnet: %s: tcp fd unset for dest %I",
-				qp->dp->name, qp->tcpip);
-		else {
-			alarm(ms);
-			if (readn(fd, lenbuf, 2) != 2) {
-				alarm(0);
-				dnslog("readnet: short read of 2-byte tcp msg size from %I", qp->tcpip);
-			} else {
-				len = lenbuf[0]<<8 | lenbuf[1];
-				if (readn(fd, ibuf, len) != len) {
-					alarm(0);
-					dnslog("readnet: short read of tcp data from %I", qp->tcpip);
-					len = -1;
-				} else {
-					alarm(0);
-				}
-			}
+		if(len != 2){
+			alarm(0);
+			dnslog("%d: readnet: short read of 2-byte tcp msg size from %I",
+				qp->req->id, srcip);
+			len = -1;
+			break;
 		}
-		memmove(srcip, qp->tcpip, IPaddrlen);
+		len = pkt[0]<<8 | pkt[1];
+		if(len <= 0 || len > Maxtcp || readn(fd, pkt+2, len) != len){
+			alarm(0);
+			dnslog("%d: readnet: short read of tcp data from %I",
+				qp->req->id, srcip);
+			len = -1;
+			break;
+		}
+		alarm(0);
+		reply += 2;
+		break;
 	}
 
+	/* file statistics */
 	ms = (long)(timems() - startms);
 	notestats(ms, len < 0, qp->type);
 
@@ -626,58 +602,62 @@
 
 /*
  *  read replies to a request and remember the rrs in the answer(s).
- *  ignore any of the wrong type.
+ *  ignore any of the wrong type for UDP.
  *  wait at most until endms.
  */
 static int
-readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
-	uvlong endms)
+readreply(Query *qp, int medium, int fd, uvlong endms,
+	DNSmsg *mp, uchar *srcip)
 {
-	int len;
-	char *err;
+	uchar pkt[Maxpkt];
 	uchar *reply;
-	uchar srcip[IPaddrlen];
+	char *err;
+	int len;
 	RR *rp;
 
-	for (;; freeanswers(mp)) {
-		len = readnet(qp, medium, ibuf, endms, &reply, srcip);
+	for(;;){
+		len = readnet(qp, medium, fd, pkt, endms, &reply, srcip);
 		if (len < 0)
 			break;
+
 		/* convert into internal format  */
 		memset(mp, 0, sizeof *mp);
 		err = convM2DNS(reply, len, mp, nil);
 		if (mp->flags & Ftrunc) {
 			free(err);
-			freeanswers(mp);
-			/* notify our caller to retry the query via tcp. */
-			return -1;
-		} else if(err){
-			dnslog("readreply: %s: input err, len %d: %s: %I",
-				qp->dp->name, len, err, srcip);
-			free(err);
-			continue;
+			return 1;	/* signal truncation */
 		}
-		if(debug)
-			logreply(qp->req->id, srcip, mp);
+		if(err){
+			dnslog("%d: readreply: input err, len %d: %s from %I",
+				qp->req->id, len, err, srcip);
+			free(err);
+		} else {
+			logreply(qp->req->id, "rcvd", srcip, mp);
 
-		/* answering the right question? */
-		if(mp->id != req)
-			dnslog("%d: id %d instead of %d: %I", qp->req->id,
-				mp->id, req, srcip);
-		else if(mp->qd == 0)
-			dnslog("%d: no question RR: %I", qp->req->id, srcip);
-		else if(mp->qd->owner != qp->dp)
-			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
-				mp->qd->owner->name, qp->dp->name, srcip);
-		else if(mp->qd->type != qp->type)
-			dnslog("%d: qp->type %d instead of %d: %I",
-				qp->req->id, mp->qd->type, qp->type, srcip);
-		else {
-			/* remember what request this is in answer to */
-			for(rp = mp->an; rp; rp = rp->next)
-				rp->query = qp->type;
-			return 0;
+			/* answering the right question? */
+			if(mp->id != qp->id)
+				dnslog("%d: id %d instead of %d from %I",
+					qp->req->id, mp->id, qp->id, srcip);
+			else if(mp->qd == 0)
+				dnslog("%d: no question RR from %I", qp->req->id, srcip);
+			else if(mp->qd->owner != qp->dp)
+				dnslog("%d: owner %s instead of %s from %I", qp->req->id,
+					mp->qd->owner->name, qp->dp->name, srcip);
+			else if(mp->qd->type != qp->type)
+				dnslog("%d: qp->type %d instead of %d from %I",
+					qp->req->id, mp->qd->type, qp->type, srcip);
+			else {
+				/* remember what request this is in answer to */
+				for(rp = mp->an; rp; rp = rp->next)
+					rp->query = qp->type;
+				return 0;
+			}
 		}
+		freeanswers(mp);
+
+		/* only single reply is expected from TCP */
+		if(medium == Tcp)
+			break;
 	}
 	memset(mp, 0, sizeof *mp);
 	return -1;
@@ -703,7 +683,6 @@
 	return 1;
 }
 
-
 /*
  *  return multicast version if any
  */
@@ -743,10 +722,10 @@
 }
 
 /*
- *  Get next server type address(es) into qp->dest[nd] and beyond
+ *  Get next server type address(es) into dest[nd] and beyond
  */
 static int
-serveraddrs(Query *qp, int nd, int depth, int type)
+serveraddrs(Query *qp, Dest dest[Maxdest], int nd, int type)
 {
 	RR *rp, *arp, *trp;
 	ulong mark;
@@ -800,7 +779,7 @@
 			if(strncmp(rp->owner->name, "local#", 6) == 0)
 				continue;
 			arp = dnresolve(rp->host->name, Cin, type, qp->req, 0,
-				depth+1, Recurse, 1, 0);
+				qp->depth+1, Recurse, 1, 0);
 			rrfreelist(rrremneg(&arp));
 			if(arp)
 				break;
@@ -809,19 +788,17 @@
 
 	/* use any addresses that we found */
 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
-		p = &qp->dest[nd];
+		p = &dest[nd];
 		memset(p, 0, sizeof *p);
 		if(parseip(p->a, trp->ip->name) == -1)
 			continue;
-
-		/*
-		 * straddling servers can reject all nameservers if they are all
-		 * inside, so be sure to list at least one outside ns at
-		 * the end of the ns list in /lib/ndb for `dom='.
-		 */
-		if (ipisbm(p->a) ||
-		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(p->a))
+		if(ipcmp(p->a, IPnoaddr) == 0)
 			continue;
+		if(ipisbm(p->a))
+			continue;
+		if(cfg.serve && myip(p->a))
+			continue;
+
 		p->nx = 0;
 		p->n = nil;
 		p->s = trp->owner;
@@ -848,9 +825,7 @@
 	DN *soaowner;
 	ulong ttl;
 
-	qlock(&stats);
 	stats.negcached++;
-	qunlock(&stats);
 
 	/* no cache time specified, don't make anything up */
 	if(soarr != nil){
@@ -878,170 +853,6 @@
 	rrattach(rp, Authoritative);
 }
 
-static int
-setdestoutns(Dest *p, int n)
-{
-	memset(p, 0, sizeof *p);
-	if (outsidensip(n, p->a) < 0){
-		if (n == 0)
-			dnslog("[%d] no outside-ns in ndb", getpid());
-		return -1;
-	}
-	p->s = dnlookup("outside-ns-ips", Cin, 1);
-	return 0;
-}
-
-/*
- * issue query via UDP or TCP as appropriate.
- * for TCP, returns with qp->tcpip set from udppkt header.
- */
-static int
-mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
-{
-	int rv, nfd;
-	char conndir[40], addr[128];
-	uchar belen[2];
-	NetConnInfo *nci;
-
-	rv = -1;
-	if (myip(udppkt))
-		return rv;
-	switch (medium) {
-	case Udp:
-		nfd = dup(qp->udpfd, -1);
-		if (nfd < 0) {
-			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
-			close(qp->udpfd);	/* ensure it's closed */
-			qp->udpfd = -1;		/* poison it */
-			break;
-		}
-		close(nfd);
-
-		if (qp->udpfd < 0)
-			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
-		else {
-			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
-			    len+Udphdrsize)
-				warning("sending udp msg: %r");
-			else {
-				qlock(&stats);
-				stats.qsent++;
-				qunlock(&stats);
-				rv = 0;
-			}
-		}
-		break;
-	case Tcp:
-		/* send via TCP & keep fd around for reply */
-		memmove(qp->tcpip, udppkt, sizeof qp->tcpip);
-		snprint(addr, sizeof addr, "%s/tcp!%I!dns",
-			(mntpt && *mntpt) ? mntpt : "/net", udppkt);
-		alarm(10*1000);
-		qp->tcpfd = dial(addr, nil, conndir, &qp->tcpctlfd);
-		alarm(0);
-		if (qp->tcpfd < 0) {
-			dnslog("can't dial %s: %r", addr);
-			break;
-		}
-		nci = getnetconninfo(conndir, qp->tcpfd);
-		if (nci) {
-			parseip(qp->tcpip, nci->rsys);
-			freenetconninfo(nci);
-		} else
-			dnslog("mydnsquery: getnetconninfo failed");
-		qp->tcpset = 1;
-
-		belen[0] = len >> 8;
-		belen[1] = len;
-		if (write(qp->tcpfd, belen, 2) != 2 ||
-		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
-			warning("sending tcp msg: %r");
-		else
-			rv = 0;
-		break;
-	}
-	return rv;
-}
-
-/*
- * send query to all UDP destinations or one TCP destination,
- * taken from obuf (udp packet) header
- */
-static int
-xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
-{
-	int n;
-	char buf[32];
-	Dest *p;
-
-	/*
-	 * if we send tcp query, we just take the dest ip address from
-	 * the udp header placed there by tcpquery().
-	 */
-	if (medium == Tcp) {
-		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
-			qp->dp->name, rrname(qp->type, buf, sizeof buf));
-		if(mydnsquery(qp, medium, obuf, len) < 0) /* sets qp->tcpip from obuf */
-			return -1;
-		if(debug)
-			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
-				qp->type);
-		return 0;
-	}
-
-	/*
-	 * get a nameserver address if we need one.
-	 * we're to transmit to more destinations than we currently have,
-	 * so get another.
-	 */
-	p = qp->dest;
-	n = qp->curdest - p;
-	if (qp->ndest > n) {
-		/* populates qp->dest with v4 and v6 addresses. */
-		n = serveraddrs(qp, n, depth, Ta);
-		n = serveraddrs(qp, n, depth, Taaaa);
-		if (n == 0 && cfg.straddle && cfg.inside) {
-			/* get ips of "outside-ns-ips" */
-			while(n < Maxdest){
-				if (setdestoutns(&qp->dest[n], n) < 0)
-					break;
-				n++;
-			}
-			if(n == 0)
-				dnslog("xmitquery: %s: no outside-ns nameservers",
-					qp->dp->name);
-		}
-		qp->curdest = &qp->dest[n];
-	}
-
-	for(n = 0; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
-		/* skip destinations we've finished with */
-		if(p->nx >= Maxtrans)
-			continue;
-		/* exponential backoff of requests */
-		if((1<<p->nx) > qp->ndest)
-			continue;
-
-		if(ipcmp(p->a, IPnoaddr) == 0)
-			continue;		/* mistake */
-
-		procsetname("udp %sside query to %I/%s %s %s",
-			(inns? "in": "out"), p->a, p->s->name,
-			qp->dp->name, rrname(qp->type, buf, sizeof buf));
-		if(debug)
-			logsend(qp->req->id, depth, p->a, p->s->name,
-				qp->dp->name, qp->type);
-
-		/* fill in UDP destination addr & send it */
-		memmove(obuf, p->a, sizeof p->a);
-		if(mydnsquery(qp, medium, obuf, len) == 0)
-			n++;
-		p->nx++;
-	}
-
-	return n == 0 ? -1 : 0;
-}
-
 /* is mp a cachable negative response (with Rname set)? */
 static int
 isnegrname(DNSmsg *mp)
@@ -1109,13 +920,12 @@
 
 /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
 static int
-procansw(Query *qp, DNSmsg *mp, int depth, Dest *p)
+procansw(Query *qp, Dest *p, DNSmsg *mp)
 {
-	int rv;
-	char buf[32];
-	DN *ndp;
 	Query nq;
+	DN *ndp;
 	RR *tp, *soarr;
+	int rv;
 
 	if(mp->an == nil)
 		stats.negans++;
@@ -1242,61 +1052,112 @@
 		rrfreelist(tp);
 		return Answnone;
 	}
-	procsetname("recursive query for %s %s", qp->dp->name,
-		rrname(qp->type, buf, sizeof buf));
 
-	queryinit(&nq, qp->dp, qp->type, qp->req);
-	rv = netqueryns(&nq, depth+1, tp);
-	querydestroy(&nq);
+	initquery(&nq, qp->dp, qp->type, qp->req, qp->depth+1);
+	rv = netqueryns(&nq, tp);
+	exitquery(&nq);
 
 	return rv;
 }
 
+static int
+writenet(Query *qp, int medium, int fd, uchar *pkt, int len, Dest *p)
+{
+	uchar tmp[2];
+	int rv;
+
+	logrequest(qp->req->id, qp->depth, "send", p->a, p->s->name,
+		qp->dp->name, qp->type);
+
+	rv = -1;
+	switch (medium) {
+	case Udp:
+		/* fill in UDP destination addr & send it */
+		ipmove(pkt, p->a);
+		if (write(fd, pkt, len+Udphdrsize) != len+Udphdrsize)
+			warning("sending udp msg to %I/%s: %r", p->a, p->s->name);
+		else {
+			stats.qsentudp++;
+			rv = 0;
+		}
+		break;
+	case Tcp:
+		tmp[0] = pkt[Udphdrsize-2], pkt[Udphdrsize-2] = len >> 8;
+		tmp[1] = pkt[Udphdrsize-1], pkt[Udphdrsize-1] = len;
+		len += 2;
+		if (write(fd, pkt + Udphdrsize-2, len) != len)
+			warning("sending tcp msg to %I/%s: %r", p->a, p->s->name);
+		else {
+			stats.qsenttcp++;
+			rv = 0;
+		}
+		pkt[Udphdrsize-2] = tmp[0];
+		pkt[Udphdrsize-1] = tmp[1];
+		break;
+	}
+	return rv;
+}
+
 /*
- * send a query via tcp to a single address (from ibuf's udp header)
+ * send a query via tcp to a single address
  * and read the answer(s) into mp->an.
  */
 static int
-tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
-	uvlong endms, int inns, ushort req)
+tcpquery(Query *qp, uchar *pkt, int len, Dest *p, uvlong endms, DNSmsg *mp)
 {
-	int rv = 0;
+	char buf[NETPATHLEN];
+	int fd, rv;
+	long ms;
 
-	if (0)
-		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
-			qp->dp->name, qp->tcpip);
-
-	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
 	memset(mp, 0, sizeof *mp);
-	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
-	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
-		rv = -1;
-	if (qp->tcpfd >= 0) {
-		hangup(qp->tcpctlfd);
-		close(qp->tcpctlfd);
-		close(qp->tcpfd);
-	}
-	qp->tcpfd = qp->tcpctlfd = -1;
 
+	ms = (long)(endms - nowms);
+	if(ms < Minreqtm)
+		return -1;	/* takes too long */
+	if(ms > Maxtcpdialtm)
+		ms = Maxtcpdialtm;
+
+	procsetname("tcp query to %I/%s for %s %s", p->a, p->s->name,
+		qp->dp->name, rrname(qp->type, buf, sizeof buf));
+
+	snprint(buf, sizeof buf, "%s/tcp!%I!53", mntpt, p->a);
+
+	alarm(ms);
+	fd = dial(buf, nil, nil, nil);
+	alarm(0);
+	if (fd < 0) {
+		dnslog("%d: can't dial %s for %I/%s: %r",
+			qp->req->id, buf, p->a, p->s->name);
+		return -1;
+	}
+	rv = writenet(qp, Tcp, fd, pkt, len, p);
+	if(rv == 0){
+		timems();	/* account for time dialing and sending */
+		rv = readreply(qp, Tcp, fd, endms, mp, pkt);
+	}
+	close(fd);
 	return rv;
 }
 
 /*
- *  query name servers.  fill in obuf with on-the-wire representation of a
- *  DNSmsg derived from qp.  if the name server returns a pointer to another
+ *  query name servers.  fill in pkt with on-the-wire representation of a
+ *  DNSmsg derived from qp. if the name server returns a pointer to another
  *  name server, recurse.
  */
 static int
-queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
+udpqueryns(Query *qp, int fd, uchar *pkt)
 {
-	int ndest, len, replywaits, rv, flag;
-	ushort req;
-	uvlong endms;
-	char buf[32];
+	Dest dest[Maxdest], *edest, *p, *np;
+	int ndest, replywaits, len, flag, rv, n;
 	uchar srcip[IPaddrlen];
-	Dest *p, *np, dest[Maxdest];
+	char buf[32];
+	uvlong endms;
+	DNSmsg m;
+	RR *rp;
 
-	req = rand();
+	/* prepare server RR's for incremental lookup */
+	for(rp = qp->nsrp; rp; rp = rp->next)
+		rp->marker = 0;
 
 	/* request recursion only for local dns servers */
 	flag = Oquery;
@@ -1304,11 +1165,11 @@
 		flag |= Frecurse;
 
 	/* pack request into a udp message */
-	len = mkreq(qp->dp, qp->type, obuf, flag, req);
+	qp->id = rand();
+	len = mkreq(qp->dp, qp->type, pkt, flag, qp->id);
 
-	/* no server addresses yet */
-	memset(dest, 0, sizeof dest);
-	qp->curdest = qp->dest = dest;
+	/* no destination yet */
+	edest = dest;
 
 	/*
 	 *  transmit udp requests and wait for answers.
@@ -1317,80 +1178,97 @@
 	 *  retry a query via tcp if its response is truncated.
 	 */
 	for(ndest = 2; ndest < Maxdest; ndest += 2){
-		qp->ndest = ndest;
-		qp->tcpset = 0;
-
 		endms = nowms;
-		if(endms >= qp->req->aborttime)
+		if((long)(qp->req->aborttime - nowms) < Minreqtm)
 			break;
-		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
+
+		/*
+		 * get a nameserver address if we need one.
+		 * we're to transmit to more destinations than we currently have,
+		 * so get another.
+		 */
+		n = edest - dest;
+		if (n < ndest) {
+			/* populates dest with v4 and v6 addresses. */
+			n = serveraddrs(qp, dest, n, Ta);
+			n = serveraddrs(qp, dest, n, Taaaa);
+			edest = dest + n;
+		}
+
+		n = 0;
+		for(p = dest; p < edest && p < &dest[ndest]; p++){
+			/* skip destinations we've finished with */
+			if(p->nx >= Maxtrans)
+				continue;
+			/* exponential backoff of requests */
+			if((1UL<<p->nx) > ndest)
+				continue;
+			if(writenet(qp, Udp, fd, pkt, len, p) == 0)
+				n++;
+			p->nx++;
+		}
+
+		/* nothing left to send to */
+		if (n == 0)
 			break;
-		endms += waitms;
+
+		/* set the timeout for replies */
+		endms += 500;
 		if(endms > qp->req->aborttime)
 			endms = qp->req->aborttime;
 
+		procsetname("reading replies from %I...: %s %s from %s",
+			pkt, qp->dp->name,
+			rrname(qp->type, buf, sizeof buf), qp->req->from);
+
 		for(replywaits = 0; replywaits < ndest; replywaits++){
-			DNSmsg m;
+			/* read udp answer into m, fill srcip */
+			if(readreply(qp, Udp, fd, endms, &m, srcip) < 0)
+				break;
 
-			procsetname("reading %sside reply from %I: %s %s from %s",
-				(inns? "in": "out"), obuf, qp->dp->name,
-				rrname(qp->type, buf, sizeof buf), qp->req->from);
+			if(debug)
+				dnslog("%d: got reply from %I", qp->req->id, srcip);
 
-			/* read udp answer into m */
-			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
-				memmove(srcip, ibuf, IPaddrlen);
-			else if (!(m.flags & Ftrunc)) {
-				freeanswers(&m);
-				break;		/* timed out on this dest */
-			} else {
-				/* whoops, it was truncated! ask again via tcp */
-				freeanswers(&m);
-				if(nowms >= endms)
-					break;
-				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
-					endms, inns, req);  /* answer in m */
-				if (rv < 0) {
-					freeanswers(&m);
-					break;		/* failed via tcp too */
-				}
-				memmove(srcip, qp->tcpip, IPaddrlen);
-			}
-
 			/* find responder */
-			if(debug)
-				dnslog("queryns got reply from %I", srcip);
-			for(p = qp->dest; p < qp->curdest; p++)
+			for(p = dest; p < edest; p++)
 				if(ipcmp(p->a, srcip) == 0)
 					break;
-			if(p >= qp->curdest){
-				dnslog("response from %I but no destination", srcip);
+			if(p >= edest){
+				dnslog("%d: response from %I but no destination",
+					qp->req->id, srcip);
+				freeanswers(&m);
 				continue;
 			}
 
+			/* if response was truncated, try tcp */
+			if(m.flags & Ftrunc){
+				freeanswers(&m);
+				if(tcpquery(qp, pkt, len, p, endms, &m) < 0)
+					break;	/* failed via tcp too */
+				if(m.flags & Ftrunc){
+					freeanswers(&m);
+					break;
+				}
+			}
+
 			/* remove all addrs of responding server from list */
-			for(np = qp->dest; np < qp->curdest; np++)
+			for(np = dest; np < edest; np++)
 				if(np->s == p->s)
 					np->nx = Maxtrans;
 
 			/* free or incorporate RRs in m */
-			rv = procansw(qp, &m, depth, p);
-			if (rv > Answnone) {
-				qp->dest = qp->curdest = nil; /* prevent accidents */
+			rv = procansw(qp, p, &m);
+			if(rv > Answnone)
 				return rv;
-			}
 		}
 	}
 
 	/* if all servers returned failure, propagate it */
 	qp->dp->respcode = Rserver;
-	for(p = dest; p < qp->curdest; p++)
+	for(p = dest; p < edest; p++)
 		if(p->code != Rserver)
 			qp->dp->respcode = Rok;
 
-//	if (qp->dp->respcode)
-//		dnslog("queryns setting Rserver for %s", qp->dp->name);
-
-	qp->dest = qp->curdest = nil;		/* prevent accidents */
 	return Answnone;
 }
 
@@ -1400,30 +1278,23 @@
  * but we'd have to sort out the answers by dns-query id.
  */
 static int
-udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
+udpquery(Query *qp)
 {
 	int fd, rv;
-	uchar *obuf, *ibuf;
+	uchar *pkt;
 
-	/* use alloced buffers rather than ones from the stack */
-	ibuf = emalloc(64*1024);		/* max. tcp reply size */
-	obuf = emalloc(Maxudp+Udphdrsize);
-
+	pkt = emalloc(Maxudp+Udphdrsize);
 	fd = udpport(mntpt);
 	if (fd < 0) {
-		dnslog("can't get udpport for %s query of name %s: %r",
-			mntpt, qp->dp->name);
+		dnslog("%d: can't get udpport for %s query of name %s: %r",
+			qp->req->id, mntpt, qp->dp->name);
 		rv = -1;
 		goto Out;
 	}
-	qp->udpfd = fd;
-	rv = queryns(qp, depth, ibuf, obuf, 500UL<<(patient != 0), inns);
-	qp->udpfd = -1;
+	rv = udpqueryns(qp, fd, pkt);
 	close(fd);
-
 Out:
-	free(obuf);
-	free(ibuf);
+	free(pkt);
 	return rv;
 }
 
@@ -1432,13 +1303,9 @@
  * using nameservers in qp->nsrp.
  */
 static int
-netquery(Query *qp, int depth)
+netquery(Query *qp)
 {
-	int rv, triedin, inname;
-	RR *rp;
-
-	rv = Answnone;			/* pessimism */
-	if(depth > 12)			/* in a recursive loop? */
+	if(qp->depth > 12)			/* in a recursive loop? */
 		return Answnone;
 
 	slave(qp->req);
@@ -1452,64 +1319,5 @@
 	if(!qp->req->isslave && strcmp(qp->req->from, "9p") == 0)
 		return Answnone;
 
-	procsetname("netquery: %s", qp->dp->name);
-
-	/* prepare server RR's for incremental lookup */
-	for(rp = qp->nsrp; rp; rp = rp->next)
-		rp->marker = 0;
-
-	triedin = 0;
-
-	/*
-	 * normal resolvers and servers will just use mntpt for all addresses,
-	 * even on the outside.  straddling servers will use mntpt (/net)
-	 * for inside addresses and /net.alt for outside addresses,
-	 * thus bypassing other inside nameservers.
-	 */
-	inname = insideaddr(qp->dp->name);
-	if (!cfg.straddle || inname) {
-		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
-		triedin = 1;
-	}
-
-	/*
-	 * if we're still looking, are inside, and have an outside domain,
-	 * try it on our outside interface, if any.
-	 */
-	if (rv == Answnone && cfg.inside && !inname) {
-		if (triedin)
-			dnslog(
-	   "[%d] netquery: internal nameservers failed for %s; trying external",
-				getpid(), qp->dp->name);
-
-		/* prepare server RR's for incremental lookup */
-		for(rp = qp->nsrp; rp; rp = rp->next)
-			rp->marker = 0;
-
-		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
-	}
-
-	return rv;
-}
-
-int
-seerootns(void)
-{
-	int rv;
-	char root[] = "";
-	Request req;
-	RR *rr, *nsrp;
-	Query q;
-
-	memset(&req, 0, sizeof req);
-	req.aborttime = timems() + Maxreqtm;
-	req.isslave = 1;
-	req.from = "internal";
-	queryinit(&q, dnlookup(root, Cin, 1), Tns, &req);
-	nsrp = randomize(dblookup(root, Cin, Tns, 0, 0));
-	for (rr = nsrp; rr != nil; rr = rr->next)
-		dnslog("seerootns query nsrp: %R", rr);
-	rv = netqueryns(&q, 0, nsrp);		/* lookup ". ns" using nsrp */
-	querydestroy(&q);
-	return rv;
+	return udpquery(qp);
 }
--- a/sys/src/cmd/ndb/dns.c
+++ b/sys/src/cmd/ndb/dns.c
@@ -2,7 +2,6 @@
 #include <libc.h>
 #include <auth.h>
 #include <fcall.h>
-#include <bio.h>
 #include <ip.h>
 #include "dns.h"
 
@@ -13,8 +12,6 @@
 	Maxrrr=			32,		/* was 16 */
 	Maxfdata=		8192,
 
-	Defmaxage=		60*60,	/* default domain name max. age */
-
 	Qdir=			0,
 	Qdns=			1,
 };
@@ -60,15 +57,8 @@
 
 Cfg	cfg;
 int	debug;
-int	maxage = Defmaxage;
 int	mfd[2];
-int	needrefresh;
-ulong	now;
-uvlong	nowms;
 int	sendnotifies;
-char	*trace;
-int	traceactivity;
-char	*zonerefreshprogram;
 
 char	*logfile = "dns";	/* or "dns.test" */
 char	*dbfile;
@@ -75,7 +65,6 @@
 char	*dnsuser;
 char	mntpt[Maxpath];
 
-int	addforwtarg(char *);
 int	fillreply(Mfile*, int);
 void	freejob(Job*);
 void	io(void);
@@ -103,8 +92,8 @@
 void
 usage(void)
 {
-	fprint(2, "usage: %s [-FnorR] [-a maxage] [-f ndb-file] [-N target] "
-		"[-T forwip] [-x netmtpt] [-z refreshprog] [-s [addrs...]]\n", argv0);
+	fprint(2, "usage: %s [-FnrR] [-a maxage] [-f ndb-file] [-N target] "
+		"[-x netmtpt] [-s [addrs...]]\n", argv0);
 	exits("usage");
 }
 
@@ -111,7 +100,7 @@
 void
 main(int argc, char *argv[])
 {
-	char servefile[Maxpath], ext[Maxpath];
+	char ext[Maxpath], servefile[Maxpath];
 	Dir *dir;
 
 	setnetmtpt(mntpt, sizeof mntpt, nil);
@@ -119,12 +108,9 @@
 	ARGBEGIN{
 	case 'a':
 		maxage = atol(EARGF(usage()));
-		if (maxage <= 0)
-			maxage = Defmaxage;
 		break;
 	case 'd':
 		debug = 1;
-		traceactivity = 1;
 		break;
 	case 'f':
 		dbfile = EARGF(usage());
@@ -140,29 +126,20 @@
 		if (target < 1000)
 			target = 1000;
 		break;
-	case 'o':
-		cfg.straddle = 1;	/* straddle inside & outside networks */
-		break;
 	case 'r':
 		cfg.resolver = 1;
 		break;
 	case 'R':
-		norecursion = 1;
+		cfg.nonrecursive = 1;
 		break;
 	case 's':
 		cfg.serve = 1;		/* serve network */
 		cfg.cachedb = 1;
 		break;
-	case 'T':
-		addforwtarg(EARGF(usage()));
-		break;
 	case 'x':
 		setnetmtpt(mntpt, sizeof mntpt, EARGF(usage()));
 		setext(ext, sizeof ext, mntpt);
 		break;
-	case 'z':
-		zonerefreshprogram = EARGF(usage());
-		break;
 	default:
 		usage();
 		break;
@@ -171,17 +148,13 @@
 	if(argc != 0 && !cfg.serve)
 		usage();
 
-	rfork(RFREND|RFNOTEG);
-
-	cfg.inside = strcmp(mntpt, "/net") == 0;
-
 	/* start syslog before we fork */
 	fmtinstall('F', fcallfmt);
 	dninit();
-	dnslog("starting %s%sdns %s%s%son %s",
-		(cfg.straddle? "straddling ": ""),
+	dnslog("starting %s%s%sdns %s%son %s",
 		(cfg.cachedb? "caching ": ""),
-		(cfg.serve?   "udp server ": ""),
+		(cfg.nonrecursive? "non-recursive ": ""),
+		(cfg.serve?   "server ": ""),
 		(cfg.justforw? "forwarding-only ": ""),
 		(cfg.resolver? "resolver ": ""), mntpt);
 
@@ -199,15 +172,16 @@
 	srand(truerand());
 	db2cache(1);
 
-	if (cfg.straddle && !seerootns())
-		dnslog("straddle server misconfigured; can't see root name servers");
-
 	if(cfg.serve){
-		if(argc == 0)
+		if(argc == 0) {
 			dnudpserver(mntpt, "*");
-		else {
-			while(argc-- > 0)
-				dnudpserver(mntpt, *argv++);
+			dntcpserver(mntpt, "*");
+		} else {
+			while(argc-- > 0){
+				dnudpserver(mntpt, *argv);
+				dntcpserver(mntpt, *argv);
+				argv++;
+			}
 		}
 	}
 	if(sendnotifies)
@@ -258,7 +232,7 @@
 		sysfatal("write %s failed: %r", service);
 
 	/* copy namespace to avoid a deadlock */
-	switch(rfork(RFFDG|RFPROC|RFNAMEG)){
+	switch(rfork(RFFDG|RFPROC|RFNAMEG|RFREND|RFNOTEG)){
 	case 0:			/* child: start main proc */
 		close(p[1]);
 		procsetname("%s", mntpt);
@@ -686,19 +660,15 @@
 	 *  special commands
 	 */
 	if(debug)
-		dnslog("rwrite got: %s", job->request.data);
+		dnslog("%d: rwrite got: %s", req->id, job->request.data);
 	send = 1;
 	if(strcmp(job->request.data, "debug")==0)
 		debug ^= 1;
-	else if(strcmp(job->request.data, "dump")==0)
-		dndump("/lib/ndb/dnsdump");
 	else if(strcmp(job->request.data, "refresh")==0)
 		needrefresh = 1;
-	else if(strcmp(job->request.data, "stats")==0)
-		dnstats("/lib/ndb/dnsstats");
 	else if(strncmp(job->request.data, "target ", 7)==0){
 		target = atol(job->request.data + 7);
-		dnslog("target set to %ld", target);
+		dnslog("%d: target set to %ld", req->id, target);
 	} else
 		send = 0;
 	if (send)
@@ -723,19 +693,6 @@
 	} else
 		*atype++ = 0;
 
-	/*
-	 *  tracing request
-	 */
-	if(strcmp(atype, "trace") == 0){
-		if(trace)
-			free(trace);
-		if(*job->request.data)
-			trace = estrdup(job->request.data);
-		else
-			trace = 0;
-		goto send;
-	}
-
 	/* normal request: domain [type] */
 	stats.qrecvd9p++;
 	mf->type = rrtype(atype);
@@ -911,11 +868,14 @@
  *  the following varies between dnsdebug and dns
  */
 void
-logreply(int id, uchar *addr, DNSmsg *mp)
+logreply(int id, char *rcvd, uchar *addr, DNSmsg *mp)
 {
 	RR *rp;
 
-	dnslog("%d: rcvd %I flags:%s%s%s%s%s", id, addr,
+	if(!debug)
+		return;
+
+	dnslog("%d: %s %I flags:%s%s%s%s%s", id, rcvd, addr,
 		mp->flags & Fauth? " auth": "",
 		mp->flags & Ftrunc? " trunc": "",
 		mp->flags & Frecurse? " rd": "",
@@ -922,23 +882,26 @@
 		mp->flags & Fcanrec? " ra": "",
 		(mp->flags & (Fauth|Rmask)) == (Fauth|Rname)? " nx": "");
 	for(rp = mp->qd; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I qd %s", id, addr, rp->owner->name);
+		dnslog("%d: %s %I qd %s", id, rcvd, addr, rp->owner->name);
 	for(rp = mp->an; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I an %R", id, addr, rp);
+		dnslog("%d: %s %I an %R", id, rcvd, addr, rp);
 	for(rp = mp->ns; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I ns %R", id, addr, rp);
+		dnslog("%d: %s %I ns %R", id, rcvd, addr, rp);
 	for(rp = mp->ar; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I ar %R", id, addr, rp);
+		dnslog("%d: %s %I ar %R", id, rcvd, addr, rp);
 }
 
 void
-logsend(int id, int subid, uchar *addr, char *sname, char *rname, int type)
+logrequest(int id, int depth, char *send, uchar *addr, char *sname, char *rname, int type)
 {
-	char buf[12];
+	char tname[32];
 
-	dnslog("[%d] %d.%d: sending to %I/%s %s %s",
-		getpid(), id, subid, addr, sname, rname,
-		rrname(type, buf, sizeof buf));
+	if(!debug)
+		return;
+
+	dnslog("%d.%d: %s %I/%s %s %s",
+		id, depth, send, addr, sname, rname,
+		rrname(type, tname, sizeof tname));
 }
 
 RR*
--- a/sys/src/cmd/ndb/dns.h
+++ b/sys/src/cmd/ndb/dns.h
@@ -128,22 +128,20 @@
 	DEFTTL=		Day,
 
 	/* packet sizes */
-	Maxudp=		512,	/* maximum bytes per udp message sent */
-	Maxudpin=	2048,	/* maximum bytes per udp message rcv'd */
+	Maxudp=		8*1024,
+	Maxtcp=		0xfffe,
+	Maxpkt=		0x10000,
 
-	/* length of domain name hash table */
-	HTLEN= 		4*1024,
-
 	Maxpath=	128,	/* size of mntpt */
 	Maxlcks=	10,	/* max. query-type locks per domain name */
 
-	RRnames=	8,	/* # of referenced names per RR */
-
 	/* parallelism: tune; was 32; allow lots */
 	Maxactive=	250,
 
 	/* tune; was 8*1000; that was too short */
 	Maxreqtm=	15*1000,	/* max. ms to process a request */
+	Minreqtm=	100,		/* min. ms to attempt a request */
+	Maxtcpdialtm=	4000,		/* max. ms to dial() tcp connection */
 
 	Notauthoritative = 0,
 	Authoritative,
@@ -164,6 +162,7 @@
 typedef struct Srv	Srv;
 typedef struct Txt	Txt;
 typedef struct Caa	Caa;
+typedef struct Unknown	Unknown;
 
 /*
  *  a structure to track a request and any slave process handling it
@@ -173,7 +172,7 @@
 	int	isslave;	/* pid of slave */
 	uvlong	aborttime;	/* time in ms at which we give up */
 	jmp_buf	mret;		/* where master jumps to after starting a slave */
-	ushort	id;
+	ushort	id;		/* internal id of request (just for logging) */
 	uchar	mark;
 	char	*from;		/* who asked us? */
 	void	*aux;
@@ -234,6 +233,10 @@
 {
 	Block;
 };
+struct Unknown
+{
+	Block;
+};
 
 /*
  *  text strings
@@ -290,6 +293,7 @@
 		Sig	*sig;
 		Null	*null;
 		Txt	*txt;
+		Unknown	*unknown;
 	};
 };
 
@@ -357,10 +361,10 @@
 {
 	Area	*next;
 
-	int	len;		/* strlen(area->soarr->owner->name) */
 	RR	*soarr;		/* soa defining this area */
-	int	neednotify;
-	int	needrefresh;
+	int	len;		/* strlen(area->soarr->owner->name) */
+	uchar	neednotify;
+	uchar	needrefresh;
 };
 
 typedef struct Cfg Cfg;
@@ -368,18 +372,18 @@
 	int	cachedb;
 	int	resolver;
 	int	justforw;	/* flag: pure resolver, just forward queries */
-	int	serve;		/* flag: serve udp queries */
-	int	inside;
-	int	straddle;
+	int	serve;		/* flag: serve tcp udp queries */
+	int	nonrecursive;
 };
 
-/* (udp) query stats */
+/* query stats */
 typedef struct {
-	QLock;
 	ulong	slavehiwat;	/* procs */
 	ulong	qrecvd9p;	/* query counts */
 	ulong	qrecvdudp;
-	ulong	qsent;
+	ulong	qrecvdtcp;
+	ulong	qsentudp;
+	ulong	qsenttcp;
 	ulong	qrecvd9prpc;	/* packet count */
 	/* reply times by count */
 	ulong	under10ths[3*10+2];	/* under n*0.1 seconds, n is index */
@@ -406,35 +410,29 @@
 	OKneg,
 };
 
-extern Cfg	cfg;
 extern char	*dbfile;
-extern int	debug;
-extern Area	*delegated;
 extern char	*logfile;
-extern int	maxage;		/* age of oldest entry in cache (secs) */
+extern Cfg	cfg;
+extern int	debug;
 extern char	mntpt[];
-extern int	needrefresh;	/* set to pid of the process requesting flush */
-extern int	norecursion;
-extern ulong	now;		/* time base */
-extern uvlong	nowms;
-extern Area	*owned;
-extern int	sendnotifies;
-extern ulong	target;
-extern char	*trace;
-extern int	traceactivity;
-extern char	*zonerefreshprogram;
 
 #pragma	varargck	type	"\\"	uchar*
 #pragma	varargck	type	"R"	RR*
 #pragma	varargck	type	"Q"	RR*
 
-
 /* dn.c */
+extern int	needrefresh;
+extern ulong	now;		/* time base seconds */
+extern uvlong	nowms;		/* time base milliseconds */
+extern int	maxage;		/* age of oldest entry in cache (secs) */
+extern ulong	target;
+
 extern char	*rname[];
 extern unsigned	nrname;
 extern char	*opname[];
-extern Lock	dnlock;
 
+RR*	getdnsservers(int);
+
 void	abort(); /* char*, ... */;
 void	addserver(Server**, char*);
 int	bslashfmt(Fmt*);
@@ -442,9 +440,7 @@
 void	db2cache(int);
 void	dnageall(int);
 void	dnagedb(void);
-void	dnagenever(DN *);
 void	dnauthdb(void);
-void	dndump(char*);
 void	dninit(void);
 DN*	dnlookup(char*, int, int);
 DN*	idnlookup(char*, int, int);
@@ -458,7 +454,6 @@
 void	freeanswers(DNSmsg *mp);
 void	freeserverlist(Server*);
 void	getactivity(Request*);
-Area*	inmyarea(char*);
 void	putactivity(Request*);
 RR*	randomize(RR*);
 RR*	rralloc(int);
@@ -470,6 +465,7 @@
 void	rrfree(RR*);
 void	rrfreelist(RR*);
 RR*	rrlookup(DN*, int, int);
+RR*	rrgetzone(char*);
 char*	rrname(int, char*, int);
 RR*	rrremneg(RR**);
 RR*	rrremtype(RR**, int);
@@ -485,9 +481,11 @@
 void	warning(char*, ...);
 
 /* dnarea.c */
-void	refresh_areas(Area*);
-void	freearea(Area**);
-void	addarea(DN *dp, RR *rp, Ndbtuple *t);
+extern Area	*delegated;
+extern Area	*owned;
+void	addarea(RR *rp, Ndbtuple *t);
+void	freeareas(Area**);
+Area*	inmyarea(char*);
 
 /* dblookup.c */
 int	baddelegation(RR*, RR*, uchar*);
@@ -494,29 +492,28 @@
 RR*	dblookup(char*, int, int, int, int);
 RR*	dnsservers(int);
 RR*	domainlist(int);
-int	insideaddr(char *dom);
-int	insidens(uchar *ip);
 int	myip(uchar *ip);
 int	opendatabase(void);
-int	outsidensip(int, uchar *ip);
 
 /* dns.c */
 char*	walkup(char*);
-RR*	getdnsservers(int);
-void	logreply(int, uchar*, DNSmsg*);
-void	logsend(int, int, uchar*, char*, char*, int);
+void	logreply(int, char*, uchar*, DNSmsg*);
+void	logrequest(int, int, char*, uchar*, char*, char*, int);
 
 /* dnresolve.c */
 RR*	dnresolve(char*, int, int, Request*, RR**, int, int, int, int*);
 int	udpport(char *);
-int	mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno);
-int	seerootns(void);
-void	initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno);
+int	mkreq(DN *dp, int type, uchar *pkt, int flags, ushort reqno);
 
 /* dnserver.c */
 void	dnserver(DNSmsg*, DNSmsg*, Request*, uchar *, int);
+
+/* dnudpserver.c */
 void	dnudpserver(char*, char*);
 
+/* dntcpserver.c */
+void	dntcpserver(char*, char*);
+
 /* dnnotify.c */
 void	dnnotify(DNSmsg*, DNSmsg*, Request*);
 void	notifyproc(char*);
@@ -528,3 +525,5 @@
 char*	convM2DNS(uchar*, int, DNSmsg*, int*);
 
 #pragma varargck argpos dnslog 1
+#pragma varargck argpos warning 1
+#pragma varargck argpos dnsdebug 1
--- a/sys/src/cmd/ndb/dnsdebug.c
+++ b/sys/src/cmd/ndb/dnsdebug.c
@@ -6,32 +6,18 @@
 #include <ndb.h>
 #include "dns.h"
 
-enum {
-	Maxrequest=		128,
-};
-
 Cfg cfg;
 
-static char *servername;
-static RR *serveraddrs;
-
 char	*dbfile;
-int	debug;
 char	*logfile = "dnsdebug";
-int	maxage  = 60*60;
+int	debug;
 char	mntpt[Maxpath];
-int	needrefresh;
-ulong	now;
-uvlong	nowms;
-char	*trace;
-int	traceactivity;
-char	*zonerefreshprogram;
 
+static char *servername;
+
 void	docmd(int, char**);
 void	doquery(char*, char*);
-void	preloadserveraddrs(void);
 int	setserver(char*);
-void	squirrelserveraddrs(void);
 
 #pragma	varargck	type	"P"	RR*
 int	prettyrrfmt(Fmt*);
@@ -52,7 +38,6 @@
 	char *f[4];
 
 	strcpy(mntpt, "/net");
-	cfg.inside = 1;
 
 	ARGBEGIN{
 	case 'f':
@@ -66,7 +51,6 @@
 		break;
 	case 'd':
 		debug = 1;
-		traceactivity = 1;
 		break;
 	case 'x':
 		dbfile = "/lib/ndb/external";
@@ -82,11 +66,6 @@
 	srand(truerand());
 	db2cache(1);
 
-	if(cfg.resolver)
-		squirrelserveraddrs();
-
-	debug = 1;
-
 	if(argc > 0){
 		docmd(argc, argv);
 		exits(0);
@@ -160,7 +139,7 @@
 }
 
 void
-logreply(int id, uchar *addr, DNSmsg *mp)
+logreply(int id, char *rcvd, uchar *addr, DNSmsg *mp)
 {
 	RR *rp;
 	char buf[12], resp[32];
@@ -189,7 +168,7 @@
 		break;
 	}
 
-	print("%d: rcvd %s from %I (%s%s%s%s%s)\n", id, resp, addr,
+	print("%d: %s %I %s (%s%s%s%s%s)\n", id, rcvd, addr, resp,
 		mp->flags & Fauth? "authoritative": "",
 		mp->flags & Ftrunc? " truncated": "",
 		mp->flags & Frecurse? " recurse": "",
@@ -204,12 +183,12 @@
 }
 
 void
-logsend(int id, int subid, uchar *addr, char *sname, char *rname, int type)
+logrequest(int id, int depth, char *send, uchar *addr, char *sname, char *rname, int type)
 {
-	char buf[12];
+	char tname[32];
 
-	print("%d.%d: sending to %I/%s %s %s\n", id, subid,
-		addr, sname, rname, rrname(type, buf, sizeof buf));
+	print("%d.%d: %s %I/%s %s %s\n", id, depth, send,
+		addr, sname, rname, rrname(type, tname, sizeof tname));
 }
 
 RR*
@@ -223,64 +202,9 @@
 	rr = rralloc(Tns);
 	rr->owner = dnlookup("local#dns#servers", class, 1);
 	rr->host = idnlookup(servername, class, 1);
-
 	return rr;
 }
 
-void
-squirrelserveraddrs(void)
-{
-	int v4;
-	char *attr;
-	RR *rr, *rp, **l;
-	Request req;
-
-	/* look up the resolver address first */
-	cfg.resolver = 0;
-	debug = 0;
-	if(serveraddrs){
-		rrfreelist(serveraddrs);
-		serveraddrs = nil;
-	}
-	rr = getdnsservers(Cin);
-	l = &serveraddrs;
-	for(rp = rr; rp != nil; rp = rp->next){
-		attr = ipattr(rp->host->name);
-		v4 = strcmp(attr, "ip") == 0;
-		if(v4 || strcmp(attr, "ipv6") == 0){
-			*l = rralloc(v4? Ta: Taaaa);
-			(*l)->owner = rp->host;
-			(*l)->ip = rp->host;
-			l = &(*l)->next;
-			continue;
-		}
-		memset(&req, 0, sizeof req);
-		req.isslave = 1;
-		req.aborttime = timems() + Maxreqtm;
-		*l = dnresolve(rp->host->name, Cin, Ta, &req, nil, 0, Recurse, 0, nil);
-		if(*l == nil)
-			*l = dnresolve(rp->host->name, Cin, Taaaa, &req,
-				nil, 0, Recurse, 0, nil);
-		while(*l != nil)
-			l = &(*l)->next;
-	}
-	cfg.resolver = 1;
-	debug = 1;
-}
-
-void
-preloadserveraddrs(void)
-{
-	RR *rp, **l, *first;
-
-	first = nil;
-	l = &first;
-	for(rp = serveraddrs; rp != nil; rp = rp->next){
-		rrcopy(rp, l);
-		rrattach(first, Authoritative);
-	}
-}
-
 int
 setserver(char *server)
 {
@@ -291,14 +215,9 @@
 	}
 	if(server == nil || *server == 0)
 		return 0;
-	servername = strdup(server);
-	squirrelserveraddrs();
-	if(serveraddrs == nil){
-		print("can't resolve %s\n", servername);
-		cfg.resolver = 0;
-	} else
-		cfg.resolver = 1;
-	return cfg.resolver? 0: -1;
+	servername = estrdup(server);
+	cfg.resolver = 1;
+	return 0;
 }
 
 void
@@ -309,9 +228,6 @@
 	RR *rr, *rp;
 	Request req;
 
-	if(cfg.resolver)
-		preloadserveraddrs();
-
 	/* default to an "ip" request if alpha, "ptr" if numeric */
 	if(tstr == nil || *tstr == 0)
 		if(strcmp(ipattr(name), "ip") == 0)
@@ -344,6 +260,8 @@
 	getactivity(&req);
 	req.isslave = 1;
 	req.aborttime = timems() + Maxreqtm;
+	req.from = argv0;
+
 	rr = dnresolve(buf, Cin, type, &req, nil, 0, Recurse, rooted, nil);
 	if(rr){
 		print("----------------------------\n");
@@ -361,14 +279,14 @@
 	int tmpsrv;
 	char *name, *type;
 
-	name = type = nil;
-	tmpsrv = 0;
-
-	if(strcmp(f[0], "refresh") == 0){
+	if(n == 1 && strcmp(f[0], "refresh") == 0){
 		db2cache(1);
 		dnageall(1);
 		return;
 	}
+
+	name = type = nil;
+	tmpsrv = 0;
 
 	if(*f[0] == '@') {
 		if(setserver(f[0]+1) < 0)
--- a/sys/src/cmd/ndb/dnserver.c
+++ b/sys/src/cmd/ndb/dnserver.c
@@ -6,9 +6,6 @@
 static RR*	doextquery(DNSmsg*, Request*, int);
 static void	hint(RR**, RR*);
 
-/* set in dns.c */
-int	norecursion;		/* don't allow recursive requests */
-
 /*
  *  answer a dns request
  */
@@ -22,7 +19,7 @@
 	Area *myarea;
 	RR *tp, *neg, *rp;
 
-	recursionflag = norecursion? 0: Fcanrec;
+	recursionflag = cfg.nonrecursive? 0: Fcanrec;
 	memset(repp, 0, sizeof(*repp));
 	repp->id = reqp->id;
 	repp->flags = Fresp | recursionflag | Oquery;
@@ -37,22 +34,24 @@
 		errmsg = "";
 		if (rcode >= 0 && rcode < nrname)
 			errmsg = rname[rcode];
-		dnslog("server: response code 0%o (%s), req from %I",
-			rcode, errmsg, srcip);
+		dnslog("%d: server: response code 0%o (%s), req from %I",
+			req->id, rcode, errmsg, srcip);
 		/* provide feedback to clients who send us trash */
 		repp->flags = (rcode&Rmask) | Fresp | Fcanrec | Oquery;
 		return;
 	}
 	if(!rrsupported(repp->qd->type)){
-		dnslog("server: unsupported request %s from %I",
-			rrname(repp->qd->type, tname, sizeof tname), srcip);
+		if(debug)
+			dnslog("%d: server: unsupported request %s from %I",
+				req->id, rrname(repp->qd->type, tname, sizeof tname), srcip);
 		repp->flags = Runimplimented | Fresp | Fcanrec | Oquery;
 		return;
 	}
 
 	if(repp->qd->owner->class != Cin){
-		dnslog("server: unsupported class %d from %I",
-			repp->qd->owner->class, srcip);
+		if(debug)
+			dnslog("%d: server: unsupported class %d from %I",
+				req->id, repp->qd->owner->class, srcip);
 		repp->flags = Runimplimented | Fresp | Fcanrec | Oquery;
 		return;
 	}
@@ -60,15 +59,15 @@
 	myarea = inmyarea(repp->qd->owner->name);
 	if(myarea != nil) {
 		if(repp->qd->type == Tixfr || repp->qd->type == Taxfr){
-			dnslog("server: unsupported xfr request %s for %s from %I",
-				rrname(repp->qd->type, tname, sizeof tname),
-				repp->qd->owner->name, srcip);
-			repp->flags = Runimplimented | Fresp | recursionflag |
-				Oquery;
+			if(debug)
+				dnslog("%d: server: unsupported xfr request %s for %s from %I",
+					req->id, rrname(repp->qd->type, tname, sizeof tname),
+					repp->qd->owner->name, srcip);
+			repp->flags = Runimplimented | Fresp | recursionflag | Oquery;
 			return;
 		}
 	}
-	if(myarea == nil && norecursion) {
+	if(myarea == nil && cfg.nonrecursive) {
 		/* we don't recurse and we're not authoritative */
 		repp->flags = Rok | Fresp | Oquery;
 		neg = nil;
@@ -114,8 +113,9 @@
 				break;
 			}
 
-			if (strncmp(nsdp->name, "local#", 6) == 0)
-				dnslog("returning %s as nameserver", nsdp->name);
+			if(strncmp(nsdp->name, "local#", 6) == 0)
+				dnslog("%d: returning %s as nameserver",
+					req->id, nsdp->name);
 			repp->ns = dblookup(cp, repp->qd->owner->class, Tns, 0, 0);
 			if(repp->ns)
 				break;
--- a/sys/src/cmd/ndb/dnsgetip.c
+++ b/sys/src/cmd/ndb/dnsgetip.c
@@ -7,15 +7,9 @@
 
 Cfg cfg;
 char *dbfile;
-int debug		= 0;
 char *logfile		= "dnsgetip";
-int	maxage		= 60*60;
+int debug		= 0;
 char mntpt[Maxpath];
-int	needrefresh	= 0;
-ulong	now		= 0;
-uvlong	nowms		= 0;
-int	traceactivity	= 0;
-char	*zonerefreshprogram;
 
 int aflag = 0;
 int addresses = 0;
@@ -35,8 +29,9 @@
 
 	memset(&req, 0, sizeof req);
 	getactivity(&req);
-	req.isslave = 1;
 	req.aborttime = timems() + Maxreqtm;
+	req.isslave = 1;
+	req.from = argv0;
 
 	rr = dnresolve(name, Cin, type, &req, nil, 0, Recurse, 0, &status);
 	neg = rrremneg(&rr);
@@ -58,6 +53,7 @@
 	}
 
 	rrfreelist(rr);
+	putactivity(&req);
 
 	return errmsg;
 }
@@ -75,7 +71,6 @@
 	char *e4, *e6;
 
 	strcpy(mntpt, "/net");
-	cfg.inside = 1;
 	cfg.resolver = 1;
 
 	ARGBEGIN{
@@ -120,5 +115,5 @@
 
 /* stubs */
 void syslog(int, char*, char*, ...){}
-void logreply(int, uchar*, DNSmsg*){}
-void logsend(int, int, uchar*, char*, char*, int){}
+void logreply(int, char*, uchar*, DNSmsg*){}
+void logrequest(int, int, char*, uchar*, char*, char*, int){}
--- a/sys/src/cmd/ndb/dnstcp.c
+++ /dev/null
@@ -1,433 +1,0 @@
-/*
- * dnstcp - serve dns via tcp
- */
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include <ndb.h>
-#include <ip.h>
-#include "dns.h"
-
-Cfg cfg;
-
-char	*caller = "";
-char	*dbfile;
-int	anyone;
-int	debug;
-char	*logfile = "dns";
-int	maxage = 60*60;
-char	mntpt[Maxpath];
-int	needrefresh;
-ulong	now;
-uvlong	nowms;
-int	traceactivity;
-char	*zonerefreshprogram;
-
-static int	readmsg(int, uchar*, int);
-static void	reply(int, DNSmsg*, Request*);
-static void	dnzone(DNSmsg*, DNSmsg*, Request*, uchar*);
-static void	getcaller(char*);
-static void	refreshmain(char*);
-
-void
-usage(void)
-{
-	fprint(2, "usage: %s [-adrR] [-f ndbfile] [-x netmtpt] [conndir]\n", argv0);
-	exits("usage");
-}
-
-void
-main(int argc, char *argv[])
-{
-	volatile int len, rcode;
-	volatile char tname[32];
-	char *volatile err, *volatile ext = "";
-	volatile uchar buf[64*1024], callip[IPaddrlen];
-	volatile DNSmsg reqmsg, repmsg;
-	volatile Request req;
-
-	cfg.cachedb = 1;
-	ARGBEGIN{
-	case 'a':
-		anyone++;
-		break;
-	case 'd':
-		debug++;
-		break;
-	case 'f':
-		dbfile = EARGF(usage());
-		break;
-	case 'r':
-		cfg.resolver = 1;
-		break;
-	case 'R':
-		norecursion = 1;
-		break;
-	case 'x':
-		ext = EARGF(usage());
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(argc > 0)
-		getcaller(argv[0]);
-
-	cfg.inside = 1;
-	dninit();
-
-	if(*ext == '/')
-		snprint(mntpt, sizeof mntpt, "%s", ext);
-	else
-		snprint(mntpt, sizeof mntpt, "/net%s", ext);
-
-	dnslog("dnstcp call from %s", caller);
-	memset(callip, 0, sizeof callip);
-	parseip(callip, caller);
-
-	srand(truerand());
-	db2cache(1);
-
-	memset(&req, 0, sizeof req);
-	setjmp(req.mret);
-	req.isslave = 0;
-	procsetname("main loop");
-
-	alarm(10*1000);
-
-	/* loop on requests */
-	for(;; putactivity(&req)){
-		memset(&repmsg, 0, sizeof repmsg);
-		len = readmsg(0, buf, sizeof buf);
-		if(len <= 0)
-			break;
-
-		getactivity(&req);
-		req.aborttime = timems() + 15*Min*1000;
-		rcode = 0;
-		memset(&reqmsg, 0, sizeof reqmsg);
-		err = convM2DNS(buf, len, &reqmsg, &rcode);
-		if(err){
-			dnslog("server: input error: %s from %s", err, caller);
-			free(err);
-			break;
-		}
-		if (rcode == 0)
-			if(reqmsg.qdcount < 1){
-				dnslog("server: no questions from %s", caller);
-				break;
-			} else if(reqmsg.flags & Fresp){
-				dnslog("server: reply not request from %s",
-					caller);
-				break;
-			} else if((reqmsg.flags & Omask) != Oquery){
-				dnslog("server: op %d from %s",
-					reqmsg.flags & Omask, caller);
-				break;
-			}
-
-		if(reqmsg.qd == nil){
-			dnslog("server: no question RR from %s", caller);
-			break;
-		}
-
-		if(debug)
-			dnslog("[%d] %d: serve (%s) %d %s %s",
-				getpid(), req.id, caller,
-				reqmsg.id, reqmsg.qd->owner->name,
-				rrname(reqmsg.qd->type, tname, sizeof tname));
-
-		/* loop through each question */
-		while(reqmsg.qd)
-			if(reqmsg.qd->type == Taxfr)
-				dnzone(&reqmsg, &repmsg, &req, callip);
-			else {
-				dnserver(&reqmsg, &repmsg, &req, callip, rcode);
-				reply(1, &repmsg, &req);
-				rrfreelist(repmsg.qd);
-				rrfreelist(repmsg.an);
-				rrfreelist(repmsg.ns);
-				rrfreelist(repmsg.ar);
-			}
-		rrfreelist(reqmsg.qd);		/* qd will be nil */
-		rrfreelist(reqmsg.an);
-		rrfreelist(reqmsg.ns);
-		rrfreelist(reqmsg.ar);
-
-		if(req.isslave){
-			putactivity(&req);
-			_exits(0);
-		}
-	}
-	refreshmain(mntpt);
-}
-
-static int
-readmsg(int fd, uchar *buf, int max)
-{
-	int n;
-	uchar x[2];
-
-	if(readn(fd, x, 2) != 2)
-		return -1;
-	n = x[0]<<8 | x[1];
-	if(n > max)
-		return -1;
-	if(readn(fd, buf, n) != n)
-		return -1;
-	return n;
-}
-
-static void
-reply(int fd, DNSmsg *rep, Request *req)
-{
-	int len, rv;
-	char tname[32];
-	uchar buf[64*1024];
-	RR *rp;
-
-	if(debug){
-		dnslog("%d: reply (%s) %s %s %ux",
-			req->id, caller,
-			rep->qd->owner->name,
-			rrname(rep->qd->type, tname, sizeof tname),
-			rep->flags);
-		for(rp = rep->an; rp; rp = rp->next)
-			dnslog("an %R", rp);
-		for(rp = rep->ns; rp; rp = rp->next)
-			dnslog("ns %R", rp);
-		for(rp = rep->ar; rp; rp = rp->next)
-			dnslog("ar %R", rp);
-	}
-
-
-	len = convDNS2M(rep, buf+2, sizeof(buf) - 2);
-	buf[0] = len>>8;
-	buf[1] = len;
-	rv = write(fd, buf, len+2);
-	if(rv != len+2){
-		dnslog("[%d] sending reply: %d instead of %d", getpid(), rv,
-			len+2);
-		exits(0);
-	}
-}
-
-/*
- *  Hash table for domain names.  The hash is based only on the
- *  first element of the domain name.
- */
-extern DN	*ht[HTLEN];
-
-static int
-numelem(char *name)
-{
-	int i;
-
-	i = 1;
-	for(; *name; name++)
-		if(*name == '.')
-			i++;
-	return i;
-}
-
-int
-inzone(DN *dp, char *name, int namelen, int depth)
-{
-	int n;
-
-	if(dp->name == nil)
-		return 0;
-	if(numelem(dp->name) != depth)
-		return 0;
-	n = strlen(dp->name);
-	if(n < namelen)
-		return 0;
-	if(cistrcmp(name, dp->name + n - namelen) != 0)
-		return 0;
-	if(n > namelen && dp->name[n - namelen - 1] != '.')
-		return 0;
-	return 1;
-}
-
-static Server*
-findserver(uchar *srcip, Server *servers, Request *req)
-{
-	uchar ip[IPaddrlen];
-	RR *list, *rp;
-	int tmp;
-
-	for(; servers != nil; servers = servers->next){
-		if(strcmp(ipattr(servers->name), "ip") == 0){
-			if(parseip(ip, servers->name) == -1)
-				continue;
-			if(ipcmp(srcip, ip) == 0)
-				return servers;
-			continue;
-		}
-
-		tmp = cfg.resolver;
-		cfg.resolver = 1;
-		list = dnresolve(servers->name, Cin, isv4(srcip)? Ta: Taaaa,
-			req, nil, 0, Recurse, 0, nil);
-		cfg.resolver = tmp;
-
-		for(rp = list; rp != nil; rp = rp->next){
-			if(parseip(ip, rp->ip->name) == -1)
-				continue;
-			if(ipcmp(srcip, ip) == 0)
-				break;
-		}
-		rrfreelist(list);
-		if(rp != nil)
-			return servers;
-	}
-	return nil;
-}
-
-static void
-dnzone(DNSmsg *reqp, DNSmsg *repp, Request *req, uchar *srcip)
-{
-	DN *dp, *ndp;
-	RR r, *rp;
-	int h, depth, found, nlen;
-
-	memset(repp, 0, sizeof(*repp));
-	repp->id = reqp->id;
-	repp->qd = reqp->qd;
-	reqp->qd = reqp->qd->next;
-	repp->qd->next = 0;
-	repp->flags = Fauth | Fresp | Oquery;
-	if(!norecursion)
-		repp->flags |= Fcanrec;
-	dp = repp->qd->owner;
-
-	/* send the soa */
-	repp->an = rrlookup(dp, Tsoa, NOneg);
-	if(repp->an != nil && !anyone && !myip(srcip)
-	&& findserver(srcip, repp->an->soa->slaves, req) == nil){
-		dnslog("dnstcp: %I axfr %s - not a dnsslave", srcip, dp->name);
-		rrfreelist(repp->an);
-		repp->an = nil;
-	}
-	reply(1, repp, req);
-	if(repp->an == nil)
-		goto out;
-	rrfreelist(repp->an);
-	repp->an = nil;
-
-	nlen = strlen(dp->name);
-
-	/* construct a breadth-first search of the name space (hard with a hash) */
-	repp->an = &r;
-	for(depth = numelem(dp->name); ; depth++){
-		found = 0;
-		for(h = 0; h < HTLEN; h++)
-			for(ndp = ht[h]; ndp; ndp = ndp->next)
-				if(inzone(ndp, dp->name, nlen, depth)){
-					for(rp = ndp->rr; rp; rp = rp->next){
-						/*
-						 * there shouldn't be negatives,
-						 * but just in case.
-						 * don't send any soa's,
-						 * ns's are enough.
-						 */
-						if (rp->negative ||
-						    rp->type == Tsoa)
-							continue;
-						r = *rp;
-						r.next = 0;
-						reply(1, repp, req);
-					}
-					found = 1;
-				}
-		if(!found)
-			break;
-	}
-
-	/* resend the soa */
-	repp->an = rrlookup(dp, Tsoa, NOneg);
-	reply(1, repp, req);
-	rrfreelist(repp->an);
-	repp->an = nil;
-out:
-	rrfree(repp->qd);
-	repp->qd = nil;
-}
-
-static void
-getcaller(char *dir)
-{
-	int fd, n;
-	static char remote[128];
-
-	snprint(remote, sizeof(remote), "%s/remote", dir);
-	fd = open(remote, OREAD);
-	if(fd < 0)
-		return;
-	n = read(fd, remote, sizeof remote - 1);
-	close(fd);
-	if(n <= 0)
-		return;
-	if(remote[n-1] == '\n')
-		n--;
-	remote[n] = 0;
-	caller = remote;
-}
-
-static void
-refreshmain(char *net)
-{
-	int fd;
-	char file[128];
-
-	snprint(file, sizeof(file), "%s/dns", net);
-	if(debug)
-		dnslog("refreshing %s", file);
-	fd = open(file, ORDWR);
-	if(fd < 0)
-		dnslog("can't refresh %s", file);
-	else {
-		fprint(fd, "refresh");
-		close(fd);
-	}
-}
-
-/*
- *  the following varies between dnsdebug and dns
- */
-void
-logreply(int id, uchar *addr, DNSmsg *mp)
-{
-	RR *rp;
-
-	dnslog("%d: rcvd %I flags:%s%s%s%s%s", id, addr,
-		mp->flags & Fauth? " auth": "",
-		mp->flags & Ftrunc? " trunc": "",
-		mp->flags & Frecurse? " rd": "",
-		mp->flags & Fcanrec? " ra": "",
-		(mp->flags & (Fauth|Rmask)) == (Fauth|Rname)? " nx": "");
-	for(rp = mp->qd; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I qd %s", id, addr, rp->owner->name);
-	for(rp = mp->an; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I an %R", id, addr, rp);
-	for(rp = mp->ns; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I ns %R", id, addr, rp);
-	for(rp = mp->ar; rp != nil; rp = rp->next)
-		dnslog("%d: rcvd %I ar %R", id, addr, rp);
-}
-
-void
-logsend(int id, int subid, uchar *addr, char *sname, char *rname, int type)
-{
-	char buf[12];
-
-	dnslog("%d.%d: sending to %I/%s %s %s",
-		id, subid, addr, sname, rname, rrname(type, buf, sizeof buf));
-}
-
-RR*
-getdnsservers(int class)
-{
-	return dnsservers(class);
-}
--- /dev/null
+++ b/sys/src/cmd/ndb/dntcpserver.c
@@ -1,0 +1,324 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ndb.h>
+#include <ip.h>
+#include "dns.h"
+
+enum {
+	Maxprocs = 64,
+};
+
+static int	readmsg(int, uchar*, int);
+static int	reply(int, uchar *, DNSmsg*, Request*, uchar*);
+static int	dnzone(int, uchar *, DNSmsg*, DNSmsg*, Request*, uchar*);
+static int	tcpannounce(char *mntpt, char *addr, char caller[128]);
+
+void
+dntcpserver(char *mntpt, char *addr)
+{
+	volatile int fd, len, rcode, rv;
+	volatile long ms;
+	volatile char caller[128];
+	volatile uchar pkt[Maxpkt], callip[IPaddrlen];
+	volatile DNSmsg reqmsg, repmsg;
+	volatile Request req;
+	char *volatile err;
+
+	/*
+	 * fork sharing text, data, and bss with parent.
+	 * stay in the same note group.
+	 */
+	switch(rfork(RFPROC|RFMEM|RFNOWAIT)){
+	case -1:
+		break;
+	case 0:
+		break;
+	default:
+		return;
+	}
+
+	procsetname("%s: tcp server %s", mntpt, addr);
+	if((fd = tcpannounce(mntpt, addr, caller)) < 0){
+		warning("can't announce %s on %s: %r", addr, mntpt);
+		_exits(0);
+	}
+	parseip(callip, caller);
+	procsetname("%s: tcp server %s serving %s", mntpt, addr, caller);
+
+	memset(&req, 0, sizeof req);
+	req.isslave = 1;
+	req.from = caller;
+	req.aborttime = timems() + Maxreqtm;
+
+	/* loop on requests */
+	for(;; putactivity(&req)){
+		memset(&reqmsg, 0, sizeof reqmsg);
+
+		ms = (long)(req.aborttime - nowms);
+		if(ms < Minreqtm){
+		noreq:
+			close(fd);
+			_exits(0);
+		}
+		alarm(ms);
+		if(readn(fd, pkt, 2) != 2){
+			alarm(0);
+			goto noreq;
+		}
+		len = pkt[0]<<8 | pkt[1];
+		if(len <= 0 || len > Maxtcp || readn(fd, pkt+2, len) != len){
+			alarm(0);
+			goto noreq;
+		}
+		alarm(0);
+
+		getactivity(&req);
+		if((long)(req.aborttime - timems()) < Minreqtm)
+			break;
+
+		stats.qrecvdtcp++;
+
+		rcode = 0;
+		err = convM2DNS(pkt+2, len, &reqmsg, &rcode);
+		if(err){
+			dnslog("%d: server: input err, len %d: %s from %s",
+				req.id, len, err, caller);
+			free(err);
+			break;
+		}
+		if(rcode == 0)
+			if(reqmsg.qdcount < 1){
+				dnslog("%d: server: no questions from %s",
+					req.id, caller);
+				break;
+			} else if(reqmsg.flags & Fresp){
+				dnslog("%d: server: reply not request from %s",
+					req.id, caller);
+				break;
+			} else if((reqmsg.flags & Omask) != Oquery){
+				dnslog("%d: server: op %d from %s",
+					req.id, reqmsg.flags & Omask, caller);
+				break;
+			}
+
+		if(reqmsg.qd == nil){
+			dnslog("%d: server: no question RR from %s",
+				req.id, caller);
+			break;
+		}
+
+		logrequest(req.id, 0, "rcvd", callip, caller,
+			reqmsg.qd->owner->name, reqmsg.qd->type);
+
+		/* loop through each question */
+		while(reqmsg.qd){
+			memset(&repmsg, 0, sizeof(repmsg));
+			if(reqmsg.qd->type == Taxfr)
+				rv = dnzone(fd, pkt, &reqmsg, &repmsg, &req, callip);
+			else {
+				dnserver(&reqmsg, &repmsg, &req, callip, rcode);
+				rv = reply(fd, pkt, &repmsg, &req, callip);
+				freeanswers(&repmsg);
+			}
+			if(rv < 0)
+				goto out;
+		}
+		freeanswers(&reqmsg);
+	}
+out:
+	close(fd);
+	freeanswers(&reqmsg);
+	putactivity(&req);
+	_exits(0);
+}
+
+static int
+reply(int fd, uchar *pkt, DNSmsg *rep, Request *req, uchar *callip)
+{
+	int len, rv;
+	long ms;
+
+	/* taking too long */
+	ms = (long)(req->aborttime - nowms);
+	if(ms < 1)
+		return -1;
+
+	logreply(req->id, "send", callip, rep);
+
+	len = convDNS2M(rep, pkt+2, Maxtcp);
+	pkt[0] = len>>8;
+	pkt[1] = len;
+	len += 2;
+
+	alarm(ms);
+	rv = write(fd, pkt, len);
+	alarm(0);
+	if(rv != len){
+		dnslog("%d: error sending reply to %I: %r",
+			req->id, callip);
+		rv = -1;
+	}
+	return rv;
+}
+
+static Server*
+findserver(uchar *callip, Server *servers, Request *req)
+{
+	uchar ip[IPaddrlen];
+	RR *list, *rp;
+
+	for(; servers != nil; servers = servers->next){
+		if(strcmp(servers->name, "*") == 0)
+			return servers;
+		if(strcmp(ipattr(servers->name), "ip") == 0){
+			if(parseip(ip, servers->name) == -1)
+				continue;
+			if(ipcmp(callip, ip) == 0)
+				return servers;
+			continue;
+		}
+		list = dnresolve(servers->name, Cin,
+			isv4(callip)? Ta: Taaaa,
+			req, nil, 0, Recurse, 0, nil);
+		rrfreelist(rrremneg(&list));
+		for(rp = list; rp != nil; rp = rp->next){
+			if(parseip(ip, rp->ip->name) == -1)
+				continue;
+			if(ipcmp(callip, ip) == 0)
+				break;
+		}
+		rrfreelist(list);
+		if(rp != nil)
+			return servers;
+	}
+	return nil;
+}
+
+static int
+dnzone(int fd, uchar *pkt, DNSmsg *reqp, DNSmsg *repp, Request *req, uchar *callip)
+{
+	DN *dp;
+	RR *rp;
+	int rv;
+
+	repp->id = reqp->id;
+	repp->qd = reqp->qd;
+	reqp->qd = reqp->qd->next;
+	repp->qd->next = 0;
+	repp->flags = Fauth | Fresp | Oquery;
+	if(!cfg.nonrecursive)
+		repp->flags |= Fcanrec;
+	dp = repp->qd->owner;
+
+	/* send the soa */
+	repp->an = rrlookup(dp, Tsoa, NOneg);
+	if(repp->an != nil && !myip(callip)
+	&& findserver(callip, repp->an->soa->slaves, req) == nil){
+		dnslog("%d: dnzone: %I axfr %s - not a dnsslave",
+			req->id, callip, dp->name);
+		rrfreelist(repp->an);
+		repp->an = nil;
+	}
+	rv = reply(fd, pkt, repp, req, callip);
+	if(repp->an == nil)
+		goto out;
+	rrfreelist(repp->an);
+	repp->an = nil;
+	if(rv < 0)
+		goto out;
+
+	repp->an = rrgetzone(dp->name);
+	while(repp->an != nil) {
+		rp = repp->an->next;
+		repp->an->next = nil;
+		rv = reply(fd, pkt, repp, req, callip);
+		rrfreelist(repp->an);
+		repp->an = rp;
+		if(rv < 0)
+			goto out;
+	}
+
+	/* resend the soa */
+	repp->an = rrlookup(dp, Tsoa, NOneg);
+	rv = reply(fd, pkt, repp, req, callip);
+	rrfreelist(repp->an);
+	repp->an = nil;
+out:
+	rrfree(repp->qd);
+	repp->qd = nil;
+	return rv;
+}
+
+static int
+tcpannounce(char *mntpt, char *addr, char caller[128])
+{
+	char adir[NETPATHLEN], ldir[NETPATHLEN], buf[128];
+	int acfd, lcfd, dfd, wfd, rfd, procs;
+
+	/* announce tcp dns port */
+	snprint(buf, sizeof(buf), "%s/tcp!%s!53", mntpt, addr);
+	acfd = announce(buf, adir);
+	if(acfd < 0)
+		return -1;
+
+	/* open wait file to maintain child process count */
+	snprint(buf, sizeof(buf), "/proc/%d/wait", getpid());
+	wfd = open(buf, OREAD|OCEXEC);
+	if(wfd < 0){
+		close(acfd);
+		return -1;
+	}
+
+	procs = 0;
+	for(;;) {
+		if(procs >= Maxprocs || (procs % 8) == 0){
+			while(procs > 0){
+				if(procs < Maxprocs){
+					Dir *d = dirfstat(wfd);
+					if(d == nil || d->length == 0){
+						free(d);
+						break;
+					}
+					free(d);
+				}
+				if(read(wfd, buf, sizeof(buf)) <= 0){
+					procs = 0;
+					break;
+				}
+				procs--;
+			}
+		}
+
+		lcfd = listen(adir, ldir);
+		if(lcfd < 0){
+			close(wfd);
+			close(acfd);
+			return -1;
+		}
+
+		switch(rfork(RFPROC|RFMEM)){
+		case -1:
+			close(lcfd);
+			break;
+		case 0:
+			dfd = accept(lcfd, ldir);
+			close(lcfd);
+			if(dfd < 0)
+				_exits(0);
+
+			/* get the callers ip!port */
+			memset(caller, 0, 128);
+			snprint(buf, sizeof(buf), "%s/remote", ldir);
+			if((rfd = open(buf, OREAD|OCEXEC)) >= 0){
+				read(rfd, caller, 128-1);
+				close(rfd);
+			}
+
+			/* child returns */
+			return dfd;
+		default:
+			procs++;
+		}
+	}
+}
--- a/sys/src/cmd/ndb/dnudpserver.c
+++ b/sys/src/cmd/ndb/dnudpserver.c
@@ -3,10 +3,6 @@
 #include <ip.h>
 #include "dns.h"
 
-enum {
-	Logqueries = 0,
-};
-
 static int	udpannounce(char*, char*);
 static void	reply(int, uchar*, DNSmsg*, Request*);
 
@@ -19,17 +15,8 @@
 	ushort	type;
 	ushort	id;
 };
-Inprogress inprog[Maxactive+2];
 
-typedef struct Forwtarg Forwtarg;
-struct Forwtarg {
-	char	*host;
-	uchar	addr[IPaddrlen];
-	int	fd;
-	ulong	lastdial;
-};
-Forwtarg forwtarg[10];
-int forwtcount;
+static Inprogress inprog[Maxactive+2];
 
 /*
  *  record client id and ignore retransmissions.
@@ -36,12 +23,10 @@
  *  we're still single thread at this point.
  */
 static Inprogress*
-clientrxmit(DNSmsg *req, uchar *buf)
+clientrxmit(DNSmsg *mp, Udphdr *uh, Request *req)
 {
 	Inprogress *p, *empty;
-	Udphdr *uh;
 
-	uh = (Udphdr *)buf;
 	empty = nil;
 	for(p = inprog; p < &inprog[Maxactive]; p++){
 		if(p->inuse == 0){
@@ -49,9 +34,9 @@
 				empty = p;
 			continue;
 		}
-		if(req->id == p->id)
-		if(req->qd->owner == p->owner)
-		if(req->qd->type == p->type)
+		if(mp->id == p->id)
+		if(mp->qd->owner == p->owner)
+		if(mp->qd->type == p->type)
 		if(memcmp(uh, &p->uh, Udphdrsize) == 0)
 			return nil;
 	}
@@ -58,72 +43,17 @@
 	if(empty == nil)
 		return nil; /* shouldn't happen: see slave() & Maxactive def'n */
 
-	empty->id = req->id;
-	empty->owner = req->qd->owner;
-	empty->type = req->qd->type;
-	if (empty->type != req->qd->type)
-		dnslog("clientrxmit: bogus req->qd->type %d", req->qd->type);
+	empty->id = mp->id;
+	empty->owner = mp->qd->owner;
+	empty->type = mp->qd->type;
+	if(empty->type != mp->qd->type)
+		dnslog("%d: clientrxmit: bogus req->qd->type %d", req->id, mp->qd->type);
 	memmove(&empty->uh, uh, Udphdrsize);
 	empty->inuse = 1;
 	return empty;
 }
 
-int
-addforwtarg(char *host)
-{
-	Forwtarg *tp;
-
-	if (forwtcount >= nelem(forwtarg)) {
-		dnslog("too many forwarding targets");
-		return -1;
-	}
-	tp = forwtarg + forwtcount;
-	if(parseip(tp->addr, host) == -1) {
-		dnslog("can't parse ip %s", host);
-		return -1;
-	}
-	tp->lastdial = time(nil);
-	tp->fd = udpport(mntpt);
-	if (tp->fd < 0)
-		return -1;
-
-	free(tp->host);
-	tp->host = estrdup(host);
-	forwtcount++;
-	return 0;
-}
-
 /*
- * fast forwarding of incoming queries to other dns servers.
- * intended primarily for debugging.
- */
-static void
-redistrib(uchar *buf, int len)
-{
-	uchar save[Udphdrsize];
-	Forwtarg *tp;
-	Udphdr *uh;
-
-	memmove(save, buf, Udphdrsize);
-
-	uh = (Udphdr *)buf;
-	for (tp = forwtarg; tp < forwtarg + forwtcount; tp++)
-		if (tp->fd >= 0) {
-			memmove(uh->raddr, tp->addr, sizeof tp->addr);
-			hnputs(uh->rport, 53);		/* dns port */
-			if (write(tp->fd, buf, len) != len) {
-				close(tp->fd);
-				tp->fd = -1;
-			}
-		} else if (tp->host && time(nil) - tp->lastdial > 60) {
-			tp->lastdial = time(nil);
-			tp->fd = udpport(mntpt);
-		}
-
-	memmove(buf, save, Udphdrsize);
-}
-
-/*
  *  a process to act as a dns server for outside reqeusts
  */
 void
@@ -131,8 +61,8 @@
 {
 	volatile int fd, len, op, rcode, served;
 	char *volatile err;
-	volatile char tname[32], ipstr[64];
-	volatile uchar buf[Udphdrsize + Maxudp + 1024];
+	volatile char caller[64];
+	volatile uchar pkt[Udphdrsize + Maxudp];
 	volatile DNSmsg reqmsg, repmsg;
 	Inprogress *volatile p;
 	volatile Request req;
@@ -167,97 +97,78 @@
 
 	/* loop on requests */
 	for(;; putactivity(&req)){
-		procsetname("%s: udp server %s: served %d", mntpt, addr, served);
-		memset(&repmsg, 0, sizeof repmsg);
 		memset(&reqmsg, 0, sizeof reqmsg);
+		procsetname("%s: udp server %s: served %d", mntpt, addr, served);
 
-		alarm(60*1000);
-		len = read(fd, buf, sizeof buf);
-		alarm(0);
+		len = read(fd, pkt, sizeof pkt);
 		if(len <= Udphdrsize){
 			close(fd);
 			goto restart;
 		}
 
-		if(forwtcount > 0)
-			redistrib(buf, len);
-
-		uh = (Udphdr*)buf;
+		uh = (Udphdr*)pkt;
 		len -= Udphdrsize;
 
-		// dnslog("read received UDP from %I to %I", uh->raddr, uh->laddr);
-		snprint(ipstr, sizeof(ipstr), "%I", uh->raddr);
+		snprint(caller, sizeof(caller), "%I", uh->raddr);
 		getactivity(&req);
 		req.aborttime = timems() + Maxreqtm;
-		req.from = ipstr;
+		req.from = caller;
 
 		served++;
 		stats.qrecvdudp++;
 
 		rcode = 0;
-		err = convM2DNS(&buf[Udphdrsize], len, &reqmsg, &rcode);
+		err = convM2DNS(&pkt[Udphdrsize], len, &reqmsg, &rcode);
 		if(err){
 			/* first bytes in buf are source IP addr */
-			dnslog("server: input error: %s from %I", err, buf);
+			dnslog("%d: server: input err, len %d: %s from %s",
+				req.id, len, err, caller);
 			free(err);
 			goto freereq;
 		}
 		if (rcode == 0)
 			if(reqmsg.qdcount < 1){
-				dnslog("server: no questions from %I", buf);
+				dnslog("%d: server: no questions from %s",
+					req.id, caller);
 				goto freereq;
 			} else if(reqmsg.flags & Fresp){
-				dnslog("server: reply not request from %I", buf);
+				dnslog("%d: server: reply not request from %s",
+					req.id, caller);
 				goto freereq;
 			}
 		op = reqmsg.flags & Omask;
 		if(op != Oquery && op != Onotify){
-			dnslog("server: op %d from %I", reqmsg.flags & Omask, buf);
+			dnslog("%d: server: op %d from %s",
+				req.id, reqmsg.flags & Omask, caller);
 			goto freereq;
 		}
 
 		if(reqmsg.qd == nil){
-			dnslog("server: no question RR from %I", buf);
+			dnslog("%d: server: no question RR from %s",
+				req.id, caller);
 			goto freereq;
 		}
 
-		if(debug || (trace && subsume(trace, reqmsg.qd->owner->name)))
-			dnslog("%d: serve (%I/%d) %d %s %s",
-				req.id, buf, uh->rport[0]<<8 | uh->rport[1],
-				reqmsg.id, reqmsg.qd->owner->name,
-				rrname(reqmsg.qd->type, tname, sizeof tname));
-
-		p = clientrxmit(&reqmsg, buf);
-		if(p == nil){
-			if(debug)
-				dnslog("%d: duplicate", req.id);
+		p = clientrxmit(&reqmsg, uh, &req);
+		if(p == nil)
 			goto freereq;
-		}
 
-		if (Logqueries) {
-			RR *rr;
+		logrequest(req.id, 0, "rcvd", uh->raddr, caller,
+			reqmsg.qd->owner->name, reqmsg.qd->type);
 
-			for (rr = reqmsg.qd; rr; rr = rr->next)
-				syslog(0, "dnsq", "id %d: (%I/%d) %d %s %s",
-					req.id, buf, uh->rport[0]<<8 |
-					uh->rport[1], reqmsg.id,
-					reqmsg.qd->owner->name,
-					rrname(reqmsg.qd->type, tname,
-					sizeof tname));
-		}
 		/* loop through each question */
 		while(reqmsg.qd){
 			memset(&repmsg, 0, sizeof repmsg);
 			switch(op){
 			case Oquery:
-				dnserver(&reqmsg, &repmsg, &req, buf, rcode);
+				dnserver(&reqmsg, &repmsg, &req, uh->raddr, rcode);
 				break;
 			case Onotify:
 				dnnotify(&reqmsg, &repmsg, &req);
 				break;
 			}
-			/* send reply on fd to address in buf's udp hdr */
-			reply(fd, buf, &repmsg, &req);
+			/* send reply on fd to address in pkt's udp hdr */
+			reply(fd, pkt, &repmsg, &req);
 			freeanswers(&repmsg);
 		}
 
@@ -271,6 +182,20 @@
 	}
 }
 
+static void
+reply(int fd, uchar *pkt, DNSmsg *rep, Request *req)
+{
+	int len;
+
+	logreply(req->id, "send", pkt, rep);
+
+	len = convDNS2M(rep, &pkt[Udphdrsize], Maxudp);
+	len += Udphdrsize;
+	if(write(fd, pkt, len) != len)
+		dnslog("%d: error sending reply to %I: %r",
+			req->id, pkt);
+}
+
 /*
  *  announce on well-known dns udp port and set message style interface
  */
@@ -280,17 +205,17 @@
 	static char hmsg[] = "headers";
 	static char imsg[] = "ignoreadvice";
 
-	char dir[64], datafile[64+6];
+	char adir[NETPATHLEN], buf[NETPATHLEN];
 	int data, ctl;
 
-	snprint(datafile, sizeof(datafile), "%s/udp!%s!dns", mntpt, addr);
-	ctl = announce(datafile, dir);
+	snprint(buf, sizeof(buf), "%s/udp!%s!53", mntpt, addr);
+	ctl = announce(buf, adir);
 	if(ctl < 0)
 		return -1;
 
 	/* turn on header style interface */
 	if(write(ctl, hmsg, sizeof(hmsg)-1) < 0){
-		warning("can't enable %s on %s: %r", hmsg, datafile);
+		warning("can't enable %s on %s: %r", hmsg, adir);
 		close(ctl);
 		return -1;
 	}
@@ -298,29 +223,10 @@
 	/* ignore ICMP advice */
 	write(ctl, imsg, sizeof(imsg)-1);
 
-	snprint(datafile, sizeof(datafile), "%s/data", dir);
-	data = open(datafile, ORDWR);
+	snprint(buf, sizeof(buf), "%s/data", adir);
+	data = open(buf, ORDWR|OCEXEC);
 	if(data < 0)
-		warning("can't open udp port %s: %r", datafile);
+		warning("can't open udp port %s: %r", adir);
 	close(ctl);
 	return data;
-}
-
-static void
-reply(int fd, uchar *buf, DNSmsg *rep, Request *reqp)
-{
-	int len;
-	char tname[32];
-
-	if(debug || (trace && subsume(trace, rep->qd->owner->name)))
-		dnslog("%d: reply (%I/%d) %d %s %s qd %R an %R ns %R ar %R",
-			reqp->id, buf, buf[4]<<8 | buf[5],
-			rep->id, rep->qd->owner->name,
-			rrname(rep->qd->type, tname, sizeof tname),
-			rep->qd, rep->an, rep->ns, rep->ar);
-
-	len = convDNS2M(rep, &buf[Udphdrsize], Maxudp);
-	len += Udphdrsize;
-	if(write(fd, buf, len) != len)
-		dnslog("error sending reply: %r");
 }
--- a/sys/src/cmd/ndb/mkfile
+++ b/sys/src/cmd/ndb/mkfile
@@ -10,18 +10,14 @@
 	csquery\
 	dns\
 	dnsquery\
-	dnstcp\
 	dnsdebug\
 	dnsgetip\
 	ipquery\
 	inform\
 
-DNSOBJ = dns.$O dnudpserver.$O dn.$O dnresolve.$O dblookup.$O dnserver.$O dnnotify.$O\
+DNSOBJ = dns.$O dnudpserver.$O dntcpserver.$O dn.$O dnresolve.$O dblookup.$O dnserver.$O dnnotify.$O\
 	 dnarea.$O convM2DNS.$O convDNS2M.$O
 
-DNSTCPOBJ = dnstcp.$O dn.$O dnresolve.$O dblookup.$O dnserver.$O\
-	 dnarea.$O convM2DNS.$O convDNS2M.$O
-
 DNSDEBUGOBJ = dnsdebug.$O dn.$O dnresolve.$O dblookup.$O dnserver.$O\
 	 dnarea.$O convM2DNS.$O convDNS2M.$O
 
@@ -37,9 +33,6 @@
 $O.dns: $DNSOBJ
 	$LD -o $target $prereq
 
-$O.dnstcp: $DNSTCPOBJ
-	$LD -o $target $prereq
-
 $O.dnsdebug: $DNSDEBUGOBJ
 	$LD -o $target $prereq
 
@@ -48,21 +41,6 @@
 
 $O.cs: cs.$O
 	$LD -o $target $prereq
-
-$O.testipinfo: testipinfo.$O ipinfo.$O
-	$LD -o $target $prereq
-
-push: $O.dns $O.dnsdebug $O.dnstcp
-	import lookout / /n/lookout
-	cp $O.dns /n/lookout/$objtype/bin/ndb/dns
-	cp $O.dnsdebug /n/lookout/$objtype/bin/ndb/dnsdebug
-	cp $O.dnstcp /n/lookout/$objtype/bin/ndb/dnstcp
-	unmount /n/lookout
-	import boundary / /n/boundary
-	cp $O.dns /n/boundary/$objtype/bin/ndb/dns
-	cp $O.dnsdebug /n/boundary/$objtype/bin/ndb/dnsdebug
-	cp $O.dnstcp /n/boundary/$objtype/bin/ndb/dnstcp
-	unmount /n/boundary
 
 cs.safeinstall:V: $O.cs
 	if(test -e $BIN/ooocs)