ref: d1540c7f666e3c5d636b48c956b444205b50502d
dir: /appl/cmd/sed.b/
implement Sed; # # partial sed implementation borrowed from plan9 sed. # include "sys.m"; sys: Sys; include "draw.m"; include "arg.m"; arg: Arg; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "string.m"; str: String; include "regex.m"; regex: Regex; Re: import regex; Sed : module { init: fn(ctxt: ref Draw->Context, argv: list of string); }; false, true: con iota; bool: type int; Addr: adt { pick { None => Dollar => Line => line: int; Regex => re: Re; } }; Sedcom: adt { command: fn(c: self ref Sedcom); executable: fn(c: self ref Sedcom) : int; ad1, ad2: ref Addr; negfl: bool; active: int; pick { S => gfl, pfl: int; re: Re; b: ref Iobuf; rhs: string; D or CD or P or Q or EQ or G or CG or H or CH or N or CN or X or CP or L=> A or C or I => text: string; R => filename: string; W => b: ref Iobuf; Y => map: list of (int, int); B or T or Lab => lab: string; } }; dflag := false; nflag := false; gflag := false; sflag := 0; delflag := 0; dolflag := 0; fhead := 0; files: list of string; fout: ref Iobuf; infile: ref Iobuf; jflag := 0; lastregex: Re; linebuf: string; filename := ""; lnum := 0; peekc := 0; holdsp := ""; patsp := ""; cmds: list of ref Sedcom; appendlist: list of ref Sedcom; bufioflush: list of ref Iobuf; init(nil: ref Draw->Context, args: list of string) { sys = load Sys Sys->PATH; if ((arg = load Arg Arg->PATH) == nil) fatal(sys->sprint("could not load %s: %r", Arg->PATH)); if ((bufio = load Bufio Bufio->PATH) == nil) fatal(sys->sprint("could not load %s: %r", Bufio->PATH)); if ((str = load String String->PATH) == nil) fatal(sys->sprint("could not load %s: %r", String->PATH)); if ((regex = load Regex Regex->PATH) == nil) fatal(sys->sprint("could not load %s: %r", Regex->PATH)); arg->init(args); compfl := 0; while ((c := arg->opt()) != 0) case c { 'n' => nflag = true; 'g' => gflag = true; 'e' => if ((s := arg->arg()) == nil) usage(); filename = ""; cmds = compile(bufio->sopen(s + "\n"), cmds); compfl = 1; 'f' => if ((filename = arg->arg()) == nil) usage(); b := bufio->open(filename, bufio->OREAD); if (b == nil) fatal(sys->sprint("couldn't open '%s': %r", filename)); cmds = compile(b, cmds); compfl = 1; 'd' => dflag = true; * => usage(); } args = arg->argv(); if (compfl == 0) { if (len args == 0) fatal("missing pattern"); filename = ""; cmds = compile(bufio->sopen(hd args + "\n"), cmds); args = tl args; } # reverse command list, we could compile addresses here if required l: list of ref Sedcom; for (p := cmds; p != nil; p = tl p) { l = hd p :: l; } cmds = l; # add files to file list (and reverse to get in right order) f: list of string; if (len args == 0) f = "" :: f; else for (; len args != 0; args = tl args) f = hd args :: f; for (;f != nil; f = tl f) files = hd f :: files; if ((fout = bufio->fopen(sys->fildes(1), bufio->OWRITE)) == nil) fatal(sys->sprint("couldn't buffer stdout: %r")); bufioflush = fout :: bufioflush; lnum = 0; execute(cmds); exits(nil); } depth := 0; maxdepth: con 20; cmdend := array [maxdepth] of string; cmdcnt := array [maxdepth] of int; compile(b: ref Iobuf, l: list of ref Sedcom) : list of ref Sedcom { lnum = 1; nextline: for (;;) { err: int; (err, linebuf) = getline(b); if (err < 0) break; s := linebuf; do { rep: ref Sedcom; ad1, ad2: ref Addr; negfl := 0; if (s != "") s = str->drop(s, " \t;"); if (s == "" || s[0] == '#') continue nextline; # read addresses (s, ad1) = address(s); pick a := ad1 { None => ad2 = ref Addr.None(); * => if (s != "" && (s[0] == ',' || s[0] == ';')) { (s, ad2) = address(s[1:]); } else { ad2 = ref Addr.None(); } } s = str->drop(s, " \t"); if (s != "" && str->in(s[0], "!")) { negfl = true; s = str->drop(s, "!"); } s = str->drop(s, " \t"); if (s == "") break; c := s[0]; s = s[1:]; # mop up commands that got two addresses but only want one. case c { 'a' or 'c' or 'q' or '=' or 'i' => if (tagof ad2 != tagof Addr.None) fatal(sys->sprint("only one address allowed: '%s'", linebuf)); } case c { * => fatal(sys->sprint("unrecognised command: '%s' (%c)", linebuf, c)); 'a' => if (s != "" && s[0] == '\\') s = s[1:]; if (s == "" || s[0] != '\n') fatal("unexpected characters in a command: " + s); rep = ref Sedcom.A (ad1, ad2, negfl, 0, s[1:]); s = ""; 'c' => if (s != "" && s[0] == '\\') s = s[1:]; if (s == "" || s[0] != '\n') fatal("unexpected characters in c command: " + s); rep = ref Sedcom.C (ad1, ad2, negfl, 0, s[1:]); s = ""; 'i' => if (s != "" && s[0] == '\\') s = s[1:]; if (s == "" || s[0] != '\n') fatal("unexpected characters in i command: " + s); rep = ref Sedcom.I (ad1, ad2, negfl, 0, s[1:]); s = ""; 'r' => s = str->drop(s, " \t"); rep = ref Sedcom.R (ad1, ad2, negfl, 0, s); s = ""; 'w' => if (s != "") s = str->drop(s, " \t"); if (s == "") fatal("no filename in w command: " + linebuf); bo := bufio->open(s, bufio->OWRITE); if (bo == nil) bo = bufio->create(s, bufio->OWRITE, 8r666); if (bo == nil) fatal(sys->sprint("can't create output file: '%s'", s)); bufioflush = bo :: bufioflush; rep = ref Sedcom.W (ad1, ad2, negfl, 0, bo); s = ""; 'd' => rep = ref Sedcom.D (ad1, ad2, negfl, 0); 'D' => rep = ref Sedcom.CD (ad1, ad2, negfl, 0); 'p' => rep = ref Sedcom.P (ad1, ad2, negfl, 0); 'P' => rep = ref Sedcom.CP (ad1, ad2, negfl, 0); 'q' => rep = ref Sedcom.Q (ad1, ad2, negfl, 0); '=' => rep = ref Sedcom.EQ (ad1, ad2, negfl, 0); 'g' => rep = ref Sedcom.G (ad1, ad2, negfl, 0); 'G' => rep = ref Sedcom.CG (ad1, ad2, negfl, 0); 'h' => rep = ref Sedcom.H (ad1, ad2, negfl, 0); 'H' => rep = ref Sedcom.CH (ad1, ad2, negfl, 0); 'n' => rep = ref Sedcom.N (ad1, ad2, negfl, 0); 'N' => rep = ref Sedcom.CN (ad1, ad2, negfl, 0); 'x' => rep = ref Sedcom.X (ad1, ad2, negfl, 0); 'l' => rep = ref Sedcom.L (ad1, ad2, negfl, 0); 'y' => if (s == "") fatal("expected args: " + linebuf); seof := s[0:1]; s = s[1:]; if (s == "") fatal("no lhs: " + linebuf); (lhs, s2) := str->splitl(s, seof); if (s2 == "") fatal("no lhs terminator: " + linebuf); s2 = s2[1:]; (rhs, s4) := str->splitl(s2, seof); if (s4 == "") fatal("no rhs: " + linebuf); s = s4[1:]; if (len lhs != len rhs) fatal("y command needs same length sets: " + linebuf); map: list of (int, int); for (i := 0; i < len lhs; i++) map = (lhs[i], rhs[i]) :: map; rep = ref Sedcom.Y (ad1, ad2, negfl, 0, map); 's' => seof := s[0:1]; re: Re; (re, s) = recomp(s); rhs: string; (s, rhs) = compsub(seof + s); gfl := gflag; pfl := 0; if (s != "" && s[0] == 'g') { gfl = 1; s = s[1:]; } if (s != "" && s[0] == 'p') { pfl = 1; s = s[1:]; } if (s != "" && s[0] == 'P') { pfl = 2; s = s[1:]; } b: ref Iobuf = nil; if (s != "" && s[0] == 'w') { s = s[1:]; if (s != "") s = str->drop(s, " \t"); if (s == "") fatal("no filename in s with w: " + linebuf); b = bufio->open(s, bufio->OWRITE); if (b == nil) b = bufio->create(s, bufio->OWRITE, 8r666); if (b == nil) fatal(sys->sprint("can't create output file: '%s'", s)); bufioflush = b :: bufioflush; s = ""; } rep = ref Sedcom.S (ad1, ad2, negfl, 0, gfl, pfl, re, b, rhs); ':' => if (s != "") s = str->drop(s, " \t"); (lab, s1) := str->splitl(s, " \t;#"); s = s1; if (lab == "") fatal(sys->sprint("null label: '%s'", linebuf)); if (findlabel(lab)) fatal(sys->sprint("duplicate label: '%s'", lab)); rep = ref Sedcom.Lab (ad1, ad2, negfl, 0, lab); 'b' or 't' => if (s != "") s = str->drop(s, " \t"); (lab, s1) := str->splitl(s, " \t;#"); s = s1; if (c == 'b') rep = ref Sedcom.B (ad1, ad2, negfl, 0, lab); else rep = ref Sedcom.T (ad1, ad2, negfl, 0, lab); '{' => # replace { with branch to }. lab := mklab(depth); depth++; rep = ref Sedcom.B (ad1, ad2, !negfl, 0, lab); s = ";" + s; '}' => if (tagof ad1 != tagof Addr.None) fatal("did not expect address:" + linebuf); if (--depth < 0) fatal("too many }'s: " + linebuf); lab := mklab(depth); cmdcnt[depth]++; rep = ref Sedcom.Lab ( ad1, ad2, negfl, 0, lab); s = ";" + s; } l = rep :: l; } while (s != nil && str->in(s[0], ";{}")); if (s != nil) fatal("leftover junk: " + s); } return l; } findlabel(lab: string) : bool { for (l := cmds; l != nil; l = tl l) pick x := hd l { Lab => if (x.lab == lab) return true; } return false; } mklab(depth: int): string { return "_" + string cmdcnt[depth] + "_" + string depth; } Sedcom.command(c: self ref Sedcom) { pick x := c { S => m: bool; (m, patsp) = substitute(x, patsp); if (m) { case x.pfl { 0 => ; 1 => fout.puts(patsp + "\n"); * => l: string; (l, patsp) = str->splitl(patsp, "\n"); fout.puts(l + "\n"); break; } if (x.b != nil) x.b.puts(patsp + "\n"); } P => fout.puts(patsp + "\n"); CP => (s, nil) := str->splitl(patsp, "\n"); fout.puts(s + "\n"); A => appendlist = c :: appendlist; R => appendlist = c :: appendlist; C => delflag++; if (c.active == 1) fout.puts(x.text + "\n"); I => fout.puts(x.text + "\n"); W => x.b.puts(patsp + "\n"); G => patsp = holdsp; CG => patsp += holdsp; H => holdsp = patsp; CH => holdsp += patsp; X => (holdsp, patsp) = (patsp, holdsp); Y => # yes this is O(N²). for (i := 0; i < len patsp; i++) for (h := x.map; h != nil; h = tl h) { (s, d) := hd h; if (patsp[i] == s) patsp[i] = d; } D => delflag++; CD => # loose upto \n. (s1, s2) := str->splitl(patsp, "\n"); if (s2 == nil) patsp = s1; else if (len s2 > 1) patsp = s2[1:]; else patsp = ""; jflag++; Q => if (!nflag) fout.puts(patsp + "\n"); arout(); exits(nil); N => if (!nflag) fout.puts(patsp + "\n"); arout(); n: int; (patsp, n) = gline(); if (n < 0) delflag++; CN => arout(); (ns, n) := gline(); if (n < 0) delflag++; patsp += "\n" + ns; EQ => fout.puts(sys->sprint("%d\n", lnum)); Lab => # labels don't do anything. B => jflag = true; T => if (sflag) { sflag = false; jflag = true; } L => col := 0; cc := 0; for (i := 0; i < len patsp; i++) { s := ""; cc = patsp[i]; if (cc >= 16r20 && cc < 16r7F && cc != '\n') s[len s] = cc; else s = trans(cc); for (j := 0; j < len s; j++) { fout.putc(s[j]); if (col++ > 71) { fout.puts("\\\n"); col = 0; } } } if (cc == ' ') fout.puts("\\n"); fout.putc('\n'); * => fatal("unhandled command"); } } trans(ch: int) : string { case ch { '\b' => return "\\b"; '\n' => return "\\n"; '\r' => return "\\r"; '\t' => return "\\t"; '\\' => return "\\\\"; * => return sys->sprint("\\u%.4ux", ch); } } getline(b: ref Iobuf) : (int, string) { w : string; lnum++; while ((c := b.getc()) != bufio->EOF) { r := c; if (r == '\\') { w[len w] = r; if ((c = b.getc()) == bufio->EOF) break; r = c; } else if (r == '\n') return (1, w); w[len w] = r; } return (-1, w); } address(s: string) : (string, ref Addr) { case s[0] { '$' => return (s[1:], ref Addr.Dollar()); '/' => (r, s1) := recomp(s); if (r == nil) r = lastregex; if (r == nil) fatal("First RE in address may not be null"); return (s1, ref Addr.Regex(r)); '0' to '9' => (lno, ls) := str->toint(s, 10); if (lno == 0) fatal("line no 0 is illegal address"); return (ls, ref Addr.Line(lno)); * => return (s, ref Addr.None()); } } recomp(s :string) : (Re, string) { expbuf := ""; seof := s[0]; s = s[1:]; if (s[0] == seof) return (nil, s[1:]); # // c := s[0]; s = s[1:]; do { if (c == '\0' || c == '\n') fatal("too much text: " + linebuf); if (c == '\\') { expbuf[len expbuf] = c; c = s[0]; s = s[1:]; if (c == 'n') c = '\n'; } expbuf[len expbuf] = c; c = s[0]; s = s[1:]; } while (c != seof); (r, err) := regex->compile(expbuf, 1); if (r == nil) fatal(sys->sprint("%s '%s'", err, expbuf)); lastregex = r; return (r, s); } compsub(s: string): (string, string) { seof := s[0]; rhs := ""; for (i := 1; i < len s; i++) { r := s[i]; if (r == seof) break; if (r == '\\') { rhs[len rhs] = r; if(++i >= len s) break; r = s[i]; } rhs[len rhs] = r; } if (i >= len s) fatal(sys->sprint("no closing %c in replacement text: %s", seof, linebuf)); return (s[i+1:], rhs); } execute(l: list of ref Sedcom) { for (;;) { n: int; (patsp, n) = gline(); if (n < 0) break; cmdloop: for (p := l; p != nil;) { c := hd p; if (!c.executable()) { p = tl p; continue; } c.command(); if (delflag) break; if (jflag) { jflag = 0; pick x := c { B or T => if (p == nil) break cmdloop; for (p = l; p != nil; p = tl p) { pick cc := hd p { Lab => if (cc.lab == x.lab) continue cmdloop; } } break cmdloop; # unmatched branch => end of script * => # don't branch. } } else p = tl p; } if (!nflag && !delflag) fout.puts(patsp + "\n"); arout(); delflag = 0; } } Sedcom.executable(c: self ref Sedcom) : int { if (c.active) { if (c.active == 1) c.active = 2; pick x := c.ad2 { None => c.active = 0; Dollar => return !c.negfl; Line => if (lnum <= x.line) { if (x.line == lnum) c.active = 0; return !c.negfl; } c.active = 0; return c.negfl; Regex => if (match(x.re, patsp)) c.active = false; return !c.negfl; } } pick x := c.ad1 { None => return !c.negfl; Dollar => if (dolflag) return !c.negfl; Line => if (x.line == lnum) { c.active = 1; return !c.negfl; } Regex => if (match(x.re, patsp)) { c.active = 1; return !c.negfl; } } return c.negfl; } arout() { a: list of ref Sedcom; while (appendlist != nil) { a = hd appendlist :: a; appendlist = tl appendlist; } for (; a != nil; a = tl a) pick x := hd a { A => fout.puts(x.text + "\n"); R => if ((b := bufio->open(x.filename, bufio->OREAD)) == nil) fatal(sys->sprint("couldn't open '%s'", x.filename)); while ((c := b.getc()) != bufio->EOF) fout.putc(c); b.close(); * => fatal("unexpected command on appendlist"); } } match(re: Re, s: string) : bool { return re != nil && regex->execute(re, s) != nil; } substitute(c: ref Sedcom.S, s: string) : (bool, string) { if (!match(c.re, s)) return (false, s); sflag = true; start := 0; # Beware of infinite loops: 's/$/i/g', 's/a/aa/g', 's/^/a/g' do { se := (start, len s); if ((m := regex->executese(c.re, s, se, true, true)) == nil) break; (l, r) := m[0]; rep := ""; for (i := 0; i < len c.rhs; i++){ if (c.rhs[i] != '\\' || i+1 == len c.rhs){ if (c.rhs[i] == '&') rep += s[l: r]; else rep[len rep] = c.rhs[i]; }else { i++; case c.rhs[i] { '0' to '9' => n := c.rhs[i] - '0'; # elide if too big if (n < len m) { (beg, end) := m[n]; rep += s[beg:end]; } 'n' => rep[len rep] = '\n'; * => rep[len rep] = c.rhs[i]; } } } s = s[0:l] + rep + s[r:]; start = l + len rep; if(r == l) start++; } while (c.gfl); return (true, s); } gline() : (string, int) { if (infile == nil && opendatafile() < 0) return (nil, -1); sflag = false; lnum++; s := ""; do { c := peekc; if (c == 0) c = infile.getc(); for (; c != bufio->EOF; c = infile.getc()) { if (c == '\n') { if ((peekc = infile.getc()) == bufio->EOF) if (fhead == 0) dolflag = 1; return (s, 1); } s[len s] = c; } if (len s != 0) { peekc = bufio->EOF; if (fhead == 0) dolflag = 1; return (s, 1); } peekc = 0; infile = nil; } while (opendatafile() > 0); infile = nil; return (nil, -1); } opendatafile() : int { if (files == nil) return -1; if (hd files != nil) { if ((infile = bufio->open(hd files, bufio->OREAD)) == nil) fatal(sys->sprint("can't open '%s'", hd files)); } else if ((infile = bufio->fopen(sys->fildes(0), bufio->OREAD)) == nil) fatal("can't buffer stdin"); files = tl files; return 1; } dbg(s: string) { if (dflag) sys->print("dbg: %s\n", s); } usage() { sys->fprint(stderr(), "usage: %s [-ngd] [-e expr] [-f file] [expr] [file...]\n", arg->progname()); exits("usage"); } fatal(s: string) { f := filename; if (f == nil) f = "<stdin>"; sys->fprint(stderr(), "%s:%d %s\n", f, lnum, s); exits("error"); } exits(e: string) { for(; bufioflush != nil; bufioflush = tl bufioflush) (hd bufioflush).flush(); if (e != nil) raise "fail:" + e; exit; } stderr() : ref Sys->FD { return sys->fildes(2); }