ref: ed9fdc72f519553f9c8f1f221f97bd5919847caa
dir: /sys/src/cmd/upas/smtp/rfc822.y/
%{ #include "common.h" #include "smtp.h" #include <ctype.h> #define YYMAXDEPTH 500 /* was default 150 */ char *yylp; /* next character to be lex'd */ int yydone; /* tell yylex to give up */ char *yybuffer; /* first parsed character */ char *yyend; /* end of buffer to be parsed */ Node *root; Field *firstfield; Field *lastfield; Node *usender; Node *usys; Node *udate; char *startfield, *endfield; int originator; int destination; int date; int received; int messageid; %} %term WORD %term DATE %term RESENT_DATE %term RETURN_PATH %term FROM %term SENDER %term REPLY_TO %term RESENT_FROM %term RESENT_SENDER %term RESENT_REPLY_TO %term SUBJECT %term TO %term CC %term BCC %term RESENT_TO %term RESENT_CC %term RESENT_BCC %term REMOTE %term PRECEDENCE %term MIMEVERSION %term CONTENTTYPE %term MESSAGEID %term RECEIVED %term MAILER %term BADTOKEN %start msg %% msg : fields | unixfrom '\n' fields ; fields : '\n' { yydone = 1; } | field '\n' | field '\n' fields ; field : dates { date = 1; } | originator { originator = 1; } | destination { destination = 1; } | subject | optional | ignored | received | precedence | error '\n' field ; unixfrom : FROM route_addr unix_date_time REMOTE FROM word { freenode($1); freenode($4); freenode($5); usender = $2; udate = $3; usys = $6; } ; originator : REPLY_TO ':' address_list { newfield(link3($1, $2, $3), 1); } | RETURN_PATH ':' route_addr { newfield(link3($1, $2, $3), 1); } | FROM ':' mailbox_list { newfield(link3($1, $2, $3), 1); } | SENDER ':' mailbox { newfield(link3($1, $2, $3), 1); } | RESENT_REPLY_TO ':' address_list { newfield(link3($1, $2, $3), 1); } | RESENT_SENDER ':' mailbox { newfield(link3($1, $2, $3), 1); } | RESENT_FROM ':' mailbox { newfield(link3($1, $2, $3), 1); } ; dates : DATE ':' date_time { newfield(link3($1, $2, $3), 0); } | RESENT_DATE ':' date_time { newfield(link3($1, $2, $3), 0); } ; destination : TO ':' { newfield(link2($1, $2), 0); } | TO ':' address_list { newfield(link3($1, $2, $3), 0); } | RESENT_TO ':' { newfield(link2($1, $2), 0); } | RESENT_TO ':' address_list { newfield(link3($1, $2, $3), 0); } | CC ':' { newfield(link2($1, $2), 0); } | CC ':' address_list { newfield(link3($1, $2, $3), 0); } | RESENT_CC ':' { newfield(link2($1, $2), 0); } | RESENT_CC ':' address_list { newfield(link3($1, $2, $3), 0); } | BCC ':' { newfield(link2($1, $2), 0); } | BCC ':' address_list { newfield(link3($1, $2, $3), 0); } | RESENT_BCC ':' { newfield(link2($1, $2), 0); } | RESENT_BCC ':' address_list { newfield(link3($1, $2, $3), 0); } ; subject : SUBJECT ':' things { newfield(link3($1, $2, $3), 0); } | SUBJECT ':' { newfield(link2($1, $2), 0); } ; received : RECEIVED ':' things { newfield(link3($1, $2, $3), 0); received++; } | RECEIVED ':' { newfield(link2($1, $2), 0); received++; } ; precedence : PRECEDENCE ':' things { newfield(link3($1, $2, $3), 0); } | PRECEDENCE ':' { newfield(link2($1, $2), 0); } ; ignored : ignoredhdr ':' things { newfield(link3($1, $2, $3), 0); } | ignoredhdr ':' { newfield(link2($1, $2), 0); } ; ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER ; optional : fieldwords ':' things { /* hack to allow same lex for field names and the rest */ if(badfieldname($1)){ freenode($1); freenode($2); freenode($3); return 1; } newfield(link3($1, $2, $3), 0); } | fieldwords ':' { /* hack to allow same lex for field names and the rest */ if(badfieldname($1)){ freenode($1); freenode($2); return 1; } newfield(link2($1, $2), 0); } ; address_list : address | address_list ',' address { $$ = link3($1, $2, $3); } ; address : mailbox | group ; group : phrase ':' address_list ';' { $$ = link2($1, link3($2, $3, $4)); } | phrase ':' ';' { $$ = link3($1, $2, $3); } ; mailbox_list : mailbox | mailbox_list ',' mailbox { $$ = link3($1, $2, $3); } ; mailbox : route_addr | phrase brak_addr { $$ = link2($1, $2); } | brak_addr ; brak_addr : '<' route_addr '>' { $$ = link3($1, $2, $3); } | '<' '>' { $$ = nobody($2); freenode($1); } ; route_addr : route ':' at_addr { $$ = address(concat($1, concat($2, $3))); } | addr_spec ; route : '@' domain { $$ = concat($1, $2); } | route ',' '@' domain { $$ = concat($1, concat($2, concat($3, $4))); } ; addr_spec : local_part { $$ = address($1); } | at_addr ; at_addr : local_part '@' domain { $$ = address(concat($1, concat($2, $3)));} | at_addr '@' domain { $$ = address(concat($1, concat($2, $3)));} ; local_part : word ; domain : word ; phrase : word | phrase word { $$ = link2($1, $2); } ; things : thing | things thing { $$ = link2($1, $2); } ; thing : word | '<' | '>' | '@' | ':' | ';' | ',' ; date_time : things ; unix_date_time : word word word unix_time word word { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); } ; unix_time : word | unix_time ':' word { $$ = link3($1, $2, $3); } ; word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER ; fieldwords : fieldword | WORD | fieldwords fieldword { $$ = link2($1, $2); } | fieldwords word { $$ = link2($1, $2); } ; fieldword : '<' | '>' | '@' | ';' | ',' ; %% /* * Initialize the parsing. Done once for each header field. */ void yyinit(char *p, int len) { yybuffer = p; yylp = p; yyend = p + len; firstfield = lastfield = 0; received = 0; } /* * keywords identifying header fields we care about */ typedef struct Keyword Keyword; struct Keyword { char *rep; int val; }; /* field names that we need to recognize */ Keyword key[] = { { "date", DATE }, { "resent-date", RESENT_DATE }, { "return_path", RETURN_PATH }, { "from", FROM }, { "sender", SENDER }, { "reply-to", REPLY_TO }, { "resent-from", RESENT_FROM }, { "resent-sender", RESENT_SENDER }, { "resent-reply-to", RESENT_REPLY_TO }, { "to", TO }, { "cc", CC }, { "bcc", BCC }, { "resent-to", RESENT_TO }, { "resent-cc", RESENT_CC }, { "resent-bcc", RESENT_BCC }, { "remote", REMOTE }, { "subject", SUBJECT }, { "precedence", PRECEDENCE }, { "mime-version", MIMEVERSION }, { "content-type", CONTENTTYPE }, { "message-id", MESSAGEID }, { "received", RECEIVED }, { "mailer", MAILER }, { "who-the-hell-cares", WORD } }; /* * Lexical analysis for an rfc822 header field. Continuation lines * are handled in yywhite() when skipping over white space. * */ yylex(void) { String *t; int quoting; int escaping; char *start; Keyword *kp; int c, d; /* print("lexing\n"); /**/ if(yylp >= yyend) return 0; if(yydone) return 0; quoting = escaping = 0; start = yylp; yylval = malloc(sizeof(Node)); yylval->white = yylval->s = 0; yylval->next = 0; yylval->addr = 0; yylval->start = yylp; for(t = 0; yylp < yyend; yylp++){ c = *yylp & 0xff; /* dump nulls, they can't be in header */ if(c == 0) continue; if(escaping) { escaping = 0; } else if(quoting) { switch(c){ case '\\': escaping = 1; break; case '\n': d = (*(yylp+1))&0xff; if(d != ' ' && d != '\t'){ quoting = 0; yylp--; continue; } break; case '"': quoting = 0; break; } } else { switch(c){ case '\\': escaping = 1; break; case '(': case ' ': case '\t': case '\r': goto out; case '\n': if(yylp == start){ yylp++; /* print("lex(c %c)\n", c); /**/ yylval->end = yylp; return yylval->c = c; } goto out; case '@': case '>': case '<': case ':': case ',': case ';': if(yylp == start){ yylp++; yylval->white = yywhite(); /* print("lex(c %c)\n", c); /**/ yylval->end = yylp; return yylval->c = c; } goto out; case '"': quoting = 1; break; default: break; } } if(t == 0) t = s_new(); s_putc(t, c); } out: yylval->white = yywhite(); if(t) { s_terminate(t); } else /* message begins with white-space! */ return yylval->c = '\n'; yylval->s = t; for(kp = key; kp->val != WORD; kp++) if(cistrcmp(s_to_c(t), kp->rep)==0) break; /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/ yylval->end = yylp; return yylval->c = kp->val; } void yyerror(char *x) { USED(x); /*fprint(2, "parse err: %s\n", x);/**/ } /* * parse white space and comments */ String * yywhite(void) { String *w; int clevel; int c; int escaping; escaping = clevel = 0; for(w = 0; yylp < yyend; yylp++){ c = *yylp & 0xff; /* dump nulls, they can't be in header */ if(c == 0) continue; if(escaping){ escaping = 0; } else if(clevel) { switch(c){ case '\n': /* * look for multiline fields */ if(*(yylp+1)==' ' || *(yylp+1)=='\t') break; else goto out; case '\\': escaping = 1; break; case '(': clevel++; break; case ')': clevel--; break; } } else { switch(c){ case '\\': escaping = 1; break; case '(': clevel++; break; case ' ': case '\t': case '\r': break; case '\n': /* * look for multiline fields */ if(*(yylp+1)==' ' || *(yylp+1)=='\t') break; else goto out; default: goto out; } } if(w == 0) w = s_new(); s_putc(w, c); } out: if(w) s_terminate(w); return w; } /* * link two parsed entries together */ Node* link2(Node *p1, Node *p2) { Node *p; for(p = p1; p->next; p = p->next) ; p->next = p2; return p1; } /* * link three parsed entries together */ Node* link3(Node *p1, Node *p2, Node *p3) { Node *p; for(p = p2; p->next; p = p->next) ; p->next = p3; for(p = p1; p->next; p = p->next) ; p->next = p2; return p1; } /* * make a:b, move all white space after both */ Node* colon(Node *p1, Node *p2) { if(p1->white){ if(p2->white) s_append(p1->white, s_to_c(p2->white)); } else { p1->white = p2->white; p2->white = 0; } s_append(p1->s, ":"); if(p2->s) s_append(p1->s, s_to_c(p2->s)); if(p1->end < p2->end) p1->end = p2->end; freenode(p2); return p1; } /* * concatenate two fields, move all white space after both */ Node* concat(Node *p1, Node *p2) { char buf[2]; if(p1->white){ if(p2->white) s_append(p1->white, s_to_c(p2->white)); } else { p1->white = p2->white; p2->white = 0; } if(p1->s == nil){ buf[0] = p1->c; buf[1] = 0; p1->s = s_new(); s_append(p1->s, buf); } if(p2->s) s_append(p1->s, s_to_c(p2->s)); else { buf[0] = p2->c; buf[1] = 0; s_append(p1->s, buf); } if(p1->end < p2->end) p1->end = p2->end; freenode(p2); return p1; } /* * look for disallowed chars in the field name */ int badfieldname(Node *p) { for(; p; p = p->next){ /* field name can't contain white space */ if(p->white && p->next) return 1; } return 0; } /* * mark as an address */ Node * address(Node *p) { p->addr = 1; return p; } /* * case independent string compare */ int cistrcmp(char *s1, char *s2) { int c1, c2; for(; *s1; s1++, s2++){ c1 = isupper(*s1) ? tolower(*s1) : *s1; c2 = isupper(*s2) ? tolower(*s2) : *s2; if (c1 != c2) return -1; } return *s2; } /* * free a node */ void freenode(Node *p) { Node *tp; while(p){ tp = p->next; if(p->s) s_free(p->s); if(p->white) s_free(p->white); free(p); p = tp; } } /* * an anonymous user */ Node* nobody(Node *p) { if(p->s) s_free(p->s); p->s = s_copy("pOsTmAsTeR"); p->addr = 1; return p; } /* * add anything that was dropped because of a parse error */ void missing(Node *p) { Node *np; char *start, *end; Field *f; String *s; start = yybuffer; if(lastfield != nil){ for(np = lastfield->node; np; np = np->next) start = np->end+1; } end = p->start-1; if(end <= start) return; if(strncmp(start, "From ", 5) == 0) return; np = malloc(sizeof(Node)); np->start = start; np->end = end; np->white = nil; s = s_copy("BadHeader: "); np->s = s_nappend(s, start, end-start); np->next = nil; f = malloc(sizeof(Field)); f->next = 0; f->node = np; f->source = 0; if(firstfield) lastfield->next = f; else firstfield = f; lastfield = f; } /* * create a new field */ void newfield(Node *p, int source) { Field *f; missing(p); f = malloc(sizeof(Field)); f->next = 0; f->node = p; f->source = source; if(firstfield) lastfield->next = f; else firstfield = f; lastfield = f; endfield = startfield; startfield = yylp; } /* * fee a list of fields */ void freefield(Field *f) { Field *tf; while(f){ tf = f->next; freenode(f->node); free(f); f = tf; } } /* * add some white space to a node */ Node* whiten(Node *p) { Node *tp; for(tp = p; tp->next; tp = tp->next) ; if(tp->white == 0) tp->white = s_copy(" "); return p; } void yycleanup(void) { Field *f, *fnext; Node *np, *next; for(f = firstfield; f; f = fnext){ for(np = f->node; np; np = next){ if(np->s) s_free(np->s); if(np->white) s_free(np->white); next = np->next; free(np); } fnext = f->next; free(f); } firstfield = lastfield = 0; }