ref: 2e8f865a978cba527282aacebfc99b9a6317c051
dir: /scan.c/
#include <u.h>
#include <libc.h>
#include <thread.h>
#include "dat.h"
#include "fns.h"
Token *
newtok(TokenList *tokens, int tag)
{
Token *new;
tokens->count++;
tokens->tokens = allocextra(tokens, sizeof(Token) * tokens->count);
new = tokens->tokens + (tokens->count-1);
new->tag = tag;
return new;
}
TokenList *
scan(char *buf)
{
Rune r;
int n, id;
TokenList *tokens = alloc(DataTokenList);
Token *tok;
char *cp = buf;
while(*cp){
n = chartorune(&r, cp);
int new = -1;
switch(r){
case L'(': new = TokLparen; break;
case L')': new = TokRparen; break;
case L'[': new = TokLbrack; break;
case L']': new = TokRbrack; break;
case L'{': new = TokLbrace; break;
case L'}': new = TokRbrace; break;
case L'\n': new = TokNewline; break;
case L'⋄': new = TokDiamond; break;
case L'∇': new = TokDel; break;
case L'←': new = TokLarrow; break;
case L';': new = TokSemi; break;
}
if(new != -1){
newtok(tokens, new);
goto next;
}
if((id = primid(cp)) != -1){
n = strlen(primsymb(id));
tok = newtok(tokens, TokPrimitive);
tok->prim = id;
tok->nameclass = primclass(id);
goto next;
}
if(isspacerune(r))
goto next;
if(isdigitrune(r)){
char *rest;
vlong num = strtoll(cp, &rest, 10);
n = rest - cp;
tok = newtok(tokens, TokNumber);
tok->num = num;
goto next;
}
if(isalpharune(r)){
char *start = cp;
do{
cp += n;
n = chartorune(&r, cp);
}while(isalpharune(r) || isdigitrune(r));
tok = newtok(tokens, TokName);
usize size = cp - start;
tok->name = malloc(size + 1);
memcpy(tok->name, start, size);
tok->name[size] = 0;
continue;
}
error(ESyntax, "unexpected: '%C'", r);
next:
cp += n;
}
newtok(tokens, TokEnd);
return tokens;
}
char *
printtok(Token t)
{
char buf[1024];
char *p = buf;
switch(t.tag){
case TokNumber:
sprint(p, "number");
break;
case TokName:
sprint(p, "name");
break;
case TokLparen:
sprint(p, "(");
break;
case TokRparen:
sprint(p, ")");
break;
case TokLbrack:
sprint(p, "[");
break;
case TokRbrack:
sprint(p, "]");
break;
case TokLbrace:
sprint(p, "{");
break;
case TokRbrace:
sprint(p, "}");
break;
case TokNewline:
sprint(p, "newline");
break;
case TokDiamond:
sprint(p, "⋄");
break;
case TokPrimitive:
sprint(p, "primitive");
break;
case TokDel:
sprint(p, "∇");
break;
case TokLarrow:
sprint(p, "←");
break;
case TokSemi:
sprint(p, ";");
break;
default:
sprint(p, "???");
}
return buf;
}