shithub: lpa

ref: 2b23d05d57743af57385cd42c0fd2d223b11d8c8
dir: /scan.c/

View raw version
#include <u.h>
#include <libc.h>
#include <thread.h>

#include "dat.h"
#include "fns.h"

Token *
newtok(TokenList *tokens, int tag)
{
	Token *new;

	tokens->count++;
	tokens->tokens = allocextra(tokens, sizeof(Token) * tokens->count);
	new = tokens->tokens + (tokens->count-1);
	new->tag = tag;

	return new;
}

TokenList *
scan(char *buf)
{
	Rune r;
	int n, id;
	TokenList *tokens = alloc(DataTokenList);
	Token *tok;
	char *cp = buf;

	while(*cp){
		n = chartorune(&r, cp);
		int new = -1;
		switch(r){
		case L'(': new = TokLparen; break;
		case L')': new = TokRparen; break;
		case L'[': new = TokLbrack; break;
		case L']': new = TokRbrack; break;
		case L'{': new = TokLbrace; break;
		case L'}': new = TokRbrace; break;
		case L'\n': new = TokNewline; break;
		case L'⋄': new = TokDiamond; break;
		case L'∇': new = TokDel; break;
		case L'←': new = TokLarrow; break;
		case L';': new = TokSemi; break;
		}
		if(new != -1){
			newtok(tokens, new);
			goto next;
		}
		if((id = primid(cp)) != -1){
			n = strlen(primsymb(id));
			tok = newtok(tokens, TokPrimitive);
			tok->prim = id;
			tok->nameclass = primclass(id);
			goto next;
		}
		if(isspacerune(r))
			goto next;
		if(isdigitrune(r)){
			char *rest;
			vlong num = strtoll(cp, &rest, 10);
			n = rest - cp;
			tok = newtok(tokens, TokNumber);
			tok->num = num;
			goto next;
		}
		if(isalpharune(r)){
			char *start = cp;
			do{
				cp += n;
				n = chartorune(&r, cp);
			}while(isalpharune(r) || isdigitrune(r));
			tok = newtok(tokens, TokName);
			usize size = cp - start;
			tok->name = malloc(size + 1);
			memcpy(tok->name, start, size);
			tok->name[size] = 0;
			continue;
		}
		if(r == '\''){
			cp += n;
			n = chartorune(&r, cp);

			char *start = cp;
			while(!(r == '\'' || r == 0)){
				cp += n;
				n = chartorune(&r, cp);
			}
			if(r == 0)
				error(ESyntax, "unmatched '");

			tok = newtok(tokens, TokString);
			usize size = utfnlen(start, cp - start) + 1;
			tok->string = malloc(sizeof(Rune) * size);
			runesnprint(tok->string, size, "%s", start);
			goto next;
		}
		error(ESyntax, "unexpected: '%C'", r);
next:
		cp += n;
	}
	newtok(tokens, TokEnd);
	return tokens;
}

char *
printtok(Token t)
{
	char buf[1024];
	char *p = buf;

	switch(t.tag){
	case TokNumber:
		sprint(p, "number");
		break;
	case TokName: 
		sprint(p, "name");
		break;
	case TokLparen:
		sprint(p, "(");
		break;
	case TokRparen:
		sprint(p, ")");
		break;
	case TokLbrack:
		sprint(p, "[");
		break;
	case TokRbrack:
		sprint(p, "]");
		break;
	case TokLbrace:
		sprint(p, "{");
		break;
	case TokRbrace:
		sprint(p, "}");
		break;
	case TokNewline:
		sprint(p, "newline");
		break;
	case TokDiamond:
		sprint(p, "⋄");
		break;
	case TokPrimitive:
		sprint(p, "primitive");
		break;
	case TokDel:
		sprint(p, "∇");
		break;
	case TokLarrow:
		sprint(p, "←");
		break;
	case TokSemi:
		sprint(p, ";");
		break;
	default:
		sprint(p, "???");
	}

	return buf;
}