ref: bdb99b496ec68880effa7df50fb4b05e7c1799bc
dir: /reader.ml/
module T = Types.Types let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\"" let registered_macros = Env.make None type reader = {form: Types.m9type; tokens: string list} type list_reader = {list_form: Types.m9type list; tokens: string list} let unescape_string token = if Str.string_match string_re token 0 then let without_quotes = String.sub token 1 (String.length token - 2) in Utils.gsub (Str.regexp "\\\\.") (function "\\n" -> "\n" | x -> String.sub x 1 1) without_quotes else raise (Utils.Syntax_error "unterminated string") let trim_end list = List.rev (List.tl (List.rev list)) let rec replace_token tokens replacement block = match tokens with | [token] -> let t = if token = "_" then replacement else token in block := !block @ [t] ; String.concat " " !block | token :: rest -> let t = if token = "_" then replacement else token in block := !block @ [t] ; replace_token rest replacement block | _ -> String.concat " " !block and fix_clause original sym clause = print_endline (">>>>> fix_clause: incoming: " ^ Printer.to_string clause) ; match clause with | T.List {T.value= [T.List {T.value= pattern; meta= _}; T.List {T.value= transform; meta= _}]; meta= _} -> let pattern = Utils.tokenize (Printer.dump pattern) in let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in let fixed_transform = replace_token (Utils.tokenize (Printer.dump transform)) (Printer.to_string original) (ref []) in ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; "("; fixed_transform; ")"; ")"; ")"] | T.List {T.value= [T.List {T.value= pattern; meta= _}; atom]; meta= _} -> let pattern = Utils.tokenize (Printer.dump pattern) in let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; Printer.to_string atom; ")"; ")"] | _ as e -> raise (Utils.Syntax_error ("fix_clause botch: " ^ Printer.to_string e)) let read_atom token = match token with | "null" -> T.Nil | "#t" | "#true" -> T.Bool true | "#f" | "#false" -> T.Bool false | _ -> ( match token.[0] with | '0' .. '9' -> Types.number (float_of_string token) | '#' -> ( match (token.[1], token.[2]) with | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2] | _ -> Types.symbol token ) | '-' -> ( match String.length token with | 1 -> Types.symbol token | _ -> ( match token.[1] with '0' .. '9' -> Types.number (float_of_string token) | _ -> Types.symbol token ) ) | '"' -> T.String (unescape_string token) | _ -> Types.symbol token ) let rec read_list eol list_reader = (* we need to replace macro calls with their variant symbols *) let tweaked_tokens = if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then let symbol_str = List.nth list_reader.tokens 1 in let symbol = Types.symbol symbol_str in match try Env.get registered_macros symbol with _ -> T.Nil with | T.Macro {T.value= m; meta} -> (* print_endline "XXXX MACRO FOUND" ; *) print_endline ("XXXX MACRO: " ^ Printer.to_string m) ; (* print_endline ("XXXX META: " ^ Printer.to_string meta); *) (* print_endline ("XXXX TOKENS: " ^ String.concat " " list_reader.tokens) ; *) let args = Macro.collect_args (List.tl list_reader.tokens) [] in (* print_endline ("<><><> args: " ^ String.concat " " args) ; *) let variant = Macro.match_variant symbol_str meta args in print_endline ("<><><><>: " ^ variant) ; (* List.map (fun s -> if s = Printer.to_string symbol then variant else s) (trim_end list_reader.tokens) *) List.map (fun s -> if s = symbol_str then variant else s) (trim_end list_reader.tokens) | _ -> list_reader.tokens else list_reader.tokens in (* print_endline ("TWEAKED_TOKENS: [" ^ String.concat " " tweaked_tokens ^ "]") ; *) match tweaked_tokens with | [] -> raise (Utils.Syntax_error ("read_list botch: '" ^ Printer.dump list_reader.list_form ^ "' eol: '" ^ eol ^ "'")) | [_] -> {list_form= list_reader.list_form; tokens= [")"]} | token :: tokens -> if Str.string_match (Str.regexp eol) token 0 then {list_form= list_reader.list_form; tokens} else let reader = read_form tweaked_tokens in (* print_endline ("token: " ^ token ^ " tokens: " ^ String.concat " " tokens); *) read_list eol {list_form= list_reader.list_form @ [reader.form]; tokens= reader.tokens} and read_quote sym tokens = let reader = read_form tokens in {form= Types.list [Types.symbol sym; reader.form]; tokens= reader.tokens} and read_vector all_tokens = match all_tokens with | [] -> raise End_of_file | token :: tokens -> ( match token with | "(" -> let list_reader = read_list ")" {list_form= []; tokens} in {form= Types.vector list_reader.list_form; tokens= list_reader.tokens} | _ -> read_form tokens ) and read_macro tokens = let macro = ref [] in let list_reader = read_list ")" {list_form= []; tokens} in print_endline ("READ_MACRO: " ^ Printer.dump list_reader.list_form) ; print_endline ("READ_MACRO tokens: " ^ String.concat " " list_reader.tokens) ; ( match list_reader.list_form with | sym :: rest -> ( print_endline ("> sym: " ^ Printer.to_string sym) ; print_endline (">> rest: " ^ Printer.dump rest) ; match rest with | [T.List {T.value= T.Symbol {T.value= "syntax-rules"; meta= _} :: literals :: clauses; meta= _}] -> let sanitized_clauses = List.flatten (Macro.sanitize_clauses sym clauses) in let variants = Macro.generate_variants sym literals sanitized_clauses in let fixed_variants = ref Types.M9map.empty in let transforms = ref Types.M9map.empty in Types.M9map.iter (fun k v -> transforms := Types.M9map.add k (Utils.tokenize v) !transforms) variants ; let fixed_clauses = ref [] in Types.M9map.iter (fun k v -> let fixed_clause = fix_clause sym k (read_form (Utils.tokenize v)).form in (* TODO: is this even used? *) print_endline (">>>> registering variant: " ^ Printer.print k true ^ ": " ^ String.concat " " fixed_clause) ; macro := !macro @ fixed_clause ; let parsed = (read_form fixed_clause).form in fixed_clauses := !fixed_clauses @ [parsed] ; fixed_variants := Types.M9map.add k parsed !fixed_variants ; Env.set registered_macros k parsed ) variants ; print_endline ("trying to parse macro: " ^ String.concat " " !macro) ; let macro_entry = Types.macro sym literals T.Nil !fixed_variants in Env.set registered_macros sym macro_entry ; print_endline "finished" | _ -> raise (Utils.Syntax_error "read_macro botch") ) | _ -> raise (Utils.Syntax_error "read_macro last rest botch") ) ; print_endline ("SO HERE ARE THE MACRO VARIANTS: " ^ String.concat " " !macro) ; let trimmed_macro = List.tl !macro in let trimmed_tokens = trim_end list_reader.tokens in print_endline ("TRIMMED_MACRO: " ^ String.concat " " trimmed_macro) ; print_endline ("TRIMMED_TOKENS: " ^ String.concat " " trimmed_tokens) ; print_endline ("TRIMMED_MACRO: " ^ String.concat " " (trimmed_macro @ trimmed_tokens)) ; trimmed_macro @ trimmed_tokens and read_form all_tokens = (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *) match all_tokens with | [] -> raise End_of_file | token :: tokens -> ( match token with | "'" -> read_quote "quote" tokens | "`" -> read_quote "quasiquote" tokens | "#" -> read_vector tokens | "#|" -> let list_reader = read_list "|#" {list_form= []; tokens} in print_endline ("block comment: " ^ Printer.dump list_reader.list_form) ; {form= T.Unspecified; tokens= list_reader.tokens} | "(" -> let list_reader = read_list ")" {list_form= []; tokens} in {form= Types.list list_reader.list_form; tokens= list_reader.tokens} | "" | "\t" | "\n" -> read_form tokens | "define-syntax" -> read_form (read_macro tokens) | _ -> if token.[0] = ';' then ( let list_reader = read_list "\\n" {list_form= []; tokens} in print_endline ("line comment: " ^ String.concat " " list_reader.tokens) ; {form= T.Unspecified; tokens= list_reader.tokens} ) else {form= read_atom token; tokens} ) let slurp filename = let chan = open_in filename in let b = Buffer.create 27 in Buffer.add_channel b chan (in_channel_length chan) ; close_in chan ; Buffer.contents b (* let read str = (read_form (tokenize str)).form *) let read str = let tokenized = Utils.tokenize str in print_endline ("TOKENIZED: " ^ String.concat " " tokenized) ; let form = (read_form tokenized).form in let retoken = Utils.tokenize (Printer.to_string form) in print_endline ("\nRETOKENIZED: " ^ String.concat " " retoken ^ "\n") ; let reform = (read_form retoken).form in print_endline ("\nFORM: " ^ Printer.to_string form) ; reform