shithub: martian9

ref: 6cec07e20602ff1a19e3179c48ad11203c61c274
dir: /reader.ml/

View raw version
module T = Types.Types

let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][  \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None

type reader =
  { form : Types.m9type
  ; tokens : string list
  }

type list_reader =
  { list_form : Types.m9type list
  ; tokens : string list
  }

let tokenize str =
  List.map
    (function
      | Str.Delim x -> String.trim x (* move trim to regex for speed? *)
      | Str.Text x -> "tokenize botch")
    (List.filter
       (function
         | Str.Delim x -> true
         | Str.Text x -> false)
       (Str.full_split token_re str))
;;

let unescape_string token =
  if Str.string_match string_re token 0
  then (
    let without_quotes = String.sub token 1 (String.length token - 2) in
    Utils.gsub
      (Str.regexp "\\\\.")
      (function
        | "\\n" -> "\n"
        | x -> String.sub x 1 1)
      without_quotes)
  else (
    output_string stderr "expected '\"', got EOF\n";
    flush stderr;
    raise End_of_file)
;;

let fix_pattern sym pattern =
  let tokenized_pattern = tokenize pattern in
  let new_pattern = ref [] in
  let rec replace_token tokens =
    match tokens with
    | token :: [] -> let t = if token = "_" then (Printer.print sym true) else token in
                     new_pattern := !new_pattern @ [t];
                     !new_pattern
    | token :: rest -> let t = if token = "_" then (Printer.print sym true) else token in
                       new_pattern := !new_pattern @ [t];
                       replace_token rest
    | _ -> raise (Utils.Syntax_error "unable to fix pattern")
  in
  replace_token tokenized_pattern
;;

let read_atom token =
  match token with
  | "null" -> T.Nil
  | "#t" | "#true" -> T.Bool true
  | "#f" | "#false" -> T.Bool false
  | _ ->
    (match token.[0] with
    | '0' .. '9' -> Types.number (float_of_string token)
    | '#' ->
      (match token.[1], token.[2] with
      | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
      | _ -> Types.symbol token)
    | '-' ->
      (match String.length token with
      | 1 -> Types.symbol token
      | _ ->
        (match token.[1] with
        | '0' .. '9' -> Types.number (float_of_string token)
        | _ -> Types.symbol token))
    | '"' -> T.String (unescape_string token)
    | _ -> Types.symbol token)
;;

let rec read_list eol list_reader =
  if (List.length list_reader.tokens > 1) && (List.hd list_reader.tokens) = "("
  then
    (match
       try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with
       | _ -> T.Nil
     with
     | T.Macro { T.value = sym; meta } ->
        print_endline("\nFOUND A MACRO! " ^ Printer.print sym true);
        print_endline("  tokens: " ^ String.concat " " list_reader.tokens);
        let rec collect_args tokens args =
          match tokens with
           | t :: [] -> args @ [t]
           | t :: ts -> if t = eol then args else collect_args ts args @ [t]
           | _ -> []
        in
        let args = collect_args (List.tl list_reader.tokens) [] in
        print_endline(" ### " ^ String.concat " " args);
        Macro.match_variant meta args
     | _ -> ());
  match list_reader.tokens with
  | [] -> raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
  | token :: tokens ->
    if Str.string_match (Str.regexp eol) token 0
    then { list_form = list_reader.list_form; tokens }
    else (
      let reader = read_form list_reader.tokens in
      read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })

and read_quote sym tokens =
  let reader = read_form tokens in
  { form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }

and read_vector all_tokens =
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
    | _ -> read_form tokens)

and read_macro tokens =
  let list_reader = read_list ")" { list_form = []; tokens } in
  print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
  (match list_reader.list_form with
  | sym :: rest ->
    print_endline ("  sym: " ^ Printer.print sym true);
    print_endline ("    rest: " ^ Printer.dump rest);
    (match rest with
     | [ T.List { T.value = T.Symbol { T.value = "syntax-rules" } :: literals :: clauses } ] ->
        let variants = Macro.generate_variants sym literals clauses in
        print_endline ("    variants: " ^ (Printer.print (Types.map variants) true));
        let macro_entry = Types.macro sym literals (Types.list clauses) variants in
        Env.set registered_macros sym macro_entry;
        Types.M9map.iter (fun k v ->
            print_endline ("   >>> " ^ String.concat " " (fix_pattern k (Printer.print v true)));
            print_endline ("  >> " ^ Printer.print k true ^ ":  " ^ Printer.print v true))
          variants;
        print_endline(" >>>>>> MACRO: " ^ Printer.print macro_entry true)
     | _ -> raise (Utils.Syntax_error "read_macro botch"))
  | _ as x -> print_endline ("  rest: " ^ Printer.dump x));
  { form = Types.list list_reader.list_form; tokens = list_reader.tokens }

and read_form all_tokens =
  (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "'" -> read_quote "quote" tokens
    | "`" -> read_quote "quasiquote" tokens
    | "#" -> read_vector tokens
    | "#|" ->
      let list_reader = read_list "|#" { list_form = []; tokens } in
      print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
      { form = T.Unspecified; tokens = list_reader.tokens }
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.list list_reader.list_form; tokens = list_reader.tokens }
    | "" | "\t" | "\n" -> read_form tokens
    | "define-syntax" -> read_macro tokens
    | _ ->
      if token.[0] = ';'
      then (
        let list_reader = read_list "\\n" { list_form = []; tokens } in
        print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
        { form = T.Unspecified; tokens = list_reader.tokens })
      else { form = read_atom token; tokens })
;;

let slurp filename =
  let chan = open_in filename in
  let b = Buffer.create 27 in
  Buffer.add_channel b chan (in_channel_length chan);
  close_in chan;
  Buffer.contents b
;;

let read str = (read_form (tokenize str)).form