shithub: martian9

ref: 284f87648016e19394d52aca2505ef24b50bdbca
dir: /reader.ml/

View raw version
module T = Types.Types

let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\|[^][  \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""

type reader = { form : Types.m9type; tokens : string list }
type list_reader = { list_form : Types.m9type list; tokens : string list }

let tokenize str =
  List.map
    (function Str.Delim x -> x | Str.Text x -> "botch")
    (List.filter (function Str.Delim x -> true | Str.Text x -> false) (Str.full_split token_re str))

let read_atom token =
  match token with
  | "null"  -> T.Nil
  | "true"  -> T.Bool true
  | "false" -> T.Bool false
  | _       -> (
      match token.[0] with
      | '0' .. '9' -> T.Number (int_of_string token)
      | '-'        -> (
          match String.length token with
          | 1 -> Types.symbol token
          | _ -> ( match token.[1] with '0' .. '9' -> T.Number (int_of_string token) | _ -> Types.symbol token ) )
      | '"'        -> T.String token (* TODO: unescape *)
      | _          -> Types.symbol token )

let rec read_list eol list_reader =
  match list_reader.tokens with
  | []              ->
      print_endline "unexpected EOF";
      raise End_of_file
  | token :: tokens ->
      if Str.string_match (Str.regexp eol) token 0 then
        { list_form = list_reader.list_form; tokens }
      else
        let reader = read_form list_reader.tokens in
        read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens }

and read_form all_tokens =
  match all_tokens with
  | []              -> raise End_of_file
  | token :: tokens -> (
      match token with
      | "("  ->
          let list_reader = read_list ")" { list_form = []; tokens } in
          { form = Types.list list_reader.list_form; tokens = list_reader.tokens }
      | "#|" ->
          let list_reader = read_list "|#" { list_form = []; tokens } in
          { form = T.Comment; tokens = list_reader.tokens }
      | _    -> if token.[0] = ';' then read_form tokens else { form = read_atom token; tokens } )

let read_str str = (read_form (tokenize str)).form