shithub: desereter

Download patch

ref: 91b74e4777120478cf658bad19323e1c147b0449
author: McKay Marston <mckay.marston@greymanlabs.com>
date: Fri Feb 5 16:44:10 EST 2021

The first commit to shithub.

--- /dev/null
+++ b/desereter.ml
@@ -1,0 +1,172 @@
+let entry_rgx = Str.regexp "\\([.a-z'-]+\\)(?[0-9]?)? \\(.*\\)"
+
+type wordset = {prefix: string; word: string; suffix: string}
+
+let is_uppercase = function
+  | 'A'
+   |'B'
+   |'C'
+   |'D'
+   |'E'
+   |'F'
+   |'G'
+   |'H'
+   |'I'
+   |'J'
+   |'K'
+   |'L'
+   |'M'
+   |'N'
+   |'O'
+   |'P'
+   |'Q'
+   |'R'
+   |'S'
+   |'T'
+   |'U'
+   |'V'
+   |'W'
+   |'X'
+   |'Y'
+   |'Z' ->
+      true
+  | _ -> false
+
+let get_vowel vowel =
+  if String.length vowel != 3 then ("", "")
+  else
+    let trimmed = String.sub vowel 0 2 in
+    match trimmed with
+    | "IY" -> ("𐐀", "𐐨")
+    | "EY" -> ("𐐁", "𐐩")
+    | "AA" -> ("𐐂", "𐐪")
+    | "AO" -> ("𐐉", "𐐱")
+    | "OW" -> ("𐐄", "𐐬")
+    | "UW" -> ("𐐅", "𐐭")
+    | "IH"
+     |"IX" ->
+        ("𐐆", "𐐮")
+    | "EH" -> ("𐐇", "𐐯")
+    | "AE" -> ("𐐈", "𐐰")
+    | "AX" -> ("𐐉", "𐐱")
+    | "AH" -> ("𐐊", "𐐲")
+    | "UH" -> ("𐐋", "𐐳")
+    | "AY" -> ("𐐌", "𐐴")
+    | "AW" -> ("𐐍", "𐐵")
+    | "ER" -> ("𐐊𐐡", "𐐊𐑉")
+    | _ -> ("", "")
+
+let get_char = function
+  | "W" -> ("𐐎", "𐐶")
+  | "Y" -> ("𐐏", "𐐷")
+  | "H"
+   |"HH" ->
+      ("𐐐", "𐐸")
+  | "P" -> ("𐐑", "𐐹")
+  | "B" -> ("𐐒", "𐐺")
+  | "T" -> ("𐐓", "𐐻")
+  | "D" -> ("𐐔", "𐐼")
+  | "CH" -> ("𐐕", "𐐽")
+  | "JH" -> ("𐐖", "𐐾")
+  | "K" -> ("𐐗", "𐐿")
+  | "G" -> ("𐐘", "𐑀")
+  | "F" -> ("𐐙", "𐑁")
+  | "V" -> ("𐐚", "𐑂")
+  | "TH" -> ("𐐛", "𐑃")
+  | "DH" -> ("𐐜", "𐑄")
+  | "S" -> ("𐐝", "𐑅")
+  | "Z" -> ("𐐞", "𐑆")
+  | "SH" -> ("𐐟", "𐑇")
+  | "ZH" -> ("𐐠", "𐑈")
+  | "R" -> ("𐐡", "𐑉")
+  | "L" -> ("𐐢", "𐑊")
+  | "M" -> ("𐐣", "𐑋")
+  | "N" -> ("𐐤", "𐑌")
+  | "NX"
+   |"NG" ->
+      ("𐐥", "𐑍")
+  | v -> get_vowel v
+
+let rec parse_arpabet line des uppercase =
+  match line with
+  | hd :: tl ->
+      let u, l = get_char hd in
+      (if uppercase then u else l) ^ parse_arpabet tl des false
+  | [] -> des
+
+let unquoted word = String.sub word 1 (String.length word - 2)
+
+let consider word =
+  let wrd = ref (String.lowercase_ascii word) in
+  let prefix = ref "" in
+  let suffix = ref "" in
+  ( try
+      let pos = Str.search_forward (Str.regexp "[({\"]") !wrd 0 + 1 in
+      wrd := String.sub word pos (String.length !wrd - pos) ;
+      prefix := String.sub word 0 pos
+    with Not_found -> () ) ;
+  ( try
+      let pos = Str.search_backward (Str.regexp "[})\"\\.,!;:]") !wrd (String.length !wrd) in
+      suffix := String.sub !wrd pos (String.length !wrd - pos) ;
+      wrd := String.sub !wrd 0 pos;
+    with Not_found -> () ) ;
+  {prefix= !prefix; word= !wrd; suffix= !suffix}
+
+let parse word dictionary =
+  let uppercase = is_uppercase word.[0] in
+  let wordparts = consider word in
+  try
+    let des = parse_arpabet (String.split_on_char ' ' (Hashtbl.find dictionary wordparts.word)) "" uppercase in
+    wordparts.prefix ^ des ^ wordparts.suffix
+  with Not_found -> word
+
+let sanitize line = Str.global_replace (Str.regexp "\\.\\.\\.") " ... " line
+
+let load_dictionary extra =
+  let default =
+		try
+			let prefix = Unix.getenv "OPAM_SWITCH_PREFIX" in
+			prefix ^ "/share/desereter/cmudict.dict"
+		with Not_found -> "/lib/cmudict.dict"
+  in
+  let dictionaries = [default] @ String.split_on_char ';' extra in
+  let dictionary = Hashtbl.create 150000 in
+  let load file =
+    if String.length file > 0 then
+      let ic = open_in file in
+      try
+        while true do
+          let entry = input_line ic in
+          if Str.string_match entry_rgx entry 0 then
+            let word = Str.matched_group 1 entry in
+            let pronunciation = Str.matched_group 2 entry in
+            Hashtbl.add dictionary word pronunciation
+        done
+      with End_of_file -> close_in ic in
+  List.iter load dictionaries ; dictionary
+
+let translate dictionary line =
+  let words = String.split_on_char ' ' (sanitize line) in
+  let words =
+    List.filter
+      (fun x ->
+        let w = String.trim x in
+        String.length w > 0 )
+      words in
+  print_endline (List.fold_left (fun acc word -> acc ^ parse word dictionary ^ " ") "" words)
+
+let () =
+  let line = ref "" in
+  let extra = ref "" in
+  Arg.parse
+    [(* ("-i", Arg.Set_string line, "input"); *) ("-d", Arg.Set_string extra, "dictionary")]
+    (fun x -> line := x)
+    (Sys.argv.(0) ^ " [-d dictionary] <-i input>") ;
+  let dictionary = load_dictionary !extra in
+  if String.length !line > 0 then translate dictionary !line
+  else
+    try
+      while true do
+        read_line () |> translate dictionary
+      done
+    with End_of_file -> ()
--- /dev/null
+++ b/desereter.opam
@@ -1,0 +1,26 @@
+# This file is generated by dune, edit dune-project instead
+opam-version: "2.0"
+version: "19"
+synopsis: "Tool to convert English text to the Deseret Alphabet"
+maintainer: ["smazga@greymanlabs.com"]
+authors: ["smazga"]
+homepage: "https://shithub.us/git/zgasma/desereter/HEAD/info.html"
+bug-reports: "no"
+depends: [
+  "dune" {>= "2.7"}
+  "odoc" {with-doc}
+]
+build: [
+  ["dune" "subst"] {dev}
+  [
+    "dune"
+    "build"
+    "-p"
+    name
+    "-j"
+    jobs
+    "@install"
+    "@runtest" {with-test}
+    "@doc" {with-doc}
+  ]
+]
--- /dev/null
+++ b/dune
@@ -1,0 +1,11 @@
+(executable
+ (name desereter)
+ (libraries unix str))
+
+(install
+ (section bin)
+  (files (desereter.exe as desereter)))
+
+(install
+ (section share)
+ (files cmudict.dict))
--- /dev/null
+++ b/dune-project
@@ -1,0 +1,13 @@
+(lang dune 2.7)
+(version 19)
+(name desereter)
+(maintainers smazga@greymanlabs.com)
+(authors smazga)
+(homepage https://shithub.us/git/zgasma/desereter/HEAD/info.html)
+
+(bug_reports no)
+(generate_opam_files true)
+
+(package
+  (name desereter)
+  (synopsis "Tool to convert English text to the Deseret Alphabet"))
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,14 @@
+TARG=desereter
+
+$TARG: desereter.ml
+	ocamlc str.cma unix.cma desereter.ml -o $target
+
+install:V: $TARG
+	cp $TARG $home/bin/
+	cp cmudict.dict /lib
+
+uninstall:V:
+	rm -f $home/bin/$TARG /lib/cmudict.dict
+
+clean:V:
+	rm -f $TARG *cmi *cmo