ref: 9358139dacada10f22281ef575e4b80435d9184b
dir: /hyph.c/
/* hyphenation */ #include <ctype.h> #include <stdio.h> #include <string.h> #include "roff.h" #include "hyen.h" #define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */ /* the hyphenation dictionary (.hw) */ static char hwword[HYPATLEN]; /* buffer for .hw words */ static char hwhyph[HYPATLEN]; /* buffer for .hw hyphenations */ static int hwword_len; /* used hwword[] length */ /* word lists (per starting characters) for dictionary entries */ static int hwhead[256]; /* the head of hw_*[] lists */ static int hwnext[NHYPHS]; /* the next word with the same initial */ static int hwidx[NHYPHS]; /* the offset of this word in hwword[] */ static int hwlen[NHYPHS]; /* the length of the word */ static int hw_n = 1; /* number of words in hw_*[] lists */ /* functions for the hyphenation dictionary */ static void hw_add(char *word) { char *s = word; char *d = hwword + hwword_len; int c, i; if (hw_n == LEN(hwidx) || hwword_len + 128 > sizeof(hwword)) return; i = hw_n++; while ((c = *s++)) { if (c == '-') hwhyph[d - hwword] = 1; else *d++ = c; } *d++ = '\0'; hwidx[i] = hwword_len; hwword_len = d - hwword; hwlen[i] = hwword_len - hwidx[i] - 1; hwnext[i] = hwhead[(unsigned char) word[0]]; hwhead[(unsigned char) word[0]] = i; } /* copy lower-cased s to d */ static void hw_strcpy(char *d, char *s) { while (*s) { if (*s & 0x80) *d++ = *s++; else *d++ = tolower(*s++); } *d = '\0'; } static char *hw_lookup(char *s) { char word[ILNLEN]; int i; hw_strcpy(word, s); /* finding a dictionary entry that matches a prefix of the input */ i = hwhead[(unsigned char) word[0]]; while (i > 0) { if (!strncmp(word, hwword + hwidx[i], hwlen[i])) return hwhyph + hwidx[i]; i = hwnext[i]; } return NULL; } void tr_hw(char **args) { int i; for (i = 1; i < NARGS && args[i]; i++) hw_add(args[i]); } /* the tex hyphenation algorithm */ static int hyinit; /* hyphenation data initialized */ static char hypats[HYPATLEN]; /* the patterns */ static char hynums[HYPATLEN]; /* numbers in the patterns */ static int hypats_len; /* lists (one per pair of starting characters) for storing patterns */ static int hyhead[256 * 256]; /* the head of hy_*[] lists */ static int hynext[NHYPHS]; /* the next pattern with the same initial */ static int hyoff[NHYPHS]; /* the offset of this pattern in hypats[] */ static int hy_n = 1; /* number of words in hy_*[] lists */ #define HYC_MAP(c) ((c) == '.' ? 0 : (c)) /* index of the string starting with a and b in hyhash[] */ static int hy_idx(char *s) { return (HYC_MAP((unsigned char) s[1]) << 8) | HYC_MAP((unsigned char) s[0]); } /* make s lower-case and replace its non-alphabetic characters with . */ static void hy_strcpy(char *d, char *s) { int c; *d++ = '.'; while ((c = (unsigned char) *s++)) *d++ = c & 0x80 ? c : (isalpha(c) ? tolower(c) : '.'); *d++ = '.'; *d = '\0'; } /* find the patterns matching s and update hyphenation values in n */ static void hy_find(char *s, char *n) { int plen; char *p, *np; int j; int idx = hyhead[hy_idx(s)]; while (idx > 0) { p = hypats + hyoff[idx]; np = hynums + (p - hypats); plen = strlen(p); if (!strncmp(s + 2, p + 2, plen - 2)) for (j = 0; j < plen; j++) if (n[j] < np[j]) n[j] = np[j]; idx = hynext[idx]; } } /* mark the hyphenation points of word in hyph */ static void hy_dohyph(char *hyph, char *word, int flg) { char n[ILNLEN] = {0}; char w[ILNLEN]; int c[ILNLEN]; /* start of the i-th character in w */ int nc = 0; int i, wlen; hy_strcpy(w, word); wlen = strlen(w); for (i = 0; i < wlen - 1; i += utf8len((unsigned int) w[i])) c[nc++] = i; for (i = 0; i < nc - 1; i++) hy_find(w + c[i], n + c[i]); memset(hyph, 0, wlen * sizeof(hyph[0])); for (i = 3; i < nc - 2; i++) if (n[i] % 2 && w[c[i - 1]] != '.' && w[c[i - 2]] != '.' && w[c[i + 1]] != '.') hyph[c[i - 1]] = (~flg & HY_FINAL2 || w[c[i + 2]] != '.') && (~flg & HY_FIRST2 || w[c[i - 3]] != '.'); } /* insert pattern s into hypats[] and hynums[] */ static void hy_ins(char *s) { char *p = hypats + hypats_len; char *n = hynums + hypats_len; int i = 0, idx; if (hy_n >= NHYPHS || hypats_len + 64 >= sizeof(hypats)) return; idx = hy_n++; while (*s) { if (*s >= '0' && *s <= '9') n[i] = *s++ - '0'; else p[i++] = *s++; } p[i] = '\0'; hyoff[idx] = hypats_len; hynext[idx] = hyhead[hy_idx(p)]; hyhead[hy_idx(p)] = idx; hypats_len += i + 1; } static void hyph_readpatterns(char *s) { char word[ILNLEN]; char *d; while (*s) { d = word; while (*s && !isspace((unsigned char) *s)) *d++ = *s++; *d = '\0'; hy_ins(word); while (*s && isspace((unsigned char) *s)) s++; } } static void hyph_readexceptions(char *s) { char word[ILNLEN]; char *d; while (*s) { d = word; while (*s && !isspace((unsigned char) *s)) *d++ = *s++; *d = '\0'; hw_add(word); while (*s && isspace((unsigned char) *s)) s++; } } void hyphenate(char *hyph, char *word, int flg) { char *r; if (!hyinit) { hyinit = 1; hyph_readpatterns(en_patterns); hyph_readexceptions(en_exceptions); } r = hw_lookup(word); if (r) memcpy(hyph, r, strlen(word) + 1); else hy_dohyph(hyph, word, flg); } void tr_hpfa(char **args) { char tok[ILNLEN]; FILE *filp; /* reading patterns */ if (args[1]) { hyinit = 1; filp = fopen(args[1], "r"); while (fscanf(filp, "%s", tok) == 1) hy_ins(tok); fclose(filp); } /* reading exceptions */ if (args[2]) { filp = fopen(args[1], "r"); while (fscanf(filp, "%s", tok) == 1) hw_add(tok); fclose(filp); } } void tr_hpf(char **args) { /* reseting the patterns */ hypats_len = 0; hy_n = 1; memset(hyhead, 0, sizeof(hyhead)); memset(hynext, 0, sizeof(hynext)); /* reseting the dictionary */ hwword_len = 0; hw_n = 1; memset(hwhead, 0, sizeof(hwhead)); memset(hwnext, 0, sizeof(hwnext)); /* reading */ tr_hpfa(args); }