ref: ce3a09ee03be91d042b0284ffa3bfe3bb1f675db
dir: /DoConfig/fltk/src/xutf8/utf8Utils.c/
/* "$Id: $" * * Author: Jean-Marc Lienher ( http://oksid.ch ) * Copyright 2000-2003 by O'ksi'D. * * This library is free software. Distribution and use rights are outlined in * the file "COPYING" which should have been included with this file. If this * file is missing or damaged, see the license at: * * http://www.fltk.org/COPYING.php * * Please report all bugs and problems on the following page: * * http://www.fltk.org/str.php */ /* * Unicode to UTF-8 conversion functions. */ #if !defined(WIN32) && !defined(__APPLE__) #include "../Xutf8.h" /*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/ /* * Converts the first char of the UTF-8 string to an Unicode value * Returns the byte length of the converted UTF-8 char * Returns -1 if the UTF-8 string is not valid */ int XConvertUtf8ToUcs(const unsigned char *buf, int len, unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { if (buf[0] & 0x10) { if (buf[0] & 0x08) { if (buf[0] & 0x04) { if (buf[0] & 0x02) { /* bad UTF-8 string */ } else { /* 0x04000000 - 0x7FFFFFFF */ } } else if (len > 4 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80 && (buf[3] & 0xC0) == 0x80 && (buf[4] & 0xC0) == 0x80) { /* 0x00200000 - 0x03FFFFFF */ *ucs = ((buf[0] & ~0xF8) << 24) + ((buf[1] & ~0x80) << 18) + ((buf[2] & ~0x80) << 12) + ((buf[3] & ~0x80) << 6) + (buf[4] & ~0x80); if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5; } } else if (len > 3 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80 && (buf[3] & 0xC0) == 0x80) { /* 0x00010000 - 0x001FFFFF */ *ucs = ((buf[0] & ~0xF0) << 18) + ((buf[1] & ~0x80) << 12) + ((buf[2] & ~0x80) << 6) + (buf[3] & ~0x80); if (*ucs > 0x0000FFFF) return 4; } } else if (len > 2 && (buf[1] & 0xC0) == 0x80 && (buf[2] & 0xC0) == 0x80) { /* 0x00000800 - 0x0000FFFF */ *ucs = ((buf[0] & ~0xE0) << 12) + ((buf[1] & ~0x80) << 6) + (buf[2] & ~0x80); if (*ucs > 0x000007FF) return 3; } } else if (len > 1 && (buf[1] & 0xC0) == 0x80) { /* 0x00000080 - 0x000007FF */ *ucs = ((buf[0] & ~0xC0) << 6) + (buf[1] & ~0x80); if (*ucs > 0x0000007F) return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; } *ucs = (unsigned int) '?'; /* bad utf-8 string */ return -1; } /* * Converts an Unicode value to an UTF-8 string * NOTE : the buffer (buf) must be at least 5 bytes long !!! */ int XConvertUcsToUtf8(unsigned int ucs, char *buf) { if (ucs < 0x000080) { buf[0] = ucs; return 1; } else if (ucs < 0x000800) { buf[0] = 0xC0 | (ucs >> 6); buf[1] = 0x80 | (ucs & 0x3F); return 2; } else if (ucs < 0x010000) { buf[0] = 0xE0 | (ucs >> 12); buf[1] = 0x80 | ((ucs >> 6) & 0x3F); buf[2] = 0x80 | (ucs & 0x3F); return 3; } else if (ucs < 0x00200000) { buf[0] = 0xF0 | (ucs >> 18); buf[1] = 0x80 | ((ucs >> 12) & 0x3F); buf[2] = 0x80 | ((ucs >> 6) & 0x3F); buf[3] = 0x80 | (ucs & 0x3F); return 4; } else if (ucs < 0x01000000) { buf[0] = 0xF8 | (ucs >> 24); buf[1] = 0x80 | ((ucs >> 18) & 0x3F); buf[2] = 0x80 | ((ucs >> 12) & 0x3F); buf[3] = 0x80 | ((ucs >> 6) & 0x3F); buf[4] = 0x80 | (ucs & 0x3F); return 5; } buf[0] = '?'; return -1; } /* * returns the byte length of the first UTF-8 char * (returns -1 if not valid) */ int XUtf8CharByteLen(const unsigned char *buf, int len) { unsigned int ucs; return XConvertUtf8ToUcs(buf, len, &ucs); } /* * returns the quantity of Unicode chars in the UTF-8 string */ int XCountUtf8Char(const unsigned char *buf, int len) { int i = 0; int nbc = 0; while (i < len) { int cl = XUtf8CharByteLen(buf + i, len - i); if (cl < 1) cl = 1; nbc++; i += cl; } return nbc; } /* * Same as XConvertUtf8ToUcs but no sanity check is done. */ int XFastConvertUtf8ToUcs(const unsigned char *buf, int len, unsigned int *ucs) { if (buf[0] & 0x80) { if (buf[0] & 0x40) { if (buf[0] & 0x20) { if (buf[0] & 0x10) { if (buf[0] & 0x08) { if (buf[0] & 0x04) { if (buf[0] & 0x02) { /* bad UTF-8 string */ } else { /* 0x04000000 - 0x7FFFFFFF */ } } else if (len > 4) { /* 0x00200000 - 0x03FFFFFF */ *ucs = ((buf[0] & ~0xF8) << 24) + ((buf[1] & ~0x80) << 18) + ((buf[2] & ~0x80) << 12) + ((buf[3] & ~0x80) << 6) + (buf[4] & ~0x80); return 5; } } else if (len > 3) { /* 0x00010000 - 0x001FFFFF */ *ucs = ((buf[0] & ~0xF0) << 18) + ((buf[1] & ~0x80) << 12) + ((buf[2] & ~0x80) << 6) + (buf[3] & ~0x80); return 4; } } else if (len > 2) { /* 0x00000800 - 0x0000FFFF */ *ucs = ((buf[0] & ~0xE0) << 12) + ((buf[1] & ~0x80) << 6) + (buf[2] & ~0x80); return 3; } } else if (len > 1) { /* 0x00000080 - 0x000007FF */ *ucs = ((buf[0] & ~0xC0) << 6) + (buf[1] & ~0x80); return 2; } } } else if (len > 0) { /* 0x00000000 - 0x0000007F */ *ucs = buf[0]; return 1; } *ucs = (unsigned int) '?'; /* bad utf-8 string */ return -1; } #endif /* X11 only */ /* * End of "$Id: $". */