FLTK 1.3.0
|
00001 /* $XFree86: xc/lib/X11/lcUniConv/utf8.h,v 1.3 2000/11/28 18:50:07 dawes Exp $ */ 00002 00003 /* 00004 * UTF-8 00005 */ 00006 00007 /* Specification: RFC 2279 */ 00008 00009 static int 00010 utf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 00011 { 00012 unsigned char c = s[0]; 00013 00014 if (c < 0x80) { 00015 *pwc = c; 00016 return 1; 00017 } else if (c < 0xc2) { 00018 return RET_ILSEQ; 00019 } else if (c < 0xe0) { 00020 if (n < 2) 00021 return RET_TOOFEW(0); 00022 if (!((s[1] ^ 0x80) < 0x40)) 00023 return RET_ILSEQ; 00024 *pwc = ((ucs4_t) (c & 0x1f) << 6) 00025 | (ucs4_t) (s[1] ^ 0x80); 00026 return 2; 00027 } else if (c < 0xf0) { 00028 if (n < 3) 00029 return RET_TOOFEW(0); 00030 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 00031 && (c >= 0xe1 || s[1] >= 0xa0))) 00032 return RET_ILSEQ; 00033 *pwc = ((ucs4_t) (c & 0x0f) << 12) 00034 | ((ucs4_t) (s[1] ^ 0x80) << 6) 00035 | (ucs4_t) (s[2] ^ 0x80); 00036 return 3; 00037 } else if (c < 0xf8) { 00038 if (n < 4) 00039 return RET_TOOFEW(0); 00040 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 00041 && (s[3] ^ 0x80) < 0x40 00042 && (c >= 0xf1 || s[1] >= 0x90))) 00043 return RET_ILSEQ; 00044 *pwc = ((ucs4_t) (c & 0x07) << 18) 00045 | ((ucs4_t) (s[1] ^ 0x80) << 12) 00046 | ((ucs4_t) (s[2] ^ 0x80) << 6) 00047 | (ucs4_t) (s[3] ^ 0x80); 00048 return 4; 00049 } else if (c < 0xfc) { 00050 if (n < 5) 00051 return RET_TOOFEW(0); 00052 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 00053 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 00054 && (c >= 0xf9 || s[1] >= 0x88))) 00055 return RET_ILSEQ; 00056 *pwc = ((ucs4_t) (c & 0x03) << 24) 00057 | ((ucs4_t) (s[1] ^ 0x80) << 18) 00058 | ((ucs4_t) (s[2] ^ 0x80) << 12) 00059 | ((ucs4_t) (s[3] ^ 0x80) << 6) 00060 | (ucs4_t) (s[4] ^ 0x80); 00061 return 5; 00062 } else if (c < 0xfe) { 00063 if (n < 6) 00064 return RET_TOOFEW(0); 00065 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 00066 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 00067 && (s[5] ^ 0x80) < 0x40 00068 && (c >= 0xfd || s[1] >= 0x84))) 00069 return RET_ILSEQ; 00070 *pwc = ((ucs4_t) (c & 0x01) << 30) 00071 | ((ucs4_t) (s[1] ^ 0x80) << 24) 00072 | ((ucs4_t) (s[2] ^ 0x80) << 18) 00073 | ((ucs4_t) (s[3] ^ 0x80) << 12) 00074 | ((ucs4_t) (s[4] ^ 0x80) << 6) 00075 | (ucs4_t) (s[5] ^ 0x80); 00076 return 6; 00077 } else 00078 return RET_ILSEQ; 00079 } 00080 00081 static int 00082 utf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) /* n == 0 is acceptable */ 00083 { 00084 int count; 00085 if (wc < 0x80) 00086 count = 1; 00087 else if (wc < 0x800) 00088 count = 2; 00089 else if (wc < 0x10000) 00090 count = 3; 00091 else if (wc < 0x200000) 00092 count = 4; 00093 else if (wc < 0x4000000) 00094 count = 5; 00095 else if (wc <= 0x7fffffff) 00096 count = 6; 00097 else 00098 return RET_ILSEQ; 00099 if (n < count) 00100 return RET_TOOSMALL; 00101 switch (count) { /* note: code falls through cases! */ 00102 case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000; 00103 case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000; 00104 case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; 00105 case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; 00106 case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; 00107 case 1: r[0] = wc; 00108 } 00109 return count; 00110 }