ICU 4.8.1.1  4.8.1.1
utf_old.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002-2008, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00146 #ifndef __UTF_OLD_H__
00147 #define __UTF_OLD_H__
00148 
00149 #ifndef U_HIDE_DEPRECATED_API
00150 
00151 /* utf.h must be included first. */
00152 #ifndef __UTF_H__
00153 #   include "unicode/utf.h"
00154 #endif
00155 
00156 /* Formerly utf.h, part 1 --------------------------------------------------- */
00157 
00158 #ifdef U_USE_UTF_DEPRECATES
00159 
00166 typedef int32_t UTextOffset;
00167 #endif
00168 
00170 #define UTF_SIZE 16
00171 
00178 #define UTF_SAFE
00179 
00180 #undef UTF_UNSAFE
00181 
00182 #undef UTF_STRICT
00183 
00198 #define UTF8_ERROR_VALUE_1 0x15
00199 
00205 #define UTF8_ERROR_VALUE_2 0x9f
00206 
00213 #define UTF_ERROR_VALUE 0xffff
00214 
00221 #define UTF_IS_ERROR(c) \
00222     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00223 
00229 #define UTF_IS_VALID(c) \
00230     (UTF_IS_UNICODE_CHAR(c) && \
00231      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00232 
00237 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00238 
00244 #define UTF_IS_UNICODE_NONCHAR(c) \
00245     ((c)>=0xfdd0 && \
00246      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00247      (uint32_t)(c)<=0x10ffff)
00248 
00264 #define UTF_IS_UNICODE_CHAR(c) \
00265     ((uint32_t)(c)<0xd800 || \
00266         ((uint32_t)(c)>0xdfff && \
00267          (uint32_t)(c)<=0x10ffff && \
00268          !UTF_IS_UNICODE_NONCHAR(c)))
00269 
00270 /* Formerly utf8.h ---------------------------------------------------------- */
00271 
00276 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00277 
00282 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00283 
00285 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00286 
00287 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00288 
00289 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00290 
00292 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00293 
00307 #if 1
00308 #   define UTF8_CHAR_LENGTH(c) \
00309         ((uint32_t)(c)<=0x7f ? 1 : \
00310             ((uint32_t)(c)<=0x7ff ? 2 : \
00311                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00312             ) \
00313         )
00314 #else
00315 #   define UTF8_CHAR_LENGTH(c) \
00316         ((uint32_t)(c)<=0x7f ? 1 : \
00317             ((uint32_t)(c)<=0x7ff ? 2 : \
00318                 ((uint32_t)(c)<=0xffff ? 3 : \
00319                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00320                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00321                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00322                         ) \
00323                     ) \
00324                 ) \
00325             ) \
00326         )
00327 #endif
00328 
00330 #define UTF8_MAX_CHAR_LENGTH 4
00331 
00333 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00334 
00336 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00337     int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
00338     UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
00339     UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
00340 }
00341 
00343 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00344     int32_t _utf8_get_char_safe_index=(int32_t)(i); \
00345     UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
00346     UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
00347 }
00348 
00350 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00351     (c)=(s)[(i)++]; \
00352     if((uint8_t)((c)-0xc0)<0x35) { \
00353         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00354         UTF8_MASK_LEAD_BYTE(c, __count); \
00355         switch(__count) { \
00356         /* each following branch falls through to the next one */ \
00357         case 3: \
00358             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00359         case 2: \
00360             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00361         case 1: \
00362             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00363         /* no other branches to optimize switch() */ \
00364             break; \
00365         } \
00366     } \
00367 }
00368 
00370 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00371     if((uint32_t)(c)<=0x7f) { \
00372         (s)[(i)++]=(uint8_t)(c); \
00373     } else { \
00374         if((uint32_t)(c)<=0x7ff) { \
00375             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00376         } else { \
00377             if((uint32_t)(c)<=0xffff) { \
00378                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00379             } else { \
00380                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00381                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00382             } \
00383             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00384         } \
00385         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00386     } \
00387 }
00388 
00390 #define UTF8_FWD_1_UNSAFE(s, i) { \
00391     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00392 }
00393 
00395 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00396     int32_t __N=(n); \
00397     while(__N>0) { \
00398         UTF8_FWD_1_UNSAFE(s, i); \
00399         --__N; \
00400     } \
00401 }
00402 
00404 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00405     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00406 }
00407 
00409 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00410     (c)=(s)[(i)++]; \
00411     if((c)>=0x80) { \
00412         if(UTF8_IS_LEAD(c)) { \
00413             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00414         } else { \
00415             (c)=UTF8_ERROR_VALUE_1; \
00416         } \
00417     } \
00418 }
00419 
00421 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00422     if((uint32_t)(c)<=0x7f) { \
00423         (s)[(i)++]=(uint8_t)(c); \
00424     } else { \
00425         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00426     } \
00427 }
00428 
00430 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00431 
00433 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00434 
00436 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00437 
00439 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00440     (c)=(s)[--(i)]; \
00441     if(UTF8_IS_TRAIL(c)) { \
00442         uint8_t __b, __count=1, __shift=6; \
00443 \
00444         /* c is a trail byte */ \
00445         (c)&=0x3f; \
00446         for(;;) { \
00447             __b=(s)[--(i)]; \
00448             if(__b>=0xc0) { \
00449                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00450                 (c)|=(UChar32)__b<<__shift; \
00451                 break; \
00452             } else { \
00453                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00454                 ++__count; \
00455                 __shift+=6; \
00456             } \
00457         } \
00458     } \
00459 }
00460 
00462 #define UTF8_BACK_1_UNSAFE(s, i) { \
00463     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00464 }
00465 
00467 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00468     int32_t __N=(n); \
00469     while(__N>0) { \
00470         UTF8_BACK_1_UNSAFE(s, i); \
00471         --__N; \
00472     } \
00473 }
00474 
00476 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00477     UTF8_BACK_1_UNSAFE(s, i); \
00478     UTF8_FWD_1_UNSAFE(s, i); \
00479 }
00480 
00482 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00483     (c)=(s)[--(i)]; \
00484     if((c)>=0x80) { \
00485         if((c)<=0xbf) { \
00486             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00487         } else { \
00488             (c)=UTF8_ERROR_VALUE_1; \
00489         } \
00490     } \
00491 }
00492 
00494 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00495 
00497 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00498 
00500 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00501 
00502 /* Formerly utf16.h --------------------------------------------------------- */
00503 
00505 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00506 
00508 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00509 
00511 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00512 
00514 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00515 
00517 #define UTF16_GET_PAIR_VALUE(first, second) \
00518     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00519 
00521 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00522 
00524 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00525 
00527 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00528 
00530 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00531 
00533 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00534 
00536 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00537 
00539 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00540 
00542 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00543 
00545 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00546 
00548 #define UTF16_MAX_CHAR_LENGTH 2
00549 
00551 #define UTF16_ARRAY_SIZE(size) (size)
00552 
00564 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00565     (c)=(s)[i]; \
00566     if(UTF_IS_SURROGATE(c)) { \
00567         if(UTF_IS_SURROGATE_FIRST(c)) { \
00568             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00569         } else { \
00570             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00571         } \
00572     } \
00573 }
00574 
00576 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00577     (c)=(s)[i]; \
00578     if(UTF_IS_SURROGATE(c)) { \
00579         uint16_t __c2; \
00580         if(UTF_IS_SURROGATE_FIRST(c)) { \
00581             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00582                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00583                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00584             } else if(strict) {\
00585                 /* unmatched first surrogate */ \
00586                 (c)=UTF_ERROR_VALUE; \
00587             } \
00588         } else { \
00589             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00590                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00591                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00592             } else if(strict) {\
00593                 /* unmatched second surrogate */ \
00594                 (c)=UTF_ERROR_VALUE; \
00595             } \
00596         } \
00597     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00598         (c)=UTF_ERROR_VALUE; \
00599     } \
00600 }
00601 
00603 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00604     (c)=(s)[(i)++]; \
00605     if(UTF_IS_FIRST_SURROGATE(c)) { \
00606         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00607     } \
00608 }
00609 
00611 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00612     if((uint32_t)(c)<=0xffff) { \
00613         (s)[(i)++]=(uint16_t)(c); \
00614     } else { \
00615         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00616         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00617     } \
00618 }
00619 
00621 #define UTF16_FWD_1_UNSAFE(s, i) { \
00622     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00623         ++(i); \
00624     } \
00625 }
00626 
00628 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00629     int32_t __N=(n); \
00630     while(__N>0) { \
00631         UTF16_FWD_1_UNSAFE(s, i); \
00632         --__N; \
00633     } \
00634 }
00635 
00637 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00638     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00639         --(i); \
00640     } \
00641 }
00642 
00644 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00645     (c)=(s)[(i)++]; \
00646     if(UTF_IS_FIRST_SURROGATE(c)) { \
00647         uint16_t __c2; \
00648         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00649             ++(i); \
00650             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00651             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00652         } else if(strict) {\
00653             /* unmatched first surrogate */ \
00654             (c)=UTF_ERROR_VALUE; \
00655         } \
00656     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00657         /* unmatched second surrogate or other non-character */ \
00658         (c)=UTF_ERROR_VALUE; \
00659     } \
00660 }
00661 
00663 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00664     if((uint32_t)(c)<=0xffff) { \
00665         (s)[(i)++]=(uint16_t)(c); \
00666     } else if((uint32_t)(c)<=0x10ffff) { \
00667         if((i)+1<(length)) { \
00668             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00669             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00670         } else /* not enough space */ { \
00671             (s)[(i)++]=UTF_ERROR_VALUE; \
00672         } \
00673     } else /* c>0x10ffff, write error value */ { \
00674         (s)[(i)++]=UTF_ERROR_VALUE; \
00675     } \
00676 }
00677 
00679 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00680 
00682 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00683 
00685 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00686 
00688 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00689     (c)=(s)[--(i)]; \
00690     if(UTF_IS_SECOND_SURROGATE(c)) { \
00691         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00692     } \
00693 }
00694 
00696 #define UTF16_BACK_1_UNSAFE(s, i) { \
00697     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00698         --(i); \
00699     } \
00700 }
00701 
00703 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00704     int32_t __N=(n); \
00705     while(__N>0) { \
00706         UTF16_BACK_1_UNSAFE(s, i); \
00707         --__N; \
00708     } \
00709 }
00710 
00712 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00713     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00714         ++(i); \
00715     } \
00716 }
00717 
00719 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00720     (c)=(s)[--(i)]; \
00721     if(UTF_IS_SECOND_SURROGATE(c)) { \
00722         uint16_t __c2; \
00723         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00724             --(i); \
00725             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00726             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00727         } else if(strict) {\
00728             /* unmatched second surrogate */ \
00729             (c)=UTF_ERROR_VALUE; \
00730         } \
00731     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00732         /* unmatched first surrogate or other non-character */ \
00733         (c)=UTF_ERROR_VALUE; \
00734     } \
00735 }
00736 
00738 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00739 
00741 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00742 
00744 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00745 
00746 /* Formerly utf32.h --------------------------------------------------------- */
00747 
00748 /*
00749 * Old documentation:
00750 *
00751 *   This file defines macros to deal with UTF-32 code units and code points.
00752 *   Signatures and semantics are the same as for the similarly named macros
00753 *   in utf16.h.
00754 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00755 *   and some common definitions.
00756 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00757 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00758 *                  bodies and all macro statements should be terminated with semicolon.</p>
00759 */
00760 
00761 /* internal definitions ----------------------------------------------------- */
00762 
00764 #define UTF32_IS_SAFE(c, strict) \
00765     (!(strict) ? \
00766         (uint32_t)(c)<=0x10ffff : \
00767         UTF_IS_UNICODE_CHAR(c))
00768 
00769 /*
00770  * For the semantics of all of these macros, see utf16.h.
00771  * The UTF-32 versions are trivial because any code point is
00772  * encoded using exactly one code unit.
00773  */
00774 
00775 /* single-code point definitions -------------------------------------------- */
00776 
00777 /* classes of code unit values */
00778 
00780 #define UTF32_IS_SINGLE(uchar) 1
00781 
00782 #define UTF32_IS_LEAD(uchar) 0
00783 
00784 #define UTF32_IS_TRAIL(uchar) 0
00785 
00786 /* number of code units per code point */
00787 
00789 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00790 
00791 #define UTF32_CHAR_LENGTH(c) 1
00792 
00793 #define UTF32_MAX_CHAR_LENGTH 1
00794 
00795 /* average number of code units compared to UTF-16 */
00796 
00798 #define UTF32_ARRAY_SIZE(size) (size)
00799 
00801 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00802     (c)=(s)[i]; \
00803 }
00804 
00806 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00807     (c)=(s)[i]; \
00808     if(!UTF32_IS_SAFE(c, strict)) { \
00809         (c)=UTF_ERROR_VALUE; \
00810     } \
00811 }
00812 
00813 /* definitions with forward iteration --------------------------------------- */
00814 
00816 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00817     (c)=(s)[(i)++]; \
00818 }
00819 
00821 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00822     (s)[(i)++]=(c); \
00823 }
00824 
00826 #define UTF32_FWD_1_UNSAFE(s, i) { \
00827     ++(i); \
00828 }
00829 
00831 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00832     (i)+=(n); \
00833 }
00834 
00836 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00837 }
00838 
00840 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00841     (c)=(s)[(i)++]; \
00842     if(!UTF32_IS_SAFE(c, strict)) { \
00843         (c)=UTF_ERROR_VALUE; \
00844     } \
00845 }
00846 
00848 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00849     if((uint32_t)(c)<=0x10ffff) { \
00850         (s)[(i)++]=(c); \
00851     } else /* c>0x10ffff, write 0xfffd */ { \
00852         (s)[(i)++]=0xfffd; \
00853     } \
00854 }
00855 
00857 #define UTF32_FWD_1_SAFE(s, i, length) { \
00858     ++(i); \
00859 }
00860 
00862 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00863     if(((i)+=(n))>(length)) { \
00864         (i)=(length); \
00865     } \
00866 }
00867 
00869 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00870 }
00871 
00872 /* definitions with backward iteration -------------------------------------- */
00873 
00875 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00876     (c)=(s)[--(i)]; \
00877 }
00878 
00880 #define UTF32_BACK_1_UNSAFE(s, i) { \
00881     --(i); \
00882 }
00883 
00885 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00886     (i)-=(n); \
00887 }
00888 
00890 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00891 }
00892 
00894 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00895     (c)=(s)[--(i)]; \
00896     if(!UTF32_IS_SAFE(c, strict)) { \
00897         (c)=UTF_ERROR_VALUE; \
00898     } \
00899 }
00900 
00902 #define UTF32_BACK_1_SAFE(s, start, i) { \
00903     --(i); \
00904 }
00905 
00907 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00908     (i)-=(n); \
00909     if((i)<(start)) { \
00910         (i)=(start); \
00911     } \
00912 }
00913 
00915 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00916 }
00917 
00918 /* Formerly utf.h, part 2 --------------------------------------------------- */
00919 
00925 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00926 
00928 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00929 
00931 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00932 
00933 
00935 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00936 
00938 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00939 
00940 
00942 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00943 
00945 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00946 
00947 
00949 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00950 
00952 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00953 
00954 
00956 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00957 
00959 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00960 
00961 
00963 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00964 
00966 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00967 
00968 
00970 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00971 
00973 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00974 
00975 
00977 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00978 
00980 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00981 
00982 
00984 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00985 
00987 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00988 
00989 
00991 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00992 
00994 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00995 
00996 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00997 
01003 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
01004 
01010 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
01011 
01017 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01018 
01024 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01025 
01031 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01032 
01038 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01039 
01049 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01050 
01062 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01063 
01075 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01076 
01086 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01087 
01097 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01098 
01113 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01114 
01126 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01127 
01139 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01140 
01152 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01153 
01168 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01169 
01170 #endif /* U_HIDE_DEPRECATED_API */
01171 
01172 #endif
01173 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines