ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2002-2008, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2002sep21 00014 * created by: Markus W. Scherer 00015 */ 00016 00146 #ifndef __UTF_OLD_H__ 00147 #define __UTF_OLD_H__ 00148 00149 #ifndef U_HIDE_DEPRECATED_API 00150 00151 /* utf.h must be included first. */ 00152 #ifndef __UTF_H__ 00153 # include "unicode/utf.h" 00154 #endif 00155 00156 /* Formerly utf.h, part 1 --------------------------------------------------- */ 00157 00158 #ifdef U_USE_UTF_DEPRECATES 00159 00166 typedef int32_t UTextOffset; 00167 #endif 00168 00170 #define UTF_SIZE 16 00171 00178 #define UTF_SAFE 00179 00180 #undef UTF_UNSAFE 00181 00182 #undef UTF_STRICT 00183 00198 #define UTF8_ERROR_VALUE_1 0x15 00199 00205 #define UTF8_ERROR_VALUE_2 0x9f 00206 00213 #define UTF_ERROR_VALUE 0xffff 00214 00221 #define UTF_IS_ERROR(c) \ 00222 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 00223 00229 #define UTF_IS_VALID(c) \ 00230 (UTF_IS_UNICODE_CHAR(c) && \ 00231 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 00232 00237 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 00238 00244 #define UTF_IS_UNICODE_NONCHAR(c) \ 00245 ((c)>=0xfdd0 && \ 00246 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 00247 (uint32_t)(c)<=0x10ffff) 00248 00264 #define UTF_IS_UNICODE_CHAR(c) \ 00265 ((uint32_t)(c)<0xd800 || \ 00266 ((uint32_t)(c)>0xdfff && \ 00267 (uint32_t)(c)<=0x10ffff && \ 00268 !UTF_IS_UNICODE_NONCHAR(c))) 00269 00270 /* Formerly utf8.h ---------------------------------------------------------- */ 00271 00276 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 00277 00282 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 00283 00285 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 00286 00287 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 00288 00289 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 00290 00292 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 00293 00307 #if 1 00308 # define UTF8_CHAR_LENGTH(c) \ 00309 ((uint32_t)(c)<=0x7f ? 1 : \ 00310 ((uint32_t)(c)<=0x7ff ? 2 : \ 00311 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 00312 ) \ 00313 ) 00314 #else 00315 # define UTF8_CHAR_LENGTH(c) \ 00316 ((uint32_t)(c)<=0x7f ? 1 : \ 00317 ((uint32_t)(c)<=0x7ff ? 2 : \ 00318 ((uint32_t)(c)<=0xffff ? 3 : \ 00319 ((uint32_t)(c)<=0x10ffff ? 4 : \ 00320 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 00321 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 00322 ) \ 00323 ) \ 00324 ) \ 00325 ) \ 00326 ) 00327 #endif 00328 00330 #define UTF8_MAX_CHAR_LENGTH 4 00331 00333 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 00334 00336 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 00337 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 00338 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 00339 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 00340 } 00341 00343 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00344 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 00345 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 00346 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 00347 } 00348 00350 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 00351 (c)=(s)[(i)++]; \ 00352 if((uint8_t)((c)-0xc0)<0x35) { \ 00353 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 00354 UTF8_MASK_LEAD_BYTE(c, __count); \ 00355 switch(__count) { \ 00356 /* each following branch falls through to the next one */ \ 00357 case 3: \ 00358 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00359 case 2: \ 00360 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00361 case 1: \ 00362 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00363 /* no other branches to optimize switch() */ \ 00364 break; \ 00365 } \ 00366 } \ 00367 } 00368 00370 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 00371 if((uint32_t)(c)<=0x7f) { \ 00372 (s)[(i)++]=(uint8_t)(c); \ 00373 } else { \ 00374 if((uint32_t)(c)<=0x7ff) { \ 00375 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 00376 } else { \ 00377 if((uint32_t)(c)<=0xffff) { \ 00378 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 00379 } else { \ 00380 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 00381 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 00382 } \ 00383 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 00384 } \ 00385 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 00386 } \ 00387 } 00388 00390 #define UTF8_FWD_1_UNSAFE(s, i) { \ 00391 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 00392 } 00393 00395 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 00396 int32_t __N=(n); \ 00397 while(__N>0) { \ 00398 UTF8_FWD_1_UNSAFE(s, i); \ 00399 --__N; \ 00400 } \ 00401 } 00402 00404 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 00405 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 00406 } 00407 00409 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00410 (c)=(s)[(i)++]; \ 00411 if((c)>=0x80) { \ 00412 if(UTF8_IS_LEAD(c)) { \ 00413 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 00414 } else { \ 00415 (c)=UTF8_ERROR_VALUE_1; \ 00416 } \ 00417 } \ 00418 } 00419 00421 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 00422 if((uint32_t)(c)<=0x7f) { \ 00423 (s)[(i)++]=(uint8_t)(c); \ 00424 } else { \ 00425 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 00426 } \ 00427 } 00428 00430 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 00431 00433 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 00434 00436 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 00437 00439 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 00440 (c)=(s)[--(i)]; \ 00441 if(UTF8_IS_TRAIL(c)) { \ 00442 uint8_t __b, __count=1, __shift=6; \ 00443 \ 00444 /* c is a trail byte */ \ 00445 (c)&=0x3f; \ 00446 for(;;) { \ 00447 __b=(s)[--(i)]; \ 00448 if(__b>=0xc0) { \ 00449 UTF8_MASK_LEAD_BYTE(__b, __count); \ 00450 (c)|=(UChar32)__b<<__shift; \ 00451 break; \ 00452 } else { \ 00453 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 00454 ++__count; \ 00455 __shift+=6; \ 00456 } \ 00457 } \ 00458 } \ 00459 } 00460 00462 #define UTF8_BACK_1_UNSAFE(s, i) { \ 00463 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 00464 } 00465 00467 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 00468 int32_t __N=(n); \ 00469 while(__N>0) { \ 00470 UTF8_BACK_1_UNSAFE(s, i); \ 00471 --__N; \ 00472 } \ 00473 } 00474 00476 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00477 UTF8_BACK_1_UNSAFE(s, i); \ 00478 UTF8_FWD_1_UNSAFE(s, i); \ 00479 } 00480 00482 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00483 (c)=(s)[--(i)]; \ 00484 if((c)>=0x80) { \ 00485 if((c)<=0xbf) { \ 00486 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 00487 } else { \ 00488 (c)=UTF8_ERROR_VALUE_1; \ 00489 } \ 00490 } \ 00491 } 00492 00494 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 00495 00497 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 00498 00500 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 00501 00502 /* Formerly utf16.h --------------------------------------------------------- */ 00503 00505 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 00506 00508 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 00509 00511 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 00512 00514 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 00515 00517 #define UTF16_GET_PAIR_VALUE(first, second) \ 00518 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 00519 00521 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 00522 00524 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 00525 00527 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 00528 00530 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 00531 00533 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 00534 00536 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 00537 00539 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 00540 00542 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 00543 00545 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 00546 00548 #define UTF16_MAX_CHAR_LENGTH 2 00549 00551 #define UTF16_ARRAY_SIZE(size) (size) 00552 00564 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 00565 (c)=(s)[i]; \ 00566 if(UTF_IS_SURROGATE(c)) { \ 00567 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00568 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 00569 } else { \ 00570 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 00571 } \ 00572 } \ 00573 } 00574 00576 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00577 (c)=(s)[i]; \ 00578 if(UTF_IS_SURROGATE(c)) { \ 00579 uint16_t __c2; \ 00580 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00581 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 00582 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00583 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00584 } else if(strict) {\ 00585 /* unmatched first surrogate */ \ 00586 (c)=UTF_ERROR_VALUE; \ 00587 } \ 00588 } else { \ 00589 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00590 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00591 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00592 } else if(strict) {\ 00593 /* unmatched second surrogate */ \ 00594 (c)=UTF_ERROR_VALUE; \ 00595 } \ 00596 } \ 00597 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00598 (c)=UTF_ERROR_VALUE; \ 00599 } \ 00600 } 00601 00603 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 00604 (c)=(s)[(i)++]; \ 00605 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00606 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 00607 } \ 00608 } 00609 00611 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 00612 if((uint32_t)(c)<=0xffff) { \ 00613 (s)[(i)++]=(uint16_t)(c); \ 00614 } else { \ 00615 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00616 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00617 } \ 00618 } 00619 00621 #define UTF16_FWD_1_UNSAFE(s, i) { \ 00622 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 00623 ++(i); \ 00624 } \ 00625 } 00626 00628 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 00629 int32_t __N=(n); \ 00630 while(__N>0) { \ 00631 UTF16_FWD_1_UNSAFE(s, i); \ 00632 --__N; \ 00633 } \ 00634 } 00635 00637 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 00638 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 00639 --(i); \ 00640 } \ 00641 } 00642 00644 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00645 (c)=(s)[(i)++]; \ 00646 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00647 uint16_t __c2; \ 00648 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 00649 ++(i); \ 00650 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00651 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00652 } else if(strict) {\ 00653 /* unmatched first surrogate */ \ 00654 (c)=UTF_ERROR_VALUE; \ 00655 } \ 00656 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00657 /* unmatched second surrogate or other non-character */ \ 00658 (c)=UTF_ERROR_VALUE; \ 00659 } \ 00660 } 00661 00663 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 00664 if((uint32_t)(c)<=0xffff) { \ 00665 (s)[(i)++]=(uint16_t)(c); \ 00666 } else if((uint32_t)(c)<=0x10ffff) { \ 00667 if((i)+1<(length)) { \ 00668 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00669 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00670 } else /* not enough space */ { \ 00671 (s)[(i)++]=UTF_ERROR_VALUE; \ 00672 } \ 00673 } else /* c>0x10ffff, write error value */ { \ 00674 (s)[(i)++]=UTF_ERROR_VALUE; \ 00675 } \ 00676 } 00677 00679 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 00680 00682 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 00683 00685 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 00686 00688 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 00689 (c)=(s)[--(i)]; \ 00690 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00691 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 00692 } \ 00693 } 00694 00696 #define UTF16_BACK_1_UNSAFE(s, i) { \ 00697 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 00698 --(i); \ 00699 } \ 00700 } 00701 00703 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 00704 int32_t __N=(n); \ 00705 while(__N>0) { \ 00706 UTF16_BACK_1_UNSAFE(s, i); \ 00707 --__N; \ 00708 } \ 00709 } 00710 00712 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00713 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 00714 ++(i); \ 00715 } \ 00716 } 00717 00719 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00720 (c)=(s)[--(i)]; \ 00721 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00722 uint16_t __c2; \ 00723 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00724 --(i); \ 00725 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00726 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00727 } else if(strict) {\ 00728 /* unmatched second surrogate */ \ 00729 (c)=UTF_ERROR_VALUE; \ 00730 } \ 00731 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00732 /* unmatched first surrogate or other non-character */ \ 00733 (c)=UTF_ERROR_VALUE; \ 00734 } \ 00735 } 00736 00738 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 00739 00741 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 00742 00744 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 00745 00746 /* Formerly utf32.h --------------------------------------------------------- */ 00747 00748 /* 00749 * Old documentation: 00750 * 00751 * This file defines macros to deal with UTF-32 code units and code points. 00752 * Signatures and semantics are the same as for the similarly named macros 00753 * in utf16.h. 00754 * utf32.h is included by utf.h after unicode/umachine.h</p> 00755 * and some common definitions. 00756 * <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. 00757 * Compound statements (curly braces {}) must be used for if-else-while... 00758 * bodies and all macro statements should be terminated with semicolon.</p> 00759 */ 00760 00761 /* internal definitions ----------------------------------------------------- */ 00762 00764 #define UTF32_IS_SAFE(c, strict) \ 00765 (!(strict) ? \ 00766 (uint32_t)(c)<=0x10ffff : \ 00767 UTF_IS_UNICODE_CHAR(c)) 00768 00769 /* 00770 * For the semantics of all of these macros, see utf16.h. 00771 * The UTF-32 versions are trivial because any code point is 00772 * encoded using exactly one code unit. 00773 */ 00774 00775 /* single-code point definitions -------------------------------------------- */ 00776 00777 /* classes of code unit values */ 00778 00780 #define UTF32_IS_SINGLE(uchar) 1 00781 00782 #define UTF32_IS_LEAD(uchar) 0 00783 00784 #define UTF32_IS_TRAIL(uchar) 0 00785 00786 /* number of code units per code point */ 00787 00789 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 00790 00791 #define UTF32_CHAR_LENGTH(c) 1 00792 00793 #define UTF32_MAX_CHAR_LENGTH 1 00794 00795 /* average number of code units compared to UTF-16 */ 00796 00798 #define UTF32_ARRAY_SIZE(size) (size) 00799 00801 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 00802 (c)=(s)[i]; \ 00803 } 00804 00806 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00807 (c)=(s)[i]; \ 00808 if(!UTF32_IS_SAFE(c, strict)) { \ 00809 (c)=UTF_ERROR_VALUE; \ 00810 } \ 00811 } 00812 00813 /* definitions with forward iteration --------------------------------------- */ 00814 00816 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 00817 (c)=(s)[(i)++]; \ 00818 } 00819 00821 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 00822 (s)[(i)++]=(c); \ 00823 } 00824 00826 #define UTF32_FWD_1_UNSAFE(s, i) { \ 00827 ++(i); \ 00828 } 00829 00831 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 00832 (i)+=(n); \ 00833 } 00834 00836 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 00837 } 00838 00840 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00841 (c)=(s)[(i)++]; \ 00842 if(!UTF32_IS_SAFE(c, strict)) { \ 00843 (c)=UTF_ERROR_VALUE; \ 00844 } \ 00845 } 00846 00848 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 00849 if((uint32_t)(c)<=0x10ffff) { \ 00850 (s)[(i)++]=(c); \ 00851 } else /* c>0x10ffff, write 0xfffd */ { \ 00852 (s)[(i)++]=0xfffd; \ 00853 } \ 00854 } 00855 00857 #define UTF32_FWD_1_SAFE(s, i, length) { \ 00858 ++(i); \ 00859 } 00860 00862 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 00863 if(((i)+=(n))>(length)) { \ 00864 (i)=(length); \ 00865 } \ 00866 } 00867 00869 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 00870 } 00871 00872 /* definitions with backward iteration -------------------------------------- */ 00873 00875 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 00876 (c)=(s)[--(i)]; \ 00877 } 00878 00880 #define UTF32_BACK_1_UNSAFE(s, i) { \ 00881 --(i); \ 00882 } 00883 00885 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 00886 (i)-=(n); \ 00887 } 00888 00890 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00891 } 00892 00894 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00895 (c)=(s)[--(i)]; \ 00896 if(!UTF32_IS_SAFE(c, strict)) { \ 00897 (c)=UTF_ERROR_VALUE; \ 00898 } \ 00899 } 00900 00902 #define UTF32_BACK_1_SAFE(s, start, i) { \ 00903 --(i); \ 00904 } 00905 00907 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 00908 (i)-=(n); \ 00909 if((i)<(start)) { \ 00910 (i)=(start); \ 00911 } \ 00912 } 00913 00915 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 00916 } 00917 00918 /* Formerly utf.h, part 2 --------------------------------------------------- */ 00919 00925 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 00926 00928 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 00929 00931 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 00932 00933 00935 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 00936 00938 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 00939 00940 00942 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 00943 00945 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 00946 00947 00949 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 00950 00952 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 00953 00954 00956 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 00957 00959 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 00960 00961 00963 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 00964 00966 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 00967 00968 00970 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 00971 00973 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 00974 00975 00977 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 00978 00980 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 00981 00982 00984 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 00985 00987 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 00988 00989 00991 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 00992 00994 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00995 00996 /* Define default macros (UTF-16 "safe") ------------------------------------ */ 00997 01003 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 01004 01010 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 01011 01017 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 01018 01024 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 01025 01031 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 01032 01038 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 01039 01049 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 01050 01062 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 01063 01075 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 01076 01086 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 01087 01097 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 01098 01113 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 01114 01126 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 01127 01139 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 01140 01152 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 01153 01168 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 01169 01170 #endif /* U_HIDE_DEPRECATED_API */ 01171 01172 #endif 01173