ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2011, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 00035 struct UConverter; // unicode/ucnv.h 00036 class StringThreadTest; 00037 00038 #ifndef U_COMPARE_CODE_POINT_ORDER 00039 /* see also ustring.h and unorm.h */ 00045 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00046 #endif 00047 00048 #ifndef USTRING_H 00049 00052 U_STABLE int32_t U_EXPORT2 00053 u_strlen(const UChar *s); 00054 #endif 00055 00056 U_NAMESPACE_BEGIN 00057 00058 class BreakIterator; // unicode/brkiter.h 00059 class Locale; // unicode/locid.h 00060 class StringCharacterIterator; 00061 class UnicodeStringAppendable; // unicode/appendable.h 00062 00063 /* The <iostream> include has been moved to unicode/ustream.h */ 00064 00075 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant 00076 00094 #if defined(U_DECLARE_UTF16) 00095 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00096 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00097 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00098 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00099 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length) 00100 #else 00101 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV) 00102 #endif 00103 00117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00118 00188 class U_COMMON_API UnicodeString : public Replaceable 00189 { 00190 public: 00191 00200 enum EInvariant { 00205 kInvariant 00206 }; 00207 00208 //======================================== 00209 // Read-only operations 00210 //======================================== 00211 00212 /* Comparison - bitwise only - for international comparison use collation */ 00213 00221 inline UBool operator== (const UnicodeString& text) const; 00222 00230 inline UBool operator!= (const UnicodeString& text) const; 00231 00239 inline UBool operator> (const UnicodeString& text) const; 00240 00248 inline UBool operator< (const UnicodeString& text) const; 00249 00257 inline UBool operator>= (const UnicodeString& text) const; 00258 00266 inline UBool operator<= (const UnicodeString& text) const; 00267 00279 inline int8_t compare(const UnicodeString& text) const; 00280 00295 inline int8_t compare(int32_t start, 00296 int32_t length, 00297 const UnicodeString& text) const; 00298 00316 inline int8_t compare(int32_t start, 00317 int32_t length, 00318 const UnicodeString& srcText, 00319 int32_t srcStart, 00320 int32_t srcLength) const; 00321 00334 inline int8_t compare(const UChar *srcChars, 00335 int32_t srcLength) const; 00336 00351 inline int8_t compare(int32_t start, 00352 int32_t length, 00353 const UChar *srcChars) const; 00354 00372 inline int8_t compare(int32_t start, 00373 int32_t length, 00374 const UChar *srcChars, 00375 int32_t srcStart, 00376 int32_t srcLength) const; 00377 00395 inline int8_t compareBetween(int32_t start, 00396 int32_t limit, 00397 const UnicodeString& srcText, 00398 int32_t srcStart, 00399 int32_t srcLimit) const; 00400 00418 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00419 00439 inline int8_t compareCodePointOrder(int32_t start, 00440 int32_t length, 00441 const UnicodeString& srcText) const; 00442 00464 inline int8_t compareCodePointOrder(int32_t start, 00465 int32_t length, 00466 const UnicodeString& srcText, 00467 int32_t srcStart, 00468 int32_t srcLength) const; 00469 00488 inline int8_t compareCodePointOrder(const UChar *srcChars, 00489 int32_t srcLength) const; 00490 00510 inline int8_t compareCodePointOrder(int32_t start, 00511 int32_t length, 00512 const UChar *srcChars) const; 00513 00535 inline int8_t compareCodePointOrder(int32_t start, 00536 int32_t length, 00537 const UChar *srcChars, 00538 int32_t srcStart, 00539 int32_t srcLength) const; 00540 00562 inline int8_t compareCodePointOrderBetween(int32_t start, 00563 int32_t limit, 00564 const UnicodeString& srcText, 00565 int32_t srcStart, 00566 int32_t srcLimit) const; 00567 00586 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00587 00608 inline int8_t caseCompare(int32_t start, 00609 int32_t length, 00610 const UnicodeString& srcText, 00611 uint32_t options) const; 00612 00635 inline int8_t caseCompare(int32_t start, 00636 int32_t length, 00637 const UnicodeString& srcText, 00638 int32_t srcStart, 00639 int32_t srcLength, 00640 uint32_t options) const; 00641 00661 inline int8_t caseCompare(const UChar *srcChars, 00662 int32_t srcLength, 00663 uint32_t options) const; 00664 00685 inline int8_t caseCompare(int32_t start, 00686 int32_t length, 00687 const UChar *srcChars, 00688 uint32_t options) const; 00689 00712 inline int8_t caseCompare(int32_t start, 00713 int32_t length, 00714 const UChar *srcChars, 00715 int32_t srcStart, 00716 int32_t srcLength, 00717 uint32_t options) const; 00718 00741 inline int8_t caseCompareBetween(int32_t start, 00742 int32_t limit, 00743 const UnicodeString& srcText, 00744 int32_t srcStart, 00745 int32_t srcLimit, 00746 uint32_t options) const; 00747 00755 inline UBool startsWith(const UnicodeString& text) const; 00756 00767 inline UBool startsWith(const UnicodeString& srcText, 00768 int32_t srcStart, 00769 int32_t srcLength) const; 00770 00779 inline UBool startsWith(const UChar *srcChars, 00780 int32_t srcLength) const; 00781 00791 inline UBool startsWith(const UChar *srcChars, 00792 int32_t srcStart, 00793 int32_t srcLength) const; 00794 00802 inline UBool endsWith(const UnicodeString& text) const; 00803 00814 inline UBool endsWith(const UnicodeString& srcText, 00815 int32_t srcStart, 00816 int32_t srcLength) const; 00817 00826 inline UBool endsWith(const UChar *srcChars, 00827 int32_t srcLength) const; 00828 00839 inline UBool endsWith(const UChar *srcChars, 00840 int32_t srcStart, 00841 int32_t srcLength) const; 00842 00843 00844 /* Searching - bitwise only */ 00845 00854 inline int32_t indexOf(const UnicodeString& text) const; 00855 00865 inline int32_t indexOf(const UnicodeString& text, 00866 int32_t start) const; 00867 00879 inline int32_t indexOf(const UnicodeString& text, 00880 int32_t start, 00881 int32_t length) const; 00882 00899 inline int32_t indexOf(const UnicodeString& srcText, 00900 int32_t srcStart, 00901 int32_t srcLength, 00902 int32_t start, 00903 int32_t length) const; 00904 00916 inline int32_t indexOf(const UChar *srcChars, 00917 int32_t srcLength, 00918 int32_t start) const; 00919 00932 inline int32_t indexOf(const UChar *srcChars, 00933 int32_t srcLength, 00934 int32_t start, 00935 int32_t length) const; 00936 00953 int32_t indexOf(const UChar *srcChars, 00954 int32_t srcStart, 00955 int32_t srcLength, 00956 int32_t start, 00957 int32_t length) const; 00958 00966 inline int32_t indexOf(UChar c) const; 00967 00976 inline int32_t indexOf(UChar32 c) const; 00977 00986 inline int32_t indexOf(UChar c, 00987 int32_t start) const; 00988 00998 inline int32_t indexOf(UChar32 c, 00999 int32_t start) const; 01000 01011 inline int32_t indexOf(UChar c, 01012 int32_t start, 01013 int32_t length) const; 01014 01026 inline int32_t indexOf(UChar32 c, 01027 int32_t start, 01028 int32_t length) const; 01029 01038 inline int32_t lastIndexOf(const UnicodeString& text) const; 01039 01049 inline int32_t lastIndexOf(const UnicodeString& text, 01050 int32_t start) const; 01051 01063 inline int32_t lastIndexOf(const UnicodeString& text, 01064 int32_t start, 01065 int32_t length) const; 01066 01083 inline int32_t lastIndexOf(const UnicodeString& srcText, 01084 int32_t srcStart, 01085 int32_t srcLength, 01086 int32_t start, 01087 int32_t length) const; 01088 01099 inline int32_t lastIndexOf(const UChar *srcChars, 01100 int32_t srcLength, 01101 int32_t start) const; 01102 01115 inline int32_t lastIndexOf(const UChar *srcChars, 01116 int32_t srcLength, 01117 int32_t start, 01118 int32_t length) const; 01119 01136 int32_t lastIndexOf(const UChar *srcChars, 01137 int32_t srcStart, 01138 int32_t srcLength, 01139 int32_t start, 01140 int32_t length) const; 01141 01149 inline int32_t lastIndexOf(UChar c) const; 01150 01159 inline int32_t lastIndexOf(UChar32 c) const; 01160 01169 inline int32_t lastIndexOf(UChar c, 01170 int32_t start) const; 01171 01181 inline int32_t lastIndexOf(UChar32 c, 01182 int32_t start) const; 01183 01194 inline int32_t lastIndexOf(UChar c, 01195 int32_t start, 01196 int32_t length) const; 01197 01209 inline int32_t lastIndexOf(UChar32 c, 01210 int32_t start, 01211 int32_t length) const; 01212 01213 01214 /* Character access */ 01215 01224 inline UChar charAt(int32_t offset) const; 01225 01233 inline UChar operator[] (int32_t offset) const; 01234 01246 inline UChar32 char32At(int32_t offset) const; 01247 01263 inline int32_t getChar32Start(int32_t offset) const; 01264 01281 inline int32_t getChar32Limit(int32_t offset) const; 01282 01333 int32_t moveIndex32(int32_t index, int32_t delta) const; 01334 01335 /* Substring extraction */ 01336 01352 inline void extract(int32_t start, 01353 int32_t length, 01354 UChar *dst, 01355 int32_t dstStart = 0) const; 01356 01378 int32_t 01379 extract(UChar *dest, int32_t destCapacity, 01380 UErrorCode &errorCode) const; 01381 01392 inline void extract(int32_t start, 01393 int32_t length, 01394 UnicodeString& target) const; 01395 01407 inline void extractBetween(int32_t start, 01408 int32_t limit, 01409 UChar *dst, 01410 int32_t dstStart = 0) const; 01411 01421 virtual void extractBetween(int32_t start, 01422 int32_t limit, 01423 UnicodeString& target) const; 01424 01446 int32_t extract(int32_t start, 01447 int32_t startLength, 01448 char *target, 01449 int32_t targetCapacity, 01450 enum EInvariant inv) const; 01451 01452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01453 01473 int32_t extract(int32_t start, 01474 int32_t startLength, 01475 char *target, 01476 uint32_t targetLength) const; 01477 01478 #endif 01479 01480 #if !UCONFIG_NO_CONVERSION 01481 01507 inline int32_t extract(int32_t start, 01508 int32_t startLength, 01509 char *target, 01510 const char *codepage = 0) const; 01511 01541 int32_t extract(int32_t start, 01542 int32_t startLength, 01543 char *target, 01544 uint32_t targetLength, 01545 const char *codepage) const; 01546 01564 int32_t extract(char *dest, int32_t destCapacity, 01565 UConverter *cnv, 01566 UErrorCode &errorCode) const; 01567 01568 #endif 01569 01583 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01584 01595 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01596 01608 void toUTF8(ByteSink &sink) const; 01609 01610 #if U_HAVE_STD_STRING 01611 01624 template<typename StringClass> 01625 StringClass &toUTF8String(StringClass &result) const { 01626 StringByteSink<StringClass> sbs(&result); 01627 toUTF8(sbs); 01628 return result; 01629 } 01630 01631 #endif 01632 01648 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01649 01650 /* Length operations */ 01651 01660 inline int32_t length(void) const; 01661 01675 int32_t 01676 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01677 01701 UBool 01702 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01703 01709 inline UBool isEmpty(void) const; 01710 01720 inline int32_t getCapacity(void) const; 01721 01722 /* Other operations */ 01723 01729 inline int32_t hashCode(void) const; 01730 01743 inline UBool isBogus(void) const; 01744 01745 01746 //======================================== 01747 // Write operations 01748 //======================================== 01749 01750 /* Assignment operations */ 01751 01759 UnicodeString &operator=(const UnicodeString &srcText); 01760 01781 UnicodeString &fastCopyFrom(const UnicodeString &src); 01782 01790 inline UnicodeString& operator= (UChar ch); 01791 01799 inline UnicodeString& operator= (UChar32 ch); 01800 01812 inline UnicodeString& setTo(const UnicodeString& srcText, 01813 int32_t srcStart); 01814 01828 inline UnicodeString& setTo(const UnicodeString& srcText, 01829 int32_t srcStart, 01830 int32_t srcLength); 01831 01840 inline UnicodeString& setTo(const UnicodeString& srcText); 01841 01850 inline UnicodeString& setTo(const UChar *srcChars, 01851 int32_t srcLength); 01852 01861 UnicodeString& setTo(UChar srcChar); 01862 01871 UnicodeString& setTo(UChar32 srcChar); 01872 01893 UnicodeString &setTo(UBool isTerminated, 01894 const UChar *text, 01895 int32_t textLength); 01896 01916 UnicodeString &setTo(UChar *buffer, 01917 int32_t buffLength, 01918 int32_t buffCapacity); 01919 01960 void setToBogus(); 01961 01969 UnicodeString& setCharAt(int32_t offset, 01970 UChar ch); 01971 01972 01973 /* Append operations */ 01974 01982 inline UnicodeString& operator+= (UChar ch); 01983 01991 inline UnicodeString& operator+= (UChar32 ch); 01992 02000 inline UnicodeString& operator+= (const UnicodeString& srcText); 02001 02016 inline UnicodeString& append(const UnicodeString& srcText, 02017 int32_t srcStart, 02018 int32_t srcLength); 02019 02027 inline UnicodeString& append(const UnicodeString& srcText); 02028 02042 inline UnicodeString& append(const UChar *srcChars, 02043 int32_t srcStart, 02044 int32_t srcLength); 02045 02055 inline UnicodeString& append(const UChar *srcChars, 02056 int32_t srcLength); 02057 02064 inline UnicodeString& append(UChar srcChar); 02065 02072 inline UnicodeString& append(UChar32 srcChar); 02073 02074 02075 /* Insert operations */ 02076 02090 inline UnicodeString& insert(int32_t start, 02091 const UnicodeString& srcText, 02092 int32_t srcStart, 02093 int32_t srcLength); 02094 02103 inline UnicodeString& insert(int32_t start, 02104 const UnicodeString& srcText); 02105 02119 inline UnicodeString& insert(int32_t start, 02120 const UChar *srcChars, 02121 int32_t srcStart, 02122 int32_t srcLength); 02123 02133 inline UnicodeString& insert(int32_t start, 02134 const UChar *srcChars, 02135 int32_t srcLength); 02136 02145 inline UnicodeString& insert(int32_t start, 02146 UChar srcChar); 02147 02156 inline UnicodeString& insert(int32_t start, 02157 UChar32 srcChar); 02158 02159 02160 /* Replace operations */ 02161 02179 UnicodeString& replace(int32_t start, 02180 int32_t length, 02181 const UnicodeString& srcText, 02182 int32_t srcStart, 02183 int32_t srcLength); 02184 02197 UnicodeString& replace(int32_t start, 02198 int32_t length, 02199 const UnicodeString& srcText); 02200 02218 UnicodeString& replace(int32_t start, 02219 int32_t length, 02220 const UChar *srcChars, 02221 int32_t srcStart, 02222 int32_t srcLength); 02223 02236 inline UnicodeString& replace(int32_t start, 02237 int32_t length, 02238 const UChar *srcChars, 02239 int32_t srcLength); 02240 02252 inline UnicodeString& replace(int32_t start, 02253 int32_t length, 02254 UChar srcChar); 02255 02267 inline UnicodeString& replace(int32_t start, 02268 int32_t length, 02269 UChar32 srcChar); 02270 02280 inline UnicodeString& replaceBetween(int32_t start, 02281 int32_t limit, 02282 const UnicodeString& srcText); 02283 02298 inline UnicodeString& replaceBetween(int32_t start, 02299 int32_t limit, 02300 const UnicodeString& srcText, 02301 int32_t srcStart, 02302 int32_t srcLimit); 02303 02314 virtual void handleReplaceBetween(int32_t start, 02315 int32_t limit, 02316 const UnicodeString& text); 02317 02323 virtual UBool hasMetaData() const; 02324 02340 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02341 02342 /* Search and replace operations */ 02343 02352 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02353 const UnicodeString& newText); 02354 02366 inline UnicodeString& findAndReplace(int32_t start, 02367 int32_t length, 02368 const UnicodeString& oldText, 02369 const UnicodeString& newText); 02370 02388 UnicodeString& findAndReplace(int32_t start, 02389 int32_t length, 02390 const UnicodeString& oldText, 02391 int32_t oldStart, 02392 int32_t oldLength, 02393 const UnicodeString& newText, 02394 int32_t newStart, 02395 int32_t newLength); 02396 02397 02398 /* Remove operations */ 02399 02405 inline UnicodeString& remove(void); 02406 02415 inline UnicodeString& remove(int32_t start, 02416 int32_t length = (int32_t)INT32_MAX); 02417 02426 inline UnicodeString& removeBetween(int32_t start, 02427 int32_t limit = (int32_t)INT32_MAX); 02428 02438 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02439 02440 /* Length operations */ 02441 02453 UBool padLeading(int32_t targetLength, 02454 UChar padChar = 0x0020); 02455 02467 UBool padTrailing(int32_t targetLength, 02468 UChar padChar = 0x0020); 02469 02476 inline UBool truncate(int32_t targetLength); 02477 02483 UnicodeString& trim(void); 02484 02485 02486 /* Miscellaneous operations */ 02487 02493 inline UnicodeString& reverse(void); 02494 02503 inline UnicodeString& reverse(int32_t start, 02504 int32_t length); 02505 02512 UnicodeString& toUpper(void); 02513 02521 UnicodeString& toUpper(const Locale& locale); 02522 02529 UnicodeString& toLower(void); 02530 02538 UnicodeString& toLower(const Locale& locale); 02539 02540 #if !UCONFIG_NO_BREAK_ITERATION 02541 02568 UnicodeString &toTitle(BreakIterator *titleIter); 02569 02597 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02598 02630 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02631 02632 #endif 02633 02645 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02646 02647 //======================================== 02648 // Access to the internal buffer 02649 //======================================== 02650 02694 UChar *getBuffer(int32_t minCapacity); 02695 02716 void releaseBuffer(int32_t newLength=-1); 02717 02748 inline const UChar *getBuffer() const; 02749 02783 inline const UChar *getTerminatedBuffer(); 02784 02785 //======================================== 02786 // Constructors 02787 //======================================== 02788 02792 UnicodeString(); 02793 02805 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02806 02812 UnicodeString(UChar ch); 02813 02819 UnicodeString(UChar32 ch); 02820 02827 UnicodeString(const UChar *text); 02828 02836 UnicodeString(const UChar *text, 02837 int32_t textLength); 02838 02858 UnicodeString(UBool isTerminated, 02859 const UChar *text, 02860 int32_t textLength); 02861 02880 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02881 02882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02883 02890 UnicodeString(const char *codepageData); 02891 02898 UnicodeString(const char *codepageData, int32_t dataLength); 02899 02900 #endif 02901 02902 #if !UCONFIG_NO_CONVERSION 02903 02921 UnicodeString(const char *codepageData, const char *codepage); 02922 02940 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 02941 02963 UnicodeString( 02964 const char *src, int32_t srcLength, 02965 UConverter *cnv, 02966 UErrorCode &errorCode); 02967 02968 #endif 02969 02994 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 02995 02996 03002 UnicodeString(const UnicodeString& that); 03003 03010 UnicodeString(const UnicodeString& src, int32_t srcStart); 03011 03019 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03020 03037 virtual Replaceable *clone() const; 03038 03042 virtual ~UnicodeString(); 03043 03057 static UnicodeString fromUTF8(const StringPiece &utf8); 03058 03070 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03071 03072 /* Miscellaneous operations */ 03073 03108 UnicodeString unescape() const; 03109 03129 UChar32 unescapeAt(int32_t &offset) const; 03130 03136 static UClassID U_EXPORT2 getStaticClassID(); 03137 03143 virtual UClassID getDynamicClassID() const; 03144 03145 //======================================== 03146 // Implementation methods 03147 //======================================== 03148 03149 protected: 03154 virtual int32_t getLength() const; 03155 03161 virtual UChar getCharAt(int32_t offset) const; 03162 03168 virtual UChar32 getChar32At(int32_t offset) const; 03169 03170 private: 03171 // For char* constructors. Could be made public. 03172 UnicodeString &setToUTF8(const StringPiece &utf8); 03173 // For extract(char*). 03174 // We could make a toUTF8(target, capacity, errorCode) public but not 03175 // this version: New API will be cleaner if we make callers create substrings 03176 // rather than having start+length on every method, 03177 // and it should take a UErrorCode&. 03178 int32_t 03179 toUTF8(int32_t start, int32_t len, 03180 char *target, int32_t capacity) const; 03181 03182 03183 inline int8_t 03184 doCompare(int32_t start, 03185 int32_t length, 03186 const UnicodeString& srcText, 03187 int32_t srcStart, 03188 int32_t srcLength) const; 03189 03190 int8_t doCompare(int32_t start, 03191 int32_t length, 03192 const UChar *srcChars, 03193 int32_t srcStart, 03194 int32_t srcLength) const; 03195 03196 inline int8_t 03197 doCompareCodePointOrder(int32_t start, 03198 int32_t length, 03199 const UnicodeString& srcText, 03200 int32_t srcStart, 03201 int32_t srcLength) const; 03202 03203 int8_t doCompareCodePointOrder(int32_t start, 03204 int32_t length, 03205 const UChar *srcChars, 03206 int32_t srcStart, 03207 int32_t srcLength) const; 03208 03209 inline int8_t 03210 doCaseCompare(int32_t start, 03211 int32_t length, 03212 const UnicodeString &srcText, 03213 int32_t srcStart, 03214 int32_t srcLength, 03215 uint32_t options) const; 03216 03217 int8_t 03218 doCaseCompare(int32_t start, 03219 int32_t length, 03220 const UChar *srcChars, 03221 int32_t srcStart, 03222 int32_t srcLength, 03223 uint32_t options) const; 03224 03225 int32_t doIndexOf(UChar c, 03226 int32_t start, 03227 int32_t length) const; 03228 03229 int32_t doIndexOf(UChar32 c, 03230 int32_t start, 03231 int32_t length) const; 03232 03233 int32_t doLastIndexOf(UChar c, 03234 int32_t start, 03235 int32_t length) const; 03236 03237 int32_t doLastIndexOf(UChar32 c, 03238 int32_t start, 03239 int32_t length) const; 03240 03241 void doExtract(int32_t start, 03242 int32_t length, 03243 UChar *dst, 03244 int32_t dstStart) const; 03245 03246 inline void doExtract(int32_t start, 03247 int32_t length, 03248 UnicodeString& target) const; 03249 03250 inline UChar doCharAt(int32_t offset) const; 03251 03252 UnicodeString& doReplace(int32_t start, 03253 int32_t length, 03254 const UnicodeString& srcText, 03255 int32_t srcStart, 03256 int32_t srcLength); 03257 03258 UnicodeString& doReplace(int32_t start, 03259 int32_t length, 03260 const UChar *srcChars, 03261 int32_t srcStart, 03262 int32_t srcLength); 03263 03264 UnicodeString& doReverse(int32_t start, 03265 int32_t length); 03266 03267 // calculate hash code 03268 int32_t doHashCode(void) const; 03269 03270 // get pointer to start of array 03271 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03272 inline UChar* getArrayStart(void); 03273 inline const UChar* getArrayStart(void) const; 03274 03275 // A UnicodeString object (not necessarily its current buffer) 03276 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03277 inline UBool isWritable() const; 03278 03279 // Is the current buffer writable? 03280 inline UBool isBufferWritable() const; 03281 03282 // None of the following does releaseArray(). 03283 inline void setLength(int32_t len); // sets only fShortLength and fLength 03284 inline void setToEmpty(); // sets fFlags=kShortString 03285 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03286 03287 // allocate the array; result may be fStackBuffer 03288 // sets refCount to 1 if appropriate 03289 // sets fArray, fCapacity, and fFlags 03290 // returns boolean for success or failure 03291 UBool allocate(int32_t capacity); 03292 03293 // release the array if owned 03294 void releaseArray(void); 03295 03296 // turn a bogus string into an empty one 03297 void unBogus(); 03298 03299 // implements assigment operator, copy constructor, and fastCopyFrom() 03300 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03301 03302 // Pin start and limit to acceptable values. 03303 inline void pinIndex(int32_t& start) const; 03304 inline void pinIndices(int32_t& start, 03305 int32_t& length) const; 03306 03307 #if !UCONFIG_NO_CONVERSION 03308 03309 /* Internal extract() using UConverter. */ 03310 int32_t doExtract(int32_t start, int32_t length, 03311 char *dest, int32_t destCapacity, 03312 UConverter *cnv, 03313 UErrorCode &errorCode) const; 03314 03315 /* 03316 * Real constructor for converting from codepage data. 03317 * It assumes that it is called with !fRefCounted. 03318 * 03319 * If <code>codepage==0</code>, then the default converter 03320 * is used for the platform encoding. 03321 * If <code>codepage</code> is an empty string (<code>""</code>), 03322 * then a simple conversion is performed on the codepage-invariant 03323 * subset ("invariant characters") of the platform encoding. See utypes.h. 03324 */ 03325 void doCodepageCreate(const char *codepageData, 03326 int32_t dataLength, 03327 const char *codepage); 03328 03329 /* 03330 * Worker function for creating a UnicodeString from 03331 * a codepage string using a UConverter. 03332 */ 03333 void 03334 doCodepageCreate(const char *codepageData, 03335 int32_t dataLength, 03336 UConverter *converter, 03337 UErrorCode &status); 03338 03339 #endif 03340 03341 /* 03342 * This function is called when write access to the array 03343 * is necessary. 03344 * 03345 * We need to make a copy of the array if 03346 * the buffer is read-only, or 03347 * the buffer is refCounted (shared), and refCount>1, or 03348 * the buffer is too small. 03349 * 03350 * Return FALSE if memory could not be allocated. 03351 */ 03352 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03353 int32_t growCapacity = -1, 03354 UBool doCopyArray = TRUE, 03355 int32_t **pBufferToDelete = 0, 03356 UBool forceClone = FALSE); 03357 03358 // common function for case mappings 03359 UnicodeString & 03360 caseMap(BreakIterator *titleIter, 03361 const char *locale, 03362 uint32_t options, 03363 int32_t toWhichCase); 03364 03365 // ref counting 03366 void addRef(void); 03367 int32_t removeRef(void); 03368 int32_t refCount(void) const; 03369 03370 // constants 03371 enum { 03372 // Set the stack buffer size so that sizeof(UnicodeString) is, 03373 // naturally (without padding), a multiple of sizeof(pointer). 03374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 03375 kInvalidUChar=0xffff, // invalid UChar index 03376 kGrowSize=128, // grow size for this buffer 03377 kInvalidHashCode=0, // invalid hash code 03378 kEmptyHashCode=1, // hash code for empty string 03379 03380 // bit flag values for fFlags 03381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03382 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 03383 kRefCounted=4, // there is a refCount field before the characters in fArray 03384 kBufferIsReadonly=8,// do not write to this buffer 03385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03386 // and releaseBuffer(newLength) must be called 03387 03388 // combined values for convenience 03389 kShortString=kUsingStackBuffer, 03390 kLongString=kRefCounted, 03391 kReadonlyAlias=kBufferIsReadonly, 03392 kWritableAlias=0 03393 }; 03394 03395 friend class StringThreadTest; 03396 friend class UnicodeStringAppendable; 03397 03398 union StackBufferOrFields; // forward declaration necessary before friend declaration 03399 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03400 03401 /* 03402 * The following are all the class fields that are stored 03403 * in each UnicodeString object. 03404 * Note that UnicodeString has virtual functions, 03405 * therefore there is an implicit vtable pointer 03406 * as the first real field. 03407 * The fields should be aligned such that no padding is necessary. 03408 * On 32-bit machines, the size should be 32 bytes, 03409 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03410 * 03411 * We use a hack to achieve this. 03412 * 03413 * With at least some compilers, each of the following is forced to 03414 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 03415 * rounded up with additional padding if the fields do not already fit that requirement: 03416 * - sizeof(class UnicodeString) 03417 * - offsetof(UnicodeString, fUnion) 03418 * - sizeof(fUnion) 03419 * - sizeof(fFields) 03420 * 03421 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 03422 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 03423 * (Padding at the end of fFields is ok: 03424 * As long as there is no padding after fStackBuffer, it is not wasted space.) 03425 * 03426 * We further assume that the compiler does not reorder the fields, 03427 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 03428 * with at most some padding (but no other field) in between. 03429 * (Padding there would be wasted space, but functionally harmless.) 03430 * 03431 * We use a few more sizeof(pointer)'s chunks of space with 03432 * fRestOfStackBuffer, fShortLength and fFlags, 03433 * to get up exactly to the intended sizeof(UnicodeString). 03434 */ 03435 // (implicit) *vtable; 03436 union StackBufferOrFields { 03437 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03438 // else fFields is used 03439 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 03440 struct { 03441 UChar *fArray; // the Unicode data 03442 int32_t fCapacity; // capacity of fArray (in UChars) 03443 int32_t fLength; // number of characters in fArray if >127; else undefined 03444 } fFields; 03445 } fUnion; 03446 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 03447 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03448 uint8_t fFlags; // bit flags: see constants above 03449 }; 03450 03459 U_COMMON_API UnicodeString U_EXPORT2 03460 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03461 03462 //======================================== 03463 // Inline members 03464 //======================================== 03465 03466 //======================================== 03467 // Privates 03468 //======================================== 03469 03470 inline void 03471 UnicodeString::pinIndex(int32_t& start) const 03472 { 03473 // pin index 03474 if(start < 0) { 03475 start = 0; 03476 } else if(start > length()) { 03477 start = length(); 03478 } 03479 } 03480 03481 inline void 03482 UnicodeString::pinIndices(int32_t& start, 03483 int32_t& _length) const 03484 { 03485 // pin indices 03486 int32_t len = length(); 03487 if(start < 0) { 03488 start = 0; 03489 } else if(start > len) { 03490 start = len; 03491 } 03492 if(_length < 0) { 03493 _length = 0; 03494 } else if(_length > (len - start)) { 03495 _length = (len - start); 03496 } 03497 } 03498 03499 inline UChar* 03500 UnicodeString::getArrayStart() 03501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03502 03503 inline const UChar* 03504 UnicodeString::getArrayStart() const 03505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03506 03507 //======================================== 03508 // Read-only implementation methods 03509 //======================================== 03510 inline int32_t 03511 UnicodeString::length() const 03512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03513 03514 inline int32_t 03515 UnicodeString::getCapacity() const 03516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03517 03518 inline int32_t 03519 UnicodeString::hashCode() const 03520 { return doHashCode(); } 03521 03522 inline UBool 03523 UnicodeString::isBogus() const 03524 { return (UBool)(fFlags & kIsBogus); } 03525 03526 inline UBool 03527 UnicodeString::isWritable() const 03528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03529 03530 inline UBool 03531 UnicodeString::isBufferWritable() const 03532 { 03533 return (UBool)( 03534 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03535 (!(fFlags&kRefCounted) || refCount()==1)); 03536 } 03537 03538 inline const UChar * 03539 UnicodeString::getBuffer() const { 03540 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03541 return 0; 03542 } else if(fFlags&kUsingStackBuffer) { 03543 return fUnion.fStackBuffer; 03544 } else { 03545 return fUnion.fFields.fArray; 03546 } 03547 } 03548 03549 //======================================== 03550 // Read-only alias methods 03551 //======================================== 03552 inline int8_t 03553 UnicodeString::doCompare(int32_t start, 03554 int32_t thisLength, 03555 const UnicodeString& srcText, 03556 int32_t srcStart, 03557 int32_t srcLength) const 03558 { 03559 if(srcText.isBogus()) { 03560 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03561 } else { 03562 srcText.pinIndices(srcStart, srcLength); 03563 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03564 } 03565 } 03566 03567 inline UBool 03568 UnicodeString::operator== (const UnicodeString& text) const 03569 { 03570 if(isBogus()) { 03571 return text.isBogus(); 03572 } else { 03573 int32_t len = length(), textLength = text.length(); 03574 return 03575 !text.isBogus() && 03576 len == textLength && 03577 doCompare(0, len, text, 0, textLength) == 0; 03578 } 03579 } 03580 03581 inline UBool 03582 UnicodeString::operator!= (const UnicodeString& text) const 03583 { return (! operator==(text)); } 03584 03585 inline UBool 03586 UnicodeString::operator> (const UnicodeString& text) const 03587 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03588 03589 inline UBool 03590 UnicodeString::operator< (const UnicodeString& text) const 03591 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03592 03593 inline UBool 03594 UnicodeString::operator>= (const UnicodeString& text) const 03595 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03596 03597 inline UBool 03598 UnicodeString::operator<= (const UnicodeString& text) const 03599 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03600 03601 inline int8_t 03602 UnicodeString::compare(const UnicodeString& text) const 03603 { return doCompare(0, length(), text, 0, text.length()); } 03604 03605 inline int8_t 03606 UnicodeString::compare(int32_t start, 03607 int32_t _length, 03608 const UnicodeString& srcText) const 03609 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03610 03611 inline int8_t 03612 UnicodeString::compare(const UChar *srcChars, 03613 int32_t srcLength) const 03614 { return doCompare(0, length(), srcChars, 0, srcLength); } 03615 03616 inline int8_t 03617 UnicodeString::compare(int32_t start, 03618 int32_t _length, 03619 const UnicodeString& srcText, 03620 int32_t srcStart, 03621 int32_t srcLength) const 03622 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03623 03624 inline int8_t 03625 UnicodeString::compare(int32_t start, 03626 int32_t _length, 03627 const UChar *srcChars) const 03628 { return doCompare(start, _length, srcChars, 0, _length); } 03629 03630 inline int8_t 03631 UnicodeString::compare(int32_t start, 03632 int32_t _length, 03633 const UChar *srcChars, 03634 int32_t srcStart, 03635 int32_t srcLength) const 03636 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03637 03638 inline int8_t 03639 UnicodeString::compareBetween(int32_t start, 03640 int32_t limit, 03641 const UnicodeString& srcText, 03642 int32_t srcStart, 03643 int32_t srcLimit) const 03644 { return doCompare(start, limit - start, 03645 srcText, srcStart, srcLimit - srcStart); } 03646 03647 inline int8_t 03648 UnicodeString::doCompareCodePointOrder(int32_t start, 03649 int32_t thisLength, 03650 const UnicodeString& srcText, 03651 int32_t srcStart, 03652 int32_t srcLength) const 03653 { 03654 if(srcText.isBogus()) { 03655 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03656 } else { 03657 srcText.pinIndices(srcStart, srcLength); 03658 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03659 } 03660 } 03661 03662 inline int8_t 03663 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03665 03666 inline int8_t 03667 UnicodeString::compareCodePointOrder(int32_t start, 03668 int32_t _length, 03669 const UnicodeString& srcText) const 03670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03671 03672 inline int8_t 03673 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03674 int32_t srcLength) const 03675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03676 03677 inline int8_t 03678 UnicodeString::compareCodePointOrder(int32_t start, 03679 int32_t _length, 03680 const UnicodeString& srcText, 03681 int32_t srcStart, 03682 int32_t srcLength) const 03683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03684 03685 inline int8_t 03686 UnicodeString::compareCodePointOrder(int32_t start, 03687 int32_t _length, 03688 const UChar *srcChars) const 03689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03690 03691 inline int8_t 03692 UnicodeString::compareCodePointOrder(int32_t start, 03693 int32_t _length, 03694 const UChar *srcChars, 03695 int32_t srcStart, 03696 int32_t srcLength) const 03697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03698 03699 inline int8_t 03700 UnicodeString::compareCodePointOrderBetween(int32_t start, 03701 int32_t limit, 03702 const UnicodeString& srcText, 03703 int32_t srcStart, 03704 int32_t srcLimit) const 03705 { return doCompareCodePointOrder(start, limit - start, 03706 srcText, srcStart, srcLimit - srcStart); } 03707 03708 inline int8_t 03709 UnicodeString::doCaseCompare(int32_t start, 03710 int32_t thisLength, 03711 const UnicodeString &srcText, 03712 int32_t srcStart, 03713 int32_t srcLength, 03714 uint32_t options) const 03715 { 03716 if(srcText.isBogus()) { 03717 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03718 } else { 03719 srcText.pinIndices(srcStart, srcLength); 03720 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03721 } 03722 } 03723 03724 inline int8_t 03725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03726 return doCaseCompare(0, length(), text, 0, text.length(), options); 03727 } 03728 03729 inline int8_t 03730 UnicodeString::caseCompare(int32_t start, 03731 int32_t _length, 03732 const UnicodeString &srcText, 03733 uint32_t options) const { 03734 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03735 } 03736 03737 inline int8_t 03738 UnicodeString::caseCompare(const UChar *srcChars, 03739 int32_t srcLength, 03740 uint32_t options) const { 03741 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03742 } 03743 03744 inline int8_t 03745 UnicodeString::caseCompare(int32_t start, 03746 int32_t _length, 03747 const UnicodeString &srcText, 03748 int32_t srcStart, 03749 int32_t srcLength, 03750 uint32_t options) const { 03751 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03752 } 03753 03754 inline int8_t 03755 UnicodeString::caseCompare(int32_t start, 03756 int32_t _length, 03757 const UChar *srcChars, 03758 uint32_t options) const { 03759 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03760 } 03761 03762 inline int8_t 03763 UnicodeString::caseCompare(int32_t start, 03764 int32_t _length, 03765 const UChar *srcChars, 03766 int32_t srcStart, 03767 int32_t srcLength, 03768 uint32_t options) const { 03769 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03770 } 03771 03772 inline int8_t 03773 UnicodeString::caseCompareBetween(int32_t start, 03774 int32_t limit, 03775 const UnicodeString &srcText, 03776 int32_t srcStart, 03777 int32_t srcLimit, 03778 uint32_t options) const { 03779 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03780 } 03781 03782 inline int32_t 03783 UnicodeString::indexOf(const UnicodeString& srcText, 03784 int32_t srcStart, 03785 int32_t srcLength, 03786 int32_t start, 03787 int32_t _length) const 03788 { 03789 if(!srcText.isBogus()) { 03790 srcText.pinIndices(srcStart, srcLength); 03791 if(srcLength > 0) { 03792 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03793 } 03794 } 03795 return -1; 03796 } 03797 03798 inline int32_t 03799 UnicodeString::indexOf(const UnicodeString& text) const 03800 { return indexOf(text, 0, text.length(), 0, length()); } 03801 03802 inline int32_t 03803 UnicodeString::indexOf(const UnicodeString& text, 03804 int32_t start) const { 03805 pinIndex(start); 03806 return indexOf(text, 0, text.length(), start, length() - start); 03807 } 03808 03809 inline int32_t 03810 UnicodeString::indexOf(const UnicodeString& text, 03811 int32_t start, 03812 int32_t _length) const 03813 { return indexOf(text, 0, text.length(), start, _length); } 03814 03815 inline int32_t 03816 UnicodeString::indexOf(const UChar *srcChars, 03817 int32_t srcLength, 03818 int32_t start) const { 03819 pinIndex(start); 03820 return indexOf(srcChars, 0, srcLength, start, length() - start); 03821 } 03822 03823 inline int32_t 03824 UnicodeString::indexOf(const UChar *srcChars, 03825 int32_t srcLength, 03826 int32_t start, 03827 int32_t _length) const 03828 { return indexOf(srcChars, 0, srcLength, start, _length); } 03829 03830 inline int32_t 03831 UnicodeString::indexOf(UChar c, 03832 int32_t start, 03833 int32_t _length) const 03834 { return doIndexOf(c, start, _length); } 03835 03836 inline int32_t 03837 UnicodeString::indexOf(UChar32 c, 03838 int32_t start, 03839 int32_t _length) const 03840 { return doIndexOf(c, start, _length); } 03841 03842 inline int32_t 03843 UnicodeString::indexOf(UChar c) const 03844 { return doIndexOf(c, 0, length()); } 03845 03846 inline int32_t 03847 UnicodeString::indexOf(UChar32 c) const 03848 { return indexOf(c, 0, length()); } 03849 03850 inline int32_t 03851 UnicodeString::indexOf(UChar c, 03852 int32_t start) const { 03853 pinIndex(start); 03854 return doIndexOf(c, start, length() - start); 03855 } 03856 03857 inline int32_t 03858 UnicodeString::indexOf(UChar32 c, 03859 int32_t start) const { 03860 pinIndex(start); 03861 return indexOf(c, start, length() - start); 03862 } 03863 03864 inline int32_t 03865 UnicodeString::lastIndexOf(const UChar *srcChars, 03866 int32_t srcLength, 03867 int32_t start, 03868 int32_t _length) const 03869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03870 03871 inline int32_t 03872 UnicodeString::lastIndexOf(const UChar *srcChars, 03873 int32_t srcLength, 03874 int32_t start) const { 03875 pinIndex(start); 03876 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03877 } 03878 03879 inline int32_t 03880 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03881 int32_t srcStart, 03882 int32_t srcLength, 03883 int32_t start, 03884 int32_t _length) const 03885 { 03886 if(!srcText.isBogus()) { 03887 srcText.pinIndices(srcStart, srcLength); 03888 if(srcLength > 0) { 03889 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03890 } 03891 } 03892 return -1; 03893 } 03894 03895 inline int32_t 03896 UnicodeString::lastIndexOf(const UnicodeString& text, 03897 int32_t start, 03898 int32_t _length) const 03899 { return lastIndexOf(text, 0, text.length(), start, _length); } 03900 03901 inline int32_t 03902 UnicodeString::lastIndexOf(const UnicodeString& text, 03903 int32_t start) const { 03904 pinIndex(start); 03905 return lastIndexOf(text, 0, text.length(), start, length() - start); 03906 } 03907 03908 inline int32_t 03909 UnicodeString::lastIndexOf(const UnicodeString& text) const 03910 { return lastIndexOf(text, 0, text.length(), 0, length()); } 03911 03912 inline int32_t 03913 UnicodeString::lastIndexOf(UChar c, 03914 int32_t start, 03915 int32_t _length) const 03916 { return doLastIndexOf(c, start, _length); } 03917 03918 inline int32_t 03919 UnicodeString::lastIndexOf(UChar32 c, 03920 int32_t start, 03921 int32_t _length) const { 03922 return doLastIndexOf(c, start, _length); 03923 } 03924 03925 inline int32_t 03926 UnicodeString::lastIndexOf(UChar c) const 03927 { return doLastIndexOf(c, 0, length()); } 03928 03929 inline int32_t 03930 UnicodeString::lastIndexOf(UChar32 c) const { 03931 return lastIndexOf(c, 0, length()); 03932 } 03933 03934 inline int32_t 03935 UnicodeString::lastIndexOf(UChar c, 03936 int32_t start) const { 03937 pinIndex(start); 03938 return doLastIndexOf(c, start, length() - start); 03939 } 03940 03941 inline int32_t 03942 UnicodeString::lastIndexOf(UChar32 c, 03943 int32_t start) const { 03944 pinIndex(start); 03945 return lastIndexOf(c, start, length() - start); 03946 } 03947 03948 inline UBool 03949 UnicodeString::startsWith(const UnicodeString& text) const 03950 { return compare(0, text.length(), text, 0, text.length()) == 0; } 03951 03952 inline UBool 03953 UnicodeString::startsWith(const UnicodeString& srcText, 03954 int32_t srcStart, 03955 int32_t srcLength) const 03956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 03957 03958 inline UBool 03959 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 03960 if(srcLength < 0) { 03961 srcLength = u_strlen(srcChars); 03962 } 03963 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 03964 } 03965 03966 inline UBool 03967 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 03968 if(srcLength < 0) { 03969 srcLength = u_strlen(srcChars); 03970 } 03971 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 03972 } 03973 03974 inline UBool 03975 UnicodeString::endsWith(const UnicodeString& text) const 03976 { return doCompare(length() - text.length(), text.length(), 03977 text, 0, text.length()) == 0; } 03978 03979 inline UBool 03980 UnicodeString::endsWith(const UnicodeString& srcText, 03981 int32_t srcStart, 03982 int32_t srcLength) const { 03983 srcText.pinIndices(srcStart, srcLength); 03984 return doCompare(length() - srcLength, srcLength, 03985 srcText, srcStart, srcLength) == 0; 03986 } 03987 03988 inline UBool 03989 UnicodeString::endsWith(const UChar *srcChars, 03990 int32_t srcLength) const { 03991 if(srcLength < 0) { 03992 srcLength = u_strlen(srcChars); 03993 } 03994 return doCompare(length() - srcLength, srcLength, 03995 srcChars, 0, srcLength) == 0; 03996 } 03997 03998 inline UBool 03999 UnicodeString::endsWith(const UChar *srcChars, 04000 int32_t srcStart, 04001 int32_t srcLength) const { 04002 if(srcLength < 0) { 04003 srcLength = u_strlen(srcChars + srcStart); 04004 } 04005 return doCompare(length() - srcLength, srcLength, 04006 srcChars, srcStart, srcLength) == 0; 04007 } 04008 04009 //======================================== 04010 // replace 04011 //======================================== 04012 inline UnicodeString& 04013 UnicodeString::replace(int32_t start, 04014 int32_t _length, 04015 const UnicodeString& srcText) 04016 { return doReplace(start, _length, srcText, 0, srcText.length()); } 04017 04018 inline UnicodeString& 04019 UnicodeString::replace(int32_t start, 04020 int32_t _length, 04021 const UnicodeString& srcText, 04022 int32_t srcStart, 04023 int32_t srcLength) 04024 { return doReplace(start, _length, srcText, srcStart, srcLength); } 04025 04026 inline UnicodeString& 04027 UnicodeString::replace(int32_t start, 04028 int32_t _length, 04029 const UChar *srcChars, 04030 int32_t srcLength) 04031 { return doReplace(start, _length, srcChars, 0, srcLength); } 04032 04033 inline UnicodeString& 04034 UnicodeString::replace(int32_t start, 04035 int32_t _length, 04036 const UChar *srcChars, 04037 int32_t srcStart, 04038 int32_t srcLength) 04039 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04040 04041 inline UnicodeString& 04042 UnicodeString::replace(int32_t start, 04043 int32_t _length, 04044 UChar srcChar) 04045 { return doReplace(start, _length, &srcChar, 0, 1); } 04046 04047 inline UnicodeString& 04048 UnicodeString::replace(int32_t start, 04049 int32_t _length, 04050 UChar32 srcChar) { 04051 UChar buffer[U16_MAX_LENGTH]; 04052 int32_t count = 0; 04053 UBool isError = FALSE; 04054 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 04055 return doReplace(start, _length, buffer, 0, count); 04056 } 04057 04058 inline UnicodeString& 04059 UnicodeString::replaceBetween(int32_t start, 04060 int32_t limit, 04061 const UnicodeString& srcText) 04062 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04063 04064 inline UnicodeString& 04065 UnicodeString::replaceBetween(int32_t start, 04066 int32_t limit, 04067 const UnicodeString& srcText, 04068 int32_t srcStart, 04069 int32_t srcLimit) 04070 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04071 04072 inline UnicodeString& 04073 UnicodeString::findAndReplace(const UnicodeString& oldText, 04074 const UnicodeString& newText) 04075 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04076 newText, 0, newText.length()); } 04077 04078 inline UnicodeString& 04079 UnicodeString::findAndReplace(int32_t start, 04080 int32_t _length, 04081 const UnicodeString& oldText, 04082 const UnicodeString& newText) 04083 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04084 newText, 0, newText.length()); } 04085 04086 // ============================ 04087 // extract 04088 // ============================ 04089 inline void 04090 UnicodeString::doExtract(int32_t start, 04091 int32_t _length, 04092 UnicodeString& target) const 04093 { target.replace(0, target.length(), *this, start, _length); } 04094 04095 inline void 04096 UnicodeString::extract(int32_t start, 04097 int32_t _length, 04098 UChar *target, 04099 int32_t targetStart) const 04100 { doExtract(start, _length, target, targetStart); } 04101 04102 inline void 04103 UnicodeString::extract(int32_t start, 04104 int32_t _length, 04105 UnicodeString& target) const 04106 { doExtract(start, _length, target); } 04107 04108 #if !UCONFIG_NO_CONVERSION 04109 04110 inline int32_t 04111 UnicodeString::extract(int32_t start, 04112 int32_t _length, 04113 char *dst, 04114 const char *codepage) const 04115 04116 { 04117 // This dstSize value will be checked explicitly 04118 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04119 } 04120 04121 #endif 04122 04123 inline void 04124 UnicodeString::extractBetween(int32_t start, 04125 int32_t limit, 04126 UChar *dst, 04127 int32_t dstStart) const { 04128 pinIndex(start); 04129 pinIndex(limit); 04130 doExtract(start, limit - start, dst, dstStart); 04131 } 04132 04133 inline UnicodeString 04134 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04135 return tempSubString(start, limit - start); 04136 } 04137 04138 inline UChar 04139 UnicodeString::doCharAt(int32_t offset) const 04140 { 04141 if((uint32_t)offset < (uint32_t)length()) { 04142 return getArrayStart()[offset]; 04143 } else { 04144 return kInvalidUChar; 04145 } 04146 } 04147 04148 inline UChar 04149 UnicodeString::charAt(int32_t offset) const 04150 { return doCharAt(offset); } 04151 04152 inline UChar 04153 UnicodeString::operator[] (int32_t offset) const 04154 { return doCharAt(offset); } 04155 04156 inline UChar32 04157 UnicodeString::char32At(int32_t offset) const 04158 { 04159 int32_t len = length(); 04160 if((uint32_t)offset < (uint32_t)len) { 04161 const UChar *array = getArrayStart(); 04162 UChar32 c; 04163 U16_GET(array, 0, offset, len, c); 04164 return c; 04165 } else { 04166 return kInvalidUChar; 04167 } 04168 } 04169 04170 inline int32_t 04171 UnicodeString::getChar32Start(int32_t offset) const { 04172 if((uint32_t)offset < (uint32_t)length()) { 04173 const UChar *array = getArrayStart(); 04174 U16_SET_CP_START(array, 0, offset); 04175 return offset; 04176 } else { 04177 return 0; 04178 } 04179 } 04180 04181 inline int32_t 04182 UnicodeString::getChar32Limit(int32_t offset) const { 04183 int32_t len = length(); 04184 if((uint32_t)offset < (uint32_t)len) { 04185 const UChar *array = getArrayStart(); 04186 U16_SET_CP_LIMIT(array, 0, offset, len); 04187 return offset; 04188 } else { 04189 return len; 04190 } 04191 } 04192 04193 inline UBool 04194 UnicodeString::isEmpty() const { 04195 return fShortLength == 0; 04196 } 04197 04198 //======================================== 04199 // Write implementation methods 04200 //======================================== 04201 inline void 04202 UnicodeString::setLength(int32_t len) { 04203 if(len <= 127) { 04204 fShortLength = (int8_t)len; 04205 } else { 04206 fShortLength = (int8_t)-1; 04207 fUnion.fFields.fLength = len; 04208 } 04209 } 04210 04211 inline void 04212 UnicodeString::setToEmpty() { 04213 fShortLength = 0; 04214 fFlags = kShortString; 04215 } 04216 04217 inline void 04218 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04219 setLength(len); 04220 fUnion.fFields.fArray = array; 04221 fUnion.fFields.fCapacity = capacity; 04222 } 04223 04224 inline const UChar * 04225 UnicodeString::getTerminatedBuffer() { 04226 if(!isWritable()) { 04227 return 0; 04228 } else { 04229 UChar *array = getArrayStart(); 04230 int32_t len = length(); 04231 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04232 /* 04233 * kRefCounted: Do not write the NUL if the buffer is shared. 04234 * That is mostly safe, except when the length of one copy was modified 04235 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04236 * Then the NUL would be written into the middle of another copy's string. 04237 */ 04238 if(!(fFlags&kBufferIsReadonly)) { 04239 /* 04240 * We must not write to a readonly buffer, but it is known to be 04241 * NUL-terminated if len<capacity. 04242 * A shared, allocated buffer (refCount()>1) must not have its contents 04243 * modified, but the NUL at [len] is beyond the string contents, 04244 * and multiple string objects and threads writing the same NUL into the 04245 * same location is harmless. 04246 * In all other cases, the buffer is fully writable and it is anyway safe 04247 * to write the NUL. 04248 * 04249 * Note: An earlier version of this code tested whether there is a NUL 04250 * at [len] already, but, while safe, it generated lots of warnings from 04251 * tools like valgrind and Purify. 04252 */ 04253 array[len] = 0; 04254 } 04255 return array; 04256 } else if(cloneArrayIfNeeded(len+1)) { 04257 array = getArrayStart(); 04258 array[len] = 0; 04259 return array; 04260 } else { 04261 return 0; 04262 } 04263 } 04264 } 04265 04266 inline UnicodeString& 04267 UnicodeString::operator= (UChar ch) 04268 { return doReplace(0, length(), &ch, 0, 1); } 04269 04270 inline UnicodeString& 04271 UnicodeString::operator= (UChar32 ch) 04272 { return replace(0, length(), ch); } 04273 04274 inline UnicodeString& 04275 UnicodeString::setTo(const UnicodeString& srcText, 04276 int32_t srcStart, 04277 int32_t srcLength) 04278 { 04279 unBogus(); 04280 return doReplace(0, length(), srcText, srcStart, srcLength); 04281 } 04282 04283 inline UnicodeString& 04284 UnicodeString::setTo(const UnicodeString& srcText, 04285 int32_t srcStart) 04286 { 04287 unBogus(); 04288 srcText.pinIndex(srcStart); 04289 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04290 } 04291 04292 inline UnicodeString& 04293 UnicodeString::setTo(const UnicodeString& srcText) 04294 { 04295 return copyFrom(srcText); 04296 } 04297 04298 inline UnicodeString& 04299 UnicodeString::setTo(const UChar *srcChars, 04300 int32_t srcLength) 04301 { 04302 unBogus(); 04303 return doReplace(0, length(), srcChars, 0, srcLength); 04304 } 04305 04306 inline UnicodeString& 04307 UnicodeString::setTo(UChar srcChar) 04308 { 04309 unBogus(); 04310 return doReplace(0, length(), &srcChar, 0, 1); 04311 } 04312 04313 inline UnicodeString& 04314 UnicodeString::setTo(UChar32 srcChar) 04315 { 04316 unBogus(); 04317 return replace(0, length(), srcChar); 04318 } 04319 04320 inline UnicodeString& 04321 UnicodeString::append(const UnicodeString& srcText, 04322 int32_t srcStart, 04323 int32_t srcLength) 04324 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04325 04326 inline UnicodeString& 04327 UnicodeString::append(const UnicodeString& srcText) 04328 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04329 04330 inline UnicodeString& 04331 UnicodeString::append(const UChar *srcChars, 04332 int32_t srcStart, 04333 int32_t srcLength) 04334 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04335 04336 inline UnicodeString& 04337 UnicodeString::append(const UChar *srcChars, 04338 int32_t srcLength) 04339 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04340 04341 inline UnicodeString& 04342 UnicodeString::append(UChar srcChar) 04343 { return doReplace(length(), 0, &srcChar, 0, 1); } 04344 04345 inline UnicodeString& 04346 UnicodeString::append(UChar32 srcChar) { 04347 UChar buffer[U16_MAX_LENGTH]; 04348 int32_t _length = 0; 04349 UBool isError = FALSE; 04350 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 04351 return doReplace(length(), 0, buffer, 0, _length); 04352 } 04353 04354 inline UnicodeString& 04355 UnicodeString::operator+= (UChar ch) 04356 { return doReplace(length(), 0, &ch, 0, 1); } 04357 04358 inline UnicodeString& 04359 UnicodeString::operator+= (UChar32 ch) { 04360 return append(ch); 04361 } 04362 04363 inline UnicodeString& 04364 UnicodeString::operator+= (const UnicodeString& srcText) 04365 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04366 04367 inline UnicodeString& 04368 UnicodeString::insert(int32_t start, 04369 const UnicodeString& srcText, 04370 int32_t srcStart, 04371 int32_t srcLength) 04372 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04373 04374 inline UnicodeString& 04375 UnicodeString::insert(int32_t start, 04376 const UnicodeString& srcText) 04377 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04378 04379 inline UnicodeString& 04380 UnicodeString::insert(int32_t start, 04381 const UChar *srcChars, 04382 int32_t srcStart, 04383 int32_t srcLength) 04384 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04385 04386 inline UnicodeString& 04387 UnicodeString::insert(int32_t start, 04388 const UChar *srcChars, 04389 int32_t srcLength) 04390 { return doReplace(start, 0, srcChars, 0, srcLength); } 04391 04392 inline UnicodeString& 04393 UnicodeString::insert(int32_t start, 04394 UChar srcChar) 04395 { return doReplace(start, 0, &srcChar, 0, 1); } 04396 04397 inline UnicodeString& 04398 UnicodeString::insert(int32_t start, 04399 UChar32 srcChar) 04400 { return replace(start, 0, srcChar); } 04401 04402 04403 inline UnicodeString& 04404 UnicodeString::remove() 04405 { 04406 // remove() of a bogus string makes the string empty and non-bogus 04407 // we also un-alias a read-only alias to deal with NUL-termination 04408 // issues with getTerminatedBuffer() 04409 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04410 setToEmpty(); 04411 } else { 04412 fShortLength = 0; 04413 } 04414 return *this; 04415 } 04416 04417 inline UnicodeString& 04418 UnicodeString::remove(int32_t start, 04419 int32_t _length) 04420 { 04421 if(start <= 0 && _length == INT32_MAX) { 04422 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04423 return remove(); 04424 } 04425 return doReplace(start, _length, NULL, 0, 0); 04426 } 04427 04428 inline UnicodeString& 04429 UnicodeString::removeBetween(int32_t start, 04430 int32_t limit) 04431 { return doReplace(start, limit - start, NULL, 0, 0); } 04432 04433 inline UnicodeString & 04434 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04435 truncate(limit); 04436 return doReplace(0, start, NULL, 0, 0); 04437 } 04438 04439 inline UBool 04440 UnicodeString::truncate(int32_t targetLength) 04441 { 04442 if(isBogus() && targetLength == 0) { 04443 // truncate(0) of a bogus string makes the string empty and non-bogus 04444 unBogus(); 04445 return FALSE; 04446 } else if((uint32_t)targetLength < (uint32_t)length()) { 04447 setLength(targetLength); 04448 if(fFlags&kBufferIsReadonly) { 04449 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04450 } 04451 return TRUE; 04452 } else { 04453 return FALSE; 04454 } 04455 } 04456 04457 inline UnicodeString& 04458 UnicodeString::reverse() 04459 { return doReverse(0, length()); } 04460 04461 inline UnicodeString& 04462 UnicodeString::reverse(int32_t start, 04463 int32_t _length) 04464 { return doReverse(start, _length); } 04465 04466 U_NAMESPACE_END 04467 04468 #endif