ICU 4.8.1.1  4.8.1.1
unistr.h
Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2011, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 
00035 struct UConverter;          // unicode/ucnv.h
00036 class  StringThreadTest;
00037 
00038 #ifndef U_COMPARE_CODE_POINT_ORDER
00039 /* see also ustring.h and unorm.h */
00045 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00046 #endif
00047 
00048 #ifndef USTRING_H
00049 
00052 U_STABLE int32_t U_EXPORT2
00053 u_strlen(const UChar *s);
00054 #endif
00055 
00056 U_NAMESPACE_BEGIN
00057 
00058 class BreakIterator;        // unicode/brkiter.h
00059 class Locale;               // unicode/locid.h
00060 class StringCharacterIterator;
00061 class UnicodeStringAppendable;  // unicode/appendable.h
00062 
00063 /* The <iostream> include has been moved to unicode/ustream.h */
00064 
00075 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
00076 
00094 #if defined(U_DECLARE_UTF16)
00095 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00096 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00097 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00098 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00099 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
00100 #else
00101 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
00102 #endif
00103 
00117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00118 
00188 class U_COMMON_API UnicodeString : public Replaceable
00189 {
00190 public:
00191 
00200   enum EInvariant {
00205     kInvariant
00206   };
00207 
00208   //========================================
00209   // Read-only operations
00210   //========================================
00211 
00212   /* Comparison - bitwise only - for international comparison use collation */
00213 
00221   inline UBool operator== (const UnicodeString& text) const;
00222 
00230   inline UBool operator!= (const UnicodeString& text) const;
00231 
00239   inline UBool operator> (const UnicodeString& text) const;
00240 
00248   inline UBool operator< (const UnicodeString& text) const;
00249 
00257   inline UBool operator>= (const UnicodeString& text) const;
00258 
00266   inline UBool operator<= (const UnicodeString& text) const;
00267 
00279   inline int8_t compare(const UnicodeString& text) const;
00280 
00295   inline int8_t compare(int32_t start,
00296          int32_t length,
00297          const UnicodeString& text) const;
00298 
00316    inline int8_t compare(int32_t start,
00317          int32_t length,
00318          const UnicodeString& srcText,
00319          int32_t srcStart,
00320          int32_t srcLength) const;
00321 
00334   inline int8_t compare(const UChar *srcChars,
00335          int32_t srcLength) const;
00336 
00351   inline int8_t compare(int32_t start,
00352          int32_t length,
00353          const UChar *srcChars) const;
00354 
00372   inline int8_t compare(int32_t start,
00373          int32_t length,
00374          const UChar *srcChars,
00375          int32_t srcStart,
00376          int32_t srcLength) const;
00377 
00395   inline int8_t compareBetween(int32_t start,
00396             int32_t limit,
00397             const UnicodeString& srcText,
00398             int32_t srcStart,
00399             int32_t srcLimit) const;
00400 
00418   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00419 
00439   inline int8_t compareCodePointOrder(int32_t start,
00440                                       int32_t length,
00441                                       const UnicodeString& srcText) const;
00442 
00464    inline int8_t compareCodePointOrder(int32_t start,
00465                                        int32_t length,
00466                                        const UnicodeString& srcText,
00467                                        int32_t srcStart,
00468                                        int32_t srcLength) const;
00469 
00488   inline int8_t compareCodePointOrder(const UChar *srcChars,
00489                                       int32_t srcLength) const;
00490 
00510   inline int8_t compareCodePointOrder(int32_t start,
00511                                       int32_t length,
00512                                       const UChar *srcChars) const;
00513 
00535   inline int8_t compareCodePointOrder(int32_t start,
00536                                       int32_t length,
00537                                       const UChar *srcChars,
00538                                       int32_t srcStart,
00539                                       int32_t srcLength) const;
00540 
00562   inline int8_t compareCodePointOrderBetween(int32_t start,
00563                                              int32_t limit,
00564                                              const UnicodeString& srcText,
00565                                              int32_t srcStart,
00566                                              int32_t srcLimit) const;
00567 
00586   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00587 
00608   inline int8_t caseCompare(int32_t start,
00609          int32_t length,
00610          const UnicodeString& srcText,
00611          uint32_t options) const;
00612 
00635   inline int8_t caseCompare(int32_t start,
00636          int32_t length,
00637          const UnicodeString& srcText,
00638          int32_t srcStart,
00639          int32_t srcLength,
00640          uint32_t options) const;
00641 
00661   inline int8_t caseCompare(const UChar *srcChars,
00662          int32_t srcLength,
00663          uint32_t options) const;
00664 
00685   inline int8_t caseCompare(int32_t start,
00686          int32_t length,
00687          const UChar *srcChars,
00688          uint32_t options) const;
00689 
00712   inline int8_t caseCompare(int32_t start,
00713          int32_t length,
00714          const UChar *srcChars,
00715          int32_t srcStart,
00716          int32_t srcLength,
00717          uint32_t options) const;
00718 
00741   inline int8_t caseCompareBetween(int32_t start,
00742             int32_t limit,
00743             const UnicodeString& srcText,
00744             int32_t srcStart,
00745             int32_t srcLimit,
00746             uint32_t options) const;
00747 
00755   inline UBool startsWith(const UnicodeString& text) const;
00756 
00767   inline UBool startsWith(const UnicodeString& srcText,
00768             int32_t srcStart,
00769             int32_t srcLength) const;
00770 
00779   inline UBool startsWith(const UChar *srcChars,
00780             int32_t srcLength) const;
00781 
00791   inline UBool startsWith(const UChar *srcChars,
00792             int32_t srcStart,
00793             int32_t srcLength) const;
00794 
00802   inline UBool endsWith(const UnicodeString& text) const;
00803 
00814   inline UBool endsWith(const UnicodeString& srcText,
00815           int32_t srcStart,
00816           int32_t srcLength) const;
00817 
00826   inline UBool endsWith(const UChar *srcChars,
00827           int32_t srcLength) const;
00828 
00839   inline UBool endsWith(const UChar *srcChars,
00840           int32_t srcStart,
00841           int32_t srcLength) const;
00842 
00843 
00844   /* Searching - bitwise only */
00845 
00854   inline int32_t indexOf(const UnicodeString& text) const;
00855 
00865   inline int32_t indexOf(const UnicodeString& text,
00866               int32_t start) const;
00867 
00879   inline int32_t indexOf(const UnicodeString& text,
00880               int32_t start,
00881               int32_t length) const;
00882 
00899   inline int32_t indexOf(const UnicodeString& srcText,
00900               int32_t srcStart,
00901               int32_t srcLength,
00902               int32_t start,
00903               int32_t length) const;
00904 
00916   inline int32_t indexOf(const UChar *srcChars,
00917               int32_t srcLength,
00918               int32_t start) const;
00919 
00932   inline int32_t indexOf(const UChar *srcChars,
00933               int32_t srcLength,
00934               int32_t start,
00935               int32_t length) const;
00936 
00953   int32_t indexOf(const UChar *srcChars,
00954               int32_t srcStart,
00955               int32_t srcLength,
00956               int32_t start,
00957               int32_t length) const;
00958 
00966   inline int32_t indexOf(UChar c) const;
00967 
00976   inline int32_t indexOf(UChar32 c) const;
00977 
00986   inline int32_t indexOf(UChar c,
00987               int32_t start) const;
00988 
00998   inline int32_t indexOf(UChar32 c,
00999               int32_t start) const;
01000 
01011   inline int32_t indexOf(UChar c,
01012               int32_t start,
01013               int32_t length) const;
01014 
01026   inline int32_t indexOf(UChar32 c,
01027               int32_t start,
01028               int32_t length) const;
01029 
01038   inline int32_t lastIndexOf(const UnicodeString& text) const;
01039 
01049   inline int32_t lastIndexOf(const UnicodeString& text,
01050               int32_t start) const;
01051 
01063   inline int32_t lastIndexOf(const UnicodeString& text,
01064               int32_t start,
01065               int32_t length) const;
01066 
01083   inline int32_t lastIndexOf(const UnicodeString& srcText,
01084               int32_t srcStart,
01085               int32_t srcLength,
01086               int32_t start,
01087               int32_t length) const;
01088 
01099   inline int32_t lastIndexOf(const UChar *srcChars,
01100               int32_t srcLength,
01101               int32_t start) const;
01102 
01115   inline int32_t lastIndexOf(const UChar *srcChars,
01116               int32_t srcLength,
01117               int32_t start,
01118               int32_t length) const;
01119 
01136   int32_t lastIndexOf(const UChar *srcChars,
01137               int32_t srcStart,
01138               int32_t srcLength,
01139               int32_t start,
01140               int32_t length) const;
01141 
01149   inline int32_t lastIndexOf(UChar c) const;
01150 
01159   inline int32_t lastIndexOf(UChar32 c) const;
01160 
01169   inline int32_t lastIndexOf(UChar c,
01170               int32_t start) const;
01171 
01181   inline int32_t lastIndexOf(UChar32 c,
01182               int32_t start) const;
01183 
01194   inline int32_t lastIndexOf(UChar c,
01195               int32_t start,
01196               int32_t length) const;
01197 
01209   inline int32_t lastIndexOf(UChar32 c,
01210               int32_t start,
01211               int32_t length) const;
01212 
01213 
01214   /* Character access */
01215 
01224   inline UChar charAt(int32_t offset) const;
01225 
01233   inline UChar operator[] (int32_t offset) const;
01234 
01246   inline UChar32 char32At(int32_t offset) const;
01247 
01263   inline int32_t getChar32Start(int32_t offset) const;
01264 
01281   inline int32_t getChar32Limit(int32_t offset) const;
01282 
01333   int32_t moveIndex32(int32_t index, int32_t delta) const;
01334 
01335   /* Substring extraction */
01336 
01352   inline void extract(int32_t start,
01353            int32_t length,
01354            UChar *dst,
01355            int32_t dstStart = 0) const;
01356 
01378   int32_t
01379   extract(UChar *dest, int32_t destCapacity,
01380           UErrorCode &errorCode) const;
01381 
01392   inline void extract(int32_t start,
01393            int32_t length,
01394            UnicodeString& target) const;
01395 
01407   inline void extractBetween(int32_t start,
01408               int32_t limit,
01409               UChar *dst,
01410               int32_t dstStart = 0) const;
01411 
01421   virtual void extractBetween(int32_t start,
01422               int32_t limit,
01423               UnicodeString& target) const;
01424 
01446   int32_t extract(int32_t start,
01447            int32_t startLength,
01448            char *target,
01449            int32_t targetCapacity,
01450            enum EInvariant inv) const;
01451 
01452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01453 
01473   int32_t extract(int32_t start,
01474            int32_t startLength,
01475            char *target,
01476            uint32_t targetLength) const;
01477 
01478 #endif
01479 
01480 #if !UCONFIG_NO_CONVERSION
01481 
01507   inline int32_t extract(int32_t start,
01508                  int32_t startLength,
01509                  char *target,
01510                  const char *codepage = 0) const;
01511 
01541   int32_t extract(int32_t start,
01542            int32_t startLength,
01543            char *target,
01544            uint32_t targetLength,
01545            const char *codepage) const;
01546 
01564   int32_t extract(char *dest, int32_t destCapacity,
01565                   UConverter *cnv,
01566                   UErrorCode &errorCode) const;
01567 
01568 #endif
01569 
01583   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01584 
01595   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01596 
01608   void toUTF8(ByteSink &sink) const;
01609 
01610 #if U_HAVE_STD_STRING
01611 
01624   template<typename StringClass>
01625   StringClass &toUTF8String(StringClass &result) const {
01626     StringByteSink<StringClass> sbs(&result);
01627     toUTF8(sbs);
01628     return result;
01629   }
01630 
01631 #endif
01632 
01648   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01649 
01650   /* Length operations */
01651 
01660   inline int32_t length(void) const;
01661 
01675   int32_t
01676   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01677 
01701   UBool
01702   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01703 
01709   inline UBool isEmpty(void) const;
01710 
01720   inline int32_t getCapacity(void) const;
01721 
01722   /* Other operations */
01723 
01729   inline int32_t hashCode(void) const;
01730 
01743   inline UBool isBogus(void) const;
01744 
01745 
01746   //========================================
01747   // Write operations
01748   //========================================
01749 
01750   /* Assignment operations */
01751 
01759   UnicodeString &operator=(const UnicodeString &srcText);
01760 
01781   UnicodeString &fastCopyFrom(const UnicodeString &src);
01782 
01790   inline UnicodeString& operator= (UChar ch);
01791 
01799   inline UnicodeString& operator= (UChar32 ch);
01800 
01812   inline UnicodeString& setTo(const UnicodeString& srcText,
01813                int32_t srcStart);
01814 
01828   inline UnicodeString& setTo(const UnicodeString& srcText,
01829                int32_t srcStart,
01830                int32_t srcLength);
01831 
01840   inline UnicodeString& setTo(const UnicodeString& srcText);
01841 
01850   inline UnicodeString& setTo(const UChar *srcChars,
01851                int32_t srcLength);
01852 
01861   UnicodeString& setTo(UChar srcChar);
01862 
01871   UnicodeString& setTo(UChar32 srcChar);
01872 
01893   UnicodeString &setTo(UBool isTerminated,
01894                        const UChar *text,
01895                        int32_t textLength);
01896 
01916   UnicodeString &setTo(UChar *buffer,
01917                        int32_t buffLength,
01918                        int32_t buffCapacity);
01919 
01960   void setToBogus();
01961 
01969   UnicodeString& setCharAt(int32_t offset,
01970                UChar ch);
01971 
01972 
01973   /* Append operations */
01974 
01982  inline  UnicodeString& operator+= (UChar ch);
01983 
01991  inline  UnicodeString& operator+= (UChar32 ch);
01992 
02000   inline UnicodeString& operator+= (const UnicodeString& srcText);
02001 
02016   inline UnicodeString& append(const UnicodeString& srcText,
02017             int32_t srcStart,
02018             int32_t srcLength);
02019 
02027   inline UnicodeString& append(const UnicodeString& srcText);
02028 
02042   inline UnicodeString& append(const UChar *srcChars,
02043             int32_t srcStart,
02044             int32_t srcLength);
02045 
02055   inline UnicodeString& append(const UChar *srcChars,
02056             int32_t srcLength);
02057 
02064   inline UnicodeString& append(UChar srcChar);
02065 
02072   inline UnicodeString& append(UChar32 srcChar);
02073 
02074 
02075   /* Insert operations */
02076 
02090   inline UnicodeString& insert(int32_t start,
02091             const UnicodeString& srcText,
02092             int32_t srcStart,
02093             int32_t srcLength);
02094 
02103   inline UnicodeString& insert(int32_t start,
02104             const UnicodeString& srcText);
02105 
02119   inline UnicodeString& insert(int32_t start,
02120             const UChar *srcChars,
02121             int32_t srcStart,
02122             int32_t srcLength);
02123 
02133   inline UnicodeString& insert(int32_t start,
02134             const UChar *srcChars,
02135             int32_t srcLength);
02136 
02145   inline UnicodeString& insert(int32_t start,
02146             UChar srcChar);
02147 
02156   inline UnicodeString& insert(int32_t start,
02157             UChar32 srcChar);
02158 
02159 
02160   /* Replace operations */
02161 
02179   UnicodeString& replace(int32_t start,
02180              int32_t length,
02181              const UnicodeString& srcText,
02182              int32_t srcStart,
02183              int32_t srcLength);
02184 
02197   UnicodeString& replace(int32_t start,
02198              int32_t length,
02199              const UnicodeString& srcText);
02200 
02218   UnicodeString& replace(int32_t start,
02219              int32_t length,
02220              const UChar *srcChars,
02221              int32_t srcStart,
02222              int32_t srcLength);
02223 
02236   inline UnicodeString& replace(int32_t start,
02237              int32_t length,
02238              const UChar *srcChars,
02239              int32_t srcLength);
02240 
02252   inline UnicodeString& replace(int32_t start,
02253              int32_t length,
02254              UChar srcChar);
02255 
02267   inline UnicodeString& replace(int32_t start,
02268              int32_t length,
02269              UChar32 srcChar);
02270 
02280   inline UnicodeString& replaceBetween(int32_t start,
02281                 int32_t limit,
02282                 const UnicodeString& srcText);
02283 
02298   inline UnicodeString& replaceBetween(int32_t start,
02299                 int32_t limit,
02300                 const UnicodeString& srcText,
02301                 int32_t srcStart,
02302                 int32_t srcLimit);
02303 
02314   virtual void handleReplaceBetween(int32_t start,
02315                                     int32_t limit,
02316                                     const UnicodeString& text);
02317 
02323   virtual UBool hasMetaData() const;
02324 
02340   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02341 
02342   /* Search and replace operations */
02343 
02352   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02353                 const UnicodeString& newText);
02354 
02366   inline UnicodeString& findAndReplace(int32_t start,
02367                 int32_t length,
02368                 const UnicodeString& oldText,
02369                 const UnicodeString& newText);
02370 
02388   UnicodeString& findAndReplace(int32_t start,
02389                 int32_t length,
02390                 const UnicodeString& oldText,
02391                 int32_t oldStart,
02392                 int32_t oldLength,
02393                 const UnicodeString& newText,
02394                 int32_t newStart,
02395                 int32_t newLength);
02396 
02397 
02398   /* Remove operations */
02399 
02405   inline UnicodeString& remove(void);
02406 
02415   inline UnicodeString& remove(int32_t start,
02416                                int32_t length = (int32_t)INT32_MAX);
02417 
02426   inline UnicodeString& removeBetween(int32_t start,
02427                                       int32_t limit = (int32_t)INT32_MAX);
02428 
02438   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02439 
02440   /* Length operations */
02441 
02453   UBool padLeading(int32_t targetLength,
02454                     UChar padChar = 0x0020);
02455 
02467   UBool padTrailing(int32_t targetLength,
02468                      UChar padChar = 0x0020);
02469 
02476   inline UBool truncate(int32_t targetLength);
02477 
02483   UnicodeString& trim(void);
02484 
02485 
02486   /* Miscellaneous operations */
02487 
02493   inline UnicodeString& reverse(void);
02494 
02503   inline UnicodeString& reverse(int32_t start,
02504              int32_t length);
02505 
02512   UnicodeString& toUpper(void);
02513 
02521   UnicodeString& toUpper(const Locale& locale);
02522 
02529   UnicodeString& toLower(void);
02530 
02538   UnicodeString& toLower(const Locale& locale);
02539 
02540 #if !UCONFIG_NO_BREAK_ITERATION
02541 
02568   UnicodeString &toTitle(BreakIterator *titleIter);
02569 
02597   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02598 
02630   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02631 
02632 #endif
02633 
02645   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02646 
02647   //========================================
02648   // Access to the internal buffer
02649   //========================================
02650 
02694   UChar *getBuffer(int32_t minCapacity);
02695 
02716   void releaseBuffer(int32_t newLength=-1);
02717 
02748   inline const UChar *getBuffer() const;
02749 
02783   inline const UChar *getTerminatedBuffer();
02784 
02785   //========================================
02786   // Constructors
02787   //========================================
02788 
02792   UnicodeString();
02793 
02805   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02806 
02812   UnicodeString(UChar ch);
02813 
02819   UnicodeString(UChar32 ch);
02820 
02827   UnicodeString(const UChar *text);
02828 
02836   UnicodeString(const UChar *text,
02837         int32_t textLength);
02838 
02858   UnicodeString(UBool isTerminated,
02859                 const UChar *text,
02860                 int32_t textLength);
02861 
02880   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02881 
02882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02883 
02890   UnicodeString(const char *codepageData);
02891 
02898   UnicodeString(const char *codepageData, int32_t dataLength);
02899 
02900 #endif
02901 
02902 #if !UCONFIG_NO_CONVERSION
02903 
02921   UnicodeString(const char *codepageData, const char *codepage);
02922 
02940   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
02941 
02963   UnicodeString(
02964         const char *src, int32_t srcLength,
02965         UConverter *cnv,
02966         UErrorCode &errorCode);
02967 
02968 #endif
02969 
02994   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
02995 
02996 
03002   UnicodeString(const UnicodeString& that);
03003 
03010   UnicodeString(const UnicodeString& src, int32_t srcStart);
03011 
03019   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03020 
03037   virtual Replaceable *clone() const;
03038 
03042   virtual ~UnicodeString();
03043 
03057   static UnicodeString fromUTF8(const StringPiece &utf8);
03058 
03070   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03071 
03072   /* Miscellaneous operations */
03073 
03108   UnicodeString unescape() const;
03109 
03129   UChar32 unescapeAt(int32_t &offset) const;
03130 
03136   static UClassID U_EXPORT2 getStaticClassID();
03137 
03143   virtual UClassID getDynamicClassID() const;
03144 
03145   //========================================
03146   // Implementation methods
03147   //========================================
03148 
03149 protected:
03154   virtual int32_t getLength() const;
03155 
03161   virtual UChar getCharAt(int32_t offset) const;
03162 
03168   virtual UChar32 getChar32At(int32_t offset) const;
03169 
03170 private:
03171   // For char* constructors. Could be made public.
03172   UnicodeString &setToUTF8(const StringPiece &utf8);
03173   // For extract(char*).
03174   // We could make a toUTF8(target, capacity, errorCode) public but not
03175   // this version: New API will be cleaner if we make callers create substrings
03176   // rather than having start+length on every method,
03177   // and it should take a UErrorCode&.
03178   int32_t
03179   toUTF8(int32_t start, int32_t len,
03180          char *target, int32_t capacity) const;
03181 
03182 
03183   inline int8_t
03184   doCompare(int32_t start,
03185            int32_t length,
03186            const UnicodeString& srcText,
03187            int32_t srcStart,
03188            int32_t srcLength) const;
03189 
03190   int8_t doCompare(int32_t start,
03191            int32_t length,
03192            const UChar *srcChars,
03193            int32_t srcStart,
03194            int32_t srcLength) const;
03195 
03196   inline int8_t
03197   doCompareCodePointOrder(int32_t start,
03198                           int32_t length,
03199                           const UnicodeString& srcText,
03200                           int32_t srcStart,
03201                           int32_t srcLength) const;
03202 
03203   int8_t doCompareCodePointOrder(int32_t start,
03204                                  int32_t length,
03205                                  const UChar *srcChars,
03206                                  int32_t srcStart,
03207                                  int32_t srcLength) const;
03208 
03209   inline int8_t
03210   doCaseCompare(int32_t start,
03211                 int32_t length,
03212                 const UnicodeString &srcText,
03213                 int32_t srcStart,
03214                 int32_t srcLength,
03215                 uint32_t options) const;
03216 
03217   int8_t
03218   doCaseCompare(int32_t start,
03219                 int32_t length,
03220                 const UChar *srcChars,
03221                 int32_t srcStart,
03222                 int32_t srcLength,
03223                 uint32_t options) const;
03224 
03225   int32_t doIndexOf(UChar c,
03226             int32_t start,
03227             int32_t length) const;
03228 
03229   int32_t doIndexOf(UChar32 c,
03230                         int32_t start,
03231                         int32_t length) const;
03232 
03233   int32_t doLastIndexOf(UChar c,
03234                 int32_t start,
03235                 int32_t length) const;
03236 
03237   int32_t doLastIndexOf(UChar32 c,
03238                             int32_t start,
03239                             int32_t length) const;
03240 
03241   void doExtract(int32_t start,
03242          int32_t length,
03243          UChar *dst,
03244          int32_t dstStart) const;
03245 
03246   inline void doExtract(int32_t start,
03247          int32_t length,
03248          UnicodeString& target) const;
03249 
03250   inline UChar doCharAt(int32_t offset)  const;
03251 
03252   UnicodeString& doReplace(int32_t start,
03253                int32_t length,
03254                const UnicodeString& srcText,
03255                int32_t srcStart,
03256                int32_t srcLength);
03257 
03258   UnicodeString& doReplace(int32_t start,
03259                int32_t length,
03260                const UChar *srcChars,
03261                int32_t srcStart,
03262                int32_t srcLength);
03263 
03264   UnicodeString& doReverse(int32_t start,
03265                int32_t length);
03266 
03267   // calculate hash code
03268   int32_t doHashCode(void) const;
03269 
03270   // get pointer to start of array
03271   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03272   inline UChar* getArrayStart(void);
03273   inline const UChar* getArrayStart(void) const;
03274 
03275   // A UnicodeString object (not necessarily its current buffer)
03276   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03277   inline UBool isWritable() const;
03278 
03279   // Is the current buffer writable?
03280   inline UBool isBufferWritable() const;
03281 
03282   // None of the following does releaseArray().
03283   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03284   inline void setToEmpty();                  // sets fFlags=kShortString
03285   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03286 
03287   // allocate the array; result may be fStackBuffer
03288   // sets refCount to 1 if appropriate
03289   // sets fArray, fCapacity, and fFlags
03290   // returns boolean for success or failure
03291   UBool allocate(int32_t capacity);
03292 
03293   // release the array if owned
03294   void releaseArray(void);
03295 
03296   // turn a bogus string into an empty one
03297   void unBogus();
03298 
03299   // implements assigment operator, copy constructor, and fastCopyFrom()
03300   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03301 
03302   // Pin start and limit to acceptable values.
03303   inline void pinIndex(int32_t& start) const;
03304   inline void pinIndices(int32_t& start,
03305                          int32_t& length) const;
03306 
03307 #if !UCONFIG_NO_CONVERSION
03308 
03309   /* Internal extract() using UConverter. */
03310   int32_t doExtract(int32_t start, int32_t length,
03311                     char *dest, int32_t destCapacity,
03312                     UConverter *cnv,
03313                     UErrorCode &errorCode) const;
03314 
03315   /*
03316    * Real constructor for converting from codepage data.
03317    * It assumes that it is called with !fRefCounted.
03318    *
03319    * If <code>codepage==0</code>, then the default converter
03320    * is used for the platform encoding.
03321    * If <code>codepage</code> is an empty string (<code>""</code>),
03322    * then a simple conversion is performed on the codepage-invariant
03323    * subset ("invariant characters") of the platform encoding. See utypes.h.
03324    */
03325   void doCodepageCreate(const char *codepageData,
03326                         int32_t dataLength,
03327                         const char *codepage);
03328 
03329   /*
03330    * Worker function for creating a UnicodeString from
03331    * a codepage string using a UConverter.
03332    */
03333   void
03334   doCodepageCreate(const char *codepageData,
03335                    int32_t dataLength,
03336                    UConverter *converter,
03337                    UErrorCode &status);
03338 
03339 #endif
03340 
03341   /*
03342    * This function is called when write access to the array
03343    * is necessary.
03344    *
03345    * We need to make a copy of the array if
03346    * the buffer is read-only, or
03347    * the buffer is refCounted (shared), and refCount>1, or
03348    * the buffer is too small.
03349    *
03350    * Return FALSE if memory could not be allocated.
03351    */
03352   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03353                             int32_t growCapacity = -1,
03354                             UBool doCopyArray = TRUE,
03355                             int32_t **pBufferToDelete = 0,
03356                             UBool forceClone = FALSE);
03357 
03358   // common function for case mappings
03359   UnicodeString &
03360   caseMap(BreakIterator *titleIter,
03361           const char *locale,
03362           uint32_t options,
03363           int32_t toWhichCase);
03364 
03365   // ref counting
03366   void addRef(void);
03367   int32_t removeRef(void);
03368   int32_t refCount(void) const;
03369 
03370   // constants
03371   enum {
03372     // Set the stack buffer size so that sizeof(UnicodeString) is,
03373     // naturally (without padding), a multiple of sizeof(pointer).
03374     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
03375     kInvalidUChar=0xffff, // invalid UChar index
03376     kGrowSize=128, // grow size for this buffer
03377     kInvalidHashCode=0, // invalid hash code
03378     kEmptyHashCode=1, // hash code for empty string
03379 
03380     // bit flag values for fFlags
03381     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03382     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
03383     kRefCounted=4,      // there is a refCount field before the characters in fArray
03384     kBufferIsReadonly=8,// do not write to this buffer
03385     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03386                         // and releaseBuffer(newLength) must be called
03387 
03388     // combined values for convenience
03389     kShortString=kUsingStackBuffer,
03390     kLongString=kRefCounted,
03391     kReadonlyAlias=kBufferIsReadonly,
03392     kWritableAlias=0
03393   };
03394 
03395   friend class StringThreadTest;
03396   friend class UnicodeStringAppendable;
03397 
03398   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03399   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03400 
03401   /*
03402    * The following are all the class fields that are stored
03403    * in each UnicodeString object.
03404    * Note that UnicodeString has virtual functions,
03405    * therefore there is an implicit vtable pointer
03406    * as the first real field.
03407    * The fields should be aligned such that no padding is necessary.
03408    * On 32-bit machines, the size should be 32 bytes,
03409    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03410    *
03411    * We use a hack to achieve this.
03412    *
03413    * With at least some compilers, each of the following is forced to
03414    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
03415    * rounded up with additional padding if the fields do not already fit that requirement:
03416    * - sizeof(class UnicodeString)
03417    * - offsetof(UnicodeString, fUnion)
03418    * - sizeof(fUnion)
03419    * - sizeof(fFields)
03420    *
03421    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
03422    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
03423    * (Padding at the end of fFields is ok:
03424    * As long as there is no padding after fStackBuffer, it is not wasted space.)
03425    *
03426    * We further assume that the compiler does not reorder the fields,
03427    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
03428    * with at most some padding (but no other field) in between.
03429    * (Padding there would be wasted space, but functionally harmless.)
03430    *
03431    * We use a few more sizeof(pointer)'s chunks of space with
03432    * fRestOfStackBuffer, fShortLength and fFlags,
03433    * to get up exactly to the intended sizeof(UnicodeString).
03434    */
03435   // (implicit) *vtable;
03436   union StackBufferOrFields {
03437     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03438     // else fFields is used
03439     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
03440     struct {
03441       UChar   *fArray;    // the Unicode data
03442       int32_t fCapacity;  // capacity of fArray (in UChars)
03443       int32_t fLength;    // number of characters in fArray if >127; else undefined
03444     } fFields;
03445   } fUnion;
03446   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
03447   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
03448   uint8_t fFlags;       // bit flags: see constants above
03449 };
03450 
03459 U_COMMON_API UnicodeString U_EXPORT2
03460 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03461 
03462 //========================================
03463 // Inline members
03464 //========================================
03465 
03466 //========================================
03467 // Privates
03468 //========================================
03469 
03470 inline void
03471 UnicodeString::pinIndex(int32_t& start) const
03472 {
03473   // pin index
03474   if(start < 0) {
03475     start = 0;
03476   } else if(start > length()) {
03477     start = length();
03478   }
03479 }
03480 
03481 inline void
03482 UnicodeString::pinIndices(int32_t& start,
03483                           int32_t& _length) const
03484 {
03485   // pin indices
03486   int32_t len = length();
03487   if(start < 0) {
03488     start = 0;
03489   } else if(start > len) {
03490     start = len;
03491   }
03492   if(_length < 0) {
03493     _length = 0;
03494   } else if(_length > (len - start)) {
03495     _length = (len - start);
03496   }
03497 }
03498 
03499 inline UChar*
03500 UnicodeString::getArrayStart()
03501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03502 
03503 inline const UChar*
03504 UnicodeString::getArrayStart() const
03505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03506 
03507 //========================================
03508 // Read-only implementation methods
03509 //========================================
03510 inline int32_t
03511 UnicodeString::length() const
03512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03513 
03514 inline int32_t
03515 UnicodeString::getCapacity() const
03516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03517 
03518 inline int32_t
03519 UnicodeString::hashCode() const
03520 { return doHashCode(); }
03521 
03522 inline UBool
03523 UnicodeString::isBogus() const
03524 { return (UBool)(fFlags & kIsBogus); }
03525 
03526 inline UBool
03527 UnicodeString::isWritable() const
03528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03529 
03530 inline UBool
03531 UnicodeString::isBufferWritable() const
03532 {
03533   return (UBool)(
03534       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03535       (!(fFlags&kRefCounted) || refCount()==1));
03536 }
03537 
03538 inline const UChar *
03539 UnicodeString::getBuffer() const {
03540   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03541     return 0;
03542   } else if(fFlags&kUsingStackBuffer) {
03543     return fUnion.fStackBuffer;
03544   } else {
03545     return fUnion.fFields.fArray;
03546   }
03547 }
03548 
03549 //========================================
03550 // Read-only alias methods
03551 //========================================
03552 inline int8_t
03553 UnicodeString::doCompare(int32_t start,
03554               int32_t thisLength,
03555               const UnicodeString& srcText,
03556               int32_t srcStart,
03557               int32_t srcLength) const
03558 {
03559   if(srcText.isBogus()) {
03560     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03561   } else {
03562     srcText.pinIndices(srcStart, srcLength);
03563     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03564   }
03565 }
03566 
03567 inline UBool
03568 UnicodeString::operator== (const UnicodeString& text) const
03569 {
03570   if(isBogus()) {
03571     return text.isBogus();
03572   } else {
03573     int32_t len = length(), textLength = text.length();
03574     return
03575       !text.isBogus() &&
03576       len == textLength &&
03577       doCompare(0, len, text, 0, textLength) == 0;
03578   }
03579 }
03580 
03581 inline UBool
03582 UnicodeString::operator!= (const UnicodeString& text) const
03583 { return (! operator==(text)); }
03584 
03585 inline UBool
03586 UnicodeString::operator> (const UnicodeString& text) const
03587 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03588 
03589 inline UBool
03590 UnicodeString::operator< (const UnicodeString& text) const
03591 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03592 
03593 inline UBool
03594 UnicodeString::operator>= (const UnicodeString& text) const
03595 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03596 
03597 inline UBool
03598 UnicodeString::operator<= (const UnicodeString& text) const
03599 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03600 
03601 inline int8_t
03602 UnicodeString::compare(const UnicodeString& text) const
03603 { return doCompare(0, length(), text, 0, text.length()); }
03604 
03605 inline int8_t
03606 UnicodeString::compare(int32_t start,
03607                int32_t _length,
03608                const UnicodeString& srcText) const
03609 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03610 
03611 inline int8_t
03612 UnicodeString::compare(const UChar *srcChars,
03613                int32_t srcLength) const
03614 { return doCompare(0, length(), srcChars, 0, srcLength); }
03615 
03616 inline int8_t
03617 UnicodeString::compare(int32_t start,
03618                int32_t _length,
03619                const UnicodeString& srcText,
03620                int32_t srcStart,
03621                int32_t srcLength) const
03622 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03623 
03624 inline int8_t
03625 UnicodeString::compare(int32_t start,
03626                int32_t _length,
03627                const UChar *srcChars) const
03628 { return doCompare(start, _length, srcChars, 0, _length); }
03629 
03630 inline int8_t
03631 UnicodeString::compare(int32_t start,
03632                int32_t _length,
03633                const UChar *srcChars,
03634                int32_t srcStart,
03635                int32_t srcLength) const
03636 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03637 
03638 inline int8_t
03639 UnicodeString::compareBetween(int32_t start,
03640                   int32_t limit,
03641                   const UnicodeString& srcText,
03642                   int32_t srcStart,
03643                   int32_t srcLimit) const
03644 { return doCompare(start, limit - start,
03645            srcText, srcStart, srcLimit - srcStart); }
03646 
03647 inline int8_t
03648 UnicodeString::doCompareCodePointOrder(int32_t start,
03649                                        int32_t thisLength,
03650                                        const UnicodeString& srcText,
03651                                        int32_t srcStart,
03652                                        int32_t srcLength) const
03653 {
03654   if(srcText.isBogus()) {
03655     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03656   } else {
03657     srcText.pinIndices(srcStart, srcLength);
03658     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03659   }
03660 }
03661 
03662 inline int8_t
03663 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03665 
03666 inline int8_t
03667 UnicodeString::compareCodePointOrder(int32_t start,
03668                                      int32_t _length,
03669                                      const UnicodeString& srcText) const
03670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03671 
03672 inline int8_t
03673 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03674                                      int32_t srcLength) const
03675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03676 
03677 inline int8_t
03678 UnicodeString::compareCodePointOrder(int32_t start,
03679                                      int32_t _length,
03680                                      const UnicodeString& srcText,
03681                                      int32_t srcStart,
03682                                      int32_t srcLength) const
03683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03684 
03685 inline int8_t
03686 UnicodeString::compareCodePointOrder(int32_t start,
03687                                      int32_t _length,
03688                                      const UChar *srcChars) const
03689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03690 
03691 inline int8_t
03692 UnicodeString::compareCodePointOrder(int32_t start,
03693                                      int32_t _length,
03694                                      const UChar *srcChars,
03695                                      int32_t srcStart,
03696                                      int32_t srcLength) const
03697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03698 
03699 inline int8_t
03700 UnicodeString::compareCodePointOrderBetween(int32_t start,
03701                                             int32_t limit,
03702                                             const UnicodeString& srcText,
03703                                             int32_t srcStart,
03704                                             int32_t srcLimit) const
03705 { return doCompareCodePointOrder(start, limit - start,
03706            srcText, srcStart, srcLimit - srcStart); }
03707 
03708 inline int8_t
03709 UnicodeString::doCaseCompare(int32_t start,
03710                              int32_t thisLength,
03711                              const UnicodeString &srcText,
03712                              int32_t srcStart,
03713                              int32_t srcLength,
03714                              uint32_t options) const
03715 {
03716   if(srcText.isBogus()) {
03717     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03718   } else {
03719     srcText.pinIndices(srcStart, srcLength);
03720     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03721   }
03722 }
03723 
03724 inline int8_t
03725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03726   return doCaseCompare(0, length(), text, 0, text.length(), options);
03727 }
03728 
03729 inline int8_t
03730 UnicodeString::caseCompare(int32_t start,
03731                            int32_t _length,
03732                            const UnicodeString &srcText,
03733                            uint32_t options) const {
03734   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03735 }
03736 
03737 inline int8_t
03738 UnicodeString::caseCompare(const UChar *srcChars,
03739                            int32_t srcLength,
03740                            uint32_t options) const {
03741   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03742 }
03743 
03744 inline int8_t
03745 UnicodeString::caseCompare(int32_t start,
03746                            int32_t _length,
03747                            const UnicodeString &srcText,
03748                            int32_t srcStart,
03749                            int32_t srcLength,
03750                            uint32_t options) const {
03751   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03752 }
03753 
03754 inline int8_t
03755 UnicodeString::caseCompare(int32_t start,
03756                            int32_t _length,
03757                            const UChar *srcChars,
03758                            uint32_t options) const {
03759   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03760 }
03761 
03762 inline int8_t
03763 UnicodeString::caseCompare(int32_t start,
03764                            int32_t _length,
03765                            const UChar *srcChars,
03766                            int32_t srcStart,
03767                            int32_t srcLength,
03768                            uint32_t options) const {
03769   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03770 }
03771 
03772 inline int8_t
03773 UnicodeString::caseCompareBetween(int32_t start,
03774                                   int32_t limit,
03775                                   const UnicodeString &srcText,
03776                                   int32_t srcStart,
03777                                   int32_t srcLimit,
03778                                   uint32_t options) const {
03779   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03780 }
03781 
03782 inline int32_t
03783 UnicodeString::indexOf(const UnicodeString& srcText,
03784                int32_t srcStart,
03785                int32_t srcLength,
03786                int32_t start,
03787                int32_t _length) const
03788 {
03789   if(!srcText.isBogus()) {
03790     srcText.pinIndices(srcStart, srcLength);
03791     if(srcLength > 0) {
03792       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03793     }
03794   }
03795   return -1;
03796 }
03797 
03798 inline int32_t
03799 UnicodeString::indexOf(const UnicodeString& text) const
03800 { return indexOf(text, 0, text.length(), 0, length()); }
03801 
03802 inline int32_t
03803 UnicodeString::indexOf(const UnicodeString& text,
03804                int32_t start) const {
03805   pinIndex(start);
03806   return indexOf(text, 0, text.length(), start, length() - start);
03807 }
03808 
03809 inline int32_t
03810 UnicodeString::indexOf(const UnicodeString& text,
03811                int32_t start,
03812                int32_t _length) const
03813 { return indexOf(text, 0, text.length(), start, _length); }
03814 
03815 inline int32_t
03816 UnicodeString::indexOf(const UChar *srcChars,
03817                int32_t srcLength,
03818                int32_t start) const {
03819   pinIndex(start);
03820   return indexOf(srcChars, 0, srcLength, start, length() - start);
03821 }
03822 
03823 inline int32_t
03824 UnicodeString::indexOf(const UChar *srcChars,
03825                int32_t srcLength,
03826                int32_t start,
03827                int32_t _length) const
03828 { return indexOf(srcChars, 0, srcLength, start, _length); }
03829 
03830 inline int32_t
03831 UnicodeString::indexOf(UChar c,
03832                int32_t start,
03833                int32_t _length) const
03834 { return doIndexOf(c, start, _length); }
03835 
03836 inline int32_t
03837 UnicodeString::indexOf(UChar32 c,
03838                int32_t start,
03839                int32_t _length) const
03840 { return doIndexOf(c, start, _length); }
03841 
03842 inline int32_t
03843 UnicodeString::indexOf(UChar c) const
03844 { return doIndexOf(c, 0, length()); }
03845 
03846 inline int32_t
03847 UnicodeString::indexOf(UChar32 c) const
03848 { return indexOf(c, 0, length()); }
03849 
03850 inline int32_t
03851 UnicodeString::indexOf(UChar c,
03852                int32_t start) const {
03853   pinIndex(start);
03854   return doIndexOf(c, start, length() - start);
03855 }
03856 
03857 inline int32_t
03858 UnicodeString::indexOf(UChar32 c,
03859                int32_t start) const {
03860   pinIndex(start);
03861   return indexOf(c, start, length() - start);
03862 }
03863 
03864 inline int32_t
03865 UnicodeString::lastIndexOf(const UChar *srcChars,
03866                int32_t srcLength,
03867                int32_t start,
03868                int32_t _length) const
03869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03870 
03871 inline int32_t
03872 UnicodeString::lastIndexOf(const UChar *srcChars,
03873                int32_t srcLength,
03874                int32_t start) const {
03875   pinIndex(start);
03876   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03877 }
03878 
03879 inline int32_t
03880 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03881                int32_t srcStart,
03882                int32_t srcLength,
03883                int32_t start,
03884                int32_t _length) const
03885 {
03886   if(!srcText.isBogus()) {
03887     srcText.pinIndices(srcStart, srcLength);
03888     if(srcLength > 0) {
03889       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03890     }
03891   }
03892   return -1;
03893 }
03894 
03895 inline int32_t
03896 UnicodeString::lastIndexOf(const UnicodeString& text,
03897                int32_t start,
03898                int32_t _length) const
03899 { return lastIndexOf(text, 0, text.length(), start, _length); }
03900 
03901 inline int32_t
03902 UnicodeString::lastIndexOf(const UnicodeString& text,
03903                int32_t start) const {
03904   pinIndex(start);
03905   return lastIndexOf(text, 0, text.length(), start, length() - start);
03906 }
03907 
03908 inline int32_t
03909 UnicodeString::lastIndexOf(const UnicodeString& text) const
03910 { return lastIndexOf(text, 0, text.length(), 0, length()); }
03911 
03912 inline int32_t
03913 UnicodeString::lastIndexOf(UChar c,
03914                int32_t start,
03915                int32_t _length) const
03916 { return doLastIndexOf(c, start, _length); }
03917 
03918 inline int32_t
03919 UnicodeString::lastIndexOf(UChar32 c,
03920                int32_t start,
03921                int32_t _length) const {
03922   return doLastIndexOf(c, start, _length);
03923 }
03924 
03925 inline int32_t
03926 UnicodeString::lastIndexOf(UChar c) const
03927 { return doLastIndexOf(c, 0, length()); }
03928 
03929 inline int32_t
03930 UnicodeString::lastIndexOf(UChar32 c) const {
03931   return lastIndexOf(c, 0, length());
03932 }
03933 
03934 inline int32_t
03935 UnicodeString::lastIndexOf(UChar c,
03936                int32_t start) const {
03937   pinIndex(start);
03938   return doLastIndexOf(c, start, length() - start);
03939 }
03940 
03941 inline int32_t
03942 UnicodeString::lastIndexOf(UChar32 c,
03943                int32_t start) const {
03944   pinIndex(start);
03945   return lastIndexOf(c, start, length() - start);
03946 }
03947 
03948 inline UBool
03949 UnicodeString::startsWith(const UnicodeString& text) const
03950 { return compare(0, text.length(), text, 0, text.length()) == 0; }
03951 
03952 inline UBool
03953 UnicodeString::startsWith(const UnicodeString& srcText,
03954               int32_t srcStart,
03955               int32_t srcLength) const
03956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
03957 
03958 inline UBool
03959 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
03960   if(srcLength < 0) {
03961     srcLength = u_strlen(srcChars);
03962   }
03963   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
03964 }
03965 
03966 inline UBool
03967 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
03968   if(srcLength < 0) {
03969     srcLength = u_strlen(srcChars);
03970   }
03971   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
03972 }
03973 
03974 inline UBool
03975 UnicodeString::endsWith(const UnicodeString& text) const
03976 { return doCompare(length() - text.length(), text.length(),
03977            text, 0, text.length()) == 0; }
03978 
03979 inline UBool
03980 UnicodeString::endsWith(const UnicodeString& srcText,
03981             int32_t srcStart,
03982             int32_t srcLength) const {
03983   srcText.pinIndices(srcStart, srcLength);
03984   return doCompare(length() - srcLength, srcLength,
03985                    srcText, srcStart, srcLength) == 0;
03986 }
03987 
03988 inline UBool
03989 UnicodeString::endsWith(const UChar *srcChars,
03990             int32_t srcLength) const {
03991   if(srcLength < 0) {
03992     srcLength = u_strlen(srcChars);
03993   }
03994   return doCompare(length() - srcLength, srcLength,
03995                    srcChars, 0, srcLength) == 0;
03996 }
03997 
03998 inline UBool
03999 UnicodeString::endsWith(const UChar *srcChars,
04000             int32_t srcStart,
04001             int32_t srcLength) const {
04002   if(srcLength < 0) {
04003     srcLength = u_strlen(srcChars + srcStart);
04004   }
04005   return doCompare(length() - srcLength, srcLength,
04006                    srcChars, srcStart, srcLength) == 0;
04007 }
04008 
04009 //========================================
04010 // replace
04011 //========================================
04012 inline UnicodeString&
04013 UnicodeString::replace(int32_t start,
04014                int32_t _length,
04015                const UnicodeString& srcText)
04016 { return doReplace(start, _length, srcText, 0, srcText.length()); }
04017 
04018 inline UnicodeString&
04019 UnicodeString::replace(int32_t start,
04020                int32_t _length,
04021                const UnicodeString& srcText,
04022                int32_t srcStart,
04023                int32_t srcLength)
04024 { return doReplace(start, _length, srcText, srcStart, srcLength); }
04025 
04026 inline UnicodeString&
04027 UnicodeString::replace(int32_t start,
04028                int32_t _length,
04029                const UChar *srcChars,
04030                int32_t srcLength)
04031 { return doReplace(start, _length, srcChars, 0, srcLength); }
04032 
04033 inline UnicodeString&
04034 UnicodeString::replace(int32_t start,
04035                int32_t _length,
04036                const UChar *srcChars,
04037                int32_t srcStart,
04038                int32_t srcLength)
04039 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04040 
04041 inline UnicodeString&
04042 UnicodeString::replace(int32_t start,
04043                int32_t _length,
04044                UChar srcChar)
04045 { return doReplace(start, _length, &srcChar, 0, 1); }
04046 
04047 inline UnicodeString&
04048 UnicodeString::replace(int32_t start,
04049                int32_t _length,
04050                UChar32 srcChar) {
04051   UChar buffer[U16_MAX_LENGTH];
04052   int32_t count = 0;
04053   UBool isError = FALSE;
04054   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
04055   return doReplace(start, _length, buffer, 0, count);
04056 }
04057 
04058 inline UnicodeString&
04059 UnicodeString::replaceBetween(int32_t start,
04060                   int32_t limit,
04061                   const UnicodeString& srcText)
04062 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04063 
04064 inline UnicodeString&
04065 UnicodeString::replaceBetween(int32_t start,
04066                   int32_t limit,
04067                   const UnicodeString& srcText,
04068                   int32_t srcStart,
04069                   int32_t srcLimit)
04070 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04071 
04072 inline UnicodeString&
04073 UnicodeString::findAndReplace(const UnicodeString& oldText,
04074                   const UnicodeString& newText)
04075 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04076             newText, 0, newText.length()); }
04077 
04078 inline UnicodeString&
04079 UnicodeString::findAndReplace(int32_t start,
04080                   int32_t _length,
04081                   const UnicodeString& oldText,
04082                   const UnicodeString& newText)
04083 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04084             newText, 0, newText.length()); }
04085 
04086 // ============================
04087 // extract
04088 // ============================
04089 inline void
04090 UnicodeString::doExtract(int32_t start,
04091              int32_t _length,
04092              UnicodeString& target) const
04093 { target.replace(0, target.length(), *this, start, _length); }
04094 
04095 inline void
04096 UnicodeString::extract(int32_t start,
04097                int32_t _length,
04098                UChar *target,
04099                int32_t targetStart) const
04100 { doExtract(start, _length, target, targetStart); }
04101 
04102 inline void
04103 UnicodeString::extract(int32_t start,
04104                int32_t _length,
04105                UnicodeString& target) const
04106 { doExtract(start, _length, target); }
04107 
04108 #if !UCONFIG_NO_CONVERSION
04109 
04110 inline int32_t
04111 UnicodeString::extract(int32_t start,
04112                int32_t _length,
04113                char *dst,
04114                const char *codepage) const
04115 
04116 {
04117   // This dstSize value will be checked explicitly
04118   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04119 }
04120 
04121 #endif
04122 
04123 inline void
04124 UnicodeString::extractBetween(int32_t start,
04125                   int32_t limit,
04126                   UChar *dst,
04127                   int32_t dstStart) const {
04128   pinIndex(start);
04129   pinIndex(limit);
04130   doExtract(start, limit - start, dst, dstStart);
04131 }
04132 
04133 inline UnicodeString
04134 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04135     return tempSubString(start, limit - start);
04136 }
04137 
04138 inline UChar
04139 UnicodeString::doCharAt(int32_t offset) const
04140 {
04141   if((uint32_t)offset < (uint32_t)length()) {
04142     return getArrayStart()[offset];
04143   } else {
04144     return kInvalidUChar;
04145   }
04146 }
04147 
04148 inline UChar
04149 UnicodeString::charAt(int32_t offset) const
04150 { return doCharAt(offset); }
04151 
04152 inline UChar
04153 UnicodeString::operator[] (int32_t offset) const
04154 { return doCharAt(offset); }
04155 
04156 inline UChar32
04157 UnicodeString::char32At(int32_t offset) const
04158 {
04159   int32_t len = length();
04160   if((uint32_t)offset < (uint32_t)len) {
04161     const UChar *array = getArrayStart();
04162     UChar32 c;
04163     U16_GET(array, 0, offset, len, c);
04164     return c;
04165   } else {
04166     return kInvalidUChar;
04167   }
04168 }
04169 
04170 inline int32_t
04171 UnicodeString::getChar32Start(int32_t offset) const {
04172   if((uint32_t)offset < (uint32_t)length()) {
04173     const UChar *array = getArrayStart();
04174     U16_SET_CP_START(array, 0, offset);
04175     return offset;
04176   } else {
04177     return 0;
04178   }
04179 }
04180 
04181 inline int32_t
04182 UnicodeString::getChar32Limit(int32_t offset) const {
04183   int32_t len = length();
04184   if((uint32_t)offset < (uint32_t)len) {
04185     const UChar *array = getArrayStart();
04186     U16_SET_CP_LIMIT(array, 0, offset, len);
04187     return offset;
04188   } else {
04189     return len;
04190   }
04191 }
04192 
04193 inline UBool
04194 UnicodeString::isEmpty() const {
04195   return fShortLength == 0;
04196 }
04197 
04198 //========================================
04199 // Write implementation methods
04200 //========================================
04201 inline void
04202 UnicodeString::setLength(int32_t len) {
04203   if(len <= 127) {
04204     fShortLength = (int8_t)len;
04205   } else {
04206     fShortLength = (int8_t)-1;
04207     fUnion.fFields.fLength = len;
04208   }
04209 }
04210 
04211 inline void
04212 UnicodeString::setToEmpty() {
04213   fShortLength = 0;
04214   fFlags = kShortString;
04215 }
04216 
04217 inline void
04218 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04219   setLength(len);
04220   fUnion.fFields.fArray = array;
04221   fUnion.fFields.fCapacity = capacity;
04222 }
04223 
04224 inline const UChar *
04225 UnicodeString::getTerminatedBuffer() {
04226   if(!isWritable()) {
04227     return 0;
04228   } else {
04229     UChar *array = getArrayStart();
04230     int32_t len = length();
04231     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04232       /*
04233        * kRefCounted: Do not write the NUL if the buffer is shared.
04234        * That is mostly safe, except when the length of one copy was modified
04235        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04236        * Then the NUL would be written into the middle of another copy's string.
04237        */
04238       if(!(fFlags&kBufferIsReadonly)) {
04239         /*
04240          * We must not write to a readonly buffer, but it is known to be
04241          * NUL-terminated if len<capacity.
04242          * A shared, allocated buffer (refCount()>1) must not have its contents
04243          * modified, but the NUL at [len] is beyond the string contents,
04244          * and multiple string objects and threads writing the same NUL into the
04245          * same location is harmless.
04246          * In all other cases, the buffer is fully writable and it is anyway safe
04247          * to write the NUL.
04248          *
04249          * Note: An earlier version of this code tested whether there is a NUL
04250          * at [len] already, but, while safe, it generated lots of warnings from
04251          * tools like valgrind and Purify.
04252          */
04253         array[len] = 0;
04254       }
04255       return array;
04256     } else if(cloneArrayIfNeeded(len+1)) {
04257       array = getArrayStart();
04258       array[len] = 0;
04259       return array;
04260     } else {
04261       return 0;
04262     }
04263   }
04264 }
04265 
04266 inline UnicodeString&
04267 UnicodeString::operator= (UChar ch)
04268 { return doReplace(0, length(), &ch, 0, 1); }
04269 
04270 inline UnicodeString&
04271 UnicodeString::operator= (UChar32 ch)
04272 { return replace(0, length(), ch); }
04273 
04274 inline UnicodeString&
04275 UnicodeString::setTo(const UnicodeString& srcText,
04276              int32_t srcStart,
04277              int32_t srcLength)
04278 {
04279   unBogus();
04280   return doReplace(0, length(), srcText, srcStart, srcLength);
04281 }
04282 
04283 inline UnicodeString&
04284 UnicodeString::setTo(const UnicodeString& srcText,
04285              int32_t srcStart)
04286 {
04287   unBogus();
04288   srcText.pinIndex(srcStart);
04289   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04290 }
04291 
04292 inline UnicodeString&
04293 UnicodeString::setTo(const UnicodeString& srcText)
04294 {
04295   return copyFrom(srcText);
04296 }
04297 
04298 inline UnicodeString&
04299 UnicodeString::setTo(const UChar *srcChars,
04300              int32_t srcLength)
04301 {
04302   unBogus();
04303   return doReplace(0, length(), srcChars, 0, srcLength);
04304 }
04305 
04306 inline UnicodeString&
04307 UnicodeString::setTo(UChar srcChar)
04308 {
04309   unBogus();
04310   return doReplace(0, length(), &srcChar, 0, 1);
04311 }
04312 
04313 inline UnicodeString&
04314 UnicodeString::setTo(UChar32 srcChar)
04315 {
04316   unBogus();
04317   return replace(0, length(), srcChar);
04318 }
04319 
04320 inline UnicodeString&
04321 UnicodeString::append(const UnicodeString& srcText,
04322               int32_t srcStart,
04323               int32_t srcLength)
04324 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04325 
04326 inline UnicodeString&
04327 UnicodeString::append(const UnicodeString& srcText)
04328 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04329 
04330 inline UnicodeString&
04331 UnicodeString::append(const UChar *srcChars,
04332               int32_t srcStart,
04333               int32_t srcLength)
04334 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04335 
04336 inline UnicodeString&
04337 UnicodeString::append(const UChar *srcChars,
04338               int32_t srcLength)
04339 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04340 
04341 inline UnicodeString&
04342 UnicodeString::append(UChar srcChar)
04343 { return doReplace(length(), 0, &srcChar, 0, 1); }
04344 
04345 inline UnicodeString&
04346 UnicodeString::append(UChar32 srcChar) {
04347   UChar buffer[U16_MAX_LENGTH];
04348   int32_t _length = 0;
04349   UBool isError = FALSE;
04350   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
04351   return doReplace(length(), 0, buffer, 0, _length);
04352 }
04353 
04354 inline UnicodeString&
04355 UnicodeString::operator+= (UChar ch)
04356 { return doReplace(length(), 0, &ch, 0, 1); }
04357 
04358 inline UnicodeString&
04359 UnicodeString::operator+= (UChar32 ch) {
04360   return append(ch);
04361 }
04362 
04363 inline UnicodeString&
04364 UnicodeString::operator+= (const UnicodeString& srcText)
04365 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04366 
04367 inline UnicodeString&
04368 UnicodeString::insert(int32_t start,
04369               const UnicodeString& srcText,
04370               int32_t srcStart,
04371               int32_t srcLength)
04372 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04373 
04374 inline UnicodeString&
04375 UnicodeString::insert(int32_t start,
04376               const UnicodeString& srcText)
04377 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04378 
04379 inline UnicodeString&
04380 UnicodeString::insert(int32_t start,
04381               const UChar *srcChars,
04382               int32_t srcStart,
04383               int32_t srcLength)
04384 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04385 
04386 inline UnicodeString&
04387 UnicodeString::insert(int32_t start,
04388               const UChar *srcChars,
04389               int32_t srcLength)
04390 { return doReplace(start, 0, srcChars, 0, srcLength); }
04391 
04392 inline UnicodeString&
04393 UnicodeString::insert(int32_t start,
04394               UChar srcChar)
04395 { return doReplace(start, 0, &srcChar, 0, 1); }
04396 
04397 inline UnicodeString&
04398 UnicodeString::insert(int32_t start,
04399               UChar32 srcChar)
04400 { return replace(start, 0, srcChar); }
04401 
04402 
04403 inline UnicodeString&
04404 UnicodeString::remove()
04405 {
04406   // remove() of a bogus string makes the string empty and non-bogus
04407   // we also un-alias a read-only alias to deal with NUL-termination
04408   // issues with getTerminatedBuffer()
04409   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04410     setToEmpty();
04411   } else {
04412     fShortLength = 0;
04413   }
04414   return *this;
04415 }
04416 
04417 inline UnicodeString&
04418 UnicodeString::remove(int32_t start,
04419              int32_t _length)
04420 {
04421     if(start <= 0 && _length == INT32_MAX) {
04422         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04423         return remove();
04424     }
04425     return doReplace(start, _length, NULL, 0, 0);
04426 }
04427 
04428 inline UnicodeString&
04429 UnicodeString::removeBetween(int32_t start,
04430                 int32_t limit)
04431 { return doReplace(start, limit - start, NULL, 0, 0); }
04432 
04433 inline UnicodeString &
04434 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04435   truncate(limit);
04436   return doReplace(0, start, NULL, 0, 0);
04437 }
04438 
04439 inline UBool
04440 UnicodeString::truncate(int32_t targetLength)
04441 {
04442   if(isBogus() && targetLength == 0) {
04443     // truncate(0) of a bogus string makes the string empty and non-bogus
04444     unBogus();
04445     return FALSE;
04446   } else if((uint32_t)targetLength < (uint32_t)length()) {
04447     setLength(targetLength);
04448     if(fFlags&kBufferIsReadonly) {
04449       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04450     }
04451     return TRUE;
04452   } else {
04453     return FALSE;
04454   }
04455 }
04456 
04457 inline UnicodeString&
04458 UnicodeString::reverse()
04459 { return doReverse(0, length()); }
04460 
04461 inline UnicodeString&
04462 UnicodeString::reverse(int32_t start,
04463                int32_t _length)
04464 { return doReverse(start, _length); }
04465 
04466 U_NAMESPACE_END
04467 
04468 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines