ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2011, International Business Machines Corporation 00004 * and others. All Rights Reserved. 00005 *************************************************************************** 00006 * Date Name Description 00007 * 10/20/99 alan Creation. 00008 *************************************************************************** 00009 */ 00010 00011 #ifndef UNICODESET_H 00012 #define UNICODESET_H 00013 00014 #include "unicode/unifilt.h" 00015 #include "unicode/unistr.h" 00016 #include "unicode/uset.h" 00017 00023 U_NAMESPACE_BEGIN 00024 00025 class BMPSet; 00026 class ParsePosition; 00027 class SymbolTable; 00028 class UnicodeSetStringSpan; 00029 class UVector; 00030 class RuleCharacterIterator; 00031 00272 class U_COMMON_API UnicodeSet : public UnicodeFilter { 00273 00274 int32_t len; // length of list used; 0 <= len <= capacity 00275 int32_t capacity; // capacity of list 00276 UChar32* list; // MUST be terminated with HIGH 00277 BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL. 00278 UChar32* buffer; // internal buffer, may be NULL 00279 int32_t bufferCapacity; // capacity of buffer 00280 int32_t patLen; 00281 00291 UChar *pat; 00292 UVector* strings; // maintained in sorted order 00293 UnicodeSetStringSpan *stringSpan; 00294 00295 private: 00296 enum { // constants 00297 kIsBogus = 1 // This set is bogus (i.e. not valid) 00298 }; 00299 uint8_t fFlags; // Bit flag (see constants above) 00300 public: 00310 inline UBool isBogus(void) const; 00311 00328 void setToBogus(); 00329 00330 public: 00331 00332 enum { 00337 MIN_VALUE = 0, 00338 00343 MAX_VALUE = 0x10ffff 00344 }; 00345 00346 //---------------------------------------------------------------- 00347 // Constructors &c 00348 //---------------------------------------------------------------- 00349 00350 public: 00351 00356 UnicodeSet(); 00357 00366 UnicodeSet(UChar32 start, UChar32 end); 00367 00376 UnicodeSet(const UnicodeString& pattern, 00377 UErrorCode& status); 00378 00391 UnicodeSet(const UnicodeString& pattern, 00392 uint32_t options, 00393 const SymbolTable* symbols, 00394 UErrorCode& status); 00395 00409 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 00410 uint32_t options, 00411 const SymbolTable* symbols, 00412 UErrorCode& status); 00413 00418 UnicodeSet(const UnicodeSet& o); 00419 00424 virtual ~UnicodeSet(); 00425 00431 UnicodeSet& operator=(const UnicodeSet& o); 00432 00444 virtual UBool operator==(const UnicodeSet& o) const; 00445 00451 UBool operator!=(const UnicodeSet& o) const; 00452 00462 virtual UnicodeFunctor* clone() const; 00463 00471 virtual int32_t hashCode(void) const; 00472 00481 inline static UnicodeSet *fromUSet(USet *uset); 00482 00491 inline static const UnicodeSet *fromUSet(const USet *uset); 00492 00500 inline USet *toUSet(); 00501 00502 00510 inline const USet * toUSet() const; 00511 00512 00513 //---------------------------------------------------------------- 00514 // Freezable API 00515 //---------------------------------------------------------------- 00516 00525 inline UBool isFrozen() const; 00526 00540 UnicodeFunctor *freeze(); 00541 00550 UnicodeFunctor *cloneAsThawed() const; 00551 00552 //---------------------------------------------------------------- 00553 // Public API 00554 //---------------------------------------------------------------- 00555 00566 UnicodeSet& set(UChar32 start, UChar32 end); 00567 00573 static UBool resemblesPattern(const UnicodeString& pattern, 00574 int32_t pos); 00575 00588 UnicodeSet& applyPattern(const UnicodeString& pattern, 00589 UErrorCode& status); 00590 00607 UnicodeSet& applyPattern(const UnicodeString& pattern, 00608 uint32_t options, 00609 const SymbolTable* symbols, 00610 UErrorCode& status); 00611 00643 UnicodeSet& applyPattern(const UnicodeString& pattern, 00644 ParsePosition& pos, 00645 uint32_t options, 00646 const SymbolTable* symbols, 00647 UErrorCode& status); 00648 00662 virtual UnicodeString& toPattern(UnicodeString& result, 00663 UBool escapeUnprintable = FALSE) const; 00664 00687 UnicodeSet& applyIntPropertyValue(UProperty prop, 00688 int32_t value, 00689 UErrorCode& ec); 00690 00720 UnicodeSet& applyPropertyAlias(const UnicodeString& prop, 00721 const UnicodeString& value, 00722 UErrorCode& ec); 00723 00732 virtual int32_t size(void) const; 00733 00740 virtual UBool isEmpty(void) const; 00741 00749 virtual UBool contains(UChar32 c) const; 00750 00759 virtual UBool contains(UChar32 start, UChar32 end) const; 00760 00768 UBool contains(const UnicodeString& s) const; 00769 00777 virtual UBool containsAll(const UnicodeSet& c) const; 00778 00786 UBool containsAll(const UnicodeString& s) const; 00787 00796 UBool containsNone(UChar32 start, UChar32 end) const; 00797 00805 UBool containsNone(const UnicodeSet& c) const; 00806 00814 UBool containsNone(const UnicodeString& s) const; 00815 00824 inline UBool containsSome(UChar32 start, UChar32 end) const; 00825 00833 inline UBool containsSome(const UnicodeSet& s) const; 00834 00842 inline UBool containsSome(const UnicodeString& s) const; 00843 00862 int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; 00863 00876 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const; 00877 00895 int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; 00896 00910 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const; 00911 00930 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; 00931 00949 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; 00950 00955 virtual UMatchDegree matches(const Replaceable& text, 00956 int32_t& offset, 00957 int32_t limit, 00958 UBool incremental); 00959 00960 private: 00983 static int32_t matchRest(const Replaceable& text, 00984 int32_t start, int32_t limit, 00985 const UnicodeString& s); 00986 00996 int32_t findCodePoint(UChar32 c) const; 00997 00998 public: 00999 01007 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; 01008 01017 int32_t indexOf(UChar32 c) const; 01018 01028 UChar32 charAt(int32_t index) const; 01029 01044 virtual UnicodeSet& add(UChar32 start, UChar32 end); 01045 01053 UnicodeSet& add(UChar32 c); 01054 01066 UnicodeSet& add(const UnicodeString& s); 01067 01068 private: 01074 static int32_t getSingleCP(const UnicodeString& s); 01075 01076 void _add(const UnicodeString& s); 01077 01078 public: 01087 UnicodeSet& addAll(const UnicodeString& s); 01088 01097 UnicodeSet& retainAll(const UnicodeString& s); 01098 01107 UnicodeSet& complementAll(const UnicodeString& s); 01108 01117 UnicodeSet& removeAll(const UnicodeString& s); 01118 01127 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s); 01128 01129 01137 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s); 01138 01152 virtual UnicodeSet& retain(UChar32 start, UChar32 end); 01153 01154 01160 UnicodeSet& retain(UChar32 c); 01161 01175 virtual UnicodeSet& remove(UChar32 start, UChar32 end); 01176 01184 UnicodeSet& remove(UChar32 c); 01185 01195 UnicodeSet& remove(const UnicodeString& s); 01196 01204 virtual UnicodeSet& complement(void); 01205 01220 virtual UnicodeSet& complement(UChar32 start, UChar32 end); 01221 01229 UnicodeSet& complement(UChar32 c); 01230 01241 UnicodeSet& complement(const UnicodeString& s); 01242 01255 virtual UnicodeSet& addAll(const UnicodeSet& c); 01256 01268 virtual UnicodeSet& retainAll(const UnicodeSet& c); 01269 01281 virtual UnicodeSet& removeAll(const UnicodeSet& c); 01282 01293 virtual UnicodeSet& complementAll(const UnicodeSet& c); 01294 01301 virtual UnicodeSet& clear(void); 01302 01328 UnicodeSet& closeOver(int32_t attribute); 01329 01336 virtual UnicodeSet &removeAllStrings(); 01337 01345 virtual int32_t getRangeCount(void) const; 01346 01354 virtual UChar32 getRangeStart(int32_t index) const; 01355 01363 virtual UChar32 getRangeEnd(int32_t index) const; 01364 01413 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; 01414 01421 virtual UnicodeSet& compact(); 01422 01434 static UClassID U_EXPORT2 getStaticClassID(void); 01435 01444 virtual UClassID getDynamicClassID(void) const; 01445 01446 private: 01447 01448 // Private API for the USet API 01449 01450 friend class USetAccess; 01451 01452 int32_t getStringCount() const; 01453 01454 const UnicodeString* getString(int32_t index) const; 01455 01456 //---------------------------------------------------------------- 01457 // RuleBasedTransliterator support 01458 //---------------------------------------------------------------- 01459 01460 private: 01461 01467 virtual UBool matchesIndexValue(uint8_t v) const; 01468 01469 private: 01470 01471 //---------------------------------------------------------------- 01472 // Implementation: Clone as thawed (see ICU4J Freezable) 01473 //---------------------------------------------------------------- 01474 01475 UnicodeSet(const UnicodeSet& o, UBool /* asThawed */); 01476 01477 //---------------------------------------------------------------- 01478 // Implementation: Pattern parsing 01479 //---------------------------------------------------------------- 01480 01481 void applyPattern(RuleCharacterIterator& chars, 01482 const SymbolTable* symbols, 01483 UnicodeString& rebuiltPat, 01484 uint32_t options, 01485 UErrorCode& ec); 01486 01487 //---------------------------------------------------------------- 01488 // Implementation: Utility methods 01489 //---------------------------------------------------------------- 01490 01491 void ensureCapacity(int32_t newLen, UErrorCode& ec); 01492 01493 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec); 01494 01495 void swapBuffers(void); 01496 01497 UBool allocateStrings(UErrorCode &status); 01498 01499 UnicodeString& _toPattern(UnicodeString& result, 01500 UBool escapeUnprintable) const; 01501 01502 UnicodeString& _generatePattern(UnicodeString& result, 01503 UBool escapeUnprintable) const; 01504 01505 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable); 01506 01507 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); 01508 01509 //---------------------------------------------------------------- 01510 // Implementation: Fundamental operators 01511 //---------------------------------------------------------------- 01512 01513 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); 01514 01515 void add(const UChar32* other, int32_t otherLen, int8_t polarity); 01516 01517 void retain(const UChar32* other, int32_t otherLen, int8_t polarity); 01518 01524 static UBool resemblesPropertyPattern(const UnicodeString& pattern, 01525 int32_t pos); 01526 01527 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars, 01528 int32_t iterOpts); 01529 01569 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern, 01570 ParsePosition& ppos, 01571 UErrorCode &ec); 01572 01573 void applyPropertyPattern(RuleCharacterIterator& chars, 01574 UnicodeString& rebuiltPat, 01575 UErrorCode& ec); 01576 01577 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status); 01578 01583 typedef UBool (*Filter)(UChar32 codePoint, void* context); 01584 01594 void applyFilter(Filter filter, 01595 void* context, 01596 int32_t src, 01597 UErrorCode &status); 01598 01602 void setPattern(const UnicodeString& newPat); 01606 void releasePattern(); 01607 01608 friend class UnicodeSetIterator; 01609 }; 01610 01611 01612 01613 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { 01614 return !operator==(o); 01615 } 01616 01617 inline UBool UnicodeSet::isFrozen() const { 01618 return (UBool)(bmpSet!=NULL || stringSpan!=NULL); 01619 } 01620 01621 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { 01622 return !containsNone(start, end); 01623 } 01624 01625 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const { 01626 return !containsNone(s); 01627 } 01628 01629 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { 01630 return !containsNone(s); 01631 } 01632 01633 inline UBool UnicodeSet::isBogus() const { 01634 return (UBool)(fFlags & kIsBogus); 01635 } 01636 01637 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { 01638 return reinterpret_cast<UnicodeSet *>(uset); 01639 } 01640 01641 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) { 01642 return reinterpret_cast<const UnicodeSet *>(uset); 01643 } 01644 01645 inline USet *UnicodeSet::toUSet() { 01646 return reinterpret_cast<USet *>(this); 01647 } 01648 01649 inline const USet *UnicodeSet::toUSet() const { 01650 return reinterpret_cast<const USet *>(this); 01651 } 01652 01653 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const { 01654 int32_t sLength=s.length(); 01655 if(start<0) { 01656 start=0; 01657 } else if(start>sLength) { 01658 start=sLength; 01659 } 01660 return start+span(s.getBuffer()+start, sLength-start, spanCondition); 01661 } 01662 01663 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const { 01664 int32_t sLength=s.length(); 01665 if(limit<0) { 01666 limit=0; 01667 } else if(limit>sLength) { 01668 limit=sLength; 01669 } 01670 return spanBack(s.getBuffer(), limit, spanCondition); 01671 } 01672 01673 U_NAMESPACE_END 01674 01675 #endif