ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 ******************************************************************** 00003 * COPYRIGHT: 00004 * Copyright (c) 1996-2011, International Business Machines Corporation and 00005 * others. All Rights Reserved. 00006 ******************************************************************** 00007 */ 00008 00009 #ifndef NORMLZR_H 00010 #define NORMLZR_H 00011 00012 #include "unicode/utypes.h" 00013 00019 #if !UCONFIG_NO_NORMALIZATION 00020 00021 #include "unicode/chariter.h" 00022 #include "unicode/normalizer2.h" 00023 #include "unicode/unistr.h" 00024 #include "unicode/unorm.h" 00025 #include "unicode/uobject.h" 00026 00027 U_NAMESPACE_BEGIN 00130 class U_COMMON_API Normalizer : public UObject { 00131 public: 00137 enum { 00138 DONE=0xffff 00139 }; 00140 00141 // Constructors 00142 00153 Normalizer(const UnicodeString& str, UNormalizationMode mode); 00154 00166 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); 00167 00178 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 00179 00185 Normalizer(const Normalizer& copy); 00186 00191 virtual ~Normalizer(); 00192 00193 00194 //------------------------------------------------------------------------- 00195 // Static utility methods 00196 //------------------------------------------------------------------------- 00197 00212 static void U_EXPORT2 normalize(const UnicodeString& source, 00213 UNormalizationMode mode, int32_t options, 00214 UnicodeString& result, 00215 UErrorCode &status); 00216 00234 static void U_EXPORT2 compose(const UnicodeString& source, 00235 UBool compat, int32_t options, 00236 UnicodeString& result, 00237 UErrorCode &status); 00238 00256 static void U_EXPORT2 decompose(const UnicodeString& source, 00257 UBool compat, int32_t options, 00258 UnicodeString& result, 00259 UErrorCode &status); 00260 00281 static inline UNormalizationCheckResult 00282 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 00283 00297 static UNormalizationCheckResult 00298 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); 00299 00320 static inline UBool 00321 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); 00322 00338 static UBool 00339 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); 00340 00370 static UnicodeString & 00371 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, 00372 UnicodeString &result, 00373 UNormalizationMode mode, int32_t options, 00374 UErrorCode &errorCode); 00375 00440 static inline int32_t 00441 compare(const UnicodeString &s1, const UnicodeString &s2, 00442 uint32_t options, 00443 UErrorCode &errorCode); 00444 00445 //------------------------------------------------------------------------- 00446 // Iteration API 00447 //------------------------------------------------------------------------- 00448 00457 UChar32 current(void); 00458 00467 UChar32 first(void); 00468 00477 UChar32 last(void); 00478 00493 UChar32 next(void); 00494 00509 UChar32 previous(void); 00510 00520 void setIndexOnly(int32_t index); 00521 00527 void reset(void); 00528 00543 int32_t getIndex(void) const; 00544 00553 int32_t startIndex(void) const; 00554 00565 int32_t endIndex(void) const; 00566 00575 UBool operator==(const Normalizer& that) const; 00576 00585 inline UBool operator!=(const Normalizer& that) const; 00586 00593 Normalizer* clone(void) const; 00594 00601 int32_t hashCode(void) const; 00602 00603 //------------------------------------------------------------------------- 00604 // Property access methods 00605 //------------------------------------------------------------------------- 00606 00622 void setMode(UNormalizationMode newMode); 00623 00634 UNormalizationMode getUMode(void) const; 00635 00652 void setOption(int32_t option, 00653 UBool value); 00654 00665 UBool getOption(int32_t option) const; 00666 00675 void setText(const UnicodeString& newText, 00676 UErrorCode &status); 00677 00686 void setText(const CharacterIterator& newText, 00687 UErrorCode &status); 00688 00698 void setText(const UChar* newText, 00699 int32_t length, 00700 UErrorCode &status); 00707 void getText(UnicodeString& result); 00708 00714 static UClassID U_EXPORT2 getStaticClassID(); 00715 00721 virtual UClassID getDynamicClassID() const; 00722 00723 private: 00724 //------------------------------------------------------------------------- 00725 // Private functions 00726 //------------------------------------------------------------------------- 00727 00728 Normalizer(); // default constructor not implemented 00729 Normalizer &operator=(const Normalizer &that); // assignment operator not implemented 00730 00731 // Private utility methods for iteration 00732 // For documentation, see the source code 00733 UBool nextNormalize(); 00734 UBool previousNormalize(); 00735 00736 void init(); 00737 void clearBuffer(void); 00738 00739 //------------------------------------------------------------------------- 00740 // Private data 00741 //------------------------------------------------------------------------- 00742 00743 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL 00744 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 00745 UNormalizationMode fUMode; 00746 int32_t fOptions; 00747 00748 // The input text and our position in it 00749 CharacterIterator *text; 00750 00751 // The normalization buffer is the result of normalization 00752 // of the source in [currentIndex..nextIndex[ . 00753 int32_t currentIndex, nextIndex; 00754 00755 // A buffer for holding intermediate results 00756 UnicodeString buffer; 00757 int32_t bufferPos; 00758 }; 00759 00760 //------------------------------------------------------------------------- 00761 // Inline implementations 00762 //------------------------------------------------------------------------- 00763 00764 inline UBool 00765 Normalizer::operator!= (const Normalizer& other) const 00766 { return ! operator==(other); } 00767 00768 inline UNormalizationCheckResult 00769 Normalizer::quickCheck(const UnicodeString& source, 00770 UNormalizationMode mode, 00771 UErrorCode &status) { 00772 return quickCheck(source, mode, 0, status); 00773 } 00774 00775 inline UBool 00776 Normalizer::isNormalized(const UnicodeString& source, 00777 UNormalizationMode mode, 00778 UErrorCode &status) { 00779 return isNormalized(source, mode, 0, status); 00780 } 00781 00782 inline int32_t 00783 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 00784 uint32_t options, 00785 UErrorCode &errorCode) { 00786 // all argument checking is done in unorm_compare 00787 return unorm_compare(s1.getBuffer(), s1.length(), 00788 s2.getBuffer(), s2.length(), 00789 options, 00790 &errorCode); 00791 } 00792 00793 U_NAMESPACE_END 00794 00795 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 00796 00797 #endif // NORMLZR_H