ICU 4.8.1.1  4.8.1.1
normlzr.h
Go to the documentation of this file.
00001 /*
00002  ********************************************************************
00003  * COPYRIGHT:
00004  * Copyright (c) 1996-2011, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 
00019 #if !UCONFIG_NO_NORMALIZATION
00020 
00021 #include "unicode/chariter.h"
00022 #include "unicode/normalizer2.h"
00023 #include "unicode/unistr.h"
00024 #include "unicode/unorm.h"
00025 #include "unicode/uobject.h"
00026 
00027 U_NAMESPACE_BEGIN
00130 class U_COMMON_API Normalizer : public UObject {
00131 public:
00137   enum {
00138       DONE=0xffff
00139   };
00140 
00141   // Constructors
00142 
00153   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00154 
00166   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00167 
00178   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00179 
00185   Normalizer(const Normalizer& copy);
00186 
00191   virtual ~Normalizer();
00192 
00193 
00194   //-------------------------------------------------------------------------
00195   // Static utility methods
00196   //-------------------------------------------------------------------------
00197 
00212   static void U_EXPORT2 normalize(const UnicodeString& source,
00213                         UNormalizationMode mode, int32_t options,
00214                         UnicodeString& result,
00215                         UErrorCode &status);
00216 
00234   static void U_EXPORT2 compose(const UnicodeString& source,
00235                       UBool compat, int32_t options,
00236                       UnicodeString& result,
00237                       UErrorCode &status);
00238 
00256   static void U_EXPORT2 decompose(const UnicodeString& source,
00257                         UBool compat, int32_t options,
00258                         UnicodeString& result,
00259                         UErrorCode &status);
00260 
00281   static inline UNormalizationCheckResult
00282   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00283 
00297   static UNormalizationCheckResult
00298   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00299 
00320   static inline UBool
00321   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00322 
00338   static UBool
00339   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00340 
00370   static UnicodeString &
00371   U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
00372               UnicodeString &result,
00373               UNormalizationMode mode, int32_t options,
00374               UErrorCode &errorCode);
00375 
00440   static inline int32_t
00441   compare(const UnicodeString &s1, const UnicodeString &s2,
00442           uint32_t options,
00443           UErrorCode &errorCode);
00444 
00445   //-------------------------------------------------------------------------
00446   // Iteration API
00447   //-------------------------------------------------------------------------
00448 
00457   UChar32              current(void);
00458 
00467   UChar32              first(void);
00468 
00477   UChar32              last(void);
00478 
00493   UChar32              next(void);
00494 
00509   UChar32              previous(void);
00510 
00520   void                 setIndexOnly(int32_t index);
00521 
00527   void                reset(void);
00528 
00543   int32_t            getIndex(void) const;
00544 
00553   int32_t            startIndex(void) const;
00554 
00565   int32_t            endIndex(void) const;
00566 
00575   UBool        operator==(const Normalizer& that) const;
00576 
00585   inline UBool        operator!=(const Normalizer& that) const;
00586 
00593   Normalizer*        clone(void) const;
00594 
00601   int32_t                hashCode(void) const;
00602 
00603   //-------------------------------------------------------------------------
00604   // Property access methods
00605   //-------------------------------------------------------------------------
00606 
00622   void setMode(UNormalizationMode newMode);
00623 
00634   UNormalizationMode getUMode(void) const;
00635 
00652   void setOption(int32_t option,
00653          UBool value);
00654 
00665   UBool getOption(int32_t option) const;
00666 
00675   void setText(const UnicodeString& newText,
00676            UErrorCode &status);
00677 
00686   void setText(const CharacterIterator& newText,
00687            UErrorCode &status);
00688 
00698   void setText(const UChar* newText,
00699                     int32_t length,
00700             UErrorCode &status);
00707   void            getText(UnicodeString&  result);
00708 
00714   static UClassID U_EXPORT2 getStaticClassID();
00715 
00721   virtual UClassID getDynamicClassID() const;
00722 
00723 private:
00724   //-------------------------------------------------------------------------
00725   // Private functions
00726   //-------------------------------------------------------------------------
00727 
00728   Normalizer(); // default constructor not implemented
00729   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
00730 
00731   // Private utility methods for iteration
00732   // For documentation, see the source code
00733   UBool nextNormalize();
00734   UBool previousNormalize();
00735 
00736   void    init();
00737   void    clearBuffer(void);
00738 
00739   //-------------------------------------------------------------------------
00740   // Private data
00741   //-------------------------------------------------------------------------
00742 
00743   FilteredNormalizer2*fFilteredNorm2;  // owned if not NULL
00744   const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
00745   UNormalizationMode  fUMode;
00746   int32_t             fOptions;
00747 
00748   // The input text and our position in it
00749   CharacterIterator  *text;
00750 
00751   // The normalization buffer is the result of normalization
00752   // of the source in [currentIndex..nextIndex[ .
00753   int32_t         currentIndex, nextIndex;
00754 
00755   // A buffer for holding intermediate results
00756   UnicodeString       buffer;
00757   int32_t         bufferPos;
00758 };
00759 
00760 //-------------------------------------------------------------------------
00761 // Inline implementations
00762 //-------------------------------------------------------------------------
00763 
00764 inline UBool
00765 Normalizer::operator!= (const Normalizer& other) const
00766 { return ! operator==(other); }
00767 
00768 inline UNormalizationCheckResult
00769 Normalizer::quickCheck(const UnicodeString& source,
00770                        UNormalizationMode mode,
00771                        UErrorCode &status) {
00772     return quickCheck(source, mode, 0, status);
00773 }
00774 
00775 inline UBool
00776 Normalizer::isNormalized(const UnicodeString& source,
00777                          UNormalizationMode mode,
00778                          UErrorCode &status) {
00779     return isNormalized(source, mode, 0, status);
00780 }
00781 
00782 inline int32_t
00783 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00784                     uint32_t options,
00785                     UErrorCode &errorCode) {
00786   // all argument checking is done in unorm_compare
00787   return unorm_compare(s1.getBuffer(), s1.length(),
00788                        s2.getBuffer(), s2.length(),
00789                        options,
00790                        &errorCode);
00791 }
00792 
00793 U_NAMESPACE_END
00794 
00795 #endif /* #if !UCONFIG_NO_NORMALIZATION */
00796 
00797 #endif // NORMLZR_H
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines