ICU 4.8.1.1  4.8.1.1
normalizer2.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2009-2011, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  normalizer2.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2009nov22
00014 *   created by: Markus W. Scherer
00015 */
00016 
00017 #ifndef __NORMALIZER2_H__
00018 #define __NORMALIZER2_H__
00019 
00025 #include "unicode/utypes.h"
00026 
00027 #if !UCONFIG_NO_NORMALIZATION
00028 
00029 #include "unicode/uniset.h"
00030 #include "unicode/unistr.h"
00031 #include "unicode/unorm2.h"
00032 
00033 U_NAMESPACE_BEGIN
00034 
00078 class U_COMMON_API Normalizer2 : public UObject {
00079 public:
00101     static const Normalizer2 *
00102     getInstance(const char *packageName,
00103                 const char *name,
00104                 UNormalization2Mode mode,
00105                 UErrorCode &errorCode);
00106 
00117     UnicodeString
00118     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00119         UnicodeString result;
00120         normalize(src, result, errorCode);
00121         return result;
00122     }
00136     virtual UnicodeString &
00137     normalize(const UnicodeString &src,
00138               UnicodeString &dest,
00139               UErrorCode &errorCode) const = 0;
00154     virtual UnicodeString &
00155     normalizeSecondAndAppend(UnicodeString &first,
00156                              const UnicodeString &second,
00157                              UErrorCode &errorCode) const = 0;
00172     virtual UnicodeString &
00173     append(UnicodeString &first,
00174            const UnicodeString &second,
00175            UErrorCode &errorCode) const = 0;
00176 
00190     virtual UBool
00191     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00192 
00207     virtual UBool
00208     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00209 
00225     virtual UNormalizationCheckResult
00226     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00227 
00250     virtual int32_t
00251     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00252 
00266     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00267 
00282     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00283 
00297     virtual UBool isInert(UChar32 c) const = 0;
00298 
00299 private:
00300     // No ICU "poor man's RTTI" for this class nor its subclasses.
00301     virtual UClassID getDynamicClassID() const;
00302 };
00303 
00315 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00316 public:
00327     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00328             norm2(n2), set(filterSet) {}
00329 
00343     virtual UnicodeString &
00344     normalize(const UnicodeString &src,
00345               UnicodeString &dest,
00346               UErrorCode &errorCode) const;
00361     virtual UnicodeString &
00362     normalizeSecondAndAppend(UnicodeString &first,
00363                              const UnicodeString &second,
00364                              UErrorCode &errorCode) const;
00379     virtual UnicodeString &
00380     append(UnicodeString &first,
00381            const UnicodeString &second,
00382            UErrorCode &errorCode) const;
00383 
00394     virtual UBool
00395     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
00396 
00408     virtual UBool
00409     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
00421     virtual UNormalizationCheckResult
00422     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
00434     virtual int32_t
00435     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
00436 
00445     virtual UBool hasBoundaryBefore(UChar32 c) const;
00446 
00455     virtual UBool hasBoundaryAfter(UChar32 c) const;
00456 
00464     virtual UBool isInert(UChar32 c) const;
00465 private:
00466     UnicodeString &
00467     normalize(const UnicodeString &src,
00468               UnicodeString &dest,
00469               USetSpanCondition spanCondition,
00470               UErrorCode &errorCode) const;
00471 
00472     UnicodeString &
00473     normalizeSecondAndAppend(UnicodeString &first,
00474                              const UnicodeString &second,
00475                              UBool doNormalize,
00476                              UErrorCode &errorCode) const;
00477 
00478     const Normalizer2 &norm2;
00479     const UnicodeSet &set;
00480 };
00481 
00482 U_NAMESPACE_END
00483 
00484 #endif  // !UCONFIG_NO_NORMALIZATION
00485 #endif  // __NORMALIZER2_H__
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines