ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2009-2011, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: normalizer2.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2009nov22 00014 * created by: Markus W. Scherer 00015 */ 00016 00017 #ifndef __NORMALIZER2_H__ 00018 #define __NORMALIZER2_H__ 00019 00025 #include "unicode/utypes.h" 00026 00027 #if !UCONFIG_NO_NORMALIZATION 00028 00029 #include "unicode/uniset.h" 00030 #include "unicode/unistr.h" 00031 #include "unicode/unorm2.h" 00032 00033 U_NAMESPACE_BEGIN 00034 00078 class U_COMMON_API Normalizer2 : public UObject { 00079 public: 00101 static const Normalizer2 * 00102 getInstance(const char *packageName, 00103 const char *name, 00104 UNormalization2Mode mode, 00105 UErrorCode &errorCode); 00106 00117 UnicodeString 00118 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 00119 UnicodeString result; 00120 normalize(src, result, errorCode); 00121 return result; 00122 } 00136 virtual UnicodeString & 00137 normalize(const UnicodeString &src, 00138 UnicodeString &dest, 00139 UErrorCode &errorCode) const = 0; 00154 virtual UnicodeString & 00155 normalizeSecondAndAppend(UnicodeString &first, 00156 const UnicodeString &second, 00157 UErrorCode &errorCode) const = 0; 00172 virtual UnicodeString & 00173 append(UnicodeString &first, 00174 const UnicodeString &second, 00175 UErrorCode &errorCode) const = 0; 00176 00190 virtual UBool 00191 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 00192 00207 virtual UBool 00208 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00209 00225 virtual UNormalizationCheckResult 00226 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00227 00250 virtual int32_t 00251 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00252 00266 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 00267 00282 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 00283 00297 virtual UBool isInert(UChar32 c) const = 0; 00298 00299 private: 00300 // No ICU "poor man's RTTI" for this class nor its subclasses. 00301 virtual UClassID getDynamicClassID() const; 00302 }; 00303 00315 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 00316 public: 00327 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 00328 norm2(n2), set(filterSet) {} 00329 00343 virtual UnicodeString & 00344 normalize(const UnicodeString &src, 00345 UnicodeString &dest, 00346 UErrorCode &errorCode) const; 00361 virtual UnicodeString & 00362 normalizeSecondAndAppend(UnicodeString &first, 00363 const UnicodeString &second, 00364 UErrorCode &errorCode) const; 00379 virtual UnicodeString & 00380 append(UnicodeString &first, 00381 const UnicodeString &second, 00382 UErrorCode &errorCode) const; 00383 00394 virtual UBool 00395 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 00396 00408 virtual UBool 00409 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 00421 virtual UNormalizationCheckResult 00422 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 00434 virtual int32_t 00435 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 00436 00445 virtual UBool hasBoundaryBefore(UChar32 c) const; 00446 00455 virtual UBool hasBoundaryAfter(UChar32 c) const; 00456 00464 virtual UBool isInert(UChar32 c) const; 00465 private: 00466 UnicodeString & 00467 normalize(const UnicodeString &src, 00468 UnicodeString &dest, 00469 USetSpanCondition spanCondition, 00470 UErrorCode &errorCode) const; 00471 00472 UnicodeString & 00473 normalizeSecondAndAppend(UnicodeString &first, 00474 const UnicodeString &second, 00475 UBool doNormalize, 00476 UErrorCode &errorCode) const; 00477 00478 const Normalizer2 &norm2; 00479 const UnicodeSet &set; 00480 }; 00481 00482 U_NAMESPACE_END 00483 00484 #endif // !UCONFIG_NO_NORMALIZATION 00485 #endif // __NORMALIZER2_H__