ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2011 International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 */ 00009 00010 #ifndef INDEXCHARS_H 00011 #define INDEXCHARS_H 00012 00013 #include "unicode/utypes.h" 00014 #include "unicode/uobject.h" 00015 #include "unicode/locid.h" 00016 00023 U_CDECL_BEGIN 00024 00031 typedef enum UAlphabeticIndexLabelType { 00037 U_ALPHAINDEX_NORMAL = 0, 00038 00044 U_ALPHAINDEX_UNDERFLOW = 1, 00045 00054 U_ALPHAINDEX_INFLOW = 2, 00055 00061 U_ALPHAINDEX_OVERFLOW = 3 00062 } UAlphabeticIndexLabelType; 00063 00064 00065 struct UHashtable; 00066 U_CDECL_END 00067 00068 U_NAMESPACE_BEGIN 00069 00070 // Forward Declarations 00071 00072 class Collator; 00073 class RuleBasedCollator; 00074 class StringEnumeration; 00075 class UnicodeSet; 00076 class UVector; 00077 00078 00079 00163 class U_I18N_API AlphabeticIndex: public UObject { 00164 00165 public: 00166 00179 AlphabeticIndex(const Locale &locale, UErrorCode &status); 00180 00181 00182 00193 virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status); 00194 00208 virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status); 00209 00214 virtual ~AlphabeticIndex(); 00215 00216 00229 virtual const RuleBasedCollator &getCollator() const; 00230 00231 00240 virtual const UnicodeString &getInflowLabel() const; 00241 00253 virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status); 00254 00255 00256 00264 virtual const UnicodeString &getOverflowLabel() const; 00265 00266 00276 virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status); 00277 00285 virtual const UnicodeString &getUnderflowLabel() const; 00286 00296 virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status); 00297 00298 00306 virtual int32_t getMaxLabelCount() const; 00307 00320 virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status); 00321 00322 00335 virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit, 00336 UErrorCode &status); 00337 00338 00355 virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status); 00356 00365 virtual AlphabeticIndex &clearRecords(UErrorCode &status); 00366 00367 00376 virtual int32_t getBucketCount(UErrorCode &status); 00377 00378 00387 virtual int32_t getRecordCount(UErrorCode &status); 00388 00389 00390 00403 virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status); 00404 00405 00412 virtual int32_t getBucketIndex() const; 00413 00414 00426 virtual UBool nextBucket(UErrorCode &status); 00427 00436 virtual const UnicodeString &getBucketLabel() const; 00437 00445 virtual UAlphabeticIndexLabelType getBucketLabelType() const; 00446 00455 virtual int32_t getBucketRecordCount() const; 00456 00457 00466 virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status); 00467 00479 virtual UBool nextRecord(UErrorCode &status); 00480 00489 virtual const UnicodeString &getRecordName() const; 00490 00491 00500 virtual const void *getRecordData() const; 00501 00502 00509 virtual AlphabeticIndex &resetRecordIterator(); 00510 00511 private: 00512 // No ICU "poor man's RTTI" for this class nor its subclasses. 00513 virtual UClassID getDynamicClassID() const; 00514 00519 AlphabeticIndex(const AlphabeticIndex &other); 00520 00524 AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;}; 00525 00530 virtual UBool operator==(const AlphabeticIndex& other) const; 00531 00536 virtual UBool operator!=(const AlphabeticIndex& other) const; 00537 00538 // Common initialization, for use from all constructors. 00539 void init(UErrorCode &status); 00540 00541 // Initialize & destruct static constants used by this class. 00542 static void staticInit(UErrorCode &status); 00543 00544 // Pinyin stuff. If the input name is Chinese, add the Pinyin prefix to the dest string. 00545 void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll); 00546 void initPinyinBounds(const Collator *coll, UErrorCode &status); 00547 00548 public: 00554 static void staticCleanup(); 00555 private: 00556 00557 // Add index characters from the specified locale to the dest set. 00558 // Does not remove any previous contents from dest. 00559 static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status); 00560 00561 UVector *firstStringsInScript(UErrorCode &status); 00562 00563 static UnicodeString separated(const UnicodeString &item); 00564 00565 static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status); 00566 00567 void buildIndex(UErrorCode &status); 00568 void buildBucketList(UErrorCode &status); 00569 void bucketRecords(UErrorCode &status); 00570 00571 00572 public: 00573 00574 // The following internal items are declared public only to allow access from 00575 // implementation code written in plain C. They are not intended for 00576 // public use. 00577 00582 struct Record: public UMemory { 00583 AlphabeticIndex *alphaIndex_; 00584 const UnicodeString name_; 00585 UnicodeString sortingName_; // Usually the same as name_; different for Pinyin. 00586 const void *data_; 00587 int32_t serialNumber_; // Defines sorting order for names that compare equal. 00588 Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data); 00589 ~Record(); 00590 }; 00591 00597 UVector *inputRecords_; 00598 00604 struct Bucket: public UMemory { 00605 UnicodeString label_; 00606 UnicodeString lowerBoundary_; 00607 UAlphabeticIndexLabelType labelType_; 00608 UVector *records_; // Records are owned by inputRecords_ vector. 00609 00610 Bucket(const UnicodeString &label, // Parameter strings are copied. 00611 const UnicodeString &lowerBoundary, 00612 UAlphabeticIndexLabelType type, UErrorCode &status); 00613 ~Bucket(); 00614 }; 00615 00616 public: 00617 00622 enum ELangType { 00624 kNormal, 00626 kSimplified, 00628 kTraditional 00629 }; 00630 00635 static ELangType langTypeFromLocale(const Locale &loc); 00636 00637 00638 private: 00639 00640 // Holds the contents of this index, buckets of user items. 00641 // UVector elements are of type (Bucket *) 00642 UVector *bucketList_; 00643 00644 int32_t labelsIterIndex_; // Index of next item to return. 00645 int32_t itemsIterIndex_; 00646 Bucket *currentBucket_; // While an iteration of the index in underway, 00647 // point to the bucket for the current label. 00648 // NULL when no iteration underway. 00649 00650 UBool indexBuildRequired_; // Caller has made changes to the index that 00651 // require rebuilding & bucketing before the 00652 // contents can be iterated. 00653 00654 int32_t maxLabelCount_; // Limit on # of labels permitted in the index. 00655 00656 UHashtable *alreadyIn_; // Key=UnicodeString, value=UnicodeSet 00657 00658 UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union 00659 // of those explicitly set by the user plus 00660 // those from locales. Raw values, before 00661 // crunching into bucket labels. 00662 00663 UVector *labels_; // List of Labels, after processing, sorting. 00664 // Contents are (UnicodeString *) 00665 00666 UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may 00667 // be discarded from the exemplars. This contains 00668 // some of the discards, and is 00669 // intended for debugging. 00670 00671 UnicodeSet *notAlphabetic_; // As the set of labels is built, strings may 00672 // be discarded from the exemplars. This contains 00673 // some of the discards, and is 00674 // intended for debugging. 00675 00676 00677 UVector *firstScriptCharacters_; // The first character from each script, 00678 // in collation order. 00679 00680 Locale locale_; 00681 Collator *collator_; 00682 Collator *collatorPrimaryOnly_; 00683 00684 UnicodeString inflowLabel_; 00685 UnicodeString overflowLabel_; 00686 UnicodeString underflowLabel_; 00687 UnicodeString overflowComparisonString_; 00688 00689 ELangType langType_; // The language type, simplified Chinese, Traditional Chinese, 00690 // or not Chinese (Normal). Part of the Pinyin support 00691 00692 typedef const UChar PinyinLookup[24][3]; 00693 static PinyinLookup HACK_PINYIN_LOOKUP_SHORT; 00694 static PinyinLookup HACK_PINYIN_LOOKUP_LONG; 00695 00696 // These will be lazily set to the short or long tables based on which 00697 // Chinese collation has been configured into the ICU library. 00698 static PinyinLookup *HACK_PINYIN_LOOKUP; 00699 static const UChar *PINYIN_LOWER_BOUNDS; 00700 00701 00702 00703 int32_t recordCounter_; // Counts Records created. For minting record serial numbers. 00704 00705 // Constants. Lazily initialized the first time an AlphabeticIndex object is created. 00706 00707 static UnicodeSet *ALPHABETIC; 00708 static UnicodeSet *CORE_LATIN; 00709 static UnicodeSet *ETHIOPIC; 00710 static UnicodeSet *HANGUL; 00711 static UnicodeSet *IGNORE_SCRIPTS; 00712 static UnicodeSet *TO_TRY; 00713 static UnicodeSet *UNIHAN; 00714 static const UnicodeString *EMPTY_STRING; 00715 00716 }; 00717 00718 U_NAMESPACE_END 00719 #endif 00720