ICU 4.8.1.1  4.8.1.1
alphaindex.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2011 International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 */
00009 
00010 #ifndef INDEXCHARS_H
00011 #define INDEXCHARS_H
00012 
00013 #include "unicode/utypes.h"
00014 #include "unicode/uobject.h"
00015 #include "unicode/locid.h"
00016 
00023 U_CDECL_BEGIN
00024 
00031 typedef enum UAlphabeticIndexLabelType {
00037          U_ALPHAINDEX_NORMAL    = 0,
00038 
00044          U_ALPHAINDEX_UNDERFLOW = 1,
00045 
00054          U_ALPHAINDEX_INFLOW    = 2,
00055 
00061          U_ALPHAINDEX_OVERFLOW  = 3
00062      } UAlphabeticIndexLabelType;
00063 
00064 
00065 struct UHashtable;
00066 U_CDECL_END
00067 
00068 U_NAMESPACE_BEGIN
00069 
00070 // Forward Declarations
00071 
00072 class Collator;
00073 class RuleBasedCollator;
00074 class StringEnumeration;
00075 class UnicodeSet;
00076 class UVector;
00077 
00078 
00079 
00163 class U_I18N_API AlphabeticIndex: public UObject {
00164 
00165   public:
00166 
00179      AlphabeticIndex(const Locale &locale, UErrorCode &status);
00180 
00181 
00182 
00193      virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status);
00194 
00208      virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status);
00209 
00214      virtual ~AlphabeticIndex();
00215 
00216 
00229     virtual const RuleBasedCollator &getCollator() const;
00230 
00231 
00240     virtual const UnicodeString &getInflowLabel() const;
00241 
00253     virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
00254 
00255 
00256 
00264     virtual const UnicodeString &getOverflowLabel() const;
00265 
00266 
00276     virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status);
00277 
00285     virtual const UnicodeString &getUnderflowLabel() const;
00286 
00296     virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status);
00297 
00298 
00306     virtual int32_t getMaxLabelCount() const;
00307 
00320     virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
00321 
00322 
00335     virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
00336                                                              UErrorCode &status);
00337 
00338 
00355     virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status);
00356 
00365     virtual AlphabeticIndex &clearRecords(UErrorCode &status);
00366 
00367 
00376     virtual int32_t  getBucketCount(UErrorCode &status);
00377 
00378 
00387     virtual int32_t  getRecordCount(UErrorCode &status);
00388 
00389 
00390 
00403     virtual int32_t  getBucketIndex(const UnicodeString &itemName, UErrorCode &status);
00404 
00405 
00412     virtual int32_t  getBucketIndex() const;
00413 
00414 
00426     virtual UBool nextBucket(UErrorCode &status);
00427 
00436     virtual const UnicodeString &getBucketLabel() const;
00437 
00445     virtual UAlphabeticIndexLabelType getBucketLabelType() const;
00446 
00455     virtual int32_t getBucketRecordCount() const;
00456 
00457 
00466     virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status);
00467 
00479     virtual UBool nextRecord(UErrorCode &status);
00480 
00489     virtual const UnicodeString &getRecordName() const;
00490 
00491 
00500     virtual const void *getRecordData() const;
00501 
00502 
00509     virtual AlphabeticIndex &resetRecordIterator();
00510 
00511 private:
00512     // No ICU "poor man's RTTI" for this class nor its subclasses.
00513     virtual UClassID getDynamicClassID() const;
00514 
00519      AlphabeticIndex(const AlphabeticIndex &other);
00520 
00524      AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;};
00525 
00530      virtual UBool operator==(const AlphabeticIndex& other) const;
00531 
00536      virtual UBool operator!=(const AlphabeticIndex& other) const;
00537 
00538      // Common initialization, for use from all constructors.
00539      void init(UErrorCode &status);
00540 
00541      // Initialize & destruct static constants used by this class.
00542      static void staticInit(UErrorCode &status);
00543 
00544      // Pinyin stuff.  If the input name is Chinese, add the Pinyin prefix to the dest string.
00545      void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
00546      void initPinyinBounds(const Collator *coll, UErrorCode &status);
00547 
00548    public:
00554      static void staticCleanup();
00555    private:
00556 
00557      // Add index characters from the specified locale to the dest set.
00558      // Does not remove any previous contents from dest.
00559      static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
00560 
00561      UVector *firstStringsInScript(UErrorCode &status);
00562 
00563      static UnicodeString separated(const UnicodeString &item);
00564 
00565      static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
00566 
00567      void buildIndex(UErrorCode &status);
00568      void buildBucketList(UErrorCode &status);
00569      void bucketRecords(UErrorCode &status);
00570 
00571 
00572   public:
00573 
00574     //  The following internal items are declared public only to allow access from
00575     //  implementation code written in plain C.  They are not intended for
00576     //  public use.
00577 
00582      struct Record: public UMemory {
00583          AlphabeticIndex     *alphaIndex_;
00584          const UnicodeString  name_;
00585          UnicodeString        sortingName_;  // Usually the same as name_; different for Pinyin.
00586          const void           *data_;
00587          int32_t              serialNumber_;  // Defines sorting order for names that compare equal.
00588          Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
00589          ~Record();
00590      };
00591 
00597      UVector  *inputRecords_;
00598 
00604      struct Bucket: public UMemory {
00605          UnicodeString     label_;
00606          UnicodeString     lowerBoundary_;
00607          UAlphabeticIndexLabelType labelType_;
00608          UVector           *records_; // Records are owned by inputRecords_ vector.
00609 
00610          Bucket(const UnicodeString &label,   // Parameter strings are copied.
00611                 const UnicodeString &lowerBoundary,
00612                 UAlphabeticIndexLabelType type, UErrorCode &status);
00613          ~Bucket();
00614      };
00615 
00616   public:
00617 
00622     enum ELangType {
00624         kNormal,
00626         kSimplified,
00628         kTraditional
00629     };
00630 
00635     static ELangType  langTypeFromLocale(const Locale &loc);
00636 
00637 
00638    private:
00639 
00640      // Holds the contents of this index, buckets of user items.
00641      // UVector elements are of type (Bucket *)
00642      UVector *bucketList_;
00643 
00644      int32_t  labelsIterIndex_;      // Index of next item to return.
00645      int32_t  itemsIterIndex_;
00646      Bucket   *currentBucket_;       // While an iteration of the index in underway,
00647                                      //   point to the bucket for the current label.
00648                                      // NULL when no iteration underway.
00649 
00650      UBool    indexBuildRequired_;   //  Caller has made changes to the index that
00651                                      //  require rebuilding & bucketing before the
00652                                      //  contents can be iterated.
00653 
00654      int32_t    maxLabelCount_;      // Limit on # of labels permitted in the index.
00655 
00656      UHashtable *alreadyIn_;         // Key=UnicodeString, value=UnicodeSet
00657 
00658      UnicodeSet *initialLabels_;     // Initial (unprocessed) set of Labels.  Union
00659                                      //   of those explicitly set by the user plus
00660                                      //   those from locales.  Raw values, before
00661                                      //   crunching into bucket labels.
00662 
00663      UVector    *labels_;            // List of Labels, after processing, sorting.
00664                                      //   Contents are (UnicodeString *)
00665 
00666      UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may 
00667                                      // be discarded from the exemplars. This contains 
00668                                      // some of the discards, and is
00669                                      // intended for debugging.
00670 
00671      UnicodeSet *notAlphabetic_;     // As the set of labels is built, strings may 
00672                                      // be discarded from the exemplars. This contains 
00673                                      // some of the discards, and is
00674                                      // intended for debugging.
00675 
00676 
00677      UVector    *firstScriptCharacters_;  // The first character from each script,
00678                                           //   in collation order.
00679 
00680      Locale    locale_;
00681      Collator  *collator_;
00682      Collator  *collatorPrimaryOnly_;
00683 
00684      UnicodeString  inflowLabel_;
00685      UnicodeString  overflowLabel_;
00686      UnicodeString  underflowLabel_;
00687      UnicodeString  overflowComparisonString_;
00688 
00689      ELangType      langType_;        // The language type, simplified Chinese, Traditional Chinese,
00690                                       //  or not Chinese (Normal).  Part of the Pinyin support
00691 
00692      typedef const UChar PinyinLookup[24][3];
00693      static PinyinLookup   HACK_PINYIN_LOOKUP_SHORT;
00694      static PinyinLookup   HACK_PINYIN_LOOKUP_LONG;
00695      
00696      // These will be lazily set to the short or long tables based on which
00697      //   Chinese collation has been configured into the ICU library.
00698      static PinyinLookup   *HACK_PINYIN_LOOKUP;
00699      static const UChar    *PINYIN_LOWER_BOUNDS;
00700 
00701 
00702 
00703      int32_t    recordCounter_;         // Counts Records created.  For minting record serial numbers.
00704 
00705 // Constants.  Lazily initialized the first time an AlphabeticIndex object is created.
00706 
00707      static UnicodeSet *ALPHABETIC;
00708      static UnicodeSet *CORE_LATIN;
00709      static UnicodeSet *ETHIOPIC;
00710      static UnicodeSet *HANGUL;
00711      static UnicodeSet *IGNORE_SCRIPTS;
00712      static UnicodeSet *TO_TRY;
00713      static UnicodeSet *UNIHAN;
00714      static const UnicodeString *EMPTY_STRING;
00715 
00716 };
00717 
00718 U_NAMESPACE_END
00719 #endif
00720 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines