ICU 4.8.1.1  4.8.1.1
rbbi.h
Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 *   Copyright (C) 1999-2011 International Business Machines Corporation   *
00004 *   and others. All rights reserved.                                      *
00005 ***************************************************************************
00006 
00007 **********************************************************************
00008 *   Date        Name        Description
00009 *   10/22/99    alan        Creation.
00010 *   11/11/99    rgillam     Complete port from Java.
00011 **********************************************************************
00012 */
00013 
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016 
00017 #include "unicode/utypes.h"
00018 
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025 
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031 
00032 
00033 struct UTrie;
00034 
00035 U_NAMESPACE_BEGIN
00036 
00038 struct RBBIDataHeader;
00039 class  RuleBasedBreakIteratorTables;
00040 class  BreakIterator;
00041 class  RBBIDataWrapper;
00042 class  UStack;
00043 class  LanguageBreakEngine;
00044 class  UnhandledEngine;
00045 struct RBBIStateTable;
00046 
00047 
00048 
00049 
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066 
00067 protected:
00072     UText  *fText;
00073 
00079     CharacterIterator  *fCharIter;
00080 
00086     StringCharacterIterator *fSCharIter;
00087 
00093     UCharCharacterIterator *fDCharIter;
00094 
00099     RBBIDataWrapper    *fData;
00100 
00104     int32_t             fLastRuleStatusIndex;
00105 
00112     UBool               fLastStatusIndexValid;
00113 
00119     uint32_t            fDictionaryCharCount;
00120 
00128     int32_t*            fCachedBreakPositions;
00129 
00134     int32_t             fNumCachedBreakPositions;
00135 
00141     int32_t             fPositionInCache;
00142     
00150     UStack              *fLanguageBreakEngines;
00151     
00159     UnhandledEngine     *fUnhandledBreakEngine;
00160     
00166     int32_t             fBreakType;
00167     
00168 protected:
00169     //=======================================================================
00170     // constructors
00171     //=======================================================================
00172 
00181     enum EDontAdopt {
00182         kDontAdopt
00183     };
00184 
00195     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00196 
00205     RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
00206 
00207 
00208     friend class RBBIRuleBuilder;
00210     friend class BreakIterator;
00211 
00212 
00213 
00214 public:
00215 
00220     RuleBasedBreakIterator();
00221 
00228     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00229 
00238     RuleBasedBreakIterator( const UnicodeString    &rules,
00239                              UParseError           &parseError,
00240                              UErrorCode            &status);
00241 
00242 
00243 
00244 
00268     RuleBasedBreakIterator(const uint8_t *compiledRules,
00269                            uint32_t       ruleLength,
00270                            UErrorCode    &status);
00271 
00272 
00285     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00286 
00291     virtual ~RuleBasedBreakIterator();
00292 
00300     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00301 
00310     virtual UBool operator==(const BreakIterator& that) const;
00311 
00319     UBool operator!=(const BreakIterator& that) const;
00320 
00331     virtual BreakIterator* clone() const;
00332 
00338     virtual int32_t hashCode(void) const;
00339 
00345     virtual const UnicodeString& getRules(void) const;
00346 
00347     //=======================================================================
00348     // BreakIterator overrides
00349     //=======================================================================
00350 
00376     virtual  CharacterIterator& getText(void) const;
00377 
00378 
00393      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00394 
00402     virtual void adoptText(CharacterIterator* newText);
00403 
00410     virtual void setText(const UnicodeString& newText);
00411 
00425     virtual void  setText(UText *text, UErrorCode &status);
00426 
00432     virtual int32_t first(void);
00433 
00439     virtual int32_t last(void);
00440 
00451     virtual int32_t next(int32_t n);
00452 
00458     virtual int32_t next(void);
00459 
00465     virtual int32_t previous(void);
00466 
00474     virtual int32_t following(int32_t offset);
00475 
00483     virtual int32_t preceding(int32_t offset);
00484 
00493     virtual UBool isBoundary(int32_t offset);
00494 
00500     virtual int32_t current(void) const;
00501 
00502 
00535     virtual int32_t getRuleStatus() const;
00536 
00560     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00561 
00573     virtual UClassID getDynamicClassID(void) const;
00574 
00586     static UClassID U_EXPORT2 getStaticClassID(void);
00587 
00588     /*
00589      * Create a clone (copy) of this break iterator in memory provided
00590      *  by the caller.  The idea is to increase performance by avoiding
00591      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
00592      *  Performance gains are minimal, and correct buffer management is
00593      *  tricky.  Use clone() instead.
00594      *
00595      * @param stackBuffer  The pointer to the memory into which the cloned object
00596      *                     should be placed.  If NULL,  allocate heap memory
00597      *                     for the cloned object.
00598      * @param BufferSize   The size of the buffer.  If zero, return the required
00599      *                     buffer size, but do not clone the object.  If the
00600      *                     size was too small (but not zero), allocate heap
00601      *                     storage for the cloned object.
00602      *
00603      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
00604      *                     returned if the the provided buffer was too small, and
00605      *                     the clone was therefore put on the heap.
00606      *
00607      * @return  Pointer to the clone object.  This may differ from the stackBuffer
00608      *          address if the byte alignment of the stack buffer was not suitable
00609      *          or if the stackBuffer was too small to hold the clone.
00610      * @stable ICU 2.0
00611      */
00612     virtual BreakIterator *  createBufferClone(void *stackBuffer,
00613                                                int32_t &BufferSize,
00614                                                UErrorCode &status);
00615 
00616 
00634     virtual const uint8_t *getBinaryRules(uint32_t &length);
00635 
00636 
00637 protected:
00638     //=======================================================================
00639     // implementation
00640     //=======================================================================
00646     virtual void reset(void);
00647 
00648 #if 0
00649 
00657     virtual UBool isDictionaryChar(UChar32);
00658 
00663     virtual int32_t getBreakType() const;
00664 #endif
00665 
00670     virtual void setBreakType(int32_t type);
00671 
00677     void init();
00678 
00679 private:
00680 
00690     int32_t handlePrevious(const RBBIStateTable *statetable);
00691 
00701     int32_t handleNext(const RBBIStateTable *statetable);
00702 
00703 protected:
00704 
00719     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00720 
00721 private:
00722 
00729     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00730 
00734     void makeRuleStatusValid();
00735 
00736 };
00737 
00738 //------------------------------------------------------------------------------
00739 //
00740 //   Inline Functions Definitions ...
00741 //
00742 //------------------------------------------------------------------------------
00743 
00744 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00745     return !operator==(that);
00746 }
00747 
00748 U_NAMESPACE_END
00749 
00750 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
00751 
00752 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines