ICU 4.8.1.1
4.8.1.1
|
00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2011 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00024 #if !UCONFIG_NO_BREAK_ITERATION 00025 00026 #include "unicode/brkiter.h" 00027 #include "unicode/udata.h" 00028 #include "unicode/parseerr.h" 00029 #include "unicode/schriter.h" 00030 #include "unicode/uchriter.h" 00031 00032 00033 struct UTrie; 00034 00035 U_NAMESPACE_BEGIN 00036 00038 struct RBBIDataHeader; 00039 class RuleBasedBreakIteratorTables; 00040 class BreakIterator; 00041 class RBBIDataWrapper; 00042 class UStack; 00043 class LanguageBreakEngine; 00044 class UnhandledEngine; 00045 struct RBBIStateTable; 00046 00047 00048 00049 00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00066 00067 protected: 00072 UText *fText; 00073 00079 CharacterIterator *fCharIter; 00080 00086 StringCharacterIterator *fSCharIter; 00087 00093 UCharCharacterIterator *fDCharIter; 00094 00099 RBBIDataWrapper *fData; 00100 00104 int32_t fLastRuleStatusIndex; 00105 00112 UBool fLastStatusIndexValid; 00113 00119 uint32_t fDictionaryCharCount; 00120 00128 int32_t* fCachedBreakPositions; 00129 00134 int32_t fNumCachedBreakPositions; 00135 00141 int32_t fPositionInCache; 00142 00150 UStack *fLanguageBreakEngines; 00151 00159 UnhandledEngine *fUnhandledBreakEngine; 00160 00166 int32_t fBreakType; 00167 00168 protected: 00169 //======================================================================= 00170 // constructors 00171 //======================================================================= 00172 00181 enum EDontAdopt { 00182 kDontAdopt 00183 }; 00184 00195 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00196 00205 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status); 00206 00207 00208 friend class RBBIRuleBuilder; 00210 friend class BreakIterator; 00211 00212 00213 00214 public: 00215 00220 RuleBasedBreakIterator(); 00221 00228 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00229 00238 RuleBasedBreakIterator( const UnicodeString &rules, 00239 UParseError &parseError, 00240 UErrorCode &status); 00241 00242 00243 00244 00268 RuleBasedBreakIterator(const uint8_t *compiledRules, 00269 uint32_t ruleLength, 00270 UErrorCode &status); 00271 00272 00285 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00286 00291 virtual ~RuleBasedBreakIterator(); 00292 00300 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00301 00310 virtual UBool operator==(const BreakIterator& that) const; 00311 00319 UBool operator!=(const BreakIterator& that) const; 00320 00331 virtual BreakIterator* clone() const; 00332 00338 virtual int32_t hashCode(void) const; 00339 00345 virtual const UnicodeString& getRules(void) const; 00346 00347 //======================================================================= 00348 // BreakIterator overrides 00349 //======================================================================= 00350 00376 virtual CharacterIterator& getText(void) const; 00377 00378 00393 virtual UText *getUText(UText *fillIn, UErrorCode &status) const; 00394 00402 virtual void adoptText(CharacterIterator* newText); 00403 00410 virtual void setText(const UnicodeString& newText); 00411 00425 virtual void setText(UText *text, UErrorCode &status); 00426 00432 virtual int32_t first(void); 00433 00439 virtual int32_t last(void); 00440 00451 virtual int32_t next(int32_t n); 00452 00458 virtual int32_t next(void); 00459 00465 virtual int32_t previous(void); 00466 00474 virtual int32_t following(int32_t offset); 00475 00483 virtual int32_t preceding(int32_t offset); 00484 00493 virtual UBool isBoundary(int32_t offset); 00494 00500 virtual int32_t current(void) const; 00501 00502 00535 virtual int32_t getRuleStatus() const; 00536 00560 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 00561 00573 virtual UClassID getDynamicClassID(void) const; 00574 00586 static UClassID U_EXPORT2 getStaticClassID(void); 00587 00588 /* 00589 * Create a clone (copy) of this break iterator in memory provided 00590 * by the caller. The idea is to increase performance by avoiding 00591 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00592 * Performance gains are minimal, and correct buffer management is 00593 * tricky. Use clone() instead. 00594 * 00595 * @param stackBuffer The pointer to the memory into which the cloned object 00596 * should be placed. If NULL, allocate heap memory 00597 * for the cloned object. 00598 * @param BufferSize The size of the buffer. If zero, return the required 00599 * buffer size, but do not clone the object. If the 00600 * size was too small (but not zero), allocate heap 00601 * storage for the cloned object. 00602 * 00603 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00604 * returned if the the provided buffer was too small, and 00605 * the clone was therefore put on the heap. 00606 * 00607 * @return Pointer to the clone object. This may differ from the stackBuffer 00608 * address if the byte alignment of the stack buffer was not suitable 00609 * or if the stackBuffer was too small to hold the clone. 00610 * @stable ICU 2.0 00611 */ 00612 virtual BreakIterator * createBufferClone(void *stackBuffer, 00613 int32_t &BufferSize, 00614 UErrorCode &status); 00615 00616 00634 virtual const uint8_t *getBinaryRules(uint32_t &length); 00635 00636 00637 protected: 00638 //======================================================================= 00639 // implementation 00640 //======================================================================= 00646 virtual void reset(void); 00647 00648 #if 0 00649 00657 virtual UBool isDictionaryChar(UChar32); 00658 00663 virtual int32_t getBreakType() const; 00664 #endif 00665 00670 virtual void setBreakType(int32_t type); 00671 00677 void init(); 00678 00679 private: 00680 00690 int32_t handlePrevious(const RBBIStateTable *statetable); 00691 00701 int32_t handleNext(const RBBIStateTable *statetable); 00702 00703 protected: 00704 00719 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); 00720 00721 private: 00722 00729 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 00730 00734 void makeRuleStatusValid(); 00735 00736 }; 00737 00738 //------------------------------------------------------------------------------ 00739 // 00740 // Inline Functions Definitions ... 00741 // 00742 //------------------------------------------------------------------------------ 00743 00744 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00745 return !operator==(that); 00746 } 00747 00748 U_NAMESPACE_END 00749 00750 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00751 00752 #endif