ICU 4.8.1.1
4.8.1.1
|
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables. More...
#include <tblcoll.h>
Public Member Functions | |
RuleBasedCollator (const UnicodeString &rules, UErrorCode &status) | |
RuleBasedCollator constructor. | |
RuleBasedCollator (const UnicodeString &rules, ECollationStrength collationStrength, UErrorCode &status) | |
RuleBasedCollator constructor. | |
RuleBasedCollator (const UnicodeString &rules, UColAttributeValue decompositionMode, UErrorCode &status) | |
RuleBasedCollator constructor. | |
RuleBasedCollator (const UnicodeString &rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode &status) | |
RuleBasedCollator constructor. | |
RuleBasedCollator (const RuleBasedCollator &other) | |
Copy constructor. | |
RuleBasedCollator (const uint8_t *bin, int32_t length, const RuleBasedCollator *base, UErrorCode &status) | |
Opens a collator from a collator binary image created using cloneBinary. | |
virtual | ~RuleBasedCollator () |
Destructor. | |
RuleBasedCollator & | operator= (const RuleBasedCollator &other) |
Assignment operator. | |
virtual UBool | operator== (const Collator &other) const |
Returns true if argument is the same as this object. | |
virtual UBool | operator!= (const Collator &other) const |
Returns true if argument is not the same as this object. | |
virtual Collator * | clone (void) const |
Makes a deep copy of the object. | |
virtual CollationElementIterator * | createCollationElementIterator (const UnicodeString &source) const |
Creates a collation element iterator for the source string. | |
virtual CollationElementIterator * | createCollationElementIterator (const CharacterIterator &source) const |
Creates a collation element iterator for the source. | |
virtual EComparisonResult | compare (const UnicodeString &source, const UnicodeString &target) const |
Compares a range of character data stored in two different strings based on the collation rules. | |
virtual UCollationResult | compare (const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const |
The comparison function compares the character data stored in two different strings. | |
virtual EComparisonResult | compare (const UnicodeString &source, const UnicodeString &target, int32_t length) const |
Compares a range of character data stored in two different strings based on the collation rules up to the specified length. | |
virtual UCollationResult | compare (const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const |
Does the same thing as compare but limits the comparison to a specified length. | |
virtual EComparisonResult | compare (const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) const |
The comparison function compares the character data stored in two different string arrays. | |
virtual UCollationResult | compare (const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength, UErrorCode &status) const |
The comparison function compares the character data stored in two different string arrays. | |
virtual UCollationResult | compare (UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const |
Compares two strings using the Collator. | |
virtual CollationKey & | getCollationKey (const UnicodeString &source, CollationKey &key, UErrorCode &status) const |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. | |
virtual CollationKey & | getCollationKey (const UChar *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. | |
virtual int32_t | hashCode (void) const |
Generates the hash code for the rule-based collation object. | |
virtual const Locale | getLocale (ULocDataLocaleType type, UErrorCode &status) const |
Gets the locale of the Collator. | |
const UnicodeString & | getRules (void) const |
Gets the table-based rules for the collation object. | |
virtual void | getVersion (UVersionInfo info) const |
Gets the version information for a Collator. | |
int32_t | getMaxExpansion (int32_t order) const |
Return the maximum length of any expansion sequences that end with the specified comparison order. | |
virtual UClassID | getDynamicClassID (void) const |
Returns a unique class ID POLYMORPHICALLY. | |
uint8_t * | cloneRuleData (int32_t &length, UErrorCode &status) |
Returns the binary format of the class's rules. | |
int32_t | cloneBinary (uint8_t *buffer, int32_t capacity, UErrorCode &status) |
Creates a binary image of a collator. | |
void | getRules (UColRuleOption delta, UnicodeString &buffer) |
Returns current rules. | |
virtual void | setAttribute (UColAttribute attr, UColAttributeValue value, UErrorCode &status) |
Universal attribute setter. | |
virtual UColAttributeValue | getAttribute (UColAttribute attr, UErrorCode &status) |
Universal attribute getter. | |
virtual uint32_t | setVariableTop (const UChar *varTop, int32_t len, UErrorCode &status) |
Sets the variable top to a collation element value of a string supplied. | |
virtual uint32_t | setVariableTop (const UnicodeString varTop, UErrorCode &status) |
Sets the variable top to a collation element value of a string supplied. | |
virtual void | setVariableTop (const uint32_t varTop, UErrorCode &status) |
Sets the variable top to a collation element value supplied. | |
virtual uint32_t | getVariableTop (UErrorCode &status) const |
Gets the variable top value of a Collator. | |
virtual UnicodeSet * | getTailoredSet (UErrorCode &status) const |
Get an UnicodeSet that contains all the characters and sequences tailored in this collator. | |
virtual Collator * | safeClone (void) |
Thread safe cloning operation. | |
virtual int32_t | getSortKey (const UnicodeString &source, uint8_t *result, int32_t resultLength) const |
Get the sort key as an array of bytes from an UnicodeString. | |
virtual int32_t | getSortKey (const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const |
Get the sort key as an array of bytes from an UChar buffer. | |
virtual ECollationStrength | getStrength (void) const |
Determines the minimum strength that will be use in comparison or transformation. | |
virtual void | setStrength (ECollationStrength newStrength) |
Sets the minimum strength to be used in comparison or transformation. | |
virtual int32_t | getReorderCodes (int32_t *dest, int32_t destCapacity, UErrorCode &status) const |
Retrieves the reordering codes for this collator. | |
virtual void | setReorderCodes (const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status) |
Sets the ordering of scripts for this collator. | |
const UCollator * | getUCollator () |
Get UCollator data struct. | |
Static Public Member Functions | |
static UClassID | getStaticClassID (void) |
Returns the class ID for this class. | |
static int32_t | getEquivalentReorderCodes (int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status) |
Retrieves the reorder codes that are grouped with the given reorder code. | |
Protected Member Functions | |
virtual void | setLocales (const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale) |
Used internally by registraton to define the requested and valid locales. | |
Friends | |
class | CollationElementIterator |
Used to iterate over collation elements in a character source. | |
class | Collator |
Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&) | |
class | StringSearch |
Searching over collation elements in a character source. |
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables.
The user can create a customized table-based collation.
Important: The ICU collation service has been reimplemented in order to achieve better performance and UCA compliance. For details, see the collation design document.
RuleBasedCollator is a thin C++ wrapper over the C implementation.
For more information about the collation service see the users guide.
Collation service provides correct sorting orders for most locales supported in ICU. If specific data for a locale is not available, the orders eventually falls back to the UCA sort order.
Sort ordering may be customized by providing your own set of rules. For more on this subject see the Collation customization section of the users guide.
Note, RuleBasedCollator is not to be subclassed.
RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
status | reporting a success or an error. |
RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
ECollationStrength | collationStrength, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
collationStrength | default strength for comparison |
status | reporting a success or an error. |
RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
UColAttributeValue | decompositionMode, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
decompositionMode | the normalisation mode |
status | reporting a success or an error. |
RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
ECollationStrength | collationStrength, | ||
UColAttributeValue | decompositionMode, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
collationStrength | default strength for comparison |
decompositionMode | the normalisation mode |
status | reporting a success or an error. |
RuleBasedCollator::RuleBasedCollator | ( | const RuleBasedCollator & | other | ) |
Copy constructor.
other | the RuleBasedCollator object to be copied |
RuleBasedCollator::RuleBasedCollator | ( | const uint8_t * | bin, |
int32_t | length, | ||
const RuleBasedCollator * | base, | ||
UErrorCode & | status | ||
) |
Opens a collator from a collator binary image created using cloneBinary.
Binary image used in instantiation of the collator remains owned by the user and should stay around for the lifetime of the collator. The API also takes a base collator which usualy should be UCA.
bin | binary image owned by the user and required through the lifetime of the collator |
length | size of the image. If negative, the API will try to figure out the length of the image |
base | fallback collator, usually UCA. Base is required to be present through the lifetime of the collator. Currently it cannot be NULL. |
status | for catching errors |
virtual RuleBasedCollator::~RuleBasedCollator | ( | ) | [virtual] |
Destructor.
virtual Collator* RuleBasedCollator::clone | ( | void | ) | const [virtual] |
int32_t RuleBasedCollator::cloneBinary | ( | uint8_t * | buffer, |
int32_t | capacity, | ||
UErrorCode & | status | ||
) |
Creates a binary image of a collator.
This binary image can be stored and later used to instantiate a collator using ucol_openBinary. This API supports preflighting.
buffer | a fill-in buffer to receive the binary image |
capacity | capacity of the destination buffer |
status | for catching errors |
uint8_t* RuleBasedCollator::cloneRuleData | ( | int32_t & | length, |
UErrorCode & | status | ||
) |
Returns the binary format of the class's rules.
The format is that of .col files.
length | Returns the length of the data, in bytes |
status | the error code status. |
virtual EComparisonResult RuleBasedCollator::compare | ( | const UnicodeString & | source, |
const UnicodeString & | target | ||
) | const [virtual] |
Compares a range of character data stored in two different strings based on the collation rules.
Returns information about whether a string is less than, greater than or equal to another string in a language. This can be overriden in a subclass.
source | the source string. |
target | the target string to be compared with the source string. |
Reimplemented from Collator.
virtual UCollationResult RuleBasedCollator::compare | ( | const UnicodeString & | source, |
const UnicodeString & | target, | ||
UErrorCode & | status | ||
) | const [virtual] |
The comparison function compares the character data stored in two different strings.
Returns information about whether a string is less than, greater than or equal to another string.
source | the source string to be compared with. |
target | the string that is to be compared with the source string. |
status | possible error code |
Implements Collator.
virtual EComparisonResult RuleBasedCollator::compare | ( | const UnicodeString & | source, |
const UnicodeString & | target, | ||
int32_t | length | ||
) | const [virtual] |
Compares a range of character data stored in two different strings based on the collation rules up to the specified length.
Returns information about whether a string is less than, greater than or equal to another string in a language. This can be overriden in a subclass.
source | the source string. |
target | the target string to be compared with the source string. |
length | compares up to the specified length |
Reimplemented from Collator.
virtual UCollationResult RuleBasedCollator::compare | ( | const UnicodeString & | source, |
const UnicodeString & | target, | ||
int32_t | length, | ||
UErrorCode & | status | ||
) | const [virtual] |
Does the same thing as compare but limits the comparison to a specified length.
source | the source string to be compared with. |
target | the string that is to be compared with the source string. |
length | the length the comparison is limited to |
status | possible error code |
Implements Collator.
virtual EComparisonResult RuleBasedCollator::compare | ( | const UChar * | source, |
int32_t | sourceLength, | ||
const UChar * | target, | ||
int32_t | targetLength | ||
) | const [virtual] |
The comparison function compares the character data stored in two different string arrays.
Returns information about whether a string array is less than, greater than or equal to another string array.
Example of use:
. UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" . UErrorCode status = U_ZERO_ERROR; . Collator *myCollation = . Collator::createInstance(Locale::US, status); . if (U_FAILURE(status)) return; . myCollation->setStrength(Collator::PRIMARY); . // result would be Collator::EQUAL ("abc" == "ABC") . // (no primary difference between "abc" and "ABC") . Collator::EComparisonResult result = . myCollation->compare(abc, 3, ABC, 3); . myCollation->setStrength(Collator::TERTIARY); . // result would be Collator::LESS ("abc" <<< "ABC") . // (with tertiary difference between "abc" and "ABC") . result = myCollation->compare(abc, 3, ABC, 3);
source | the source string array to be compared with. |
sourceLength | the length of the source string array. If this value is equal to -1, the string array is null-terminated. |
target | the string that is to be compared with the source string. |
targetLength | the length of the target string array. If this value is equal to -1, the string array is null-terminated. |
Reimplemented from Collator.
virtual UCollationResult RuleBasedCollator::compare | ( | const UChar * | source, |
int32_t | sourceLength, | ||
const UChar * | target, | ||
int32_t | targetLength, | ||
UErrorCode & | status | ||
) | const [virtual] |
The comparison function compares the character data stored in two different string arrays.
Returns information about whether a string array is less than, greater than or equal to another string array.
source | the source string array to be compared with. |
sourceLength | the length of the source string array. If this value is equal to -1, the string array is null-terminated. |
target | the string that is to be compared with the source string. |
targetLength | the length of the target string array. If this value is equal to -1, the string array is null-terminated. |
status | possible error code |
Implements Collator.
virtual UCollationResult RuleBasedCollator::compare | ( | UCharIterator & | sIter, |
UCharIterator & | tIter, | ||
UErrorCode & | status | ||
) | const [virtual] |
Compares two strings using the Collator.
Returns whether the first one compares less than/equal to/greater than the second one. This version takes UCharIterator input.
sIter | the first ("source") string iterator |
tIter | the second ("target") string iterator |
status | ICU status |
Reimplemented from Collator.
virtual CollationElementIterator* RuleBasedCollator::createCollationElementIterator | ( | const UnicodeString & | source | ) | const [virtual] |
Creates a collation element iterator for the source string.
The caller of this method is responsible for the memory management of the return pointer.
source | the string over which the CollationElementIterator will iterate. |
virtual CollationElementIterator* RuleBasedCollator::createCollationElementIterator | ( | const CharacterIterator & | source | ) | const [virtual] |
Creates a collation element iterator for the source.
The caller of this method is responsible for the memory management of the returned pointer.
source | the CharacterIterator which produces the characters over which the CollationElementItgerator will iterate. |
virtual UColAttributeValue RuleBasedCollator::getAttribute | ( | UColAttribute | attr, |
UErrorCode & | status | ||
) | [virtual] |
virtual CollationKey& RuleBasedCollator::getCollationKey | ( | const UnicodeString & | source, |
CollationKey & | key, | ||
UErrorCode & | status | ||
) | const [virtual] |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster.
source | the source string. |
key | the transformed key of the source string. |
status | the error code status. |
Implements Collator.
virtual CollationKey& RuleBasedCollator::getCollationKey | ( | const UChar * | source, |
int32_t | sourceLength, | ||
CollationKey & | key, | ||
UErrorCode & | status | ||
) | const [virtual] |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster.
source | the source string. |
sourceLength | the length of the source string. |
key | the transformed key of the source string. |
status | the error code status. |
Implements Collator.
virtual UClassID RuleBasedCollator::getDynamicClassID | ( | void | ) | const [virtual] |
Returns a unique class ID POLYMORPHICALLY.
Pure virtual override. This method is to implement a simple version of RTTI, since not all C++ compilers support genuine RTTI. Polymorphic operator==() and clone() methods call this method.
Implements Collator.
static int32_t RuleBasedCollator::getEquivalentReorderCodes | ( | int32_t | reorderCode, |
int32_t * | dest, | ||
int32_t | destCapacity, | ||
UErrorCode & | status | ||
) | [static] |
Retrieves the reorder codes that are grouped with the given reorder code.
Some reorder codes will be grouped and must reorder together.
reorderCode | The reorder code to determine equivalence for. |
dest | The array to fill with the script equivalene reordering codes. |
destCapacity | The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
status | A reference to an error code value, which must not indicate a failure before the function call. |
Reimplemented from Collator.
virtual const Locale RuleBasedCollator::getLocale | ( | ULocDataLocaleType | type, |
UErrorCode & | status | ||
) | const [virtual] |
Gets the locale of the Collator.
type | can be either requested, valid or actual locale. For more information see the definition of ULocDataLocaleType in uloc.h |
status | the error code status. |
Implements Collator.
int32_t RuleBasedCollator::getMaxExpansion | ( | int32_t | order | ) | const |
Return the maximum length of any expansion sequences that end with the specified comparison order.
order | a collation order returned by previous or next. |
virtual int32_t RuleBasedCollator::getReorderCodes | ( | int32_t * | dest, |
int32_t | destCapacity, | ||
UErrorCode & | status | ||
) | const [virtual] |
Retrieves the reordering codes for this collator.
dest | The array to fill with the script ordering. |
destCapacity | The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
status | A reference to an error code value, which must not indicate a failure before the function call. |
Reimplemented from Collator.
const UnicodeString& RuleBasedCollator::getRules | ( | void | ) | const |
Gets the table-based rules for the collation object.
void RuleBasedCollator::getRules | ( | UColRuleOption | delta, |
UnicodeString & | buffer | ||
) |
Returns current rules.
Delta defines whether full rules are returned or just the tailoring.
delta | one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. |
buffer | UnicodeString to store the result rules |
virtual int32_t RuleBasedCollator::getSortKey | ( | const UnicodeString & | source, |
uint8_t * | result, | ||
int32_t | resultLength | ||
) | const [virtual] |
Get the sort key as an array of bytes from an UnicodeString.
source | string to be processed. |
result | buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength | length of the result buffer. If if not enough the buffer will be filled to capacity. |
Implements Collator.
virtual int32_t RuleBasedCollator::getSortKey | ( | const UChar * | source, |
int32_t | sourceLength, | ||
uint8_t * | result, | ||
int32_t | resultLength | ||
) | const [virtual] |
Get the sort key as an array of bytes from an UChar buffer.
source | string to be processed. |
sourceLength | length of string to be processed. If -1, the string is 0 terminated and length will be decided by the function. |
result | buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength | length of the result buffer. If if not enough the buffer will be filled to capacity. |
Implements Collator.
static UClassID RuleBasedCollator::getStaticClassID | ( | void | ) | [static] |
Returns the class ID for this class.
This is useful only for comparing to a return value from getDynamicClassID(). For example:
Base* polymorphic_pointer = createPolymorphicObject(); if (polymorphic_pointer->getDynamicClassID() == Derived::getStaticClassID()) ...
virtual ECollationStrength RuleBasedCollator::getStrength | ( | void | ) | const [virtual] |
Determines the minimum strength that will be use in comparison or transformation.
E.g. with strength == SECONDARY, the tertiary difference is ignored
E.g. with strength == PRIMARY, the secondary and tertiary difference are ignored.
Implements Collator.
virtual UnicodeSet* RuleBasedCollator::getTailoredSet | ( | UErrorCode & | status | ) | const [virtual] |
Get an UnicodeSet that contains all the characters and sequences tailored in this collator.
status | error code of the operation |
Reimplemented from Collator.
const UCollator * RuleBasedCollator::getUCollator | ( | ) | [inline] |
Get UCollator data struct.
Used only by StringSearch & intltest.
virtual uint32_t RuleBasedCollator::getVariableTop | ( | UErrorCode & | status | ) | const [virtual] |
virtual void RuleBasedCollator::getVersion | ( | UVersionInfo | info | ) | const [virtual] |
virtual int32_t RuleBasedCollator::hashCode | ( | void | ) | const [virtual] |
RuleBasedCollator& RuleBasedCollator::operator= | ( | const RuleBasedCollator & | other | ) |
Assignment operator.
other | other RuleBasedCollator object to compare with. |
virtual Collator* RuleBasedCollator::safeClone | ( | void | ) | [virtual] |
virtual void RuleBasedCollator::setAttribute | ( | UColAttribute | attr, |
UColAttributeValue | value, | ||
UErrorCode & | status | ||
) | [virtual] |
virtual void RuleBasedCollator::setLocales | ( | const Locale & | requestedLocale, |
const Locale & | validLocale, | ||
const Locale & | actualLocale | ||
) | [protected, virtual] |
virtual void RuleBasedCollator::setReorderCodes | ( | const int32_t * | reorderCodes, |
int32_t | reorderCodesLength, | ||
UErrorCode & | status | ||
) | [virtual] |
Sets the ordering of scripts for this collator.
reorderCodes | An array of script codes in the new order. This can be NULL if the length is also set to 0. An empty array will clear any reordering codes on the collator. |
reorderCodesLength | The length of reorderCodes. |
status | error code |
Reimplemented from Collator.
virtual void RuleBasedCollator::setStrength | ( | ECollationStrength | newStrength | ) | [virtual] |
Sets the minimum strength to be used in comparison or transformation.
newStrength | the new comparison level. |
Implements Collator.
virtual uint32_t RuleBasedCollator::setVariableTop | ( | const UChar * | varTop, |
int32_t | len, | ||
UErrorCode & | status | ||
) | [virtual] |
Sets the variable top to a collation element value of a string supplied.
varTop | one or more (if contraction) UChars to which the variable top should be set |
len | length of variable top string. If -1 it is considered to be zero terminated. |
status | error code. If error code is set, the return value is undefined. Errors set by this function are: U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
Implements Collator.
virtual uint32_t RuleBasedCollator::setVariableTop | ( | const UnicodeString | varTop, |
UErrorCode & | status | ||
) | [virtual] |
Sets the variable top to a collation element value of a string supplied.
varTop | an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set |
status | error code. If error code is set, the return value is undefined. Errors set by this function are: U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
Implements Collator.
virtual void RuleBasedCollator::setVariableTop | ( | const uint32_t | varTop, |
UErrorCode & | status | ||
) | [virtual] |