ICU 4.8.1.1  4.8.1.1
messagepattern.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *   Copyright (C) 2011, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 *******************************************************************************
00006 *   file name:  messagepattern.h
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2011mar14
00012 *   created by: Markus W. Scherer
00013 */
00014 
00015 #ifndef __MESSAGEPATTERN_H__
00016 #define __MESSAGEPATTERN_H__
00017 
00023 #include "unicode/utypes.h"
00024 
00025 #if !UCONFIG_NO_FORMATTING
00026 
00027 #include "unicode/parseerr.h"
00028 #include "unicode/unistr.h"
00029 
00066 enum UMessagePatternApostropheMode {
00078     UMSGPAT_APOS_DOUBLE_OPTIONAL,
00087     UMSGPAT_APOS_DOUBLE_REQUIRED
00088 };
00092 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
00093 
00098 enum UMessagePatternPartType {
00108     UMSGPAT_PART_TYPE_MSG_START,
00117     UMSGPAT_PART_TYPE_MSG_LIMIT,
00125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
00132     UMSGPAT_PART_TYPE_INSERT_CHAR,
00140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
00147     UMSGPAT_PART_TYPE_ARG_START,
00158     UMSGPAT_PART_TYPE_ARG_LIMIT,
00163     UMSGPAT_PART_TYPE_ARG_NUMBER,
00169     UMSGPAT_PART_TYPE_ARG_NAME,
00175     UMSGPAT_PART_TYPE_ARG_TYPE,
00181     UMSGPAT_PART_TYPE_ARG_STYLE,
00187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
00194     UMSGPAT_PART_TYPE_ARG_INT,
00202     UMSGPAT_PART_TYPE_ARG_DOUBLE
00203 };
00207 typedef enum UMessagePatternPartType UMessagePatternPartType;
00208 
00217 enum UMessagePatternArgType {
00222     UMSGPAT_ARG_TYPE_NONE,
00228     UMSGPAT_ARG_TYPE_SIMPLE,
00234     UMSGPAT_ARG_TYPE_CHOICE,
00244     UMSGPAT_ARG_TYPE_PLURAL,
00249     UMSGPAT_ARG_TYPE_SELECT
00250 };
00254 typedef enum UMessagePatternArgType UMessagePatternArgType;
00255 
00259 enum {
00265     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
00266 
00274     UMSGPAT_ARG_NAME_NOT_VALID=-2
00275 };
00276 
00283 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
00284 
00285 U_NAMESPACE_BEGIN
00286 
00287 class MessagePatternDoubleList;
00288 class MessagePatternPartsList;
00289 
00346 class U_COMMON_API MessagePattern : public UObject {
00347 public:
00356     MessagePattern(UErrorCode &errorCode);
00357 
00367     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
00368 
00387     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00388 
00394     MessagePattern(const MessagePattern &other);
00395 
00402     MessagePattern &operator=(const MessagePattern &other);
00403 
00408     virtual ~MessagePattern();
00409 
00427     MessagePattern &parse(const UnicodeString &pattern,
00428                           UParseError *parseError, UErrorCode &errorCode);
00429 
00447     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
00448                                      UParseError *parseError, UErrorCode &errorCode);
00449 
00467     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
00468                                      UParseError *parseError, UErrorCode &errorCode);
00469 
00487     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
00488                                      UParseError *parseError, UErrorCode &errorCode);
00489 
00495     void clear();
00496 
00503     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
00504         clear();
00505         aposMode=mode;
00506     }
00507 
00513     UBool operator==(const MessagePattern &other) const;
00514 
00520     inline UBool operator!=(const MessagePattern &other) const {
00521         return !operator==(other);
00522     }
00523 
00528     int32_t hashCode() const;
00529 
00534     UMessagePatternApostropheMode getApostropheMode() const {
00535         return aposMode;
00536     }
00537 
00538     // Java has package-private jdkAposMode() here.
00539     // In C++, this is declared in the MessageImpl class.
00540 
00545     const UnicodeString &getPatternString() const {
00546         return msg;
00547     }
00548 
00554     UBool hasNamedArguments() const {
00555         return hasArgNames;
00556     }
00557 
00563     UBool hasNumberedArguments() const {
00564         return hasArgNumbers;
00565     }
00566 
00578     static int32_t validateArgumentName(const UnicodeString &name);
00579 
00590     UnicodeString autoQuoteApostropheDeep() const;
00591 
00592     class Part;
00593 
00600     int32_t countParts() const {
00601         return partsLength;
00602     }
00603 
00610     const Part &getPart(int32_t i) const {
00611         return parts[i];
00612     }
00613 
00621     UMessagePatternPartType getPartType(int32_t i) const {
00622         return getPart(i).type;
00623     }
00624 
00632     int32_t getPatternIndex(int32_t partIndex) const {
00633         return getPart(partIndex).index;
00634     }
00635 
00643     UnicodeString getSubstring(const Part &part) const {
00644         return msg.tempSubString(part.index, part.length);
00645     }
00646 
00654     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
00655         return 0==msg.compare(part.index, part.length, s);
00656     }
00657 
00664     double getNumericValue(const Part &part) const;
00665 
00672     double getPluralOffset(int32_t pluralStart) const;
00673 
00682     int32_t getLimitPartIndex(int32_t start) const {
00683         int32_t limit=getPart(start).limitPartIndex;
00684         if(limit<start) {
00685             return start;
00686         }
00687         return limit;
00688     }
00689 
00697     class Part : public UMemory {
00698     public:
00703         Part() {}
00704 
00710         UMessagePatternPartType getType() const {
00711             return type;
00712         }
00713 
00719         int32_t getIndex() const {
00720             return index;
00721         }
00722 
00729         int32_t getLength() const {
00730             return length;
00731         }
00732 
00739         int32_t getLimit() const {
00740             return index+length;
00741         }
00742 
00749         int32_t getValue() const {
00750             return value;
00751         }
00752 
00759         UMessagePatternArgType getArgType() const {
00760             UMessagePatternPartType type=getType();
00761             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
00762                 return (UMessagePatternArgType)value;
00763             } else {
00764                 return UMSGPAT_ARG_TYPE_NONE;
00765             }
00766         }
00767 
00775         static UBool hasNumericValue(UMessagePatternPartType type) {
00776             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
00777         }
00778 
00784         UBool operator==(const Part &other) const;
00785 
00791         inline UBool operator!=(const Part &other) const {
00792             return !operator==(other);
00793         }
00794 
00799         int32_t hashCode() const {
00800             return ((type*37+index)*37+length)*37+value;
00801         }
00802 
00803     private:
00804         friend class MessagePattern;
00805 
00806         static const int32_t MAX_LENGTH=0xffff;
00807         static const int32_t MAX_VALUE=0x7fff;
00808 
00809         // Some fields are not final because they are modified during pattern parsing.
00810         // After pattern parsing, the parts are effectively immutable.
00811         UMessagePatternPartType type;
00812         int32_t index;
00813         uint16_t length;
00814         int16_t value;
00815         int32_t limitPartIndex;
00816     };
00817 
00818 private:
00819     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00820 
00821     void postParse();
00822 
00823     int32_t parseMessage(int32_t index, int32_t msgStartLength,
00824                          int32_t nestingLevel, UMessagePatternArgType parentType,
00825                          UParseError *parseError, UErrorCode &errorCode);
00826 
00827     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
00828                      UParseError *parseError, UErrorCode &errorCode);
00829 
00830     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
00831 
00832     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
00833                              UParseError *parseError, UErrorCode &errorCode);
00834 
00835     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
00836                                      UParseError *parseError, UErrorCode &errorCode);
00837 
00846     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
00847 
00848     int32_t parseArgNumber(int32_t start, int32_t limit) {
00849         return parseArgNumber(msg, start, limit);
00850     }
00851 
00860     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
00861                      UParseError *parseError, UErrorCode &errorCode);
00862 
00863     // Java has package-private appendReducedApostrophes() here.
00864     // In C++, this is declared in the MessageImpl class.
00865 
00866     int32_t skipWhiteSpace(int32_t index);
00867 
00868     int32_t skipIdentifier(int32_t index);
00869 
00874     int32_t skipDouble(int32_t index);
00875 
00876     static UBool isArgTypeChar(UChar32 c);
00877 
00878     UBool isChoice(int32_t index);
00879 
00880     UBool isPlural(int32_t index);
00881 
00882     UBool isSelect(int32_t index);
00883 
00888     UBool inMessageFormatPattern(int32_t nestingLevel);
00889 
00894     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
00895 
00896     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
00897                  int32_t value, UErrorCode &errorCode);
00898 
00899     void addLimitPart(int32_t start,
00900                       UMessagePatternPartType type, int32_t index, int32_t length,
00901                       int32_t value, UErrorCode &errorCode);
00902 
00903     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
00904 
00905     void setParseError(UParseError *parseError, int32_t index);
00906 
00907     // No ICU "poor man's RTTI" for this class nor its subclasses.
00908     virtual UClassID getDynamicClassID() const;
00909 
00910     UBool init(UErrorCode &errorCode);
00911     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
00912 
00913     UMessagePatternApostropheMode aposMode;
00914     UnicodeString msg;
00915     // ArrayList<Part> parts=new ArrayList<Part>();
00916     MessagePatternPartsList *partsList;
00917     Part *parts;
00918     int32_t partsLength;
00919     // ArrayList<Double> numericValues;
00920     MessagePatternDoubleList *numericValuesList;
00921     double *numericValues;
00922     int32_t numericValuesLength;
00923     UBool hasArgNames;
00924     UBool hasArgNumbers;
00925     UBool needsAutoQuoting;
00926 };
00927 
00928 U_NAMESPACE_END
00929 
00930 #endif  // !UCONFIG_NO_FORMATTING
00931 
00932 #endif  // __MESSAGEPATTERN_H__
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines