Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00032 #ifndef _UCOMMON_UNICODE_H_
00033 #define _UCOMMON_UNICODE_H_
00034
00035 #ifndef _UCOMMON_STRING_H_
00036 #include <ucommon/string.h>
00037 #endif
00038
00039 NAMESPACE_UCOMMON
00040
00045 typedef int32_t ucs4_t;
00046
00050 typedef int16_t ucs2_t;
00051
00055 typedef void *unicode_t;
00056
00062 class __EXPORT utf8
00063 {
00064 public:
00068 static const unsigned ucsize;
00069
00073 static const char *nil;
00074
00080 static unsigned size(const char *codepoint);
00081
00087 static size_t count(const char *string);
00088
00095 static char *offset(char *string, ssize_t position);
00096
00102 static ucs4_t codepoint(const char *encoded);
00103
00109 static size_t chars(const unicode_t string);
00110
00116 static size_t chars(ucs4_t character);
00117
00125 static size_t convert(const unicode_t string, char *buffer, size_t size);
00126
00134 static size_t extract(const char *string, unicode_t unicode, size_t size);
00135
00143 static const char *find(const char *string, ucs4_t character, size_t start = 0);
00144
00152 static const char *rfind(const char *string, ucs4_t character, size_t end = (size_t)-1l);
00153
00160 static unsigned ccount(const char *string, ucs4_t character);
00161
00167 ucs4_t getch(FILE *file);
00168
00175 ucs4_t putch(ucs4_t character, FILE *file);
00176 };
00177
00184 class __EXPORT UString : public String, public utf8
00185 {
00186 protected:
00190 UString();
00191
00196 UString(strsize_t size);
00197
00202 UString(const unicode_t text);
00203
00210 UString(const char *text, strsize_t size);
00211
00218 UString(const unicode_t *text, const unicode_t *end);
00219
00225 UString(const UString& existing);
00226
00231 virtual ~UString();
00232
00239 UString get(strsize_t codepoint, strsize_t size = 0) const;
00240
00247 inline size_t get(unicode_t unicode, size_t size) const
00248 {return utf8::extract(str->text, unicode, size);};
00249
00254 void set(const unicode_t unicode);
00255
00260 void add(const unicode_t unicode);
00261
00267 ucs4_t at(int position) const;
00268
00275 inline size_t operator()(unicode_t unicode, size_t size) const
00276 {return utf8::extract(str->text, unicode, size);};
00277
00284 UString operator()(int codepoint, strsize_t size) const;
00285
00293 const char *operator()(int offset) const;
00294
00300 inline ucs4_t operator[](int position) const
00301 {return UString::at(position);};
00302
00307 inline strsize_t count(void) const
00308 {return utf8::count(str->text);}
00309
00315 unsigned ccount(ucs4_t character) const;
00316
00323 const char *find(ucs4_t character, strsize_t start = 0) const;
00324
00331 const char *rfind(ucs4_t character, strsize_t end = npos) const;
00332 };
00333
00339 class __EXPORT utf8_pointer
00340 {
00341 protected:
00342 uint8_t *text;
00343
00344 public:
00348 utf8_pointer();
00349
00354 utf8_pointer(const char *string);
00355
00360 utf8_pointer(const utf8_pointer& copy);
00361
00366 utf8_pointer& operator ++();
00367
00372 utf8_pointer& operator --();
00373
00379 utf8_pointer& operator +=(long offset);
00380
00386 utf8_pointer& operator -=(long offset);
00387
00393 utf8_pointer operator+(long offset) const;
00394
00400 utf8_pointer operator-(long offset) const;
00401
00406 inline operator bool() const
00407 {return text != NULL;};
00408
00413 inline bool operator!() const
00414 {return text == NULL;};
00415
00421 ucs4_t operator[](long codepoint) const;
00422
00428 utf8_pointer& operator=(const char *string);
00429
00433 void inc(void);
00434
00438 void dec(void);
00439
00445 inline bool operator==(const char *string) const
00446 {return (const char *)text == string;};
00447
00453 inline bool operator!=(const char *string) const
00454 {return (const char *)text != string;};
00455
00460 inline ucs4_t operator*() const
00461 {return utf8::codepoint((const char *)text);};
00462
00467 inline char *c_str(void) const
00468 {return (char *)text;};
00469
00474 inline operator char*() const
00475 {return (char *)text;};
00476
00481 inline size_t len(void) const
00482 {return utf8::count((const char *)text);};
00483 };
00484
00488 typedef UString ustring_t;
00489
00493 typedef utf8_pointer utf8_t;
00494
00495 END_NAMESPACE
00496
00497 #endif