• Main Page
  • Namespaces
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

ucommon/unicode.h

Go to the documentation of this file.
00001 // Copyright (C) 2009-2010 David Sugar, Tycho Softworks.
00002 //
00003 // This file is part of GNU uCommon C++.
00004 //
00005 // GNU uCommon C++ is free software: you can redistribute it and/or modify
00006 // it under the terms of the GNU Lesser General Public License as published 
00007 // by the Free Software Foundation, either version 3 of the License, or
00008 // (at your option) any later version.
00009 //
00010 // GNU uCommon C++ is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU Lesser General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU Lesser General Public License
00016 // along with GNU uCommon C++.  If not, see <http://www.gnu.org/licenses/>.
00017 
00032 #ifndef _UCOMMON_UNICODE_H_
00033 #define _UCOMMON_UNICODE_H_
00034 
00035 #ifndef _UCOMMON_STRING_H_
00036 #include <ucommon/string.h>
00037 #endif
00038 
00039 NAMESPACE_UCOMMON
00040 
00045 typedef int32_t ucs4_t;
00046 
00050 typedef int16_t ucs2_t;
00051 
00055 typedef void *unicode_t;
00056 
00062 class __EXPORT utf8
00063 {
00064 public:
00068     static const unsigned ucsize;
00069 
00073     static const char *nil;
00074 
00080     static unsigned size(const char *codepoint);
00081 
00087     static size_t count(const char *string);
00088 
00095     static char *offset(char *string, ssize_t position);
00096 
00102     static ucs4_t codepoint(const char *encoded);
00103 
00109     static size_t chars(const unicode_t string);
00110 
00116     static size_t chars(ucs4_t character);
00117 
00125     static size_t convert(const unicode_t string, char *buffer, size_t size);
00126 
00134     static size_t extract(const char *string, unicode_t unicode, size_t size);
00135 
00143     static const char *find(const char *string, ucs4_t character, size_t start = 0);
00144 
00152     static const char *rfind(const char *string, ucs4_t character, size_t end = (size_t)-1l);
00153 
00160     static unsigned ccount(const char *string, ucs4_t character);
00161 
00167     ucs4_t getch(FILE *file);
00168 
00175     ucs4_t putch(ucs4_t character, FILE *file);
00176 };
00177 
00184 class __EXPORT UString : public String, public utf8
00185 {
00186 protected:
00190     UString();
00191 
00196     UString(strsize_t size);
00197 
00202     UString(const unicode_t text);
00203 
00210     UString(const char *text, strsize_t size);
00211 
00218     UString(const unicode_t *text, const unicode_t *end);
00219 
00225     UString(const UString& existing);
00226 
00231     virtual ~UString();
00232 
00239     UString get(strsize_t codepoint, strsize_t size = 0) const;
00240 
00247     inline size_t get(unicode_t unicode, size_t size) const
00248         {return utf8::extract(str->text, unicode, size);};
00249 
00254     void set(const unicode_t unicode);
00255 
00260     void add(const unicode_t unicode);
00261 
00267     ucs4_t at(int position) const;
00268 
00275     inline size_t operator()(unicode_t unicode, size_t size) const
00276         {return utf8::extract(str->text, unicode, size);};
00277 
00284     UString operator()(int codepoint, strsize_t size) const;
00285 
00293     const char *operator()(int offset) const;
00294 
00300     inline ucs4_t operator[](int position) const
00301         {return UString::at(position);};
00302 
00307     inline strsize_t count(void) const
00308         {return utf8::count(str->text);}
00309 
00315     unsigned ccount(ucs4_t character) const;
00316 
00323     const char *find(ucs4_t character, strsize_t start = 0) const;
00324 
00331     const char *rfind(ucs4_t character, strsize_t end = npos) const;
00332 };
00333 
00339 class __EXPORT utf8_pointer
00340 {
00341 protected:
00342     uint8_t *text;
00343 
00344 public:
00348     utf8_pointer();
00349 
00354     utf8_pointer(const char *string);
00355 
00360     utf8_pointer(const utf8_pointer& copy);
00361 
00366     utf8_pointer& operator ++();
00367 
00372     utf8_pointer& operator --();
00373 
00379     utf8_pointer& operator +=(long offset);
00380 
00386     utf8_pointer& operator -=(long offset);
00387 
00393     utf8_pointer operator+(long offset) const;
00394 
00400     utf8_pointer operator-(long offset) const;
00401 
00406     inline operator bool() const
00407         {return text != NULL;};
00408 
00413     inline bool operator!() const
00414         {return text == NULL;};
00415 
00421     ucs4_t operator[](long codepoint) const;
00422     
00428     utf8_pointer& operator=(const char *string);
00429 
00433     void inc(void);
00434 
00438     void dec(void);
00439 
00445     inline bool operator==(const char *string) const
00446         {return (const char *)text == string;};
00447 
00453     inline bool operator!=(const char *string) const
00454         {return (const char *)text != string;};
00455 
00460     inline  ucs4_t operator*() const
00461         {return utf8::codepoint((const char *)text);};
00462 
00467     inline char *c_str(void) const
00468         {return (char *)text;};
00469 
00474     inline operator char*() const
00475         {return (char *)text;};
00476 
00481     inline size_t len(void) const
00482         {return utf8::count((const char *)text);};
00483 };
00484 
00488 typedef UString ustring_t;
00489 
00493 typedef utf8_pointer utf8_t;
00494 
00495 END_NAMESPACE
00496 
00497 #endif

Generated on Sun Sep 4 2011 for UCommon by  doxygen 1.7.1