MyGUI
3.0.1
|
00001 // Modified from OpenGUI under lenient license 00002 // Original copyright details and licensing below: 00003 // OpenGUI (http://opengui.sourceforge.net) 00004 // This source code is released under the BSD License 00005 00006 // Permission is given to the Ogre project to use the contents of file within its 00007 // source and binary applications, as well as any derivative works, in accordance 00008 // with the terms of any license under which Ogre is or will be distributed. 00009 // 00010 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates 00011 // to this file, under any terms that it deems fit, and is not required to maintain 00012 // the original BSD licensing terms of this file, however OpenGUI retains the right 00013 // to present its copy of this file under the terms of any license under which 00014 // OpenGUI is distributed. 00015 // 00016 // Ogre is not required to release to OpenGUI any future changes that it makes to 00017 // this file, and understands and agrees that any such changes that are released 00018 // back to OpenGUI will become available under the terms of any license under which 00019 // OpenGUI is distributed. 00020 // 00021 // For brevity, this permission text may be removed from this file if desired. 00022 // The original record kept within the SourceForge (http://sourceforge.net/) tracker 00023 // is sufficient. 00024 // 00025 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007] 00026 00027 #ifndef __MYGUI_U_STRING_H__ 00028 #define __MYGUI_U_STRING_H__ 00029 00030 #include "MyGUI_Prerequest.h" 00031 #include "MyGUI_Types.h" 00032 #include "MyGUI_Diagnostic.h" 00033 #include "MyGUI_LogManager.h" 00034 00035 // these are explained later 00036 #include <iterator> 00037 #include <string> 00038 #include <stdexcept> 00039 00040 // this pragma used to avoid warnings from some advanced gcc warnings flags 00041 #if MYGUI_COMPILER == MYGUI_COMPILER_GNUC 00042 #pragma GCC system_header 00043 #endif 00044 00045 // Workaround for VC7: 00046 // when build with /MD or /MDd, VC7 have both std::basic_string<unsigned short> and 00047 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header 00048 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile 00049 // option). And since this file used both of them, causing compiler instantiating another 00050 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll. 00051 // 00052 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310) 00053 00054 # if defined(_DLL_CPPLIB) 00055 00056 namespace std 00057 { 00058 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>, 00059 allocator<unsigned short> >; 00060 00061 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>, 00062 allocator<__wchar_t> >; 00063 } 00064 00065 # endif // defined(_DLL_CPPLIB) 00066 00067 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300 00068 00069 00070 namespace MyGUI 00071 { 00072 00073 /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS 00074 =NOTICE= 00075 This class is not a complete Unicode solution. It purposefully does not 00076 provide certain functionality, such as proper lexical sorting for 00077 Unicode values. It does provide comparison operators for the sole purpose 00078 of using UString as an index with std::map and other operator< sorted 00079 containers, but it should NOT be relied upon for meaningful lexical 00080 operations, such as alphabetical sorts. If you need this type of 00081 functionality, look into using ICU instead (http://icu.sourceforge.net/). 00082 00083 =REQUIREMENTS= 00084 There are a few requirements for proper operation. They are fairly small, 00085 and shouldn't restrict usage on any reasonable target. 00086 * Compiler must support unsigned 16-bit integer types 00087 * Compiler must support signed 32-bit integer types 00088 * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such 00089 using the WCHAR_UTF16 macro as outlined below. 00090 * You must include <iterator>, <string>, and <wchar>. Probably more, but 00091 these are the most obvious. 00092 00093 =REQUIRED PREPROCESSOR MACROS= 00094 This class requires two preprocessor macros to be defined in order to 00095 work as advertised. 00096 INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int) 00097 UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short) 00098 00099 Additionally, a third macro should be defined to control the evaluation of wchar_t: 00100 WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points, 00101 such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit 00102 integer representing UTF-32 code points. 00103 */ 00104 00105 // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS 00106 #ifdef __STDC_ISO_10646__ 00107 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger) 00108 // so we can safely skip the rest of the testing 00109 #else // #ifdef __STDC_ISO_10646__ 00110 #if defined( __WIN32__ ) || defined( _WIN32 ) 00111 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t 00112 #else // #if defined( __WIN32__ ) || defined( _WIN32 ) 00113 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h> 00114 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit 00115 #endif // #if WCHAR_MAX <= 0xFFFF 00116 #endif // #if defined( __WIN32__ ) || defined( _WIN32 ) 00117 #endif // #ifdef __STDC_ISO_10646__ 00118 00119 00120 // MYGUI_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of 00121 // uint16 or uint32. 00122 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC 00123 00124 // Don't define wchar_t related functions since it'll duplicate 00125 // with UString::code_point related functions when compile 00126 // without /Zc:wchar_t, because in this case both of them are 00127 // a typedef of uint16. 00128 # if defined(_NATIVE_WCHAR_T_DEFINED) 00129 # define MYGUI_IS_NATIVE_WCHAR_T 1 00130 # else 00131 # define MYGUI_IS_NATIVE_WCHAR_T 0 00132 # endif 00133 00134 #else // MYGUI_COMPILER != MYGUI_COMPILER_MSVC 00135 00136 // Assumed wchar_t is natively for other compilers 00137 # define MYGUI_IS_NATIVE_WCHAR_T 1 00138 00139 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC 00140 00142 00167 class UString 00168 { 00169 // constants used in UTF-8 conversions 00170 static const unsigned char _lead1 = 0xC0; //110xxxxx 00171 static const unsigned char _lead1_mask = 0x1F; //00011111 00172 static const unsigned char _lead2 = 0xE0; //1110xxxx 00173 static const unsigned char _lead2_mask = 0x0F; //00001111 00174 static const unsigned char _lead3 = 0xF0; //11110xxx 00175 static const unsigned char _lead3_mask = 0x07; //00000111 00176 static const unsigned char _lead4 = 0xF8; //111110xx 00177 static const unsigned char _lead4_mask = 0x03; //00000011 00178 static const unsigned char _lead5 = 0xFC; //1111110x 00179 static const unsigned char _lead5_mask = 0x01; //00000001 00180 static const unsigned char _cont = 0x80; //10xxxxxx 00181 static const unsigned char _cont_mask = 0x3F; //00111111 00182 00183 public: 00185 typedef size_t size_type; 00187 static const size_type npos = ~(size_t)0; 00188 00190 typedef uint32 unicode_char; 00191 00193 typedef uint16 code_point; 00194 00196 typedef code_point value_type; 00197 00198 typedef std::basic_string<code_point> dstring; // data string 00199 00201 typedef std::basic_string<unicode_char> utf32string; 00202 00204 class invalid_data: public std::runtime_error 00205 { /* i don't know why the beautifier is freaking out on this line */ 00206 public: 00208 explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message ) 00209 { 00210 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */ 00211 } 00212 }; 00213 00214 //######################################################################### 00216 class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type> 00217 { /* i don't know why the beautifier is freaking out on this line */ 00218 friend class UString; 00219 protected: 00220 _base_iterator() 00221 { 00222 mString = 0; 00223 } 00224 00225 void _seekFwd( size_type c ) 00226 { 00227 mIter += c; 00228 } 00229 void _seekRev( size_type c ) 00230 { 00231 mIter -= c; 00232 } 00233 void _become( const _base_iterator& i ) 00234 { 00235 mIter = i.mIter; 00236 mString = i.mString; 00237 } 00238 bool _test_begin() const 00239 { 00240 return mIter == mString->mData.begin(); 00241 } 00242 bool _test_end() const 00243 { 00244 return mIter == mString->mData.end(); 00245 } 00246 size_type _get_index() const 00247 { 00248 return mIter - mString->mData.begin(); 00249 } 00250 void _jump_to( size_type index ) 00251 { 00252 mIter = mString->mData.begin() + index; 00253 } 00254 00255 unicode_char _getCharacter() const 00256 { 00257 size_type current_index = _get_index(); 00258 return mString->getChar( current_index ); 00259 } 00260 int _setCharacter( unicode_char uc ) 00261 { 00262 size_type current_index = _get_index(); 00263 int change = mString->setChar( current_index, uc ); 00264 _jump_to( current_index ); 00265 return change; 00266 } 00267 00268 void _moveNext() 00269 { 00270 _seekFwd( 1 ); // move 1 code point forward 00271 if ( _test_end() ) return; // exit if we hit the end 00272 if ( _utf16_surrogate_follow( mIter[0] ) ) 00273 { 00274 // landing on a follow code point means we might be part of a bigger character 00275 // so we test for that 00276 code_point lead_half = 0; 00277 //NB: we can't possibly be at the beginning here, so no need to test 00278 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair 00279 if ( _utf16_surrogate_lead( lead_half ) ) 00280 { 00281 _seekFwd( 1 ); // if so, then advance 1 more code point 00282 } 00283 } 00284 } 00285 void _movePrev() 00286 { 00287 _seekRev( 1 ); // move 1 code point backwards 00288 if ( _test_begin() ) return; // exit if we hit the beginning 00289 if ( _utf16_surrogate_follow( mIter[0] ) ) 00290 { 00291 // landing on a follow code point means we might be part of a bigger character 00292 // so we test for that 00293 code_point lead_half = 0; 00294 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair 00295 if ( _utf16_surrogate_lead( lead_half ) ) 00296 { 00297 _seekRev( 1 ); // if so, then rewind 1 more code point 00298 } 00299 } 00300 } 00301 00302 dstring::iterator mIter; 00303 UString* mString; 00304 }; 00305 00306 //######################################################################### 00307 // FORWARD ITERATORS 00308 //######################################################################### 00309 class _const_fwd_iterator; // forward declaration 00310 00312 class _fwd_iterator: public _base_iterator 00313 { /* i don't know why the beautifier is freaking out on this line */ 00314 friend class _const_fwd_iterator; 00315 public: 00316 _fwd_iterator() { } 00317 _fwd_iterator( const _fwd_iterator& i ) 00318 { 00319 _become( i ); 00320 } 00321 00323 _fwd_iterator& operator++() 00324 { 00325 _seekFwd( 1 ); 00326 return *this; 00327 } 00329 _fwd_iterator operator++( int ) 00330 { 00331 _fwd_iterator tmp( *this ); 00332 _seekFwd( 1 ); 00333 return tmp; 00334 } 00335 00337 _fwd_iterator& operator--() 00338 { 00339 _seekRev( 1 ); 00340 return *this; 00341 } 00343 _fwd_iterator operator--( int ) 00344 { 00345 _fwd_iterator tmp( *this ); 00346 _seekRev( 1 ); 00347 return tmp; 00348 } 00349 00351 _fwd_iterator operator+( size_type n ) 00352 { 00353 _fwd_iterator tmp( *this ); 00354 tmp._seekFwd( n ); 00355 return tmp; 00356 } 00358 _fwd_iterator operator+( difference_type n ) 00359 { 00360 _fwd_iterator tmp( *this ); 00361 if ( n < 0 ) 00362 tmp._seekRev( -n ); 00363 else 00364 tmp._seekFwd( n ); 00365 return tmp; 00366 } 00368 _fwd_iterator operator-( size_type n ) 00369 { 00370 _fwd_iterator tmp( *this ); 00371 tmp._seekRev( n ); 00372 return tmp; 00373 } 00375 _fwd_iterator operator-( difference_type n ) 00376 { 00377 _fwd_iterator tmp( *this ); 00378 if ( n < 0 ) 00379 tmp._seekFwd( -n ); 00380 else 00381 tmp._seekRev( n ); 00382 return tmp; 00383 } 00384 00386 _fwd_iterator& operator+=( size_type n ) 00387 { 00388 _seekFwd( n ); 00389 return *this; 00390 } 00392 _fwd_iterator& operator+=( difference_type n ) 00393 { 00394 if ( n < 0 ) 00395 _seekRev( -n ); 00396 else 00397 _seekFwd( n ); 00398 return *this; 00399 } 00401 _fwd_iterator& operator-=( size_type n ) 00402 { 00403 _seekRev( n ); 00404 return *this; 00405 } 00407 _fwd_iterator& operator-=( difference_type n ) 00408 { 00409 if ( n < 0 ) 00410 _seekFwd( -n ); 00411 else 00412 _seekRev( n ); 00413 return *this; 00414 } 00415 00417 value_type& operator*() const 00418 { 00419 return *mIter; 00420 } 00421 00423 value_type& operator[]( size_type n ) const 00424 { 00425 _fwd_iterator tmp( *this ); 00426 tmp += n; 00427 return *tmp; 00428 } 00430 value_type& operator[]( difference_type n ) const 00431 { 00432 _fwd_iterator tmp( *this ); 00433 tmp += n; 00434 return *tmp; 00435 } 00436 00438 _fwd_iterator& moveNext() 00439 { 00440 _moveNext(); 00441 return *this; 00442 } 00444 _fwd_iterator& movePrev() 00445 { 00446 _movePrev(); 00447 return *this; 00448 } 00450 unicode_char getCharacter() const 00451 { 00452 return _getCharacter(); 00453 } 00455 int setCharacter( unicode_char uc ) 00456 { 00457 return _setCharacter( uc ); 00458 } 00459 }; 00460 00461 00462 //######################################################################### 00464 class _const_fwd_iterator: public _base_iterator 00465 { /* i don't know why the beautifier is freaking out on this line */ 00466 public: 00467 _const_fwd_iterator() { } 00468 _const_fwd_iterator( const _const_fwd_iterator& i ) 00469 { 00470 _become( i ); 00471 } 00472 _const_fwd_iterator( const _fwd_iterator& i ) 00473 { 00474 _become( i ); 00475 } 00476 00478 _const_fwd_iterator& operator++() 00479 { 00480 _seekFwd( 1 ); 00481 return *this; 00482 } 00484 _const_fwd_iterator operator++( int ) 00485 { 00486 _const_fwd_iterator tmp( *this ); 00487 _seekFwd( 1 ); 00488 return tmp; 00489 } 00490 00492 _const_fwd_iterator& operator--() 00493 { 00494 _seekRev( 1 ); 00495 return *this; 00496 } 00498 _const_fwd_iterator operator--( int ) 00499 { 00500 _const_fwd_iterator tmp( *this ); 00501 _seekRev( 1 ); 00502 return tmp; 00503 } 00504 00506 _const_fwd_iterator operator+( size_type n ) 00507 { 00508 _const_fwd_iterator tmp( *this ); 00509 tmp._seekFwd( n ); 00510 return tmp; 00511 } 00513 _const_fwd_iterator operator+( difference_type n ) 00514 { 00515 _const_fwd_iterator tmp( *this ); 00516 if ( n < 0 ) 00517 tmp._seekRev( -n ); 00518 else 00519 tmp._seekFwd( n ); 00520 return tmp; 00521 } 00523 _const_fwd_iterator operator-( size_type n ) 00524 { 00525 _const_fwd_iterator tmp( *this ); 00526 tmp._seekRev( n ); 00527 return tmp; 00528 } 00530 _const_fwd_iterator operator-( difference_type n ) 00531 { 00532 _const_fwd_iterator tmp( *this ); 00533 if ( n < 0 ) 00534 tmp._seekFwd( -n ); 00535 else 00536 tmp._seekRev( n ); 00537 return tmp; 00538 } 00539 00541 _const_fwd_iterator& operator+=( size_type n ) 00542 { 00543 _seekFwd( n ); 00544 return *this; 00545 } 00547 _const_fwd_iterator& operator+=( difference_type n ) 00548 { 00549 if ( n < 0 ) 00550 _seekRev( -n ); 00551 else 00552 _seekFwd( n ); 00553 return *this; 00554 } 00556 _const_fwd_iterator& operator-=( size_type n ) 00557 { 00558 _seekRev( n ); 00559 return *this; 00560 } 00562 _const_fwd_iterator& operator-=( difference_type n ) 00563 { 00564 if ( n < 0 ) 00565 _seekFwd( -n ); 00566 else 00567 _seekRev( n ); 00568 return *this; 00569 } 00570 00572 const value_type& operator*() const 00573 { 00574 return *mIter; 00575 } 00576 00578 const value_type& operator[]( size_type n ) const 00579 { 00580 _const_fwd_iterator tmp( *this ); 00581 tmp += n; 00582 return *tmp; 00583 } 00585 const value_type& operator[]( difference_type n ) const 00586 { 00587 _const_fwd_iterator tmp( *this ); 00588 tmp += n; 00589 return *tmp; 00590 } 00591 00593 _const_fwd_iterator& moveNext() 00594 { 00595 _moveNext(); 00596 return *this; 00597 } 00599 _const_fwd_iterator& movePrev() 00600 { 00601 _movePrev(); 00602 return *this; 00603 } 00605 unicode_char getCharacter() const 00606 { 00607 return _getCharacter(); 00608 } 00609 00611 friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00613 friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00615 friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00617 friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00619 friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00621 friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00623 friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00624 00625 }; 00626 00627 //######################################################################### 00628 // REVERSE ITERATORS 00629 //######################################################################### 00630 class _const_rev_iterator; // forward declaration 00632 class _rev_iterator: public _base_iterator 00633 { /* i don't know why the beautifier is freaking out on this line */ 00634 friend class _const_rev_iterator; 00635 public: 00636 _rev_iterator() { } 00637 _rev_iterator( const _rev_iterator& i ) 00638 { 00639 _become( i ); 00640 } 00641 00643 _rev_iterator& operator++() 00644 { 00645 _seekRev( 1 ); 00646 return *this; 00647 } 00649 _rev_iterator operator++( int ) 00650 { 00651 _rev_iterator tmp( *this ); 00652 _seekRev( 1 ); 00653 return tmp; 00654 } 00655 00657 _rev_iterator& operator--() 00658 { 00659 _seekFwd( 1 ); 00660 return *this; 00661 } 00663 _rev_iterator operator--( int ) 00664 { 00665 _rev_iterator tmp( *this ); 00666 _seekFwd( 1 ); 00667 return tmp; 00668 } 00669 00671 _rev_iterator operator+( size_type n ) 00672 { 00673 _rev_iterator tmp( *this ); 00674 tmp._seekRev( n ); 00675 return tmp; 00676 } 00678 _rev_iterator operator+( difference_type n ) 00679 { 00680 _rev_iterator tmp( *this ); 00681 if ( n < 0 ) 00682 tmp._seekFwd( -n ); 00683 else 00684 tmp._seekRev( n ); 00685 return tmp; 00686 } 00688 _rev_iterator operator-( size_type n ) 00689 { 00690 _rev_iterator tmp( *this ); 00691 tmp._seekFwd( n ); 00692 return tmp; 00693 } 00695 _rev_iterator operator-( difference_type n ) 00696 { 00697 _rev_iterator tmp( *this ); 00698 if ( n < 0 ) 00699 tmp._seekRev( -n ); 00700 else 00701 tmp._seekFwd( n ); 00702 return tmp; 00703 } 00704 00706 _rev_iterator& operator+=( size_type n ) 00707 { 00708 _seekRev( n ); 00709 return *this; 00710 } 00712 _rev_iterator& operator+=( difference_type n ) 00713 { 00714 if ( n < 0 ) 00715 _seekFwd( -n ); 00716 else 00717 _seekRev( n ); 00718 return *this; 00719 } 00721 _rev_iterator& operator-=( size_type n ) 00722 { 00723 _seekFwd( n ); 00724 return *this; 00725 } 00727 _rev_iterator& operator-=( difference_type n ) 00728 { 00729 if ( n < 0 ) 00730 _seekRev( -n ); 00731 else 00732 _seekFwd( n ); 00733 return *this; 00734 } 00735 00737 value_type& operator*() const 00738 { 00739 return mIter[-1]; 00740 } 00741 00743 value_type& operator[]( size_type n ) const 00744 { 00745 _rev_iterator tmp( *this ); 00746 tmp -= n; 00747 return *tmp; 00748 } 00750 value_type& operator[]( difference_type n ) const 00751 { 00752 _rev_iterator tmp( *this ); 00753 tmp -= n; 00754 return *tmp; 00755 } 00756 }; 00757 //######################################################################### 00759 class _const_rev_iterator: public _base_iterator 00760 { /* i don't know why the beautifier is freaking out on this line */ 00761 public: 00762 _const_rev_iterator() { } 00763 _const_rev_iterator( const _const_rev_iterator& i ) 00764 { 00765 _become( i ); 00766 } 00767 _const_rev_iterator( const _rev_iterator& i ) 00768 { 00769 _become( i ); 00770 } 00772 _const_rev_iterator& operator++() 00773 { 00774 _seekRev( 1 ); 00775 return *this; 00776 } 00778 _const_rev_iterator operator++( int ) 00779 { 00780 _const_rev_iterator tmp( *this ); 00781 _seekRev( 1 ); 00782 return tmp; 00783 } 00784 00786 _const_rev_iterator& operator--() 00787 { 00788 _seekFwd( 1 ); 00789 return *this; 00790 } 00792 _const_rev_iterator operator--( int ) 00793 { 00794 _const_rev_iterator tmp( *this ); 00795 _seekFwd( 1 ); 00796 return tmp; 00797 } 00798 00800 _const_rev_iterator operator+( size_type n ) 00801 { 00802 _const_rev_iterator tmp( *this ); 00803 tmp._seekRev( n ); 00804 return tmp; 00805 } 00807 _const_rev_iterator operator+( difference_type n ) 00808 { 00809 _const_rev_iterator tmp( *this ); 00810 if ( n < 0 ) 00811 tmp._seekFwd( -n ); 00812 else 00813 tmp._seekRev( n ); 00814 return tmp; 00815 } 00817 _const_rev_iterator operator-( size_type n ) 00818 { 00819 _const_rev_iterator tmp( *this ); 00820 tmp._seekFwd( n ); 00821 return tmp; 00822 } 00824 _const_rev_iterator operator-( difference_type n ) 00825 { 00826 _const_rev_iterator tmp( *this ); 00827 if ( n < 0 ) 00828 tmp._seekRev( -n ); 00829 else 00830 tmp._seekFwd( n ); 00831 return tmp; 00832 } 00833 00835 _const_rev_iterator& operator+=( size_type n ) 00836 { 00837 _seekRev( n ); 00838 return *this; 00839 } 00841 _const_rev_iterator& operator+=( difference_type n ) 00842 { 00843 if ( n < 0 ) 00844 _seekFwd( -n ); 00845 else 00846 _seekRev( n ); 00847 return *this; 00848 } 00850 _const_rev_iterator& operator-=( size_type n ) 00851 { 00852 _seekFwd( n ); 00853 return *this; 00854 } 00856 _const_rev_iterator& operator-=( difference_type n ) 00857 { 00858 if ( n < 0 ) 00859 _seekRev( -n ); 00860 else 00861 _seekFwd( n ); 00862 return *this; 00863 } 00864 00866 const value_type& operator*() const 00867 { 00868 return mIter[-1]; 00869 } 00870 00872 const value_type& operator[]( size_type n ) const 00873 { 00874 _const_rev_iterator tmp( *this ); 00875 tmp -= n; 00876 return *tmp; 00877 } 00879 const value_type& operator[]( difference_type n ) const 00880 { 00881 _const_rev_iterator tmp( *this ); 00882 tmp -= n; 00883 return *tmp; 00884 } 00885 00887 friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00889 friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00891 friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00893 friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00895 friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00897 friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00899 friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00900 }; 00901 //######################################################################### 00902 00903 typedef _fwd_iterator iterator; 00904 typedef _rev_iterator reverse_iterator; 00905 typedef _const_fwd_iterator const_iterator; 00906 typedef _const_rev_iterator const_reverse_iterator; 00907 00908 00910 00911 00912 UString() 00913 { 00914 _init(); 00915 } 00917 UString( const UString& copy ) 00918 { 00919 _init(); 00920 mData = copy.mData; 00921 } 00923 UString( size_type length, const code_point& ch ) 00924 { 00925 _init(); 00926 assign( length, ch ); 00927 } 00929 UString( const code_point* str ) 00930 { 00931 _init(); 00932 assign( str ); 00933 } 00935 UString( const code_point* str, size_type length ) 00936 { 00937 _init(); 00938 assign( str, length ); 00939 } 00941 UString( const UString& str, size_type index, size_type length ) 00942 { 00943 _init(); 00944 assign( str, index, length ); 00945 } 00946 #if MYGUI_IS_NATIVE_WCHAR_T 00947 00948 UString( const wchar_t* w_str ) 00949 { 00950 _init(); 00951 assign( w_str ); 00952 } 00954 UString( const wchar_t* w_str, size_type length ) 00955 { 00956 _init(); 00957 assign( w_str, length ); 00958 } 00959 #endif 00960 00961 UString( const std::wstring& wstr ) 00962 { 00963 _init(); 00964 assign( wstr ); 00965 } 00967 UString( const char* c_str ) 00968 { 00969 _init(); 00970 assign( c_str ); 00971 } 00973 UString( const char* c_str, size_type length ) 00974 { 00975 _init(); 00976 assign( c_str, length ); 00977 } 00979 UString( const std::string& str ) 00980 { 00981 _init(); 00982 assign( str ); 00983 } 00985 ~UString() 00986 { 00987 _cleanBuffer(); 00988 } 00990 00992 00994 00995 00996 size_type size() const 00997 { 00998 return mData.size(); 00999 } 01001 size_type length() const 01002 { 01003 return size(); 01004 } 01006 01007 size_type length_Characters() const 01008 { 01009 const_iterator i = begin(), ie = end(); 01010 size_type c = 0; 01011 while ( i != ie ) 01012 { 01013 i.moveNext(); 01014 ++c; 01015 } 01016 return c; 01017 } 01019 size_type max_size() const 01020 { 01021 return mData.max_size(); 01022 } 01024 void reserve( size_type size ) 01025 { 01026 mData.reserve( size ); 01027 } 01029 void resize( size_type num, const code_point& val = 0 ) 01030 { 01031 mData.resize( num, val ); 01032 } 01034 void swap( UString& from ) 01035 { 01036 mData.swap( from.mData ); 01037 } 01039 bool empty() const 01040 { 01041 return mData.empty(); 01042 } 01044 const code_point* c_str() const 01045 { 01046 return mData.c_str(); 01047 } 01049 const code_point* data() const 01050 { 01051 return c_str(); 01052 } 01054 size_type capacity() const 01055 { 01056 return mData.capacity(); 01057 } 01059 void clear() 01060 { 01061 mData.clear(); 01062 } 01064 01065 UString substr( size_type index, size_type num = npos ) const 01066 { 01067 // this could avoid the extra copy if we used a private specialty constructor 01068 dstring data = mData.substr( index, num ); 01069 UString tmp; 01070 tmp.mData.swap( data ); 01071 return tmp; 01072 } 01074 void push_back( unicode_char val ) 01075 { 01076 code_point cp[2]; 01077 size_t c = _utf32_to_utf16( val, cp ); 01078 if ( c > 0 ) push_back( cp[0] ); 01079 if ( c > 1 ) push_back( cp[1] ); 01080 } 01081 #if MYGUI_IS_NATIVE_WCHAR_T 01082 01083 void push_back( wchar_t val ) 01084 { 01085 // we do this because the Unicode method still preserves UTF-16 code points 01086 mData.push_back( static_cast<unicode_char>( val ) ); 01087 } 01088 #endif 01089 01090 01092 void push_back( code_point val ) 01093 { 01094 mData.push_back( val ); 01095 } 01097 01098 void push_back( char val ) 01099 { 01100 mData.push_back( static_cast<code_point>( val ) ); 01101 } 01103 bool inString( unicode_char ch ) const 01104 { 01105 const_iterator i, ie = end(); 01106 for ( i = begin(); i != ie; i.moveNext() ) 01107 { 01108 if ( i.getCharacter() == ch ) 01109 return true; 01110 } 01111 return false; 01112 } 01114 01116 01118 01119 01120 const std::string& asUTF8() const 01121 { 01122 _load_buffer_UTF8(); 01123 return *m_buffer.mStrBuffer; 01124 } 01126 const char* asUTF8_c_str() const 01127 { 01128 _load_buffer_UTF8(); 01129 return m_buffer.mStrBuffer->c_str(); 01130 } 01132 const utf32string& asUTF32() const 01133 { 01134 _load_buffer_UTF32(); 01135 return *m_buffer.mUTF32StrBuffer; 01136 } 01138 const unicode_char* asUTF32_c_str() const 01139 { 01140 _load_buffer_UTF32(); 01141 return m_buffer.mUTF32StrBuffer->c_str(); 01142 } 01144 const std::wstring& asWStr() const 01145 { 01146 _load_buffer_WStr(); 01147 return *m_buffer.mWStrBuffer; 01148 } 01150 const wchar_t* asWStr_c_str() const 01151 { 01152 _load_buffer_WStr(); 01153 return m_buffer.mWStrBuffer->c_str(); 01154 } 01156 01158 01160 01161 01162 code_point& at( size_type loc ) 01163 { 01164 return mData.at( loc ); 01165 } 01167 const code_point& at( size_type loc ) const 01168 { 01169 return mData.at( loc ); 01170 } 01172 01176 unicode_char getChar( size_type loc ) const 01177 { 01178 const code_point* ptr = c_str(); 01179 unicode_char uc; 01180 size_t len = _utf16_char_length( ptr[loc] ); 01181 code_point cp[2] = { /* blame the code beautifier */ 0, 0 }; 01182 cp[0] = ptr[loc]; 01183 01184 if ( len == 2 && ( loc + 1 ) < mData.length() ) 01185 { 01186 cp[1] = ptr[loc+1]; 01187 } 01188 _utf16_to_utf32( cp, uc ); 01189 return uc; 01190 } 01192 01200 int setChar( size_type loc, unicode_char ch ) 01201 { 01202 code_point cp[2] = { /* blame the code beautifier */ 0, 0 }; 01203 size_t lc = _utf32_to_utf16( ch, cp ); 01204 unicode_char existingChar = getChar( loc ); 01205 size_t existingSize = _utf16_char_length( existingChar ); 01206 size_t newSize = _utf16_char_length( ch ); 01207 01208 if ( newSize > existingSize ) 01209 { 01210 at( loc ) = cp[0]; 01211 insert( loc + 1, 1, cp[1] ); 01212 return 1; 01213 } 01214 if ( newSize < existingSize ) 01215 { 01216 erase( loc, 1 ); 01217 at( loc ) = cp[0]; 01218 return -1; 01219 } 01220 01221 // newSize == existingSize 01222 at( loc ) = cp[0]; 01223 if ( lc == 2 ) at( loc + 1 ) = cp[1]; 01224 return 0; 01225 } 01227 01229 01231 01232 01233 iterator begin() 01234 { 01235 iterator i; 01236 i.mIter = mData.begin(); 01237 i.mString = this; 01238 return i; 01239 } 01241 const_iterator begin() const 01242 { 01243 const_iterator i; 01244 i.mIter = const_cast<UString*>( this )->mData.begin(); 01245 i.mString = const_cast<UString*>( this ); 01246 return i; 01247 } 01249 iterator end() 01250 { 01251 iterator i; 01252 i.mIter = mData.end(); 01253 i.mString = this; 01254 return i; 01255 } 01257 const_iterator end() const 01258 { 01259 const_iterator i; 01260 i.mIter = const_cast<UString*>( this )->mData.end(); 01261 i.mString = const_cast<UString*>( this ); 01262 return i; 01263 } 01265 reverse_iterator rbegin() 01266 { 01267 reverse_iterator i; 01268 i.mIter = mData.end(); 01269 i.mString = this; 01270 return i; 01271 } 01273 const_reverse_iterator rbegin() const 01274 { 01275 const_reverse_iterator i; 01276 i.mIter = const_cast<UString*>( this )->mData.end(); 01277 i.mString = const_cast<UString*>( this ); 01278 return i; 01279 } 01281 reverse_iterator rend() 01282 { 01283 reverse_iterator i; 01284 i.mIter = mData.begin(); 01285 i.mString = this; 01286 return i; 01287 } 01289 const_reverse_iterator rend() const 01290 { 01291 const_reverse_iterator i; 01292 i.mIter = const_cast<UString*>( this )->mData.begin(); 01293 i.mString = const_cast<UString*>( this ); 01294 return i; 01295 } 01297 01299 01301 01302 01303 UString& assign( iterator start, iterator end ) 01304 { 01305 mData.assign( start.mIter, end.mIter ); 01306 return *this; 01307 } 01309 UString& assign( const UString& str ) 01310 { 01311 mData.assign( str.mData ); 01312 return *this; 01313 } 01315 UString& assign( const code_point* str ) 01316 { 01317 mData.assign( str ); 01318 return *this; 01319 } 01321 UString& assign( const code_point* str, size_type num ) 01322 { 01323 mData.assign( str, num ); 01324 return *this; 01325 } 01327 UString& assign( const UString& str, size_type index, size_type len ) 01328 { 01329 mData.assign( str.mData, index, len ); 01330 return *this; 01331 } 01333 UString& assign( size_type num, const code_point& ch ) 01334 { 01335 mData.assign( num, ch ); 01336 return *this; 01337 } 01339 UString& assign( const std::wstring& wstr ) 01340 { 01341 mData.clear(); 01342 mData.reserve( wstr.length() ); // best guess bulk allocate 01343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy 01344 code_point tmp; 01345 std::wstring::const_iterator i, ie = wstr.end(); 01346 for ( i = wstr.begin(); i != ie; ++i ) 01347 { 01348 tmp = static_cast<code_point>( *i ); 01349 mData.push_back( tmp ); 01350 } 01351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower) 01352 code_point cp[3] = { 0, 0, 0 }; 01353 unicode_char tmp; 01354 std::wstring::const_iterator i, ie = wstr.end(); 01355 for ( i = wstr.begin(); i != ie; i++ ) 01356 { 01357 tmp = static_cast<unicode_char>( *i ); 01358 size_t lc = _utf32_to_utf16( tmp, cp ); 01359 if ( lc > 0 ) mData.push_back( cp[0] ); 01360 if ( lc > 1 ) mData.push_back( cp[1] ); 01361 } 01362 #endif 01363 return *this; 01364 } 01365 #if MYGUI_IS_NATIVE_WCHAR_T 01366 01367 UString& assign( const wchar_t* w_str ) 01368 { 01369 std::wstring tmp; 01370 tmp.assign( w_str ); 01371 return assign( tmp ); 01372 } 01374 UString& assign( const wchar_t* w_str, size_type num ) 01375 { 01376 std::wstring tmp; 01377 tmp.assign( w_str, num ); 01378 return assign( tmp ); 01379 } 01380 #endif 01381 01382 UString& assign( const std::string& str ) 01383 { 01384 size_type len = _verifyUTF8( str ); 01385 clear(); // empty our contents, if there are any 01386 reserve( len ); // best guess bulk capacity growth 01387 01388 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32, 01389 // then converting it to UTF-16, then finally appending the data buffer 01390 01391 unicode_char uc; // temporary Unicode character buffer 01392 unsigned char utf8buf[7]; // temporary UTF-8 buffer 01393 utf8buf[6] = 0; 01394 size_t utf8len; // UTF-8 length 01395 code_point utf16buff[3]; // temporary UTF-16 buffer 01396 utf16buff[2] = 0; 01397 size_t utf16len; // UTF-16 length 01398 01399 std::string::const_iterator i, ie = str.end(); 01400 for ( i = str.begin(); i != ie; ++i ) 01401 { 01402 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load 01403 for ( size_t j = 0; j < utf8len; j++ ) 01404 { // load the needed UTF-8 bytes 01405 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful) 01406 } 01407 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer 01408 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion 01409 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop 01410 01411 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion 01412 append( utf16buff, utf16len ); // append the characters to the string 01413 } 01414 return *this; 01415 } 01417 UString& assign( const char* c_str ) 01418 { 01419 std::string tmp( c_str ); 01420 return assign( tmp ); 01421 } 01423 UString& assign( const char* c_str, size_type num ) 01424 { 01425 std::string tmp; 01426 tmp.assign( c_str, num ); 01427 return assign( tmp ); 01428 } 01430 01432 01434 01435 01436 UString& append( const UString& str ) 01437 { 01438 mData.append( str.mData ); 01439 return *this; 01440 } 01442 UString& append( const code_point* str ) 01443 { 01444 mData.append( str ); 01445 return *this; 01446 } 01448 UString& append( const UString& str, size_type index, size_type len ) 01449 { 01450 mData.append( str.mData, index, len ); 01451 return *this; 01452 } 01454 UString& append( const code_point* str, size_type num ) 01455 { 01456 mData.append( str, num ); 01457 return *this; 01458 } 01460 UString& append( size_type num, code_point ch ) 01461 { 01462 mData.append( num, ch ); 01463 return *this; 01464 } 01466 UString& append( iterator start, iterator end ) 01467 { 01468 mData.append( start.mIter, end.mIter ); 01469 return *this; 01470 } 01471 #if MYGUI_IS_NATIVE_WCHAR_T 01472 01473 UString& append( const wchar_t* w_str, size_type num ) 01474 { 01475 std::wstring tmp( w_str, num ); 01476 return append( tmp ); 01477 } 01479 UString& append( size_type num, wchar_t ch ) 01480 { 01481 return append( num, static_cast<unicode_char>( ch ) ); 01482 } 01483 #endif 01484 01485 UString& append( const char* c_str, size_type num ) 01486 { 01487 UString tmp( c_str, num ); 01488 append( tmp ); 01489 return *this; 01490 } 01492 UString& append( size_type num, char ch ) 01493 { 01494 append( num, static_cast<code_point>( ch ) ); 01495 return *this; 01496 } 01498 UString& append( size_type num, unicode_char ch ) 01499 { 01500 code_point cp[2] = { 0, 0 }; 01501 if ( _utf32_to_utf16( ch, cp ) == 2 ) 01502 { 01503 for ( size_type i = 0; i < num; i++ ) 01504 { 01505 append( 1, cp[0] ); 01506 append( 1, cp[1] ); 01507 } 01508 } 01509 else 01510 { 01511 for ( size_type i = 0; i < num; i++ ) 01512 { 01513 append( 1, cp[0] ); 01514 } 01515 } 01516 return *this; 01517 } 01519 01521 01523 01524 01525 iterator insert( iterator i, const code_point& ch ) 01526 { 01527 iterator ret; 01528 ret.mIter = mData.insert( i.mIter, ch ); 01529 ret.mString = this; 01530 return ret; 01531 } 01533 UString& insert( size_type index, const UString& str ) 01534 { 01535 mData.insert( index, str.mData ); 01536 return *this; 01537 } 01539 UString& insert( size_type index, const code_point* str ) 01540 { 01541 mData.insert( index, str ); 01542 return *this; 01543 } 01545 UString& insert( size_type index1, const UString& str, size_type index2, size_type num ) 01546 { 01547 mData.insert( index1, str.mData, index2, num ); 01548 return *this; 01549 } 01551 void insert( iterator i, iterator start, iterator end ) 01552 { 01553 mData.insert( i.mIter, start.mIter, end.mIter ); 01554 } 01556 UString& insert( size_type index, const code_point* str, size_type num ) 01557 { 01558 mData.insert( index, str, num ); 01559 return *this; 01560 } 01561 #if MYGUI_IS_NATIVE_WCHAR_T 01562 01563 UString& insert( size_type index, const wchar_t* w_str, size_type num ) 01564 { 01565 UString tmp( w_str, num ); 01566 insert( index, tmp ); 01567 return *this; 01568 } 01569 #endif 01570 01571 UString& insert( size_type index, const char* c_str, size_type num ) 01572 { 01573 UString tmp( c_str, num ); 01574 insert( index, tmp ); 01575 return *this; 01576 } 01578 UString& insert( size_type index, size_type num, code_point ch ) 01579 { 01580 mData.insert( index, num, ch ); 01581 return *this; 01582 } 01583 #if MYGUI_IS_NATIVE_WCHAR_T 01584 01585 UString& insert( size_type index, size_type num, wchar_t ch ) 01586 { 01587 insert( index, num, static_cast<unicode_char>( ch ) ); 01588 return *this; 01589 } 01590 #endif 01591 01592 UString& insert( size_type index, size_type num, char ch ) 01593 { 01594 insert( index, num, static_cast<code_point>( ch ) ); 01595 return *this; 01596 } 01598 UString& insert( size_type index, size_type num, unicode_char ch ) 01599 { 01600 code_point cp[3] = { 0, 0, 0 }; 01601 size_t lc = _utf32_to_utf16( ch, cp ); 01602 if ( lc == 1 ) 01603 { 01604 return insert( index, num, cp[0] ); 01605 } 01606 for ( size_type c = 0; c < num; c++ ) 01607 { 01608 // insert in reverse order to preserve ordering after insert 01609 insert( index, 1, cp[1] ); 01610 insert( index, 1, cp[0] ); 01611 } 01612 return *this; 01613 } 01615 void insert( iterator i, size_type num, const code_point& ch ) 01616 { 01617 mData.insert( i.mIter, num, ch ); 01618 } 01619 #if MYGUI_IS_NATIVE_WCHAR_T 01620 01621 void insert( iterator i, size_type num, const wchar_t& ch ) 01622 { 01623 insert( i, num, static_cast<unicode_char>( ch ) ); 01624 } 01625 #endif 01626 01627 void insert( iterator i, size_type num, const char& ch ) 01628 { 01629 insert( i, num, static_cast<code_point>( ch ) ); 01630 } 01632 void insert( iterator i, size_type num, const unicode_char& ch ) 01633 { 01634 code_point cp[3] = { 0, 0, 0 }; 01635 size_t lc = _utf32_to_utf16( ch, cp ); 01636 if ( lc == 1 ) 01637 { 01638 insert( i, num, cp[0] ); 01639 } 01640 else 01641 { 01642 for ( size_type c = 0; c < num; c++ ) 01643 { 01644 // insert in reverse order to preserve ordering after insert 01645 insert( i, 1, cp[1] ); 01646 insert( i, 1, cp[0] ); 01647 } 01648 } 01649 } 01651 01653 01655 01656 01657 iterator erase( iterator loc ) 01658 { 01659 iterator ret; 01660 ret.mIter = mData.erase( loc.mIter ); 01661 ret.mString = this; 01662 return ret; 01663 } 01665 iterator erase( iterator start, iterator end ) 01666 { 01667 iterator ret; 01668 ret.mIter = mData.erase( start.mIter, end.mIter ); 01669 ret.mString = this; 01670 return ret; 01671 } 01673 UString& erase( size_type index = 0, size_type num = npos ) 01674 { 01675 if ( num == npos ) 01676 mData.erase( index ); 01677 else 01678 mData.erase( index, num ); 01679 return *this; 01680 } 01682 01684 01686 01687 01688 UString& replace( size_type index1, size_type num1, const UString& str ) 01689 { 01690 mData.replace( index1, num1, str.mData, 0, npos ); 01691 return *this; 01692 } 01694 UString& replace( size_type index1, size_type num1, const UString& str, size_type num2 ) 01695 { 01696 mData.replace( index1, num1, str.mData, 0, num2 ); 01697 return *this; 01698 } 01700 UString& replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 ) 01701 { 01702 mData.replace( index1, num1, str.mData, index2, num2 ); 01703 return *this; 01704 } 01706 UString& replace( iterator start, iterator end, const UString& str, size_type num = npos ) 01707 { 01708 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01709 01710 size_type index1 = begin() - st; 01711 size_type num1 = end - st; 01712 return replace( index1, num1, str, 0, num ); 01713 } 01715 UString& replace( size_type index, size_type num1, size_type num2, code_point ch ) 01716 { 01717 mData.replace( index, num1, num2, ch ); 01718 return *this; 01719 } 01721 UString& replace( iterator start, iterator end, size_type num, code_point ch ) 01722 { 01723 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01724 01725 size_type index1 = begin() - st; 01726 size_type num1 = end - st; 01727 return replace( index1, num1, num, ch ); 01728 } 01730 01732 01734 01735 01736 int compare( const UString& str ) const 01737 { 01738 return mData.compare( str.mData ); 01739 } 01741 int compare( const code_point* str ) const 01742 { 01743 return mData.compare( str ); 01744 } 01746 int compare( size_type index, size_type length, const UString& str ) const 01747 { 01748 return mData.compare( index, length, str.mData ); 01749 } 01751 int compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const 01752 { 01753 return mData.compare( index, length, str.mData, index2, length2 ); 01754 } 01756 int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const 01757 { 01758 return mData.compare( index, length, str, length2 ); 01759 } 01760 #if MYGUI_IS_NATIVE_WCHAR_T 01761 01762 int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const 01763 { 01764 UString tmp( w_str, length2 ); 01765 return compare( index, length, tmp ); 01766 } 01767 #endif 01768 01769 int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const 01770 { 01771 UString tmp( c_str, length2 ); 01772 return compare( index, length, tmp ); 01773 } 01775 01777 01779 01780 01781 01782 size_type find( const UString& str, size_type index = 0 ) const 01783 { 01784 return mData.find( str.c_str(), index ); 01785 } 01787 01788 size_type find( const code_point* cp_str, size_type index, size_type length ) const 01789 { 01790 UString tmp( cp_str ); 01791 return mData.find( tmp.c_str(), index, length ); 01792 } 01794 01795 size_type find( const char* c_str, size_type index, size_type length ) const 01796 { 01797 UString tmp( c_str ); 01798 return mData.find( tmp.c_str(), index, length ); 01799 } 01800 #if MYGUI_IS_NATIVE_WCHAR_T 01801 01802 01803 size_type find( const wchar_t* w_str, size_type index, size_type length ) const 01804 { 01805 UString tmp( w_str ); 01806 return mData.find( tmp.c_str(), index, length ); 01807 } 01808 #endif 01809 01810 01811 size_type find( char ch, size_type index = 0 ) const 01812 { 01813 return find( static_cast<code_point>( ch ), index ); 01814 } 01816 01817 size_type find( code_point ch, size_type index = 0 ) const 01818 { 01819 return mData.find( ch, index ); 01820 } 01821 #if MYGUI_IS_NATIVE_WCHAR_T 01822 01823 01824 size_type find( wchar_t ch, size_type index = 0 ) const 01825 { 01826 return find( static_cast<unicode_char>( ch ), index ); 01827 } 01828 #endif 01829 01830 01831 size_type find( unicode_char ch, size_type index = 0 ) const 01832 { 01833 code_point cp[3] = { 0, 0, 0 }; 01834 size_t lc = _utf32_to_utf16( ch, cp ); 01835 return find( UString( cp, lc ), index ); 01836 } 01837 01839 size_type rfind( const UString& str, size_type index = 0 ) const 01840 { 01841 return mData.rfind( str.c_str(), index ); 01842 } 01844 size_type rfind( const code_point* cp_str, size_type index, size_type num ) const 01845 { 01846 UString tmp( cp_str ); 01847 return mData.rfind( tmp.c_str(), index, num ); 01848 } 01850 size_type rfind( const char* c_str, size_type index, size_type num ) const 01851 { 01852 UString tmp( c_str ); 01853 return mData.rfind( tmp.c_str(), index, num ); 01854 } 01855 #if MYGUI_IS_NATIVE_WCHAR_T 01856 01857 size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const 01858 { 01859 UString tmp( w_str ); 01860 return mData.rfind( tmp.c_str(), index, num ); 01861 } 01862 #endif 01863 01864 size_type rfind( char ch, size_type index = 0 ) const 01865 { 01866 return rfind( static_cast<code_point>( ch ), index ); 01867 } 01869 size_type rfind( code_point ch, size_type index ) const 01870 { 01871 return mData.rfind( ch, index ); 01872 } 01873 #if MYGUI_IS_NATIVE_WCHAR_T 01874 01875 size_type rfind( wchar_t ch, size_type index = 0 ) const 01876 { 01877 return rfind( static_cast<unicode_char>( ch ), index ); 01878 } 01879 #endif 01880 01881 size_type rfind( unicode_char ch, size_type index = 0 ) const 01882 { 01883 code_point cp[3] = { 0, 0, 0 }; 01884 size_t lc = _utf32_to_utf16( ch, cp ); 01885 return rfind( UString( cp, lc ), index ); 01886 } 01888 01890 01892 01893 01894 size_type find_first_of( const UString &str, size_type index = 0, size_type num = npos ) const 01895 { 01896 size_type i = 0; 01897 const size_type len = length(); 01898 while ( i < num && ( index + i ) < len ) 01899 { 01900 unicode_char ch = getChar( index + i ); 01901 if ( str.inString( ch ) ) 01902 return index + i; 01903 i += _utf16_char_length( ch ); // increment by the Unicode character length 01904 } 01905 return npos; 01906 } 01908 size_type find_first_of( code_point ch, size_type index = 0 ) const 01909 { 01910 UString tmp; 01911 tmp.assign( 1, ch ); 01912 return find_first_of( tmp, index ); 01913 } 01915 size_type find_first_of( char ch, size_type index = 0 ) const 01916 { 01917 return find_first_of( static_cast<code_point>( ch ), index ); 01918 } 01919 #if MYGUI_IS_NATIVE_WCHAR_T 01920 01921 size_type find_first_of( wchar_t ch, size_type index = 0 ) const 01922 { 01923 return find_first_of( static_cast<unicode_char>( ch ), index ); 01924 } 01925 #endif 01926 01927 size_type find_first_of( unicode_char ch, size_type index = 0 ) const 01928 { 01929 code_point cp[3] = { 0, 0, 0 }; 01930 size_t lc = _utf32_to_utf16( ch, cp ); 01931 return find_first_of( UString( cp, lc ), index ); 01932 } 01933 01935 size_type find_first_not_of( const UString& str, size_type index = 0, size_type num = npos ) const 01936 { 01937 size_type i = 0; 01938 const size_type len = length(); 01939 while ( i < num && ( index + i ) < len ) 01940 { 01941 unicode_char ch = getChar( index + i ); 01942 if ( !str.inString( ch ) ) 01943 return index + i; 01944 i += _utf16_char_length( ch ); // increment by the Unicode character length 01945 } 01946 return npos; 01947 } 01949 size_type find_first_not_of( code_point ch, size_type index = 0 ) const 01950 { 01951 UString tmp; 01952 tmp.assign( 1, ch ); 01953 return find_first_not_of( tmp, index ); 01954 } 01956 size_type find_first_not_of( char ch, size_type index = 0 ) const 01957 { 01958 return find_first_not_of( static_cast<code_point>( ch ), index ); 01959 } 01960 #if MYGUI_IS_NATIVE_WCHAR_T 01961 01962 size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const 01963 { 01964 return find_first_not_of( static_cast<unicode_char>( ch ), index ); 01965 } 01966 #endif 01967 01968 size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const 01969 { 01970 code_point cp[3] = { 0, 0, 0 }; 01971 size_t lc = _utf32_to_utf16( ch, cp ); 01972 return find_first_not_of( UString( cp, lc ), index ); 01973 } 01974 01976 size_type find_last_of( const UString& str, size_type index = npos, size_type num = npos ) const 01977 { 01978 size_type i = 0; 01979 const size_type len = length(); 01980 if ( index > len ) index = len - 1; 01981 01982 while ( i < num && ( index - i ) != npos ) 01983 { 01984 size_type j = index - i; 01985 // careful to step full Unicode characters 01986 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) 01987 { 01988 j = index - ++i; 01989 } 01990 // and back to the usual dull test 01991 unicode_char ch = getChar( j ); 01992 if ( str.inString( ch ) ) 01993 return j; 01994 i++; 01995 } 01996 return npos; 01997 } 01999 size_type find_last_of( code_point ch, size_type index = npos ) const 02000 { 02001 UString tmp; 02002 tmp.assign( 1, ch ); 02003 return find_last_of( tmp, index ); 02004 } 02006 size_type find_last_of( char ch, size_type index = npos ) const 02007 { 02008 return find_last_of( static_cast<code_point>( ch ), index ); 02009 } 02010 #if MYGUI_IS_NATIVE_WCHAR_T 02011 02012 size_type find_last_of( wchar_t ch, size_type index = npos ) const 02013 { 02014 return find_last_of( static_cast<unicode_char>( ch ), index ); 02015 } 02016 #endif 02017 02018 size_type find_last_of( unicode_char ch, size_type index = npos ) const 02019 { 02020 code_point cp[3] = { 0, 0, 0 }; 02021 size_t lc = _utf32_to_utf16( ch, cp ); 02022 return find_last_of( UString( cp, lc ), index ); 02023 } 02024 02026 size_type find_last_not_of( const UString& str, size_type index = npos, size_type num = npos ) const 02027 { 02028 size_type i = 0; 02029 const size_type len = length(); 02030 if ( index > len ) index = len - 1; 02031 02032 while ( i < num && ( index - i ) != npos ) 02033 { 02034 size_type j = index - i; 02035 // careful to step full Unicode characters 02036 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) 02037 { 02038 j = index - ++i; 02039 } 02040 // and back to the usual dull test 02041 unicode_char ch = getChar( j ); 02042 if ( !str.inString( ch ) ) 02043 return j; 02044 i++; 02045 } 02046 return npos; 02047 } 02049 size_type find_last_not_of( code_point ch, size_type index = npos ) const 02050 { 02051 UString tmp; 02052 tmp.assign( 1, ch ); 02053 return find_last_not_of( tmp, index ); 02054 } 02056 size_type find_last_not_of( char ch, size_type index = npos ) const 02057 { 02058 return find_last_not_of( static_cast<code_point>( ch ), index ); 02059 } 02060 #if MYGUI_IS_NATIVE_WCHAR_T 02061 02062 size_type find_last_not_of( wchar_t ch, size_type index = npos ) const 02063 { 02064 return find_last_not_of( static_cast<unicode_char>( ch ), index ); 02065 } 02066 #endif 02067 02068 size_type find_last_not_of( unicode_char ch, size_type index = npos ) const 02069 { 02070 code_point cp[3] = { 0, 0, 0 }; 02071 size_t lc = _utf32_to_utf16( ch, cp ); 02072 return find_last_not_of( UString( cp, lc ), index ); 02073 } 02075 02077 02079 02080 02081 bool operator<( const UString& right ) const 02082 { 02083 return compare( right ) < 0; 02084 } 02086 bool operator<=( const UString& right ) const 02087 { 02088 return compare( right ) <= 0; 02089 } 02091 bool operator>( const UString& right ) const 02092 { 02093 return compare( right ) > 0; 02094 } 02096 bool operator>=( const UString& right ) const 02097 { 02098 return compare( right ) >= 0; 02099 } 02101 bool operator==( const UString& right ) const 02102 { 02103 return compare( right ) == 0; 02104 } 02106 bool operator!=( const UString& right ) const 02107 { 02108 return !operator==( right ); 02109 } 02111 UString& operator=( const UString& s ) 02112 { 02113 return assign( s ); 02114 } 02116 UString& operator=( code_point ch ) 02117 { 02118 clear(); 02119 return append( 1, ch ); 02120 } 02122 UString& operator=( char ch ) 02123 { 02124 clear(); 02125 return append( 1, ch ); 02126 } 02127 #if MYGUI_IS_NATIVE_WCHAR_T 02128 02129 UString& operator=( wchar_t ch ) 02130 { 02131 clear(); 02132 return append( 1, ch ); 02133 } 02134 #endif 02135 02136 UString& operator=( unicode_char ch ) 02137 { 02138 clear(); 02139 return append( 1, ch ); 02140 } 02142 code_point& operator[]( size_type index ) 02143 { 02144 return at( index ); 02145 } 02147 const code_point& operator[]( size_type index ) const 02148 { 02149 return at( index ); 02150 } 02152 02154 02156 02157 02158 operator std::string() const 02159 { 02160 return std::string( asUTF8() ); 02161 } 02163 operator std::wstring() const 02164 { 02165 return std::wstring( asWStr() ); 02166 } 02168 02170 02172 02173 02174 static bool _utf16_independent_char( code_point cp ) 02175 { 02176 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range 02177 return false; // it matches a surrogate pair signature 02178 return true; // everything else is a standalone code point 02179 } 02181 static bool _utf16_surrogate_lead( code_point cp ) 02182 { 02183 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair 02184 return true; // it is a 1st word 02185 return false; // it isn't 02186 } 02188 static bool _utf16_surrogate_follow( code_point cp ) 02189 { 02190 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair 02191 return true; // it is a 2nd word 02192 return false; // everything else isn't 02193 } 02195 static size_t _utf16_char_length( code_point cp ) 02196 { 02197 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair 02198 return 2; // if it is, then we are 2 words long 02199 return 1; // otherwise we are only 1 word long 02200 } 02202 static size_t _utf16_char_length( unicode_char uc ) 02203 { 02204 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum 02205 return 2; // if so, we need a surrogate pair 02206 return 1; // otherwise we can stuff it into a single word 02207 } 02209 02213 static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ) 02214 { 02215 const code_point& cp1 = in_cp[0]; 02216 const code_point& cp2 = in_cp[1]; 02217 bool wordPair = false; 02218 02219 // does it look like a surrogate pair? 02220 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) 02221 { 02222 // looks like one, but does the other half match the algorithm as well? 02223 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF ) 02224 wordPair = true; // yep! 02225 } 02226 02227 if ( !wordPair ) 02228 { // if we aren't a 100% authentic surrogate pair, then just copy the value 02229 out_uc = cp1; 02230 return 1; 02231 } 02232 02233 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers 02234 cU -= 0xD800; // remove the encoding markers 02235 cL -= 0xDC00; 02236 02237 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location 02238 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits 02239 out_uc += 0x10000; // add back in the value offset 02240 02241 return 2; // this whole operation takes to words, so that's what we'll return 02242 } 02244 02249 static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ) 02250 { 02251 if ( in_uc <= 0xFFFF ) 02252 { // we blindly preserve sentinel values because our decoder understands them 02253 out_cp[0] = in_uc; 02254 return 1; 02255 } 02256 unicode_char uc = in_uc; // copy to writable buffer 02257 unsigned short tmp; // single code point buffer 02258 uc -= 0x10000; // subtract value offset 02259 02260 //process upper word 02261 tmp = ( uc >> 10 ) & 0x03FF; // grab the upper 10 bits 02262 tmp += 0xD800; // add encoding offset 02263 out_cp[0] = tmp; // write 02264 02265 // process lower word 02266 tmp = uc & 0x03FF; // grab the lower 10 bits 02267 tmp += 0xDC00; // add encoding offset 02268 out_cp[1] = tmp; // write 02269 02270 return 2; // return used word count (2 for surrogate pairs) 02271 } 02273 02275 02277 02278 02279 static bool _utf8_start_char( unsigned char cp ) 02280 { 02281 return ( cp & ~_cont_mask ) != _cont; 02282 } 02284 static size_t _utf8_char_length( unsigned char cp ) 02285 { 02286 if ( !( cp & 0x80 ) ) return 1; 02287 if (( cp & ~_lead1_mask ) == _lead1 ) return 2; 02288 if (( cp & ~_lead2_mask ) == _lead2 ) return 3; 02289 if (( cp & ~_lead3_mask ) == _lead3 ) return 4; 02290 if (( cp & ~_lead4_mask ) == _lead4 ) return 5; 02291 if (( cp & ~_lead5_mask ) == _lead5 ) return 6; 02292 throw invalid_data( "invalid UTF-8 sequence header value" ); 02293 } 02295 static size_t _utf8_char_length( unicode_char uc ) 02296 { 02297 /* 02298 7 bit: U-00000000 - U-0000007F: 0xxxxxxx 02299 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 02300 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 02301 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 02302 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 02303 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 02304 */ 02305 if ( !( uc & ~0x0000007F ) ) return 1; 02306 if ( !( uc & ~0x000007FF ) ) return 2; 02307 if ( !( uc & ~0x0000FFFF ) ) return 3; 02308 if ( !( uc & ~0x001FFFFF ) ) return 4; 02309 if ( !( uc & ~0x03FFFFFF ) ) return 5; 02310 if ( !( uc & ~0x7FFFFFFF ) ) return 6; 02311 throw invalid_data( "invalid UTF-32 value" ); 02312 } 02313 02315 static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ) 02316 { 02317 size_t len = _utf8_char_length( in_cp[0] ); 02318 if ( len == 1 ) 02319 { // if we are only 1 byte long, then just grab it and exit 02320 out_uc = in_cp[0]; 02321 return 1; 02322 } 02323 02324 unicode_char c = 0; // temporary buffer 02325 size_t i = 0; 02326 switch ( len ) 02327 { // load header byte 02328 case 6: 02329 c = in_cp[i] & _lead5_mask; 02330 break; 02331 case 5: 02332 c = in_cp[i] & _lead4_mask; 02333 break; 02334 case 4: 02335 c = in_cp[i] & _lead3_mask; 02336 break; 02337 case 3: 02338 c = in_cp[i] & _lead2_mask; 02339 break; 02340 case 2: 02341 c = in_cp[i] & _lead1_mask; 02342 break; 02343 } 02344 02345 for ( ++i; i < len; i++ ) 02346 { // load each continuation byte 02347 if (( in_cp[i] & ~_cont_mask ) != _cont ) 02348 throw invalid_data( "bad UTF-8 continuation byte" ); 02349 c <<= 6; 02350 c |= ( in_cp[i] & _cont_mask ); 02351 } 02352 02353 out_uc = c; // write the final value and return the used byte length 02354 return len; 02355 } 02357 static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ) 02358 { 02359 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence 02360 unicode_char c = in_uc; // copy to temp buffer 02361 02362 //stuff all of the lower bits 02363 for ( size_t i = len - 1; i > 0; i-- ) 02364 { 02365 out_cp[i] = (( c ) & _cont_mask ) | _cont; 02366 c >>= 6; 02367 } 02368 02369 //now write the header byte 02370 switch ( len ) 02371 { 02372 case 6: 02373 out_cp[0] = (( c ) & _lead5_mask ) | _lead5; 02374 break; 02375 case 5: 02376 out_cp[0] = (( c ) & _lead4_mask ) | _lead4; 02377 break; 02378 case 4: 02379 out_cp[0] = (( c ) & _lead3_mask ) | _lead3; 02380 break; 02381 case 3: 02382 out_cp[0] = (( c ) & _lead2_mask ) | _lead2; 02383 break; 02384 case 2: 02385 out_cp[0] = (( c ) & _lead1_mask ) | _lead1; 02386 break; 02387 case 1: 02388 default: 02389 out_cp[0] = ( c ) & 0x7F; 02390 break; 02391 } 02392 02393 // return the byte length of the sequence 02394 return len; 02395 } 02396 02398 static size_type _verifyUTF8( const unsigned char* c_str ) 02399 { 02400 std::string tmp( reinterpret_cast<const char*>( c_str ) ); 02401 return _verifyUTF8( tmp ); 02402 } 02404 static size_type _verifyUTF8( const std::string& str ) 02405 { 02406 std::string::const_iterator i, ie = str.end(); 02407 i = str.begin(); 02408 size_type length = 0; 02409 02410 while ( i != ie ) 02411 { 02412 // characters pass until we find an extended sequence 02413 if (( *i ) & 0x80 ) 02414 { 02415 unsigned char c = ( *i ); 02416 size_t contBytes = 0; 02417 02418 // get continuation byte count and test for overlong sequences 02419 if (( c & ~_lead1_mask ) == _lead1 ) 02420 { // 1 additional byte 02421 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" ); 02422 contBytes = 1; 02423 02424 } 02425 else if (( c & ~_lead2_mask ) == _lead2 ) 02426 { // 2 additional bytes 02427 contBytes = 2; 02428 if ( c == _lead2 ) 02429 { // possible overlong UTF-8 sequence 02430 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02431 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02432 } 02433 02434 } 02435 else if (( c & ~_lead3_mask ) == _lead3 ) 02436 { // 3 additional bytes 02437 contBytes = 3; 02438 if ( c == _lead3 ) 02439 { // possible overlong UTF-8 sequence 02440 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02441 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02442 } 02443 02444 } 02445 else if (( c & ~_lead4_mask ) == _lead4 ) 02446 { // 4 additional bytes 02447 contBytes = 4; 02448 if ( c == _lead4 ) 02449 { // possible overlong UTF-8 sequence 02450 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02451 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02452 } 02453 02454 } 02455 else if (( c & ~_lead5_mask ) == _lead5 ) 02456 { // 5 additional bytes 02457 contBytes = 5; 02458 if ( c == _lead5 ) 02459 { // possible overlong UTF-8 sequence 02460 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02461 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02462 } 02463 } 02464 02465 // check remaining continuation bytes for 02466 while ( contBytes-- ) 02467 { 02468 c = ( *( ++i ) ); // get next byte in sequence 02469 if (( c & ~_cont_mask ) != _cont ) 02470 throw invalid_data( "bad UTF-8 continuation byte" ); 02471 } 02472 } 02473 length++; 02474 i++; 02475 } 02476 return length; 02477 } 02479 02480 private: 02481 //template<class ITER_TYPE> friend class _iterator; 02482 dstring mData; 02483 02485 enum BufferType 02486 { 02487 bt_none, 02488 bt_string, 02489 bt_wstring, 02490 bt_utf32string 02491 }; 02492 02494 void _init() 02495 { 02496 m_buffer.mVoidBuffer = 0; 02497 m_bufferType = bt_none; 02498 m_bufferSize = 0; 02499 } 02500 02502 // Scratch buffer 02504 void _cleanBuffer() const 02505 { 02506 if ( m_buffer.mVoidBuffer != 0 ) 02507 { 02508 switch ( m_bufferType ) 02509 { 02510 case bt_string: 02511 delete m_buffer.mStrBuffer; 02512 break; 02513 case bt_wstring: 02514 delete m_buffer.mWStrBuffer; 02515 break; 02516 case bt_utf32string: 02517 delete m_buffer.mUTF32StrBuffer; 02518 break; 02519 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out 02520 default: 02521 //delete m_buffer.mVoidBuffer; 02522 // delete void* is undefined, don't do that 02523 MYGUI_ASSERT(false, "This should never happen - mVoidBuffer should never contain something if we " 02524 "don't know the type"); 02525 break; 02526 } 02527 m_buffer.mVoidBuffer = 0; 02528 m_bufferSize = 0; 02529 } 02530 } 02531 02533 void _getBufferStr() const 02534 { 02535 if ( m_bufferType != bt_string ) 02536 { 02537 _cleanBuffer(); 02538 m_buffer.mStrBuffer = new std::string(); 02539 m_bufferType = bt_string; 02540 } 02541 m_buffer.mStrBuffer->clear(); 02542 } 02544 void _getBufferWStr() const 02545 { 02546 if ( m_bufferType != bt_wstring ) 02547 { 02548 _cleanBuffer(); 02549 m_buffer.mWStrBuffer = new std::wstring(); 02550 m_bufferType = bt_wstring; 02551 } 02552 m_buffer.mWStrBuffer->clear(); 02553 } 02555 void _getBufferUTF32Str() const 02556 { 02557 if ( m_bufferType != bt_utf32string ) 02558 { 02559 _cleanBuffer(); 02560 m_buffer.mUTF32StrBuffer = new utf32string(); 02561 m_bufferType = bt_utf32string; 02562 } 02563 m_buffer.mUTF32StrBuffer->clear(); 02564 } 02565 02566 void _load_buffer_UTF8() const 02567 { 02568 _getBufferStr(); 02569 std::string& buffer = ( *m_buffer.mStrBuffer ); 02570 buffer.reserve( length() ); 02571 02572 unsigned char utf8buf[6]; 02573 char* charbuf = ( char* )utf8buf; 02574 unicode_char c; 02575 size_t len; 02576 02577 const_iterator i, ie = end(); 02578 for ( i = begin(); i != ie; i.moveNext() ) 02579 { 02580 c = i.getCharacter(); 02581 len = _utf32_to_utf8( c, utf8buf ); 02582 size_t j = 0; 02583 while ( j < len ) 02584 buffer.push_back( charbuf[j++] ); 02585 } 02586 } 02587 void _load_buffer_WStr() const 02588 { 02589 _getBufferWStr(); 02590 std::wstring& buffer = ( *m_buffer.mWStrBuffer ); 02591 buffer.reserve( length() ); // may over reserve, but should be close enough 02592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16 02593 const_iterator i, ie = end(); 02594 for ( i = begin(); i != ie; ++i ) 02595 { 02596 buffer.push_back(( wchar_t )( *i ) ); 02597 } 02598 #else // wchar_t fits UTF-32 02599 unicode_char c; 02600 const_iterator i, ie = end(); 02601 for ( i = begin(); i != ie; i.moveNext() ) 02602 { 02603 c = i.getCharacter(); 02604 buffer.push_back(( wchar_t )c ); 02605 } 02606 #endif 02607 } 02608 void _load_buffer_UTF32() const 02609 { 02610 _getBufferUTF32Str(); 02611 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer ); 02612 buffer.reserve( length() ); // may over reserve, but should be close enough 02613 02614 unicode_char c; 02615 02616 const_iterator i, ie = end(); 02617 for ( i = begin(); i != ie; i.moveNext() ) 02618 { 02619 c = i.getCharacter(); 02620 buffer.push_back( c ); 02621 } 02622 } 02623 02624 mutable BufferType m_bufferType; // identifies the data type held in m_buffer 02625 mutable size_t m_bufferSize; // size of the CString buffer 02626 02627 // multi-purpose buffer used everywhere we need a throw-away buffer 02628 union Buffer 02629 { 02630 mutable void* mVoidBuffer; 02631 mutable std::string* mStrBuffer; 02632 mutable std::wstring* mWStrBuffer; 02633 mutable utf32string* mUTF32StrBuffer; 02634 } 02635 m_buffer; 02636 }; 02637 02639 inline UString operator+( const UString& s1, const UString& s2 ) 02640 { 02641 return UString( s1 ).append( s2 ); 02642 } 02644 inline UString operator+( const UString& s1, UString::code_point c ) 02645 { 02646 return UString( s1 ).append( 1, c ); 02647 } 02649 inline UString operator+( const UString& s1, UString::unicode_char c ) 02650 { 02651 return UString( s1 ).append( 1, c ); 02652 } 02654 inline UString operator+( const UString& s1, char c ) 02655 { 02656 return UString( s1 ).append( 1, c ); 02657 } 02658 #if MYGUI_IS_NATIVE_WCHAR_T 02659 02660 inline UString operator+( const UString& s1, wchar_t c ) 02661 { 02662 return UString( s1 ).append( 1, c ); 02663 } 02664 #endif 02665 02666 inline UString operator+( UString::code_point c, const UString& s2 ) 02667 { 02668 return UString().append( 1, c ).append( s2 ); 02669 } 02671 inline UString operator+( UString::unicode_char c, const UString& s2 ) 02672 { 02673 return UString().append( 1, c ).append( s2 ); 02674 } 02676 inline UString operator+( char c, const UString& s2 ) 02677 { 02678 return UString().append( 1, c ).append( s2 ); 02679 } 02680 #if MYGUI_IS_NATIVE_WCHAR_T 02681 02682 inline UString operator+( wchar_t c, const UString& s2 ) 02683 { 02684 return UString().append( 1, c ).append( s2 ); 02685 } 02686 #endif 02687 02688 // (const) forward iterator common operators 02689 inline UString::size_type operator-( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02690 { 02691 return ( left.mIter - right.mIter ); 02692 } 02693 inline bool operator==( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02694 { 02695 return left.mIter == right.mIter; 02696 } 02697 inline bool operator!=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02698 { 02699 return left.mIter != right.mIter; 02700 } 02701 inline bool operator<( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02702 { 02703 return left.mIter < right.mIter; 02704 } 02705 inline bool operator<=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02706 { 02707 return left.mIter <= right.mIter; 02708 } 02709 inline bool operator>( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02710 { 02711 return left.mIter > right.mIter; 02712 } 02713 inline bool operator>=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right ) 02714 { 02715 return left.mIter >= right.mIter; 02716 } 02717 02718 // (const) reverse iterator common operators 02719 // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator 02720 inline UString::size_type operator-( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02721 { 02722 return ( right.mIter - left.mIter ); 02723 } 02724 inline bool operator==( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02725 { 02726 return left.mIter == right.mIter; 02727 } 02728 inline bool operator!=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02729 { 02730 return left.mIter != right.mIter; 02731 } 02732 inline bool operator<( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02733 { 02734 return right.mIter < left.mIter; 02735 } 02736 inline bool operator<=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02737 { 02738 return right.mIter <= left.mIter; 02739 } 02740 inline bool operator>( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02741 { 02742 return right.mIter > left.mIter; 02743 } 02744 inline bool operator>=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right ) 02745 { 02746 return right.mIter >= left.mIter; 02747 } 02748 02750 inline std::ostream& operator << ( std::ostream& os, const UString& s ) 02751 { 02752 return os << s.asUTF8(); 02753 } 02754 02756 //inline std::wostream& operator << ( std::wostream& os, const UString& s ) 02757 //{ 02758 // return os << s.asWStr(); 02759 //} 02760 02761 02762 02763 } 02764 02765 #endif // __MYGUI_U_STRING_H__