• Skip to content
  • Skip to link menu
KDE 4.5 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_util.cpp

00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 #include "kmime_header_parsing.h"
00027 #include "kmime_charfreq.h"
00028 #include "kmime_warning.h"
00029 
00030 #include <config-kmime.h>
00031 #include <kdefakes.h> // for strcasestr
00032 #include <kglobal.h>
00033 #include <klocale.h>
00034 #include <kcharsets.h>
00035 #include <kcodecs.h>
00036 #include <kdebug.h>
00037 
00038 #include <QtCore/QList>
00039 #include <QtCore/QString>
00040 #include <QtCore/QTextCodec>
00041 
00042 #include <ctype.h>
00043 #include <time.h>
00044 #include <stdlib.h>
00045 #include <unistd.h>
00046 #include <boost/concept_check.hpp>
00047 #include "kmime_codecs.h"
00048 
00049 using namespace KMime;
00050 
00051 namespace KMime {
00052 
00053 QList<QByteArray> c_harsetCache;
00054 QList<QByteArray> l_anguageCache;
00055 QString f_allbackCharEnc;
00056 bool u_seOutlookEncoding = false;
00057 
00058 QByteArray cachedCharset( const QByteArray &name )
00059 {
00060   foreach ( const QByteArray& charset, c_harsetCache ) {
00061     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00062       return charset;
00063     }
00064   }
00065 
00066   c_harsetCache.append( name.toUpper() );
00067   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00068   return c_harsetCache.last();
00069 }
00070 
00071 QByteArray cachedLanguage( const QByteArray &name )
00072 {
00073   foreach ( const QByteArray& language, l_anguageCache ) {
00074     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00075       return language;
00076     }
00077   }
00078 
00079   l_anguageCache.append( name.toUpper() );
00080   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00081   return l_anguageCache.last();
00082 }
00083 
00084 bool isUsAscii( const QString &s )
00085 {
00086   uint sLength = s.length();
00087   for ( uint i=0; i<sLength; i++ ) {
00088     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00089       return false;
00090     }
00091   }
00092   return true;
00093 }
00094 
00095 QString nameForEncoding( Headers::contentEncoding enc )
00096 {
00097   switch( enc ) {
00098     case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00099     case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00100     case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00101     case Headers::CEbase64: return QString::fromLatin1( "base64" );
00102     case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00103     case Headers::CEbinary: return QString::fromLatin1( "binary" );
00104     default: return QString::fromLatin1( "unknown" );
00105   }
00106 }
00107 
00108 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00109 {
00110   QList<Headers::contentEncoding> allowed;
00111   CharFreq cf( data );
00112 
00113   switch ( cf.type() ) {
00114     case CharFreq::SevenBitText:
00115       allowed << Headers::CE7Bit;
00116     case CharFreq::EightBitText:
00117       allowed << Headers::CE8Bit;
00118     case CharFreq::SevenBitData:
00119       if ( cf.printableRatio() > 5.0/6.0 ) {
00120         // let n the length of data and p the number of printable chars.
00121         // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
00122         // => qp < base64 iff p > 5n/6.
00123         allowed << Headers::CEquPr;
00124         allowed << Headers::CEbase64;
00125       } else {
00126         allowed << Headers::CEbase64;
00127         allowed << Headers::CEquPr;
00128       }
00129       break;
00130     case CharFreq::EightBitData:
00131       allowed << Headers::CEbase64;
00132       break;
00133     case CharFreq::None:
00134     default:
00135       Q_ASSERT( false );
00136   }
00137 
00138   return allowed;
00139 }
00140 
00141 // "(),.:;<>@[\]
00142 const uchar specialsMap[16] = {
00143   0x00, 0x00, 0x00, 0x00, // CTLs
00144   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00145   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00146   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00147 };
00148 
00149 // "(),:;<>@[\]/=?
00150 const uchar tSpecialsMap[16] = {
00151   0x00, 0x00, 0x00, 0x00, // CTLs
00152   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00153   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00154   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00155 };
00156 
00157 // all except specials, CTLs, SPACE.
00158 const uchar aTextMap[16] = {
00159   0x00, 0x00, 0x00, 0x00,
00160   0x5F, 0x35, 0xFF, 0xC5,
00161   0x7F, 0xFF, 0xFF, 0xE3,
00162   0xFF, 0xFF, 0xFF, 0xFE
00163 };
00164 
00165 // all except tspecials, CTLs, SPACE.
00166 const uchar tTextMap[16] = {
00167   0x00, 0x00, 0x00, 0x00,
00168   0x5F, 0x36, 0xFF, 0xC0,
00169   0x7F, 0xFF, 0xFF, 0xE3,
00170   0xFF, 0xFF, 0xFF, 0xFE
00171 };
00172 
00173 // none except a-zA-Z0-9!*+-/
00174 const uchar eTextMap[16] = {
00175   0x00, 0x00, 0x00, 0x00,
00176   0x40, 0x35, 0xFF, 0xC0,
00177   0x7F, 0xFF, 0xFF, 0xE0,
00178   0x7F, 0xFF, 0xFF, 0xE0
00179 };
00180 
00181 void setFallbackCharEncoding(const QString& fallbackCharEnc)
00182 {
00183   f_allbackCharEnc = fallbackCharEnc;
00184 }
00185 
00186 QString fallbackCharEncoding()
00187 {
00188   return f_allbackCharEnc;
00189 }
00190 
00191 void setUseOutlookAttachmentEncoding( bool violateStandard )
00192 {
00193   u_seOutlookEncoding = violateStandard;
00194 }
00195 
00196 bool useOutlookAttachmentEncoding()
00197 {
00198   return u_seOutlookEncoding;
00199 }
00200 
00201 
00202 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00203                              const QByteArray &defaultCS, bool forceCS )
00204 {
00205   QByteArray result;
00206   QByteArray spaceBuffer;
00207   const char *scursor = src.constData();
00208   const char *send = scursor + src.length();
00209   bool onlySpacesSinceLastWord = false;
00210 
00211   while ( scursor != send ) {
00212      // space
00213     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00214       spaceBuffer += *scursor++;
00215       continue;
00216     }
00217 
00218     // possible start of an encoded word
00219     if ( *scursor == '=' ) {
00220       QByteArray language;
00221       QString decoded;
00222       ++scursor;
00223       const char *start = scursor;
00224       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00225         result += decoded.toUtf8();
00226         onlySpacesSinceLastWord = true;
00227         spaceBuffer.clear();
00228       } else {
00229         if ( onlySpacesSinceLastWord ) {
00230           result += spaceBuffer;
00231           onlySpacesSinceLastWord = false;
00232         }
00233         result += '=';
00234         scursor = start; // reset cursor after parsing failure
00235       }
00236       continue;
00237     } else {
00238       // unencoded data
00239       if ( onlySpacesSinceLastWord ) {
00240         result += spaceBuffer;
00241         onlySpacesSinceLastWord = false;
00242       }
00243       result += *scursor;
00244       ++scursor;
00245     }
00246   }
00247   // If there are any chars that couldn't be decoded in UTF-8,
00248   //  use the fallback charset if it exists
00249   const QString tryUtf8 = QString::fromUtf8( result );
00250   if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
00251     QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
00252     return codec->toUnicode( result );
00253   } else {
00254     return tryUtf8;
00255   }
00256 }
00257 
00258 QString decodeRFC2047String( const QByteArray &src )
00259 {
00260   QByteArray usedCS;
00261   return decodeRFC2047String( src, usedCS, "utf-8", false );
00262 }
00263 
00264 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00265                                 bool addressHeader, bool allow8BitHeaders )
00266 {
00267   QByteArray encoded8Bit, result;
00268   int start=0, end=0;
00269   bool nonAscii=false, ok=true, useQEncoding=false;
00270 
00271   const QTextCodec *codec = KGlobal::charsets()->codecForName( charset, ok );
00272 
00273   QByteArray usedCS;
00274   if ( !ok ) {
00275     //no codec available => try local8Bit and hope the best ;-)
00276     usedCS = KGlobal::locale()->encoding();
00277     codec = KGlobal::charsets()->codecForName( usedCS, ok );
00278   }
00279   else {
00280     Q_ASSERT( codec );
00281     if ( charset.isEmpty() )
00282       usedCS = codec->name();
00283     else
00284       usedCS = charset;
00285   }
00286 
00287   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00288     useQEncoding = true;
00289   }
00290 
00291   encoded8Bit = codec->fromUnicode( src );
00292 
00293   if ( allow8BitHeaders ) {
00294     return encoded8Bit;
00295   }
00296 
00297   uint encoded8BitLength = encoded8Bit.length();
00298   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00299     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00300       start = i + 1;
00301     }
00302 
00303     // encode escape character, for japanese encodings...
00304     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00305          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00306       end = start;   // non us-ascii char found, now we determine where to stop encoding
00307       nonAscii = true;
00308       break;
00309     }
00310   }
00311 
00312   if ( nonAscii ) {
00313     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00314       // we encode complete words
00315       end++;
00316     }
00317 
00318     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00319       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00320            ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00321         end = x;     // we found another non-ascii word
00322 
00323         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00324           // we encode complete words
00325           end++;
00326         }
00327       }
00328     }
00329 
00330     result = encoded8Bit.left( start ) + "=?" + usedCS;
00331 
00332     if ( useQEncoding ) {
00333       result += "?Q?";
00334 
00335       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00336       for ( int i=start; i<end; i++ ) {
00337         c = encoded8Bit[i];
00338         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00339           result += '_';
00340         } else {
00341           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00342               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00343               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00344             result += c;
00345           } else {
00346             result += '=';                 // "stolen" from KMail ;-)
00347             hexcode = ((c & 0xF0) >> 4) + 48;
00348             if ( hexcode >= 58 ) {
00349               hexcode += 7;
00350             }
00351             result += hexcode;
00352             hexcode = (c & 0x0F) + 48;
00353             if ( hexcode >= 58 ) {
00354               hexcode += 7;
00355             }
00356             result += hexcode;
00357           }
00358         }
00359       }
00360     } else {
00361       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00362     }
00363 
00364     result +="?=";
00365     result += encoded8Bit.right( encoded8Bit.length() - end );
00366   } else {
00367     result = encoded8Bit;
00368   }
00369 
00370   return result;
00371 }
00372 
00373 
00374 //-----------------------------------------------------------------------------
00375 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
00376 {
00377   if ( str.isEmpty() )
00378     return QByteArray();
00379 
00380   
00381   const QTextCodec *codec = KGlobal::charsets()->codecForName( charset );
00382   QByteArray latin;
00383   if ( charset == "us-ascii" )
00384     latin = str.toAscii();
00385   else if ( codec )
00386     latin = codec->fromUnicode( str );
00387   else
00388     latin = str.toLocal8Bit();
00389 
00390   char *l;
00391   for ( l = latin.data(); *l; ++l ) {
00392     if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
00393       // *l is control character or 8-bit char
00394       break;
00395   }
00396   if ( !*l )
00397     return latin;
00398 
00399   QByteArray result = charset + "''";
00400   for ( l = latin.data(); *l; ++l ) {
00401     bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
00402     if( !needsQuoting ) {
00403       const QByteArray especials = "()<>@,;:\"/[]?.= \033";
00404       int len = especials.length();
00405       for ( int i = 0; i < len; i++ )
00406         if ( *l == especials[i] ) {
00407           needsQuoting = true;
00408           break;
00409         }
00410     }
00411     if ( needsQuoting ) {
00412       result += '%';
00413       unsigned char hexcode;
00414       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00415       if ( hexcode >= 58 )
00416         hexcode += 7;
00417       result += hexcode;
00418       hexcode = ( *l & 0x0F ) + 48;
00419       if ( hexcode >= 58 )
00420         hexcode += 7;
00421       result += hexcode;
00422     } else {
00423       result += *l;
00424     }
00425   }
00426   return result;
00427 }
00428 
00429 
00430 //-----------------------------------------------------------------------------
00431 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
00432   bool forceCS )
00433 {
00434   int p = str.indexOf('\'');
00435   if (p < 0) return KGlobal::charsets()->codecForName( defaultCS )->toUnicode( str );
00436 
00437   
00438   QByteArray charset = str.left(p);
00439 
00440   QByteArray st = str.mid( str.lastIndexOf('\'') + 1 );
00441   
00442   char ch, ch2;
00443   p = 0;
00444   while (p < (int)st.length())
00445   {
00446     if (st.at(p) == 37)
00447     {
00448       // Only try to decode the percent-encoded character if the percent sign
00449       // is really followed by two other characters, see testcase at bug 163024
00450       if ( p + 2 < st.length() ) {
00451         ch = st.at(p+1) - 48;
00452         if (ch > 16)
00453           ch -= 7;
00454         ch2 = st.at(p+2) - 48;
00455         if (ch2 > 16)
00456           ch2 -= 7;
00457         st[p] = ch * 16 + ch2;
00458         st.remove( p+1, 2 );
00459       }
00460     }
00461     p++;
00462   }
00463   kDebug() << "Got pre-decoded:" << st;
00464   QString result;
00465   const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( charset );
00466   if ( !charsetcodec || forceCS )
00467     charsetcodec = KGlobal::charsets()->codecForName( defaultCS );
00468 
00469   usedCS = charsetcodec->name();
00470   return charsetcodec->toUnicode( st );
00471 }
00472 
00473 QString decodeRFC2231String( const QByteArray &src )
00474 {
00475   QByteArray usedCS;
00476   return decodeRFC2231String( src, usedCS, "utf-8", false );
00477 }
00478 
00479 QByteArray uniqueString()
00480 {
00481   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00482   time_t now;
00483   char p[11];
00484   int pos, ran;
00485   unsigned int timeval;
00486 
00487   p[10] = '\0';
00488   now = time( 0 );
00489   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00490   timeval = (now / ran) + getpid();
00491 
00492   for ( int i=0; i<10; i++ ) {
00493     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00494     //kDebug() << pos;
00495     p[i] = chars[pos];
00496   }
00497 
00498   QByteArray ret;
00499   ret.setNum( timeval );
00500   ret += '.';
00501   ret += p;
00502 
00503   return ret;
00504 }
00505 
00506 QByteArray multiPartBoundary()
00507 {
00508   return "nextPart" + uniqueString();
00509 }
00510 
00511 QByteArray unfoldHeader( const QByteArray &header )
00512 {
00513   QByteArray result;
00514   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00515   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00516     foldBegin = foldEnd = foldMid;
00517     // find the first space before the line-break
00518     while ( foldBegin > 0 ) {
00519       if ( !QChar( header[foldBegin - 1] ).isSpace() ) {
00520         break;
00521       }
00522       --foldBegin;
00523     }
00524     // find the first non-space after the line-break
00525     while ( foldEnd <= header.length() - 1 ) {
00526       if ( QChar( header[foldEnd] ).isSpace() ) {
00527         ++foldEnd;
00528       }
00529       else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
00530                 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
00531                 ( ( header[foldEnd + 1] == '0' &&
00532                     header[foldEnd + 2] == '9' ) ||
00533                   ( header[foldEnd + 1] == '2' &&
00534                     header[foldEnd + 2] == '0' ) ) ) {
00535         // bug #86302: malformed header continuation starting with =09/=20
00536         foldEnd += 3;
00537       }
00538       else {
00539         break;
00540       }
00541     }
00542 
00543     result += header.mid( pos, foldBegin - pos );
00544     if ( foldEnd < header.length() -1 )
00545       result += ' ';
00546     pos = foldEnd;
00547   }
00548   result += header.mid( pos, header.length() - pos );
00549   return result;
00550 }
00551 
00552 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
00553 {
00554   int end = dataBegin;
00555   int len = src.length() - 1;
00556 
00557   if ( folded )
00558     *folded = false;
00559 
00560   if ( dataBegin < 0 ) {
00561     // Not found
00562     return -1;
00563   }
00564 
00565   if ( dataBegin > len ) {
00566     // No data available
00567     return len + 1;
00568   }
00569 
00570   // If the first line contains nothing, but the next line starts with a space
00571   // or a tab, that means a stupid mail client has made the first header field line
00572   // entirely empty, and has folded the rest to the next line(s).
00573   if ( src.at(end) == '\n' && end + 1 < len &&
00574        ( src[end+1] == ' ' || src[end+1] == '\t' ) ) {
00575 
00576     // Skip \n and first whitespace
00577     dataBegin += 2;
00578     end += 2;
00579   }
00580 
00581   if ( src.at(end) != '\n' ) {  // check if the header is not empty
00582     while ( true ) {
00583       end = src.indexOf( '\n', end + 1 );
00584       if ( end == -1 || end == len ) {
00585         // end of string
00586         break;
00587       }
00588       else if ( src[end+1] == ' ' || src[end+1] == '\t' ||
00589                 ( src[end+1] == '=' && end+3 <= len &&
00590                   ( ( src[end+2] == '0' && src[end+3] == '9' ) ||
00591                     ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) {
00592         // next line is header continuation or starts with =09/=20 (bug #86302)
00593         if ( folded )
00594           *folded = true;
00595       } else {
00596         // end of header (no header continuation)
00597         break;
00598       }
00599     }
00600   }
00601 
00602   if ( end < 0 ) {
00603     end = len + 1; //take the rest of the string
00604   }
00605   return end;
00606 }
00607 
00608 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00609 {
00610   QByteArray n = name;
00611   n.append( ':' );
00612   int begin = -1;
00613 
00614   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00615     begin = 0;
00616   } else {
00617     n.prepend('\n');
00618     const char *p = strcasestr( src.constData(), n.constData() );
00619     if ( !p ) {
00620       begin = -1;
00621     } else {
00622       begin = p - src.constData();
00623       ++begin;
00624     }
00625   }
00626 
00627   if ( begin > -1) {     //there is a header with the given name
00628     dataBegin = begin + name.length() + 1; //skip the name
00629     // skip the usual space after the colon
00630     if ( src.at( dataBegin ) == ' ' ) {
00631       ++dataBegin;
00632     }
00633     end = findHeaderLineEnd( src, dataBegin, folded );
00634     return begin;
00635 
00636   } else {
00637     dataBegin = -1;
00638     return -1; //header not found
00639   }
00640 }
00641 
00642 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00643 {
00644   int begin, end;
00645   bool folded;
00646   indexOfHeader( src, name, end, begin, &folded );
00647 
00648   if ( begin >= 0 ) {
00649     if ( !folded ) {
00650       return src.mid( begin, end - begin );
00651     } else {
00652       QByteArray hdrValue = src.mid( begin, end - begin );
00653       return unfoldHeader( hdrValue );
00654     }
00655   } else {
00656     return QByteArray(); //header not found
00657   }
00658 }
00659 
00660 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00661 {
00662   int begin, end;
00663   bool folded;
00664   QList<QByteArray> result;
00665   QByteArray copySrc( src );
00666 
00667   indexOfHeader( copySrc, name, end, begin, &folded );
00668   while ( begin >= 0 ) {
00669     if ( !folded ) {
00670       result.append( copySrc.mid( begin, end - begin ) );
00671     } else {
00672       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00673       result.append( unfoldHeader( hdrValue ) );
00674     }
00675 
00676     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00677     copySrc = copySrc.mid( end );
00678     indexOfHeader( copySrc, name, end, begin, &folded );
00679   }
00680 
00681   return result;
00682 }
00683 
00684 void removeHeader( QByteArray &header, const QByteArray &name )
00685 {
00686   int begin, end, dummy;
00687   begin = indexOfHeader( header, name, end, dummy );
00688   if ( begin >= 0 ) {
00689     header.remove( begin, end - begin + 1 );
00690   }
00691 }
00692 
00693 QByteArray CRLFtoLF( const QByteArray &s )
00694 {
00695   QByteArray ret = s;
00696   ret.replace( "\r\n", "\n" );
00697   return ret;
00698 }
00699 
00700 QByteArray CRLFtoLF( const char *s )
00701 {
00702   QByteArray ret = s;
00703   return CRLFtoLF( ret );
00704 }
00705 
00706 QByteArray LFtoCRLF( const QByteArray &s )
00707 {
00708   QByteArray ret = s;
00709   ret.replace( '\n', "\r\n" );
00710   return ret;
00711 }
00712 
00713 QByteArray LFtoCRLF( const char *s )
00714 {
00715   QByteArray ret = s;
00716   return LFtoCRLF( ret );
00717 }
00718 
00719 namespace {
00720 template < typename T > void removeQuotesGeneric( T & str )
00721 {
00722   bool inQuote = false;
00723   for ( int i = 0; i < str.length(); ++i ) {
00724     if ( str[i] == '"' ) {
00725       str.remove( i, 1 );
00726       i--;
00727       inQuote = !inQuote;
00728     } else {
00729       if ( inQuote && ( str[i] == '\\' ) ) {
00730         str.remove( i, 1 );
00731       }
00732     }
00733   }
00734 }
00735 }
00736 
00737 void removeQuots( QByteArray &str )
00738 {
00739   removeQuotesGeneric( str );
00740 }
00741 
00742 void removeQuots( QString &str )
00743 {
00744   removeQuotesGeneric( str );
00745 }
00746 
00747 //
00748 // The next two helper function are just functions that return the ASCII char of
00749 // a string or an array. This is only there to facilitate writing addQuotes_impl()
00750 // without code duplication
00751 //
00752 
00753 static char getCharFromQByteArray( const QByteArray &array, int index )
00754 {
00755   return array.at( index );
00756 }
00757 
00758 static char getCharFromQString( const QString &string, int index )
00759 {
00760   return string.at( index ).toAscii();
00761 }
00762 
00763 template<class StringType>
00764 void addQuotes_impl( StringType &str, bool forceQuotes,
00765                      char (*convertFunction)( const StringType&, int ) )
00766 {
00767   bool needsQuotes=false;
00768   for ( int i=0; i < str.length(); i++ ) {
00769     const char cur = convertFunction( str, i );
00770     if ( strchr("()<>@,.;:[]=\\\"", cur ) != 0 ) {
00771       needsQuotes = true;
00772     }
00773     if ( cur == '\\' || cur == '\"' ) {
00774       str.insert( i, '\\' );
00775       i++;
00776     }
00777   }
00778 
00779   if ( needsQuotes || forceQuotes ) {
00780     str.insert( 0, '\"' );
00781     str.append( "\"" );
00782   }
00783 }
00784 
00785 void addQuotes( QByteArray &str, bool forceQuotes )
00786 {
00787   addQuotes_impl( str, forceQuotes, &getCharFromQByteArray );
00788 }
00789 
00790 void addQuotes( QString &str, bool forceQuotes )
00791 {
00792   addQuotes_impl( str, forceQuotes, &getCharFromQString );
00793 }
00794 
00795 KMIME_EXPORT QString balanceBidiState( const QString &input )
00796 {
00797   const int LRO = 0x202D;
00798   const int RLO = 0x202E;
00799   const int LRE = 0x202A;
00800   const int RLE = 0x202B;
00801   const int PDF = 0x202C;
00802 
00803   QString result = input;
00804 
00805   int openDirChangers = 0;
00806   int numPDFsRemoved = 0;
00807   for ( int i = 0; i < input.length(); i++ ) {
00808     const ushort &code = input.at( i ).unicode();
00809     if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00810       openDirChangers++;
00811     }
00812     else if ( code == PDF ) {
00813       if ( openDirChangers > 0 ) {
00814         openDirChangers--;
00815       }
00816       else {
00817         // One PDF too much, remove it
00818         kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00819         result.remove( i - numPDFsRemoved, 1 );
00820         numPDFsRemoved++;
00821       }
00822     }
00823   }
00824 
00825   if ( openDirChangers > 0 ) {
00826     kWarning() << "Possible Unicode spoofing detected in" << input;
00827 
00828     // At PDF chars to the end until the correct state is restored.
00829     // As a special exception, when encountering quoted strings, place the PDF before
00830     // the last quote.
00831     for ( int i = openDirChangers; i > 0; i-- ) {
00832       if ( result.endsWith( '"' ) )
00833         result.insert( result.length() - 1, QChar( PDF ) );
00834       else
00835         result += QChar( PDF );
00836     }
00837   }
00838 
00839   return result;
00840 }
00841 
00842 QString removeBidiControlChars( const QString &input )
00843 {
00844   const int LRO = 0x202D;
00845   const int RLO = 0x202E;
00846   const int LRE = 0x202A;
00847   const int RLE = 0x202B;
00848   QString result = input;
00849   result.remove( LRO );
00850   result.remove( RLO );
00851   result.remove( LRE );
00852   result.remove( RLE );
00853   return result;
00854 }
00855 
00856 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal