KMIME Library
kmime_util.cpp
00001 /* 00002 kmime_util.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001 the KMime authors. 00006 See file AUTHORS for details 00007 00008 This library is free software; you can redistribute it and/or 00009 modify it under the terms of the GNU Library General Public 00010 License as published by the Free Software Foundation; either 00011 version 2 of the License, or (at your option) any later version. 00012 00013 This library is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Library General Public License for more details. 00017 00018 You should have received a copy of the GNU Library General Public License 00019 along with this library; see the file COPYING.LIB. If not, write to 00020 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00021 Boston, MA 02110-1301, USA. 00022 */ 00023 00024 #include "kmime_util.h" 00025 #include "kmime_util_p.h" 00026 00027 #include "kmime_charfreq.h" 00028 #include "kmime_codecs.h" 00029 #include "kmime_header_parsing.h" 00030 #include "kmime_message.h" 00031 #include "kmime_warning.h" 00032 00033 #include <config-kmime.h> 00034 #include <kdefakes.h> // for strcasestr 00035 #include <kglobal.h> 00036 #include <klocale.h> 00037 #include <kcharsets.h> 00038 #include <kcodecs.h> 00039 #include <kdebug.h> 00040 00041 #include <QtCore/QList> 00042 #include <QtCore/QString> 00043 #include <QtCore/QTextCodec> 00044 00045 #include <ctype.h> 00046 #include <time.h> 00047 #include <stdlib.h> 00048 #include <unistd.h> 00049 #include <boost/concept_check.hpp> 00050 00051 using namespace KMime; 00052 00053 namespace KMime { 00054 00055 QList<QByteArray> c_harsetCache; 00056 QList<QByteArray> l_anguageCache; 00057 QString f_allbackCharEnc; 00058 bool u_seOutlookEncoding = false; 00059 00060 QByteArray cachedCharset( const QByteArray &name ) 00061 { 00062 foreach ( const QByteArray& charset, c_harsetCache ) { 00063 if ( qstricmp( name.data(), charset.data() ) == 0 ) { 00064 return charset; 00065 } 00066 } 00067 00068 c_harsetCache.append( name.toUpper() ); 00069 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00070 return c_harsetCache.last(); 00071 } 00072 00073 QByteArray cachedLanguage( const QByteArray &name ) 00074 { 00075 foreach ( const QByteArray& language, l_anguageCache ) { 00076 if ( qstricmp( name.data(), language.data() ) == 0 ) { 00077 return language; 00078 } 00079 } 00080 00081 l_anguageCache.append( name.toUpper() ); 00082 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00083 return l_anguageCache.last(); 00084 } 00085 00086 bool isUsAscii( const QString &s ) 00087 { 00088 uint sLength = s.length(); 00089 for ( uint i=0; i<sLength; i++ ) { 00090 if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii 00091 return false; 00092 } 00093 } 00094 return true; 00095 } 00096 00097 QString nameForEncoding( Headers::contentEncoding enc ) 00098 { 00099 switch( enc ) { 00100 case Headers::CE7Bit: return QString::fromLatin1( "7bit" ); 00101 case Headers::CE8Bit: return QString::fromLatin1( "8bit" ); 00102 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" ); 00103 case Headers::CEbase64: return QString::fromLatin1( "base64" ); 00104 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" ); 00105 case Headers::CEbinary: return QString::fromLatin1( "binary" ); 00106 default: return QString::fromLatin1( "unknown" ); 00107 } 00108 } 00109 00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data ) 00111 { 00112 QList<Headers::contentEncoding> allowed; 00113 CharFreq cf( data ); 00114 00115 switch ( cf.type() ) { 00116 case CharFreq::SevenBitText: 00117 allowed << Headers::CE7Bit; 00118 case CharFreq::EightBitText: 00119 allowed << Headers::CE8Bit; 00120 case CharFreq::SevenBitData: 00121 if ( cf.printableRatio() > 5.0/6.0 ) { 00122 // let n the length of data and p the number of printable chars. 00123 // Then base64 \approx 4n/3; qp \approx p + 3(n-p) 00124 // => qp < base64 iff p > 5n/6. 00125 allowed << Headers::CEquPr; 00126 allowed << Headers::CEbase64; 00127 } else { 00128 allowed << Headers::CEbase64; 00129 allowed << Headers::CEquPr; 00130 } 00131 break; 00132 case CharFreq::EightBitData: 00133 allowed << Headers::CEbase64; 00134 break; 00135 case CharFreq::None: 00136 default: 00137 Q_ASSERT( false ); 00138 } 00139 00140 return allowed; 00141 } 00142 00143 // "(),.:;<>@[\] 00144 const uchar specialsMap[16] = { 00145 0x00, 0x00, 0x00, 0x00, // CTLs 00146 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' 00147 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00148 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00149 }; 00150 00151 // "(),:;<>@[\]/=? 00152 const uchar tSpecialsMap[16] = { 00153 0x00, 0x00, 0x00, 0x00, // CTLs 00154 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' 00155 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00156 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00157 }; 00158 00159 // all except specials, CTLs, SPACE. 00160 const uchar aTextMap[16] = { 00161 0x00, 0x00, 0x00, 0x00, 00162 0x5F, 0x35, 0xFF, 0xC5, 00163 0x7F, 0xFF, 0xFF, 0xE3, 00164 0xFF, 0xFF, 0xFF, 0xFE 00165 }; 00166 00167 // all except tspecials, CTLs, SPACE. 00168 const uchar tTextMap[16] = { 00169 0x00, 0x00, 0x00, 0x00, 00170 0x5F, 0x36, 0xFF, 0xC0, 00171 0x7F, 0xFF, 0xFF, 0xE3, 00172 0xFF, 0xFF, 0xFF, 0xFE 00173 }; 00174 00175 // none except a-zA-Z0-9!*+-/ 00176 const uchar eTextMap[16] = { 00177 0x00, 0x00, 0x00, 0x00, 00178 0x40, 0x35, 0xFF, 0xC0, 00179 0x7F, 0xFF, 0xFF, 0xE0, 00180 0x7F, 0xFF, 0xFF, 0xE0 00181 }; 00182 00183 void setFallbackCharEncoding(const QString& fallbackCharEnc) 00184 { 00185 f_allbackCharEnc = fallbackCharEnc; 00186 } 00187 00188 QString fallbackCharEncoding() 00189 { 00190 return f_allbackCharEnc; 00191 } 00192 00193 void setUseOutlookAttachmentEncoding( bool violateStandard ) 00194 { 00195 u_seOutlookEncoding = violateStandard; 00196 } 00197 00198 bool useOutlookAttachmentEncoding() 00199 { 00200 return u_seOutlookEncoding; 00201 } 00202 00203 00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, 00205 const QByteArray &defaultCS, bool forceCS ) 00206 { 00207 QByteArray result; 00208 QByteArray spaceBuffer; 00209 const char *scursor = src.constData(); 00210 const char *send = scursor + src.length(); 00211 bool onlySpacesSinceLastWord = false; 00212 00213 while ( scursor != send ) { 00214 // space 00215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { 00216 spaceBuffer += *scursor++; 00217 continue; 00218 } 00219 00220 // possible start of an encoded word 00221 if ( *scursor == '=' ) { 00222 QByteArray language; 00223 QString decoded; 00224 ++scursor; 00225 const char *start = scursor; 00226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { 00227 result += decoded.toUtf8(); 00228 onlySpacesSinceLastWord = true; 00229 spaceBuffer.clear(); 00230 } else { 00231 if ( onlySpacesSinceLastWord ) { 00232 result += spaceBuffer; 00233 onlySpacesSinceLastWord = false; 00234 } 00235 result += '='; 00236 scursor = start; // reset cursor after parsing failure 00237 } 00238 continue; 00239 } else { 00240 // unencoded data 00241 if ( onlySpacesSinceLastWord ) { 00242 result += spaceBuffer; 00243 onlySpacesSinceLastWord = false; 00244 } 00245 result += *scursor; 00246 ++scursor; 00247 } 00248 } 00249 // If there are any chars that couldn't be decoded in UTF-8, 00250 // use the fallback charset if it exists 00251 const QString tryUtf8 = QString::fromUtf8( result ); 00252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) { 00253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc ); 00254 return codec->toUnicode( result ); 00255 } else { 00256 return tryUtf8; 00257 } 00258 } 00259 00260 QString decodeRFC2047String( const QByteArray &src ) 00261 { 00262 QByteArray usedCS; 00263 return decodeRFC2047String( src, usedCS, "utf-8", false ); 00264 } 00265 00266 static const char *reservedCharacters = "\"()<>@,.;:\\[]="; 00267 00268 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, 00269 bool addressHeader, bool allow8BitHeaders ) 00270 { 00271 QByteArray result; 00272 int start=0, end=0; 00273 bool nonAscii=false, ok=true, useQEncoding=false; 00274 00275 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 00276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok ); 00277 00278 QByteArray usedCS; 00279 if ( !ok ) { 00280 //no codec available => try local8Bit and hope the best ;-) 00281 usedCS = KGlobal::locale()->encoding(); 00282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok ); 00283 } 00284 else { 00285 Q_ASSERT( codec ); 00286 if ( charset.isEmpty() ) 00287 usedCS = codec->name(); 00288 else 00289 usedCS = charset; 00290 } 00291 00292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader ); 00293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState ); 00294 if ( converterState.invalidChars > 0 ) { 00295 usedCS = "utf-8"; 00296 codec = QTextCodec::codecForName( usedCS ); 00297 encoded8Bit = codec->fromUnicode( src ); 00298 } 00299 00300 if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets 00301 useQEncoding = true; 00302 } 00303 00304 if ( allow8BitHeaders ) { 00305 return encoded8Bit; 00306 } 00307 00308 uint encoded8BitLength = encoded8Bit.length(); 00309 for ( unsigned int i=0; i<encoded8BitLength; i++ ) { 00310 if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries 00311 start = i + 1; 00312 } 00313 00314 // encode escape character, for japanese encodings... 00315 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) || 00316 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) { 00317 end = start; // non us-ascii char found, now we determine where to stop encoding 00318 nonAscii = true; 00319 break; 00320 } 00321 } 00322 00323 if ( nonAscii ) { 00324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00325 // we encode complete words 00326 end++; 00327 } 00328 00329 for ( int x=end; x<encoded8Bit.length(); x++ ) { 00330 if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) || 00331 ( addressHeader && ( strchr(reservedCharacters, encoded8Bit[x]) != 0 ) ) ) { 00332 end = x; // we found another non-ascii word 00333 00334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00335 // we encode complete words 00336 end++; 00337 } 00338 } 00339 } 00340 00341 result = encoded8Bit.left( start ) + "=?" + usedCS; 00342 00343 if ( useQEncoding ) { 00344 result += "?Q?"; 00345 00346 char c, hexcode;// "Q"-encoding implementation described in RFC 2047 00347 for ( int i=start; i<end; i++ ) { 00348 c = encoded8Bit[i]; 00349 if ( c == ' ' ) { // make the result readable with not MIME-capable readers 00350 result += '_'; 00351 } else { 00352 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems 00353 ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers 00354 ( ( c >= '0' ) && ( c <= '9' ) ) ) { 00355 result += c; 00356 } else { 00357 result += '='; // "stolen" from KMail ;-) 00358 hexcode = ((c & 0xF0) >> 4) + 48; 00359 if ( hexcode >= 58 ) { 00360 hexcode += 7; 00361 } 00362 result += hexcode; 00363 hexcode = (c & 0x0F) + 48; 00364 if ( hexcode >= 58 ) { 00365 hexcode += 7; 00366 } 00367 result += hexcode; 00368 } 00369 } 00370 } 00371 } else { 00372 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); 00373 } 00374 00375 result +="?="; 00376 result += encoded8Bit.right( encoded8Bit.length() - end ); 00377 } else { 00378 result = encoded8Bit; 00379 } 00380 00381 return result; 00382 } 00383 00384 QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset ) 00385 { 00386 QByteArray result; 00387 QList<QChar> splitChars; 00388 splitChars << QLatin1Char(',') << QLatin1Char('\"') << QLatin1Char(';') << QLatin1Char('\\'); 00389 const QChar *ch = src.constData(); 00390 const int length = src.length(); 00391 int pos = 0; 00392 int wordStart = 0; 00393 00394 //qDebug() << "Input:" << src; 00395 // Loop over all characters of the string. 00396 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. 00397 while (pos < length) { 00398 //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toAscii(); 00399 const bool isAscii = ch->unicode() < 127; 00400 const bool isReserved = (strchr( reservedCharacters, ch->toAscii() ) != 0); 00401 if ( isAscii && isReserved ) { 00402 const int wordSize = pos - wordStart; 00403 if (wordSize > 0) { 00404 const QString word = src.mid( wordStart, wordSize ); 00405 result += encodeRFC2047String( word, charset ); 00406 } 00407 00408 result += ch->toAscii(); 00409 wordStart = pos + 1; 00410 } 00411 ch++; 00412 pos++; 00413 } 00414 00415 // Encode the last word 00416 const int wordSize = pos - wordStart; 00417 if (wordSize > 0) { 00418 const QString word = src.mid( wordStart, pos - wordStart ); 00419 result += encodeRFC2047String( word, charset ); 00420 } 00421 00422 return result; 00423 } 00424 00425 00426 00427 //----------------------------------------------------------------------------- 00428 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset ) 00429 { 00430 if ( str.isEmpty() ) 00431 return QByteArray(); 00432 00433 00434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00435 QByteArray latin; 00436 if ( charset == "us-ascii" ) 00437 latin = str.toAscii(); 00438 else if ( codec ) 00439 latin = codec->fromUnicode( str ); 00440 else 00441 latin = str.toLocal8Bit(); 00442 00443 char *l; 00444 for ( l = latin.data(); *l; ++l ) { 00445 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) 00446 // *l is control character or 8-bit char 00447 break; 00448 } 00449 if ( !*l ) 00450 return latin; 00451 00452 QByteArray result = charset + "''"; 00453 for ( l = latin.data(); *l; ++l ) { 00454 bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' ); 00455 if( !needsQuoting ) { 00456 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 00457 int len = especials.length(); 00458 for ( int i = 0; i < len; i++ ) 00459 if ( *l == especials[i] ) { 00460 needsQuoting = true; 00461 break; 00462 } 00463 } 00464 if ( needsQuoting ) { 00465 result += '%'; 00466 unsigned char hexcode; 00467 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48; 00468 if ( hexcode >= 58 ) 00469 hexcode += 7; 00470 result += hexcode; 00471 hexcode = ( *l & 0x0F ) + 48; 00472 if ( hexcode >= 58 ) 00473 hexcode += 7; 00474 result += hexcode; 00475 } else { 00476 result += *l; 00477 } 00478 } 00479 return result; 00480 } 00481 00482 00483 //----------------------------------------------------------------------------- 00484 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, 00485 bool forceCS ) 00486 { 00487 int p = str.indexOf('\''); 00488 if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ))->toUnicode( str ); 00489 00490 00491 QByteArray charset = str.left(p); 00492 00493 QByteArray st = str.mid( str.lastIndexOf('\'') + 1 ); 00494 00495 char ch, ch2; 00496 p = 0; 00497 while (p < (int)st.length()) 00498 { 00499 if (st.at(p) == 37) 00500 { 00501 // Only try to decode the percent-encoded character if the percent sign 00502 // is really followed by two other characters, see testcase at bug 163024 00503 if ( p + 2 < st.length() ) { 00504 ch = st.at(p+1) - 48; 00505 if (ch > 16) 00506 ch -= 7; 00507 ch2 = st.at(p+2) - 48; 00508 if (ch2 > 16) 00509 ch2 -= 7; 00510 st[p] = ch * 16 + ch2; 00511 st.remove( p+1, 2 ); 00512 } 00513 } 00514 p++; 00515 } 00516 kDebug() << "Got pre-decoded:" << st; 00517 QString result; 00518 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00519 if ( !charsetcodec || forceCS ) 00520 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) ); 00521 00522 usedCS = charsetcodec->name(); 00523 return charsetcodec->toUnicode( st ); 00524 } 00525 00526 QString decodeRFC2231String( const QByteArray &src ) 00527 { 00528 QByteArray usedCS; 00529 return decodeRFC2231String( src, usedCS, "utf-8", false ); 00530 } 00531 00532 QByteArray uniqueString() 00533 { 00534 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 00535 time_t now; 00536 char p[11]; 00537 int pos, ran; 00538 unsigned int timeval; 00539 00540 p[10] = '\0'; 00541 now = time( 0 ); 00542 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0)); 00543 timeval = (now / ran) + getpid(); 00544 00545 for ( int i=0; i<10; i++ ) { 00546 pos = (int) (61.0*rand() / (RAND_MAX + 1.0)); 00547 //kDebug() << pos; 00548 p[i] = chars[pos]; 00549 } 00550 00551 QByteArray ret; 00552 ret.setNum( timeval ); 00553 ret += '.'; 00554 ret += p; 00555 00556 return ret; 00557 } 00558 00559 QByteArray multiPartBoundary() 00560 { 00561 return "nextPart" + uniqueString(); 00562 } 00563 00564 QByteArray unfoldHeader( const QByteArray &header ) 00565 { 00566 QByteArray result; 00567 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; 00568 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { 00569 foldBegin = foldEnd = foldMid; 00570 // find the first space before the line-break 00571 while ( foldBegin > 0 ) { 00572 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) { 00573 break; 00574 } 00575 --foldBegin; 00576 } 00577 // find the first non-space after the line-break 00578 while ( foldEnd <= header.length() - 1 ) { 00579 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) { 00580 ++foldEnd; 00581 } 00582 else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' && 00583 header[foldEnd] == '=' && foldEnd + 2 < header.length() && 00584 ( ( header[foldEnd + 1] == '0' && 00585 header[foldEnd + 2] == '9' ) || 00586 ( header[foldEnd + 1] == '2' && 00587 header[foldEnd + 2] == '0' ) ) ) { 00588 // bug #86302: malformed header continuation starting with =09/=20 00589 foldEnd += 3; 00590 } 00591 else { 00592 break; 00593 } 00594 } 00595 00596 result += header.mid( pos, foldBegin - pos ); 00597 if ( foldEnd < header.length() -1 ) 00598 result += ' '; 00599 pos = foldEnd; 00600 } 00601 result += header.mid( pos, header.length() - pos ); 00602 return result; 00603 } 00604 00605 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded ) 00606 { 00607 int end = dataBegin; 00608 int len = src.length() - 1; 00609 00610 if ( folded ) 00611 *folded = false; 00612 00613 if ( dataBegin < 0 ) { 00614 // Not found 00615 return -1; 00616 } 00617 00618 if ( dataBegin > len ) { 00619 // No data available 00620 return len + 1; 00621 } 00622 00623 // If the first line contains nothing, but the next line starts with a space 00624 // or a tab, that means a stupid mail client has made the first header field line 00625 // entirely empty, and has folded the rest to the next line(s). 00626 if ( src.at(end) == '\n' && end + 1 < len && 00627 ( src[end+1] == ' ' || src[end+1] == '\t' ) ) { 00628 00629 // Skip \n and first whitespace 00630 dataBegin += 2; 00631 end += 2; 00632 } 00633 00634 if ( src.at(end) != '\n' ) { // check if the header is not empty 00635 while ( true ) { 00636 end = src.indexOf( '\n', end + 1 ); 00637 if ( end == -1 || end == len ) { 00638 // end of string 00639 break; 00640 } 00641 else if ( src[end+1] == ' ' || src[end+1] == '\t' || 00642 ( src[end+1] == '=' && end+3 <= len && 00643 ( ( src[end+2] == '0' && src[end+3] == '9' ) || 00644 ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) { 00645 // next line is header continuation or starts with =09/=20 (bug #86302) 00646 if ( folded ) 00647 *folded = true; 00648 } else { 00649 // end of header (no header continuation) 00650 break; 00651 } 00652 } 00653 } 00654 00655 if ( end < 0 ) { 00656 end = len + 1; //take the rest of the string 00657 } 00658 return end; 00659 } 00660 00661 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) 00662 { 00663 QByteArray n = name; 00664 n.append( ':' ); 00665 int begin = -1; 00666 00667 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { 00668 begin = 0; 00669 } else { 00670 n.prepend('\n'); 00671 const char *p = strcasestr( src.constData(), n.constData() ); 00672 if ( !p ) { 00673 begin = -1; 00674 } else { 00675 begin = p - src.constData(); 00676 ++begin; 00677 } 00678 } 00679 00680 if ( begin > -1) { //there is a header with the given name 00681 dataBegin = begin + name.length() + 1; //skip the name 00682 // skip the usual space after the colon 00683 if ( src.at( dataBegin ) == ' ' ) { 00684 ++dataBegin; 00685 } 00686 end = findHeaderLineEnd( src, dataBegin, folded ); 00687 return begin; 00688 00689 } else { 00690 dataBegin = -1; 00691 return -1; //header not found 00692 } 00693 } 00694 00695 QByteArray extractHeader( const QByteArray &src, const QByteArray &name ) 00696 { 00697 int begin, end; 00698 bool folded; 00699 indexOfHeader( src, name, end, begin, &folded ); 00700 00701 if ( begin >= 0 ) { 00702 if ( !folded ) { 00703 return src.mid( begin, end - begin ); 00704 } else { 00705 QByteArray hdrValue = src.mid( begin, end - begin ); 00706 return unfoldHeader( hdrValue ); 00707 } 00708 } else { 00709 return QByteArray(); //header not found 00710 } 00711 } 00712 00713 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name ) 00714 { 00715 int begin, end; 00716 bool folded; 00717 QList<QByteArray> result; 00718 QByteArray copySrc( src ); 00719 00720 indexOfHeader( copySrc, name, end, begin, &folded ); 00721 while ( begin >= 0 ) { 00722 if ( !folded ) { 00723 result.append( copySrc.mid( begin, end - begin ) ); 00724 } else { 00725 QByteArray hdrValue = copySrc.mid( begin, end - begin ); 00726 result.append( unfoldHeader( hdrValue ) ); 00727 } 00728 00729 // get the next one, a tiny bit ugly, but we don't want the previous to be found again... 00730 copySrc = copySrc.mid( end ); 00731 indexOfHeader( copySrc, name, end, begin, &folded ); 00732 } 00733 00734 return result; 00735 } 00736 00737 void removeHeader( QByteArray &header, const QByteArray &name ) 00738 { 00739 int begin, end, dummy; 00740 begin = indexOfHeader( header, name, end, dummy ); 00741 if ( begin >= 0 ) { 00742 header.remove( begin, end - begin + 1 ); 00743 } 00744 } 00745 00746 QByteArray CRLFtoLF( const QByteArray &s ) 00747 { 00748 QByteArray ret = s; 00749 ret.replace( "\r\n", "\n" ); 00750 return ret; 00751 } 00752 00753 QByteArray CRLFtoLF( const char *s ) 00754 { 00755 QByteArray ret = s; 00756 return CRLFtoLF( ret ); 00757 } 00758 00759 QByteArray LFtoCRLF( const QByteArray &s ) 00760 { 00761 QByteArray ret = s; 00762 ret.replace( '\n', "\r\n" ); 00763 return ret; 00764 } 00765 00766 QByteArray LFtoCRLF( const char *s ) 00767 { 00768 QByteArray ret = s; 00769 return LFtoCRLF( ret ); 00770 } 00771 00772 namespace { 00773 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str ) 00774 { 00775 bool inQuote = false; 00776 for ( int i = 0; i < str.length(); ++i ) { 00777 if ( str[i] == CharType( '"' ) ) { 00778 str.remove( i, 1 ); 00779 i--; 00780 inQuote = !inQuote; 00781 } else { 00782 if ( inQuote && ( str[i] == CharType( '\\' ) ) ) { 00783 str.remove( i, 1 ); 00784 } 00785 } 00786 } 00787 } 00788 } 00789 00790 void removeQuots( QByteArray &str ) 00791 { 00792 removeQuotesGeneric<QByteArray,char>( str ); 00793 } 00794 00795 void removeQuots( QString &str ) 00796 { 00797 removeQuotesGeneric<QString,QLatin1Char>( str ); 00798 } 00799 00800 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString> 00801 void addQuotes_impl( StringType &str, bool forceQuotes ) 00802 { 00803 bool needsQuotes=false; 00804 for ( int i=0; i < str.length(); i++ ) { 00805 const CharType cur = str.at( i ); 00806 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) { 00807 needsQuotes = true; 00808 } 00809 if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) { 00810 str.insert( i, CharConverterType( '\\' ) ); 00811 i++; 00812 } 00813 } 00814 00815 if ( needsQuotes || forceQuotes ) { 00816 str.insert( 0, CharConverterType( '\"' ) ); 00817 str.append( StringConverterType( "\"" ) ); 00818 } 00819 } 00820 00821 void addQuotes( QByteArray &str, bool forceQuotes ) 00822 { 00823 addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes ); 00824 } 00825 00826 void addQuotes( QString &str, bool forceQuotes ) 00827 { 00828 addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes ); 00829 } 00830 00831 KMIME_EXPORT QString balanceBidiState( const QString &input ) 00832 { 00833 const int LRO = 0x202D; 00834 const int RLO = 0x202E; 00835 const int LRE = 0x202A; 00836 const int RLE = 0x202B; 00837 const int PDF = 0x202C; 00838 00839 QString result = input; 00840 00841 int openDirChangers = 0; 00842 int numPDFsRemoved = 0; 00843 for ( int i = 0; i < input.length(); i++ ) { 00844 const ushort &code = input.at( i ).unicode(); 00845 if ( code == LRO || code == RLO || code == LRE || code == RLE ) { 00846 openDirChangers++; 00847 } 00848 else if ( code == PDF ) { 00849 if ( openDirChangers > 0 ) { 00850 openDirChangers--; 00851 } 00852 else { 00853 // One PDF too much, remove it 00854 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input; 00855 result.remove( i - numPDFsRemoved, 1 ); 00856 numPDFsRemoved++; 00857 } 00858 } 00859 } 00860 00861 if ( openDirChangers > 0 ) { 00862 kWarning() << "Possible Unicode spoofing detected in" << input; 00863 00864 // At PDF chars to the end until the correct state is restored. 00865 // As a special exception, when encountering quoted strings, place the PDF before 00866 // the last quote. 00867 for ( int i = openDirChangers; i > 0; i-- ) { 00868 if ( result.endsWith( QLatin1Char( '"' ) ) ) 00869 result.insert( result.length() - 1, QChar( PDF ) ); 00870 else 00871 result += QChar( PDF ); 00872 } 00873 } 00874 00875 return result; 00876 } 00877 00878 QString removeBidiControlChars( const QString &input ) 00879 { 00880 const int LRO = 0x202D; 00881 const int RLO = 0x202E; 00882 const int LRE = 0x202A; 00883 const int RLE = 0x202B; 00884 QString result = input; 00885 result.remove( LRO ); 00886 result.remove( RLO ); 00887 result.remove( LRE ); 00888 result.remove( RLE ); 00889 return result; 00890 } 00891 00892 static bool isCryptoPart( Content* content ) 00893 { 00894 if( !content->contentType( false ) ) 00895 return false; 00896 00897 if( content->contentType()->subType().toLower() == "octet-stream" && 00898 !content->contentDisposition( false ) ) 00899 return false; 00900 00901 const Headers::ContentType *contentType = content->contentType(); 00902 const QByteArray lowerSubType = contentType->subType().toLower(); 00903 return ( contentType->mediaType().toLower() == "application" && 00904 ( lowerSubType == "pgp-encrypted" || 00905 lowerSubType == "pgp-signature" || 00906 lowerSubType == "pkcs7-mime" || 00907 lowerSubType == "pkcs7-signature" || 00908 lowerSubType == "x-pkcs7-signature" || 00909 ( lowerSubType == "octet-stream" && 00910 content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) ); 00911 } 00912 00913 bool hasAttachment( Content* content ) 00914 { 00915 if( !content ) 00916 return false; 00917 00918 bool emptyFilename = true; 00919 if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() ) 00920 emptyFilename = false; 00921 00922 if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() ) 00923 emptyFilename = false; 00924 00925 // ignore crypto parts 00926 if( !emptyFilename && !isCryptoPart( content ) ) 00927 return true; 00928 00929 // Ok, content itself is not an attachment. now we deal with multiparts 00930 if( content->contentType()->isMultipart() ) { 00931 Q_FOREACH( Content* child, content->contents() ) { 00932 if( hasAttachment( child ) ) 00933 return true; 00934 } 00935 } 00936 00937 return false; 00938 } 00939 00940 bool isSigned( Message *message ) 00941 { 00942 if ( !message ) 00943 return false; 00944 00945 const KMime::Headers::ContentType* const contentType = message->contentType(); 00946 if ( contentType->isSubtype( "signed" ) || 00947 contentType->isSubtype( "pgp-signature" ) || 00948 contentType->isSubtype( "pkcs7-signature" ) || 00949 contentType->isSubtype( "x-pkcs7-signature" ) || 00950 message->mainBodyPart( "multipart/signed" ) || 00951 message->mainBodyPart( "application/pgp-signature" ) || 00952 message->mainBodyPart( "application/pkcs7-signature" ) || 00953 message->mainBodyPart( "application/x-pkcs7-signature" ) ) { 00954 return true; 00955 } 00956 00957 return false; 00958 } 00959 00960 bool isEncrypted( Message *message ) 00961 { 00962 if ( !message ) 00963 return false; 00964 00965 const KMime::Headers::ContentType* const contentType = message->contentType(); 00966 if ( contentType->isSubtype( "encrypted" ) || 00967 contentType->isSubtype( "pgp-encrypted" ) || 00968 contentType->isSubtype( "pkcs7-mime" ) || 00969 message->mainBodyPart( "multipart/encrypted" ) || 00970 message->mainBodyPart( "application/pgp-encrypted" ) || 00971 message->mainBodyPart( "application/pkcs7-mime" ) ) { 00972 return true; 00973 } 00974 00975 return false; 00976 } 00977 00978 bool isInvitation( Content *content ) 00979 { 00980 if ( !content ) 00981 return false; 00982 00983 const KMime::Headers::ContentType* const contentType = content->contentType( false ); 00984 00985 if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) 00986 return true; 00987 00988 return false; 00989 } 00990 00991 } // namespace KMime