KMIME Library
kmime_header_parsing.cpp
00001 /* -*- c++ -*- 00002 kmime_header_parsing.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Library General Public 00009 License as published by the Free Software Foundation; either 00010 version 2 of the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Library General Public License for more details. 00016 00017 You should have received a copy of the GNU Library General Public License 00018 along with this library; see the file COPYING.LIB. If not, write to 00019 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00020 Boston, MA 02110-1301, USA. 00021 */ 00022 00023 #include "kmime_header_parsing.h" 00024 00025 #include "kmime_codecs.h" 00026 #include "kmime_headerfactory_p.h" 00027 #include "kmime_headers.h" 00028 #include "kmime_util.h" 00029 #include "kmime_util_p.h" 00030 #include "kmime_dateformatter.h" 00031 #include "kmime_warning.h" 00032 00033 #include <kglobal.h> 00034 #include <kcharsets.h> 00035 00036 #include <QtCore/QTextCodec> 00037 #include <QtCore/QMap> 00038 #include <QtCore/QStringList> 00039 #include <QtCore/QUrl> 00040 00041 #include <ctype.h> // for isdigit 00042 #include <cassert> 00043 00044 using namespace KMime; 00045 using namespace KMime::Types; 00046 00047 namespace KMime { 00048 00049 namespace Types { 00050 00051 // QUrl::fromAce is extremely expensive, so only use it when necessary. 00052 // Fortunately, the presence of IDNA is readily detected with a substring match... 00053 static inline QString QUrl_fromAce_wrapper( const QString & domain ) 00054 { 00055 if ( domain.contains( QLatin1String( "xn--" ) ) ) 00056 return QUrl::fromAce( domain.toLatin1() ); 00057 else 00058 return domain; 00059 } 00060 00061 static QString addr_spec_as_string( const AddrSpec & as, bool pretty ) 00062 { 00063 if ( as.isEmpty() ) { 00064 return QString(); 00065 } 00066 00067 static QChar dotChar = QLatin1Char( '.' ); 00068 00069 bool needsQuotes = false; 00070 QString result; 00071 result.reserve( as.localPart.length() + as.domain.length() + 1 ); 00072 for ( int i = 0 ; i < as.localPart.length() ; ++i ) { 00073 const QChar ch = as.localPart.at( i ); 00074 if ( ch == dotChar || isAText( ch.toLatin1() ) ) { 00075 result += ch; 00076 } else { 00077 needsQuotes = true; 00078 if ( ch == QLatin1Char( '\\' ) || ch == QLatin1Char( '"' ) ) { 00079 result += QLatin1Char( '\\' ); 00080 } 00081 result += ch; 00082 } 00083 } 00084 const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ; 00085 if ( needsQuotes ) { 00086 result = QLatin1Char( '"' ) + result + QLatin1Char( '"' ); 00087 } 00088 if( dom.isEmpty() ) { 00089 return result; 00090 } else { 00091 result += QLatin1Char( '@' ); 00092 result += dom; 00093 return result; 00094 } 00095 } 00096 00097 QString AddrSpec::asString() const 00098 { 00099 return addr_spec_as_string( *this, false ); 00100 } 00101 00102 QString AddrSpec::asPrettyString() const 00103 { 00104 return addr_spec_as_string( *this, true ); 00105 } 00106 00107 bool AddrSpec::isEmpty() const 00108 { 00109 return localPart.isEmpty() && domain.isEmpty(); 00110 } 00111 00112 QByteArray Mailbox::address() const 00113 { 00114 return mAddrSpec.asString().toLatin1(); 00115 } 00116 00117 AddrSpec Mailbox::addrSpec() const 00118 { 00119 return mAddrSpec; 00120 } 00121 00122 QString Mailbox::name() const 00123 { 00124 return mDisplayName; 00125 } 00126 00127 void Mailbox::setAddress( const AddrSpec &addr ) 00128 { 00129 mAddrSpec = addr; 00130 } 00131 00132 void Mailbox::setAddress( const QByteArray &addr ) 00133 { 00134 const char *cursor = addr.constData(); 00135 if ( !HeaderParsing::parseAngleAddr( cursor, 00136 cursor + addr.length(), mAddrSpec ) ) { 00137 if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(), 00138 mAddrSpec ) ) { 00139 kWarning() << "Invalid address"; 00140 return; 00141 } 00142 } 00143 } 00144 00145 void Mailbox::setName( const QString &name ) 00146 { 00147 mDisplayName = removeBidiControlChars( name ); 00148 } 00149 00150 void Mailbox::setNameFrom7Bit( const QByteArray &name, 00151 const QByteArray &defaultCharset ) 00152 { 00153 QByteArray cs; 00154 setName( decodeRFC2047String( name, cs, defaultCharset, false ) ); 00155 } 00156 00157 bool Mailbox::hasAddress() const 00158 { 00159 return !mAddrSpec.isEmpty(); 00160 } 00161 00162 bool Mailbox::hasName() const 00163 { 00164 return !mDisplayName.isEmpty(); 00165 } 00166 00167 QString Mailbox::prettyAddress() const 00168 { 00169 return prettyAddress( QuoteNever ); 00170 } 00171 00172 QString Mailbox::prettyAddress( Quoting quoting ) const 00173 { 00174 if ( !hasName() ) { 00175 return QLatin1String( address() ); 00176 } 00177 QString s = name(); 00178 if ( quoting != QuoteNever ) { 00179 addQuotes( s, quoting == QuoteAlways /*bool force*/ ); 00180 } 00181 00182 if ( hasAddress() ) { 00183 s += QLatin1String(" <") + QLatin1String( address() ) + QLatin1Char('>'); 00184 } 00185 return s; 00186 } 00187 00188 void Mailbox::fromUnicodeString( const QString &s ) 00189 { 00190 from7BitString( encodeRFC2047Sentence( s, "utf-8" ) ); 00191 } 00192 00193 void Mailbox::from7BitString( const QByteArray &s ) 00194 { 00195 const char *cursor = s.constData(); 00196 HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this ); 00197 } 00198 00199 QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const 00200 { 00201 if ( !hasName() ) { 00202 return address(); 00203 } 00204 QByteArray rv; 00205 if ( isUsAscii( name() ) ) { 00206 QByteArray tmp = name().toLatin1(); 00207 addQuotes( tmp, false ); 00208 rv += tmp; 00209 } else { 00210 rv += encodeRFC2047String( name(), encCharset, true ); 00211 } 00212 if ( hasAddress() ) { 00213 rv += " <" + address() + '>'; 00214 } 00215 return rv; 00216 } 00217 00218 } // namespace Types 00219 00220 namespace HeaderParsing { 00221 00222 // parse the encoded-word (scursor points to after the initial '=') 00223 bool parseEncodedWord( const char* &scursor, const char * const send, 00224 QString &result, QByteArray &language, 00225 QByteArray &usedCS, const QByteArray &defaultCS, 00226 bool forceCS ) 00227 { 00228 // make sure the caller already did a bit of the work. 00229 assert( *(scursor-1) == '=' ); 00230 00231 // 00232 // STEP 1: 00233 // scan for the charset/language portion of the encoded-word 00234 // 00235 00236 char ch = *scursor++; 00237 00238 if ( ch != '?' ) { 00239 // kDebug() << "first"; 00240 //KMIME_WARN_PREMATURE_END_OF( EncodedWord ); 00241 return false; 00242 } 00243 00244 // remember start of charset (ie. just after the initial "=?") and 00245 // language (just after the first '*') fields: 00246 const char * charsetStart = scursor; 00247 const char * languageStart = 0; 00248 00249 // find delimiting '?' (and the '*' separating charset and language 00250 // tags, if any): 00251 for ( ; scursor != send ; scursor++ ) { 00252 if ( *scursor == '?') { 00253 break; 00254 } else if ( *scursor == '*' && languageStart == 0 ) { 00255 languageStart = scursor + 1; 00256 } 00257 } 00258 00259 // not found? can't be an encoded-word! 00260 if ( scursor == send || *scursor != '?' ) { 00261 // kDebug() << "second"; 00262 KMIME_WARN_PREMATURE_END_OF( EncodedWord ); 00263 return false; 00264 } 00265 00266 // extract the language information, if any (if languageStart is 0, 00267 // language will be null, too): 00268 QByteArray maybeLanguage( languageStart, scursor - languageStart ); 00269 // extract charset information (keep in mind: the size given to the 00270 // ctor is one off due to the \0 terminator): 00271 QByteArray maybeCharset( charsetStart, 00272 ( languageStart ? languageStart - 1 : scursor ) - charsetStart ); 00273 00274 // 00275 // STEP 2: 00276 // scan for the encoding portion of the encoded-word 00277 // 00278 00279 // remember start of encoding (just _after_ the second '?'): 00280 scursor++; 00281 const char * encodingStart = scursor; 00282 00283 // find next '?' (ending the encoding tag): 00284 for ( ; scursor != send ; scursor++ ) { 00285 if ( *scursor == '?' ) { 00286 break; 00287 } 00288 } 00289 00290 // not found? Can't be an encoded-word! 00291 if ( scursor == send || *scursor != '?' ) { 00292 // kDebug() << "third"; 00293 KMIME_WARN_PREMATURE_END_OF( EncodedWord ); 00294 return false; 00295 } 00296 00297 // extract the encoding information: 00298 QByteArray maybeEncoding( encodingStart, scursor - encodingStart ); 00299 00300 // kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset 00301 // << "\"; language == \"" << maybeLanguage 00302 // << "\"; encoding == \"" << maybeEncoding << "\""; 00303 00304 // 00305 // STEP 3: 00306 // scan for encoded-text portion of encoded-word 00307 // 00308 00309 // remember start of encoded-text (just after the third '?'): 00310 scursor++; 00311 const char * encodedTextStart = scursor; 00312 00313 // find the '?=' sequence (ending the encoded-text): 00314 for ( ; scursor != send ; scursor++ ) { 00315 if ( *scursor == '?' ) { 00316 if ( scursor + 1 != send ) { 00317 if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore 00318 KMIME_WARN << "Stray '?' in q-encoded word, ignoring this."; 00319 continue; 00320 } 00321 else { // yep, found a '?=' sequence 00322 scursor += 2; 00323 break; 00324 } 00325 } 00326 else { // The '?' is the last char, but we need a '=' after it! 00327 KMIME_WARN_PREMATURE_END_OF( EncodedWord ); 00328 return false; 00329 } 00330 } 00331 } 00332 00333 if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' || 00334 scursor < encodedTextStart + 2 ) { 00335 KMIME_WARN_PREMATURE_END_OF( EncodedWord ); 00336 return false; 00337 } 00338 00339 // set end sentinel for encoded-text: 00340 const char * const encodedTextEnd = scursor - 2; 00341 00342 // 00343 // STEP 4: 00344 // setup decoders for the transfer encoding and the charset 00345 // 00346 00347 // try if there's a codec for the encoding found: 00348 Codec * codec = Codec::codecForName( maybeEncoding ); 00349 if ( !codec ) { 00350 KMIME_WARN_UNKNOWN( Encoding, maybeEncoding ); 00351 return false; 00352 } 00353 00354 // get an instance of a corresponding decoder: 00355 Decoder * dec = codec->makeDecoder(); 00356 assert( dec ); 00357 00358 // try if there's a (text)codec for the charset found: 00359 bool matchOK = false; 00360 QTextCodec *textCodec = 0; 00361 if ( forceCS || maybeCharset.isEmpty() ) { 00362 textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); 00363 usedCS = cachedCharset( defaultCS ); 00364 } else { 00365 textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK ); 00366 if ( !matchOK ) { //no suitable codec found => use default charset 00367 textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); 00368 usedCS = cachedCharset( defaultCS ); 00369 } else { 00370 usedCS = cachedCharset( maybeCharset ); 00371 } 00372 } 00373 00374 if ( !matchOK || !textCodec ) { 00375 KMIME_WARN_UNKNOWN( Charset, maybeCharset ); 00376 delete dec; 00377 return false; 00378 }; 00379 00380 // kDebug() << "mimeName(): \"" << textCodec->name() << "\""; 00381 00382 // allocate a temporary buffer to store the 8bit text: 00383 int encodedTextLength = encodedTextEnd - encodedTextStart; 00384 QByteArray buffer; 00385 buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) ); 00386 char *bbegin = buffer.data(); 00387 char *bend = bbegin + buffer.length(); 00388 00389 // 00390 // STEP 5: 00391 // do the actual decoding 00392 // 00393 00394 if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) { 00395 KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor(" 00396 << encodedTextLength << ")\nresult may be truncated"; 00397 } 00398 00399 result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() ); 00400 00401 // kDebug() << "result now: \"" << result << "\""; 00402 // cleanup: 00403 delete dec; 00404 language = maybeLanguage; 00405 00406 return true; 00407 } 00408 00409 static inline void eatWhiteSpace( const char* &scursor, const char * const send ) 00410 { 00411 while ( scursor != send && 00412 ( *scursor == ' ' || *scursor == '\n' || 00413 *scursor == '\t' || *scursor == '\r' ) ) 00414 scursor++; 00415 } 00416 00417 bool parseAtom( const char * &scursor, const char * const send, 00418 QString &result, bool allow8Bit ) 00419 { 00420 QPair<const char*,int> maybeResult; 00421 00422 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { 00423 result += QString::fromLatin1( maybeResult.first, maybeResult.second ); 00424 return true; 00425 } 00426 00427 return false; 00428 } 00429 00430 bool parseAtom( const char * &scursor, const char * const send, 00431 QPair<const char*,int> &result, bool allow8Bit ) 00432 { 00433 bool success = false; 00434 const char *start = scursor; 00435 00436 while ( scursor != send ) { 00437 signed char ch = *scursor++; 00438 if ( ch > 0 && isAText( ch ) ) { 00439 // AText: OK 00440 success = true; 00441 } else if ( allow8Bit && ch < 0 ) { 00442 // 8bit char: not OK, but be tolerant. 00443 KMIME_WARN_8BIT( ch ); 00444 success = true; 00445 } else { 00446 // CTL or special - marking the end of the atom: 00447 // re-set sursor to point to the offending 00448 // char and return: 00449 scursor--; 00450 break; 00451 } 00452 } 00453 result.first = start; 00454 result.second = scursor - start; 00455 return success; 00456 } 00457 00458 // FIXME: Remove this and the other parseToken() method. add a new one where "result" is a 00459 // QByteArray. 00460 bool parseToken( const char * &scursor, const char * const send, 00461 QString &result, bool allow8Bit ) 00462 { 00463 QPair<const char*,int> maybeResult; 00464 00465 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { 00466 result += QString::fromLatin1( maybeResult.first, maybeResult.second ); 00467 return true; 00468 } 00469 00470 return false; 00471 } 00472 00473 bool parseToken( const char * &scursor, const char * const send, 00474 QPair<const char*,int> &result, bool allow8Bit ) 00475 { 00476 bool success = false; 00477 const char * start = scursor; 00478 00479 while ( scursor != send ) { 00480 signed char ch = *scursor++; 00481 if ( ch > 0 && isTText( ch ) ) { 00482 // TText: OK 00483 success = true; 00484 } else if ( allow8Bit && ch < 0 ) { 00485 // 8bit char: not OK, but be tolerant. 00486 KMIME_WARN_8BIT( ch ); 00487 success = true; 00488 } else { 00489 // CTL or tspecial - marking the end of the atom: 00490 // re-set sursor to point to the offending 00491 // char and return: 00492 scursor--; 00493 break; 00494 } 00495 } 00496 result.first = start; 00497 result.second = scursor - start; 00498 return success; 00499 } 00500 00501 #define READ_ch_OR_FAIL if ( scursor == send ) { \ 00502 KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \ 00503 return false; \ 00504 } else { \ 00505 ch = *scursor++; \ 00506 } 00507 00508 // known issues: 00509 // 00510 // - doesn't handle quoted CRLF 00511 00512 // FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't 00513 // know about encodings yet! 00514 bool parseGenericQuotedString( const char* &scursor, const char * const send, 00515 QString &result, bool isCRLF, 00516 const char openChar, const char closeChar ) 00517 { 00518 char ch; 00519 // We are in a quoted-string or domain-literal or comment and the 00520 // cursor points to the first char after the openChar. 00521 // We will apply unfolding and quoted-pair removal. 00522 // We return when we either encounter the end or unescaped openChar 00523 // or closeChar. 00524 00525 assert( *(scursor-1) == openChar || *(scursor-1) == closeChar ); 00526 00527 while ( scursor != send ) { 00528 ch = *scursor++; 00529 00530 if ( ch == closeChar || ch == openChar ) { 00531 // end of quoted-string or another opening char: 00532 // let caller decide what to do. 00533 return true; 00534 } 00535 00536 switch( ch ) { 00537 case '\\': // quoted-pair 00538 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 00539 READ_ch_OR_FAIL; 00540 KMIME_WARN_IF_8BIT( ch ); 00541 result += QLatin1Char( ch ); 00542 break; 00543 case '\r': 00544 // ### 00545 // The case of lonely '\r' is easy to solve, as they're 00546 // not part of Unix Line-ending conventions. 00547 // But I see a problem if we are given Unix-native 00548 // line-ending-mails, where we cannot determine anymore 00549 // whether a given '\n' was part of a CRLF or was occurring 00550 // on it's own. 00551 READ_ch_OR_FAIL; 00552 if ( ch != '\n' ) { 00553 // CR on it's own... 00554 KMIME_WARN_LONE( CR ); 00555 result += QLatin1Char('\r'); 00556 scursor--; // points to after the '\r' again 00557 } else { 00558 // CRLF encountered. 00559 // lookahead: check for folding 00560 READ_ch_OR_FAIL; 00561 if ( ch == ' ' || ch == '\t' ) { 00562 // correct folding; 00563 // position cursor behind the CRLF WSP (unfolding) 00564 // and add the WSP to the result 00565 result += QLatin1Char( ch ); 00566 } else { 00567 // this is the "shouldn't happen"-case. There is a CRLF 00568 // inside a quoted-string without it being part of FWS. 00569 // We take it verbatim. 00570 KMIME_WARN_NON_FOLDING( CRLF ); 00571 result += QLatin1String( "\r\n" ); 00572 // the cursor is decremented again, so's we need not 00573 // duplicate the whole switch here. "ch" could've been 00574 // everything (incl. openChar or closeChar). 00575 scursor--; 00576 } 00577 } 00578 break; 00579 case '\n': 00580 // Note: CRLF has been handled above already! 00581 // ### LF needs special treatment, depending on whether isCRLF 00582 // is true (we can be sure a lonely '\n' was meant this way) or 00583 // false ('\n' alone could have meant LF or CRLF in the original 00584 // message. This parser assumes CRLF iff the LF is followed by 00585 // either WSP (folding) or NULL (premature end of quoted-string; 00586 // Should be fixed, since NULL is allowed as per rfc822). 00587 READ_ch_OR_FAIL; 00588 if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { 00589 // folding 00590 // correct folding 00591 result += QLatin1Char( ch ); 00592 } else { 00593 // non-folding 00594 KMIME_WARN_LONE( LF ); 00595 result += QLatin1Char( '\n' ); 00596 // pos is decremented, so's we need not duplicate the whole 00597 // switch here. ch could've been everything (incl. <">, "\"). 00598 scursor--; 00599 } 00600 break; 00601 case '=': 00602 { 00603 // ### Work around broken clients that send encoded words in quoted-strings 00604 // For example, older KMail versions. 00605 if( scursor == send ) 00606 break; 00607 00608 const char *oldscursor = scursor; 00609 QString tmp; 00610 QByteArray lang, charset; 00611 if( *scursor++ == '?' ) { 00612 --scursor; 00613 if( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { 00614 result += tmp; 00615 break; 00616 } else { 00617 scursor = oldscursor; 00618 } 00619 } else { 00620 scursor = oldscursor; 00621 } 00622 // fall through 00623 } 00624 default: 00625 KMIME_WARN_IF_8BIT( ch ); 00626 result += QLatin1Char( ch ); 00627 } 00628 } 00629 00630 return false; 00631 } 00632 00633 // known issues: 00634 // 00635 // - doesn't handle encoded-word inside comments. 00636 00637 bool parseComment( const char* &scursor, const char * const send, 00638 QString &result, bool isCRLF, bool reallySave ) 00639 { 00640 int commentNestingDepth = 1; 00641 const char *afterLastClosingParenPos = 0; 00642 QString maybeCmnt; 00643 const char *oldscursor = scursor; 00644 00645 assert( *(scursor-1) == '(' ); 00646 00647 while ( commentNestingDepth ) { 00648 QString cmntPart; 00649 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { 00650 assert( *(scursor-1) == ')' || *(scursor-1) == '(' ); 00651 // see the kdoc for above function for the possible conditions 00652 // we have to check: 00653 switch ( *(scursor-1) ) { 00654 case ')': 00655 if ( reallySave ) { 00656 // add the chunk that's now surely inside the comment. 00657 result += maybeCmnt; 00658 result += cmntPart; 00659 if ( commentNestingDepth > 1 ) { 00660 // don't add the outermost ')'... 00661 result += QLatin1Char( ')' ); 00662 } 00663 maybeCmnt.clear(); 00664 } 00665 afterLastClosingParenPos = scursor; 00666 --commentNestingDepth; 00667 break; 00668 case '(': 00669 if ( reallySave ) { 00670 // don't add to "result" yet, because we might find that we 00671 // are already outside the (broken) comment... 00672 maybeCmnt += cmntPart; 00673 maybeCmnt += QLatin1Char( '(' ); 00674 } 00675 ++commentNestingDepth; 00676 break; 00677 default: assert( 0 ); 00678 } // switch 00679 } else { 00680 // !parseGenericQuotedString, ie. premature end 00681 if ( afterLastClosingParenPos ) { 00682 scursor = afterLastClosingParenPos; 00683 } else { 00684 scursor = oldscursor; 00685 } 00686 return false; 00687 } 00688 } // while 00689 00690 return true; 00691 } 00692 00693 // known issues: none. 00694 00695 bool parsePhrase( const char* &scursor, const char * const send, 00696 QString &result, bool isCRLF ) 00697 { 00698 enum { 00699 None, Phrase, Atom, EncodedWord, QuotedString 00700 } found = None; 00701 00702 QString tmp; 00703 QByteArray lang, charset; 00704 const char *successfullyParsed = 0; 00705 // only used by the encoded-word branch 00706 const char *oldscursor; 00707 // used to suppress whitespace between adjacent encoded-words 00708 // (rfc2047, 6.2): 00709 bool lastWasEncodedWord = false; 00710 00711 while ( scursor != send ) { 00712 char ch = *scursor++; 00713 switch ( ch ) { 00714 case '.': // broken, but allow for intorop's sake 00715 if ( found == None ) { 00716 --scursor; 00717 return false; 00718 } else { 00719 if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) { 00720 result += QLatin1String( ". " ); 00721 } else { 00722 result += QLatin1Char( '.' ); 00723 } 00724 successfullyParsed = scursor; 00725 } 00726 break; 00727 case '"': // quoted-string 00728 tmp.clear(); 00729 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { 00730 successfullyParsed = scursor; 00731 assert( *(scursor-1) == '"' ); 00732 switch ( found ) { 00733 case None: 00734 found = QuotedString; 00735 break; 00736 case Phrase: 00737 case Atom: 00738 case EncodedWord: 00739 case QuotedString: 00740 found = Phrase; 00741 result += QLatin1Char(' '); // rfc822, 3.4.4 00742 break; 00743 default: 00744 assert( 0 ); 00745 } 00746 lastWasEncodedWord = false; 00747 result += tmp; 00748 } else { 00749 // premature end of quoted string. 00750 // What to do? Return leading '"' as special? Return as quoted-string? 00751 // We do the latter if we already found something, else signal failure. 00752 if ( found == None ) { 00753 return false; 00754 } else { 00755 result += QLatin1Char(' '); // rfc822, 3.4.4 00756 result += tmp; 00757 return true; 00758 } 00759 } 00760 break; 00761 case '(': // comment 00762 // parse it, but ignore content: 00763 tmp.clear(); 00764 if ( parseComment( scursor, send, tmp, isCRLF, 00765 false /*don't bother with the content*/ ) ) { 00766 successfullyParsed = scursor; 00767 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 00768 } else { 00769 if ( found == None ) { 00770 return false; 00771 } else { 00772 scursor = successfullyParsed; 00773 return true; 00774 } 00775 } 00776 break; 00777 case '=': // encoded-word 00778 tmp.clear(); 00779 oldscursor = scursor; 00780 lang.clear(); 00781 charset.clear(); 00782 if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { 00783 successfullyParsed = scursor; 00784 switch ( found ) { 00785 case None: 00786 found = EncodedWord; 00787 break; 00788 case Phrase: 00789 case EncodedWord: 00790 case Atom: 00791 case QuotedString: 00792 if ( !lastWasEncodedWord ) { 00793 result += QLatin1Char(' '); // rfc822, 3.4.4 00794 } 00795 found = Phrase; 00796 break; 00797 default: assert( 0 ); 00798 } 00799 lastWasEncodedWord = true; 00800 result += tmp; 00801 break; 00802 } else { 00803 // parse as atom: 00804 scursor = oldscursor; 00805 } 00806 // fall though... 00807 00808 default: //atom 00809 tmp.clear(); 00810 scursor--; 00811 if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { 00812 successfullyParsed = scursor; 00813 switch ( found ) { 00814 case None: 00815 found = Atom; 00816 break; 00817 case Phrase: 00818 case Atom: 00819 case EncodedWord: 00820 case QuotedString: 00821 found = Phrase; 00822 result += QLatin1Char(' '); // rfc822, 3.4.4 00823 break; 00824 default: 00825 assert( 0 ); 00826 } 00827 lastWasEncodedWord = false; 00828 result += tmp; 00829 } else { 00830 if ( found == None ) { 00831 return false; 00832 } else { 00833 scursor = successfullyParsed; 00834 return true; 00835 } 00836 } 00837 } 00838 eatWhiteSpace( scursor, send ); 00839 } 00840 00841 return found != None; 00842 } 00843 00844 // FIXME: This should probably by QByteArray &result instead? 00845 bool parseDotAtom( const char* &scursor, const char * const send, 00846 QString &result, bool isCRLF ) 00847 { 00848 eatCFWS( scursor, send, isCRLF ); 00849 00850 // always points to just after the last atom parsed: 00851 const char *successfullyParsed; 00852 00853 QString tmp; 00854 if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { 00855 return false; 00856 } 00857 result += tmp; 00858 successfullyParsed = scursor; 00859 00860 while ( scursor != send ) { 00861 00862 // end of header or no '.' -> return 00863 if ( scursor == send || *scursor != '.' ) { 00864 return true; 00865 } 00866 scursor++; // eat '.' 00867 00868 if ( scursor == send || !isAText( *scursor ) ) { 00869 // end of header or no AText, but this time following a '.'!: 00870 // reset cursor to just after last successfully parsed char and 00871 // return: 00872 scursor = successfullyParsed; 00873 return true; 00874 } 00875 00876 // try to parse the next atom: 00877 QString maybeAtom; 00878 if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { 00879 scursor = successfullyParsed; 00880 return true; 00881 } 00882 00883 result += QLatin1Char('.'); 00884 result += maybeAtom; 00885 successfullyParsed = scursor; 00886 } 00887 00888 scursor = successfullyParsed; 00889 return true; 00890 } 00891 00892 void eatCFWS( const char* &scursor, const char * const send, bool isCRLF ) 00893 { 00894 QString dummy; 00895 00896 while ( scursor != send ) { 00897 const char *oldscursor = scursor; 00898 00899 char ch = *scursor++; 00900 00901 switch( ch ) { 00902 case ' ': 00903 case '\t': // whitespace 00904 case '\r': 00905 case '\n': // folding 00906 continue; 00907 00908 case '(': // comment 00909 if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) { 00910 continue; 00911 } 00912 scursor = oldscursor; 00913 return; 00914 00915 default: 00916 scursor = oldscursor; 00917 return; 00918 } 00919 } 00920 } 00921 00922 bool parseDomain( const char* &scursor, const char * const send, 00923 QString &result, bool isCRLF ) 00924 { 00925 eatCFWS( scursor, send, isCRLF ); 00926 if ( scursor == send ) { 00927 return false; 00928 } 00929 00930 // domain := dot-atom / domain-literal / atom *("." atom) 00931 // 00932 // equivalent to: 00933 // domain = dot-atom / domain-literal, 00934 // since parseDotAtom does allow CFWS between atoms and dots 00935 00936 if ( *scursor == '[' ) { 00937 // domain-literal: 00938 QString maybeDomainLiteral; 00939 // eat '[': 00940 scursor++; 00941 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, 00942 isCRLF, '[', ']' ) ) { 00943 if ( scursor == send ) { 00944 // end of header: check for closing ']': 00945 if ( *(scursor-1) == ']' ) { 00946 // OK, last char was ']': 00947 result = maybeDomainLiteral; 00948 return true; 00949 } else { 00950 // not OK, domain-literal wasn't closed: 00951 return false; 00952 } 00953 } 00954 // we hit openChar in parseGenericQuotedString. 00955 // include it in maybeDomainLiteral and keep on parsing: 00956 if ( *(scursor-1) == '[' ) { 00957 maybeDomainLiteral += QLatin1Char('['); 00958 continue; 00959 } 00960 // OK, real end of domain-literal: 00961 result = maybeDomainLiteral; 00962 return true; 00963 } 00964 } else { 00965 // dot-atom: 00966 QString maybeDotAtom; 00967 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { 00968 result = maybeDotAtom; 00969 // Domain may end with '.', if so preserve it' 00970 if ( scursor != send && *scursor == '.' ) { 00971 result += QLatin1Char('.'); 00972 scursor++; 00973 } 00974 return true; 00975 } 00976 } 00977 return false; 00978 } 00979 00980 bool parseObsRoute( const char* &scursor, const char* const send, 00981 QStringList &result, bool isCRLF, bool save ) 00982 { 00983 while ( scursor != send ) { 00984 eatCFWS( scursor, send, isCRLF ); 00985 if ( scursor == send ) { 00986 return false; 00987 } 00988 00989 // empty entry: 00990 if ( *scursor == ',' ) { 00991 scursor++; 00992 if ( save ) { 00993 result.append( QString() ); 00994 } 00995 continue; 00996 } 00997 00998 // empty entry ending the list: 00999 if ( *scursor == ':' ) { 01000 scursor++; 01001 if ( save ) { 01002 result.append( QString() ); 01003 } 01004 return true; 01005 } 01006 01007 // each non-empty entry must begin with '@': 01008 if ( *scursor != '@' ) { 01009 return false; 01010 } else { 01011 scursor++; 01012 } 01013 01014 QString maybeDomain; 01015 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { 01016 return false; 01017 } 01018 if ( save ) { 01019 result.append( maybeDomain ); 01020 } 01021 01022 // eat the following (optional) comma: 01023 eatCFWS( scursor, send, isCRLF ); 01024 if ( scursor == send ) { 01025 return false; 01026 } 01027 if ( *scursor == ':' ) { 01028 scursor++; 01029 return true; 01030 } 01031 if ( *scursor == ',' ) { 01032 scursor++; 01033 } 01034 } 01035 01036 return false; 01037 } 01038 01039 bool parseAddrSpec( const char* &scursor, const char * const send, 01040 AddrSpec &result, bool isCRLF ) 01041 { 01042 // 01043 // STEP 1: 01044 // local-part := dot-atom / quoted-string / word *("." word) 01045 // 01046 // this is equivalent to: 01047 // local-part := word *("." word) 01048 01049 QString maybeLocalPart; 01050 QString tmp; 01051 01052 while ( scursor != send ) { 01053 // first, eat any whitespace 01054 eatCFWS( scursor, send, isCRLF ); 01055 01056 char ch = *scursor++; 01057 switch ( ch ) { 01058 case '.': // dot 01059 maybeLocalPart += QLatin1Char('.'); 01060 break; 01061 01062 case '@': 01063 goto SAW_AT_SIGN; 01064 break; 01065 01066 case '"': // quoted-string 01067 tmp.clear(); 01068 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { 01069 maybeLocalPart += tmp; 01070 } else { 01071 return false; 01072 } 01073 break; 01074 01075 default: // atom 01076 scursor--; // re-set scursor to point to ch again 01077 tmp.clear(); 01078 if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { 01079 maybeLocalPart += tmp; 01080 } else { 01081 return false; // parseAtom can only fail if the first char is non-atext. 01082 } 01083 break; 01084 } 01085 } 01086 01087 return false; 01088 01089 // 01090 // STEP 2: 01091 // domain 01092 // 01093 01094 SAW_AT_SIGN: 01095 01096 assert( *(scursor-1) == '@' ); 01097 01098 QString maybeDomain; 01099 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { 01100 return false; 01101 } 01102 01103 result.localPart = maybeLocalPart; 01104 result.domain = maybeDomain; 01105 01106 return true; 01107 } 01108 01109 bool parseAngleAddr( const char* &scursor, const char * const send, 01110 AddrSpec &result, bool isCRLF ) 01111 { 01112 // first, we need an opening angle bracket: 01113 eatCFWS( scursor, send, isCRLF ); 01114 if ( scursor == send || *scursor != '<' ) { 01115 return false; 01116 } 01117 scursor++; // eat '<' 01118 01119 eatCFWS( scursor, send, isCRLF ); 01120 if ( scursor == send ) { 01121 return false; 01122 } 01123 01124 if ( *scursor == '@' || *scursor == ',' ) { 01125 // obs-route: parse, but ignore: 01126 KMIME_WARN << "obsolete source route found! ignoring."; 01127 QStringList dummy; 01128 if ( !parseObsRoute( scursor, send, dummy, 01129 isCRLF, false /* don't save */ ) ) { 01130 return false; 01131 } 01132 // angle-addr isn't complete until after the '>': 01133 if ( scursor == send ) { 01134 return false; 01135 } 01136 } 01137 01138 // parse addr-spec: 01139 AddrSpec maybeAddrSpec; 01140 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { 01141 return false; 01142 } 01143 01144 eatCFWS( scursor, send, isCRLF ); 01145 if ( scursor == send || *scursor != '>' ) { 01146 return false; 01147 } 01148 scursor++; 01149 01150 result = maybeAddrSpec; 01151 return true; 01152 01153 } 01154 01155 static QString stripQuotes( const QString &input ) 01156 { 01157 const QLatin1Char quotes( '"' ); 01158 if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) { 01159 QString stripped( input.mid( 1, input.size() - 2 ) ); 01160 return stripped; 01161 } 01162 else return input; 01163 } 01164 01165 bool parseMailbox( const char* &scursor, const char * const send, 01166 Mailbox &result, bool isCRLF ) 01167 { 01168 eatCFWS( scursor, send, isCRLF ); 01169 if ( scursor == send ) { 01170 return false; 01171 } 01172 01173 AddrSpec maybeAddrSpec; 01174 QString maybeDisplayName; 01175 01176 // first, try if it's a vanilla addr-spec: 01177 const char * oldscursor = scursor; 01178 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { 01179 result.setAddress( maybeAddrSpec ); 01180 // check for the obsolete form of display-name (as comment): 01181 eatWhiteSpace( scursor, send ); 01182 if ( scursor != send && *scursor == '(' ) { 01183 scursor++; 01184 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { 01185 return false; 01186 } 01187 } 01188 result.setName( stripQuotes( maybeDisplayName ) ); 01189 return true; 01190 } 01191 scursor = oldscursor; 01192 01193 // second, see if there's a display-name: 01194 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { 01195 // failed: reset cursor, note absent display-name 01196 maybeDisplayName.clear(); 01197 scursor = oldscursor; 01198 } else { 01199 // succeeded: eat CFWS 01200 eatCFWS( scursor, send, isCRLF ); 01201 if ( scursor == send ) { 01202 return false; 01203 } 01204 } 01205 01206 // third, parse the angle-addr: 01207 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) { 01208 return false; 01209 } 01210 01211 if ( maybeDisplayName.isNull() ) { 01212 // check for the obsolete form of display-name (as comment): 01213 eatWhiteSpace( scursor, send ); 01214 if ( scursor != send && *scursor == '(' ) { 01215 scursor++; 01216 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { 01217 return false; 01218 } 01219 } 01220 } 01221 01222 result.setName( stripQuotes( maybeDisplayName ) ); 01223 result.setAddress( maybeAddrSpec ); 01224 return true; 01225 } 01226 01227 bool parseGroup( const char* &scursor, const char * const send, 01228 Address &result, bool isCRLF ) 01229 { 01230 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] 01231 // 01232 // equivalent to: 01233 // group := display-name ":" [ obs-mbox-list ] ";" 01234 01235 eatCFWS( scursor, send, isCRLF ); 01236 if ( scursor == send ) { 01237 return false; 01238 } 01239 01240 // get display-name: 01241 QString maybeDisplayName; 01242 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { 01243 return false; 01244 } 01245 01246 // get ":": 01247 eatCFWS( scursor, send, isCRLF ); 01248 if ( scursor == send || *scursor != ':' ) { 01249 return false; 01250 } 01251 01252 // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that 01253 // automatically calls removeBidiControlChars 01254 result.displayName = removeBidiControlChars( maybeDisplayName ); 01255 01256 // get obs-mbox-list (may contain empty entries): 01257 scursor++; 01258 while ( scursor != send ) { 01259 eatCFWS( scursor, send, isCRLF ); 01260 if ( scursor == send ) { 01261 return false; 01262 } 01263 01264 // empty entry: 01265 if ( *scursor == ',' ) { 01266 scursor++; 01267 continue; 01268 } 01269 01270 // empty entry ending the list: 01271 if ( *scursor == ';' ) { 01272 scursor++; 01273 return true; 01274 } 01275 01276 Mailbox maybeMailbox; 01277 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { 01278 return false; 01279 } 01280 result.mailboxList.append( maybeMailbox ); 01281 01282 eatCFWS( scursor, send, isCRLF ); 01283 // premature end: 01284 if ( scursor == send ) { 01285 return false; 01286 } 01287 // regular end of the list: 01288 if ( *scursor == ';' ) { 01289 scursor++; 01290 return true; 01291 } 01292 // eat regular list entry separator: 01293 if ( *scursor == ',' ) { 01294 scursor++; 01295 } 01296 } 01297 return false; 01298 } 01299 01300 bool parseAddress( const char* &scursor, const char * const send, 01301 Address &result, bool isCRLF ) 01302 { 01303 // address := mailbox / group 01304 01305 eatCFWS( scursor, send, isCRLF ); 01306 if ( scursor == send ) { 01307 return false; 01308 } 01309 01310 // first try if it's a single mailbox: 01311 Mailbox maybeMailbox; 01312 const char * oldscursor = scursor; 01313 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { 01314 // yes, it is: 01315 result.displayName.clear(); 01316 result.mailboxList.append( maybeMailbox ); 01317 return true; 01318 } 01319 scursor = oldscursor; 01320 01321 Address maybeAddress; 01322 01323 // no, it's not a single mailbox. Try if it's a group: 01324 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) { 01325 return false; 01326 } 01327 01328 result = maybeAddress; 01329 return true; 01330 } 01331 01332 bool parseAddressList( const char* &scursor, const char * const send, 01333 AddressList &result, bool isCRLF ) 01334 { 01335 while ( scursor != send ) { 01336 eatCFWS( scursor, send, isCRLF ); 01337 // end of header: this is OK. 01338 if ( scursor == send ) { 01339 return true; 01340 } 01341 // empty entry: ignore: 01342 if ( *scursor == ',' ) { 01343 scursor++; 01344 continue; 01345 } 01346 // broken clients might use ';' as list delimiter, accept that as well 01347 if ( *scursor == ';' ) { 01348 scursor++; 01349 continue; 01350 } 01351 01352 // parse one entry 01353 Address maybeAddress; 01354 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) { 01355 return false; 01356 } 01357 result.append( maybeAddress ); 01358 01359 eatCFWS( scursor, send, isCRLF ); 01360 // end of header: this is OK. 01361 if ( scursor == send ) { 01362 return true; 01363 } 01364 // comma separating entries: eat it. 01365 if ( *scursor == ',' ) { 01366 scursor++; 01367 } 01368 } 01369 return true; 01370 } 01371 01372 static QString asterisk = QString::fromLatin1( "*0*", 1 ); 01373 static QString asteriskZero = QString::fromLatin1( "*0*", 2 ); 01374 //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 ); 01375 01376 // FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work 01377 // on byte arrays, not strings! The result parameter should be a simple 01378 // QPair<QByteArray,QByteArray>, which is the attribute name and the value. 01379 bool parseParameter( const char* &scursor, const char * const send, 01380 QPair<QString,QStringOrQPair> &result, bool isCRLF ) 01381 { 01382 // parameter = regular-parameter / extended-parameter 01383 // regular-parameter = regular-parameter-name "=" value 01384 // extended-parameter = 01385 // value = token / quoted-string 01386 // 01387 // note that rfc2231 handling is out of the scope of this function. 01388 // Therefore we return the attribute as QString and the value as 01389 // (start,length) tupel if we see that the value is encoded 01390 // (trailing asterisk), for parseParameterList to decode... 01391 01392 eatCFWS( scursor, send, isCRLF ); 01393 if ( scursor == send ) { 01394 return false; 01395 } 01396 01397 // 01398 // parse the parameter name: 01399 // 01400 // FIXME: maybeAttribute should be a QByteArray 01401 QString maybeAttribute; 01402 if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) { 01403 return false; 01404 } 01405 01406 eatCFWS( scursor, send, isCRLF ); 01407 // premature end: not OK (haven't seen '=' yet). 01408 if ( scursor == send || *scursor != '=' ) { 01409 return false; 01410 } 01411 scursor++; // eat '=' 01412 01413 eatCFWS( scursor, send, isCRLF ); 01414 if ( scursor == send ) { 01415 // don't choke on attribute=, meaning the value was omitted: 01416 if ( maybeAttribute.endsWith( asterisk ) ) { 01417 KMIME_WARN << "attribute ends with \"*\", but value is empty!" 01418 "Chopping away \"*\"."; 01419 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01420 } 01421 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); 01422 return true; 01423 } 01424 01425 const char * oldscursor = scursor; 01426 01427 // 01428 // parse the parameter value: 01429 // 01430 QStringOrQPair maybeValue; 01431 if ( *scursor == '"' ) { 01432 // value is a quoted-string: 01433 scursor++; 01434 if ( maybeAttribute.endsWith( asterisk ) ) { 01435 // attributes ending with "*" designate extended-parameters, 01436 // which cannot have quoted-strings as values. So we remove the 01437 // trailing "*" to not confuse upper layers. 01438 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!" 01439 "Chopping away \"*\"."; 01440 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01441 } 01442 01443 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { 01444 scursor = oldscursor; 01445 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); 01446 return false; // this case needs further processing by upper layers!! 01447 } 01448 } else { 01449 // value is a token: 01450 if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { 01451 scursor = oldscursor; 01452 result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); 01453 return false; // this case needs further processing by upper layers!! 01454 } 01455 } 01456 01457 result = qMakePair( maybeAttribute.toLower(), maybeValue ); 01458 return true; 01459 } 01460 01461 // FIXME: Get rid of QStringOrQPair: Use a simply QMap<QByteArray, QByteArray> for "result" 01462 // instead! 01463 bool parseRawParameterList( const char* &scursor, const char * const send, 01464 QMap<QString,QStringOrQPair> &result, 01465 bool isCRLF ) 01466 { 01467 // we use parseParameter() consecutively to obtain a map of raw 01468 // attributes to raw values. "Raw" here means that we don't do 01469 // rfc2231 decoding and concatenation. This is left to 01470 // parseParameterList(), which will call this function. 01471 // 01472 // The main reason for making this chunk of code a separate 01473 // (private) method is that we can deal with broken parameters 01474 // _here_ and leave the rfc2231 handling solely to 01475 // parseParameterList(), which will still be enough work. 01476 01477 while ( scursor != send ) { 01478 eatCFWS( scursor, send, isCRLF ); 01479 // empty entry ending the list: OK. 01480 if ( scursor == send ) { 01481 return true; 01482 } 01483 // empty list entry: ignore. 01484 if ( *scursor == ';' ) { 01485 scursor++; 01486 continue; 01487 } 01488 01489 QPair<QString,QStringOrQPair> maybeParameter; 01490 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { 01491 // we need to do a bit of work if the attribute is not 01492 // NULL. These are the cases marked with "needs further 01493 // processing" in parseParameter(). Specifically, parsing of the 01494 // token or the quoted-string, which should represent the value, 01495 // failed. We take the easy way out and simply search for the 01496 // next ';' to start parsing again. (Another option would be to 01497 // take the text between '=' and ';' as value) 01498 if ( maybeParameter.first.isNull() ) { 01499 return false; 01500 } 01501 while ( scursor != send ) { 01502 if ( *scursor++ == ';' ) { 01503 goto IS_SEMICOLON; 01504 } 01505 } 01506 // scursor == send case: end of list. 01507 return true; 01508 IS_SEMICOLON: 01509 // *scursor == ';' case: parse next entry. 01510 continue; 01511 } 01512 // successful parsing brings us here: 01513 result.insert( maybeParameter.first, maybeParameter.second ); 01514 01515 eatCFWS( scursor, send, isCRLF ); 01516 // end of header: ends list. 01517 if ( scursor == send ) { 01518 return true; 01519 } 01520 // regular separator: eat it. 01521 if ( *scursor == ';' ) { 01522 scursor++; 01523 } 01524 } 01525 return true; 01526 } 01527 01528 static void decodeRFC2231Value( Codec* &rfc2231Codec, 01529 QTextCodec* &textcodec, 01530 bool isContinuation, QString &value, 01531 QPair<const char*,int> &source, QByteArray& charset ) 01532 { 01533 // 01534 // parse the raw value into (charset,language,text): 01535 // 01536 01537 const char * decBegin = source.first; 01538 const char * decCursor = decBegin; 01539 const char * decEnd = decCursor + source.second; 01540 01541 if ( !isContinuation ) { 01542 // find the first single quote 01543 while ( decCursor != decEnd ) { 01544 if ( *decCursor == '\'' ) { 01545 break; 01546 } else { 01547 decCursor++; 01548 } 01549 } 01550 01551 if ( decCursor == decEnd ) { 01552 // there wasn't a single single quote at all! 01553 // take the whole value to be in latin-1: 01554 KMIME_WARN << "No charset in extended-initial-value." 01555 "Assuming \"iso-8859-1\"."; 01556 value += QString::fromLatin1( decBegin, source.second ); 01557 return; 01558 } 01559 01560 charset = QByteArray( decBegin, decCursor - decBegin ); 01561 01562 const char * oldDecCursor = ++decCursor; 01563 // find the second single quote (we ignore the language tag): 01564 while ( decCursor != decEnd ) { 01565 if ( *decCursor == '\'' ) { 01566 break; 01567 } else { 01568 decCursor++; 01569 } 01570 } 01571 if ( decCursor == decEnd ) { 01572 KMIME_WARN << "No language in extended-initial-value." 01573 "Trying to recover."; 01574 decCursor = oldDecCursor; 01575 } else { 01576 decCursor++; 01577 } 01578 01579 // decCursor now points to the start of the 01580 // "extended-other-values": 01581 01582 // 01583 // get the decoders: 01584 // 01585 01586 bool matchOK = false; 01587 textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK ); 01588 if ( !matchOK ) { 01589 textcodec = 0; 01590 KMIME_WARN_UNKNOWN( Charset, charset ); 01591 } 01592 } 01593 01594 if ( !rfc2231Codec ) { 01595 rfc2231Codec = Codec::codecForName("x-kmime-rfc2231"); 01596 assert( rfc2231Codec ); 01597 } 01598 01599 if ( !textcodec ) { 01600 value += QString::fromLatin1( decCursor, decEnd - decCursor ); 01601 return; 01602 } 01603 01604 Decoder * dec = rfc2231Codec->makeDecoder(); 01605 assert( dec ); 01606 01607 // 01608 // do the decoding: 01609 // 01610 01611 QByteArray buffer; 01612 buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); 01613 QByteArray::Iterator bit = buffer.begin(); 01614 QByteArray::ConstIterator bend = buffer.end(); 01615 01616 if ( !dec->decode( decCursor, decEnd, bit, bend ) ) { 01617 KMIME_WARN << rfc2231Codec->name() 01618 << "codec lies about its maxDecodedSizeFor()" << endl 01619 << "result may be truncated"; 01620 } 01621 01622 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); 01623 01624 // kDebug() << "value now: \"" << value << "\""; 01625 // cleanup: 01626 delete dec; 01627 } 01628 01629 // known issues: 01630 // - permutes rfc2231 continuations when the total number of parts 01631 // exceeds 10 (other-sections then becomes *xy, ie. two digits) 01632 01633 bool parseParameterListWithCharset( const char* &scursor, 01634 const char * const send, 01635 QMap<QString,QString> &result, 01636 QByteArray& charset, bool isCRLF ) 01637 { 01638 // parse the list into raw attribute-value pairs: 01639 QMap<QString,QStringOrQPair> rawParameterList; 01640 if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) { 01641 return false; 01642 } 01643 01644 if ( rawParameterList.isEmpty() ) { 01645 return true; 01646 } 01647 01648 // decode rfc 2231 continuations and alternate charset encoding: 01649 01650 // NOTE: this code assumes that what QMapIterator delivers is sorted 01651 // by the key! 01652 01653 Codec * rfc2231Codec = 0; 01654 QTextCodec * textcodec = 0; 01655 QString attribute; 01656 QString value; 01657 enum Mode { 01658 NoMode = 0x0, Continued = 0x1, Encoded = 0x2 01659 }; 01660 01661 enum EncodingMode { 01662 NoEncoding, 01663 RFC2047, 01664 RFC2231 01665 }; 01666 01667 QMap<QString,QStringOrQPair>::Iterator it, end = rawParameterList.end(); 01668 01669 for ( it = rawParameterList.begin() ; it != end ; ++it ) { 01670 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { 01671 // 01672 // new attribute: 01673 // 01674 01675 // store the last attribute/value pair in the result map now: 01676 if ( !attribute.isNull() ) { 01677 result.insert( attribute, value ); 01678 } 01679 // and extract the information from the new raw attribute: 01680 value.clear(); 01681 attribute = it.key(); 01682 int mode = NoMode; 01683 EncodingMode encodingMode = NoEncoding; 01684 01685 // is the value rfc2331-encoded? 01686 if ( attribute.endsWith( asterisk ) ) { 01687 attribute.truncate( attribute.length() - 1 ); 01688 mode |= Encoded; 01689 encodingMode = RFC2231; 01690 } 01691 // is the value rfc2047-encoded? 01692 if( !(*it).qstring.isNull() && (*it).qstring.contains( QLatin1String( "=?" ) ) ) { 01693 mode |= Encoded; 01694 encodingMode = RFC2047; 01695 } 01696 // is the value continued? 01697 if ( attribute.endsWith( asteriskZero ) ) { 01698 attribute.truncate( attribute.length() - 2 ); 01699 mode |= Continued; 01700 } 01701 // 01702 // decode if necessary: 01703 // 01704 if ( mode & Encoded ) { 01705 if ( encodingMode == RFC2231 ) { 01706 decodeRFC2231Value( rfc2231Codec, textcodec, 01707 false, /* isn't continuation */ 01708 value, (*it).qpair, charset ); 01709 } 01710 else if ( encodingMode == RFC2047 ) { 01711 value += decodeRFC2047String( (*it).qstring.toLatin1(), charset ); 01712 } 01713 } else { 01714 // not encoded. 01715 if ( (*it).qpair.first ) { 01716 value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01717 } else { 01718 value += (*it).qstring; 01719 } 01720 } 01721 01722 // 01723 // shortcut-processing when the value isn't encoded: 01724 // 01725 01726 if ( !(mode & Continued) ) { 01727 // save result already: 01728 result.insert( attribute, value ); 01729 // force begin of a new attribute: 01730 attribute.clear(); 01731 } 01732 } else { // it.key().startsWith( attribute ) 01733 // 01734 // continuation 01735 // 01736 01737 // ignore the section and trust QMap to have sorted the keys: 01738 if ( it.key().endsWith( asterisk ) ) { 01739 // encoded 01740 decodeRFC2231Value( rfc2231Codec, textcodec, 01741 true, /* is continuation */ 01742 value, (*it).qpair, charset ); 01743 } else { 01744 // not encoded 01745 if ( (*it).qpair.first ) { 01746 value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01747 } else { 01748 value += (*it).qstring; 01749 } 01750 } 01751 } 01752 } 01753 01754 // write last attr/value pair: 01755 if ( !attribute.isNull() ) { 01756 result.insert( attribute, value ); 01757 } 01758 01759 return true; 01760 } 01761 01762 01763 bool parseParameterList( const char* &scursor, const char * const send, 01764 QMap<QString,QString> &result, bool isCRLF ) 01765 { 01766 QByteArray charset; 01767 return parseParameterListWithCharset( scursor, send, result, charset, isCRLF ); 01768 } 01769 01770 static const char * const stdDayNames[] = { 01771 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 01772 }; 01773 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; 01774 01775 static bool parseDayName( const char* &scursor, const char * const send ) 01776 { 01777 // check bounds: 01778 if ( send - scursor < 3 ) { 01779 return false; 01780 } 01781 01782 for ( int i = 0 ; i < stdDayNamesLen ; ++i ) { 01783 if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { 01784 scursor += 3; 01785 // kDebug() << "found" << stdDayNames[i]; 01786 return true; 01787 } 01788 } 01789 01790 return false; 01791 } 01792 01793 static const char * const stdMonthNames[] = { 01794 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 01795 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 01796 }; 01797 static const int stdMonthNamesLen = 01798 sizeof stdMonthNames / sizeof *stdMonthNames; 01799 01800 static bool parseMonthName( const char* &scursor, const char * const send, 01801 int &result ) 01802 { 01803 // check bounds: 01804 if ( send - scursor < 3 ) { 01805 return false; 01806 } 01807 01808 for ( result = 0 ; result < stdMonthNamesLen ; ++result ) { 01809 if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { 01810 scursor += 3; 01811 return true; 01812 } 01813 } 01814 01815 // not found: 01816 return false; 01817 } 01818 01819 static const struct { 01820 const char * tzName; 01821 long int secsEastOfGMT; 01822 } timeZones[] = { 01823 // rfc 822 timezones: 01824 { "GMT", 0 }, 01825 { "UT", 0 }, 01826 { "EDT", -4*3600 }, 01827 { "EST", -5*3600 }, 01828 { "MST", -5*3600 }, 01829 { "CST", -6*3600 }, 01830 { "MDT", -6*3600 }, 01831 { "MST", -7*3600 }, 01832 { "PDT", -7*3600 }, 01833 { "PST", -8*3600 }, 01834 // common, non-rfc-822 zones: 01835 { "CET", 1*3600 }, 01836 { "MET", 1*3600 }, 01837 { "UTC", 0 }, 01838 { "CEST", 2*3600 }, 01839 { "BST", 1*3600 }, 01840 // rfc 822 military timezones: 01841 { "Z", 0 }, 01842 { "A", -1*3600 }, 01843 { "B", -2*3600 }, 01844 { "C", -3*3600 }, 01845 { "D", -4*3600 }, 01846 { "E", -5*3600 }, 01847 { "F", -6*3600 }, 01848 { "G", -7*3600 }, 01849 { "H", -8*3600 }, 01850 { "I", -9*3600 }, 01851 // J is not used! 01852 { "K", -10*3600 }, 01853 { "L", -11*3600 }, 01854 { "M", -12*3600 }, 01855 { "N", 1*3600 }, 01856 { "O", 2*3600 }, 01857 { "P", 3*3600 }, 01858 { "Q", 4*3600 }, 01859 { "R", 5*3600 }, 01860 { "S", 6*3600 }, 01861 { "T", 7*3600 }, 01862 { "U", 8*3600 }, 01863 { "V", 9*3600 }, 01864 { "W", 10*3600 }, 01865 { "X", 11*3600 }, 01866 { "Y", 12*3600 }, 01867 }; 01868 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; 01869 01870 static bool parseAlphaNumericTimeZone( const char* &scursor, 01871 const char * const send, 01872 long int &secsEastOfGMT, 01873 bool &timeZoneKnown ) 01874 { 01875 QPair<const char*,int> maybeTimeZone( 0, 0 ); 01876 if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) { 01877 return false; 01878 } 01879 for ( int i = 0 ; i < timeZonesLen ; ++i ) { 01880 if ( qstrnicmp( timeZones[i].tzName, 01881 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { 01882 scursor += maybeTimeZone.second; 01883 secsEastOfGMT = timeZones[i].secsEastOfGMT; 01884 timeZoneKnown = true; 01885 return true; 01886 } 01887 } 01888 01889 // don't choke just because we don't happen to know the time zone 01890 KMIME_WARN_UNKNOWN( time zone, 01891 QByteArray( maybeTimeZone.first, maybeTimeZone.second ) ); 01892 secsEastOfGMT = 0; 01893 timeZoneKnown = false; 01894 return true; 01895 } 01896 01897 // parse a number and return the number of digits parsed: 01898 int parseDigits( const char* &scursor, const char * const send, int &result ) 01899 { 01900 result = 0; 01901 int digits = 0; 01902 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { 01903 result *= 10; 01904 result += int( *scursor - '0' ); 01905 } 01906 return digits; 01907 } 01908 01909 static bool parseTimeOfDay( const char* &scursor, const char * const send, 01910 int &hour, int &min, int &sec, bool isCRLF=false ) 01911 { 01912 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] 01913 01914 // 01915 // 2DIGIT representing "hour": 01916 // 01917 if ( !parseDigits( scursor, send, hour ) ) { 01918 return false; 01919 } 01920 01921 eatCFWS( scursor, send, isCRLF ); 01922 if ( scursor == send || *scursor != ':' ) { 01923 return false; 01924 } 01925 scursor++; // eat ':' 01926 01927 eatCFWS( scursor, send, isCRLF ); 01928 if ( scursor == send ) { 01929 return false; 01930 } 01931 01932 // 01933 // 2DIGIT representing "minute": 01934 // 01935 if ( !parseDigits( scursor, send, min ) ) { 01936 return false; 01937 } 01938 01939 eatCFWS( scursor, send, isCRLF ); 01940 if ( scursor == send ) { 01941 return true; // seconds are optional 01942 } 01943 01944 // 01945 // let's see if we have a 2DIGIT representing "second": 01946 // 01947 if ( *scursor == ':' ) { 01948 // yepp, there are seconds: 01949 scursor++; // eat ':' 01950 eatCFWS( scursor, send, isCRLF ); 01951 if ( scursor == send ) { 01952 return false; 01953 } 01954 01955 if ( !parseDigits( scursor, send, sec ) ) { 01956 return false; 01957 } 01958 } else { 01959 sec = 0; 01960 } 01961 01962 return true; 01963 } 01964 01965 bool parseTime( const char* &scursor, const char * send, 01966 int &hour, int &min, int &sec, long int &secsEastOfGMT, 01967 bool &timeZoneKnown, bool isCRLF ) 01968 { 01969 // time := time-of-day CFWS ( zone / obs-zone ) 01970 // 01971 // obs-zone := "UT" / "GMT" / 01972 // "EST" / "EDT" / ; -0500 / -0400 01973 // "CST" / "CDT" / ; -0600 / -0500 01974 // "MST" / "MDT" / ; -0700 / -0600 01975 // "PST" / "PDT" / ; -0800 / -0700 01976 // "A"-"I" / "a"-"i" / 01977 // "K"-"Z" / "k"-"z" 01978 01979 eatCFWS( scursor, send, isCRLF ); 01980 if ( scursor == send ) { 01981 return false; 01982 } 01983 01984 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) { 01985 return false; 01986 } 01987 01988 eatCFWS( scursor, send, isCRLF ); 01989 if ( scursor == send ) { 01990 timeZoneKnown = false; 01991 secsEastOfGMT = 0; 01992 return true; // allow missing timezone 01993 } 01994 01995 timeZoneKnown = true; 01996 if ( *scursor == '+' || *scursor == '-' ) { 01997 // remember and eat '-'/'+': 01998 const char sign = *scursor++; 01999 // numerical timezone: 02000 int maybeTimeZone; 02001 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) { 02002 return false; 02003 } 02004 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); 02005 if ( sign == '-' ) { 02006 secsEastOfGMT *= -1; 02007 if ( secsEastOfGMT == 0 ) { 02008 timeZoneKnown = false; // -0000 means indetermined tz 02009 } 02010 } 02011 } else { 02012 // maybe alphanumeric timezone: 02013 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) { 02014 return false; 02015 } 02016 } 02017 return true; 02018 } 02019 02020 bool parseDateTime( const char* &scursor, const char * const send, 02021 KDateTime &result, bool isCRLF ) 02022 { 02023 // Parsing date-time; strict mode: 02024 // 02025 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday 02026 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date 02027 // time 02028 // 02029 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 02030 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / 02031 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" 02032 02033 result = KDateTime(); 02034 QDateTime maybeDateTime; 02035 02036 eatCFWS( scursor, send, isCRLF ); 02037 if ( scursor == send ) { 02038 return false; 02039 } 02040 02041 // 02042 // let's see if there's a day-of-week: 02043 // 02044 if ( parseDayName( scursor, send ) ) { 02045 eatCFWS( scursor, send, isCRLF ); 02046 if ( scursor == send ) { 02047 return false; 02048 } 02049 // day-name should be followed by ',' but we treat it as optional: 02050 if ( *scursor == ',' ) { 02051 scursor++; // eat ',' 02052 eatCFWS( scursor, send, isCRLF ); 02053 } 02054 } 02055 02056 // 02057 // 1*2DIGIT representing "day" (of month): 02058 // 02059 int maybeDay; 02060 if ( !parseDigits( scursor, send, maybeDay ) ) { 02061 return false; 02062 } 02063 02064 eatCFWS( scursor, send, isCRLF ); 02065 if ( scursor == send ) { 02066 return false; 02067 } 02068 02069 // 02070 // month-name: 02071 // 02072 int maybeMonth = 0; 02073 if ( !parseMonthName( scursor, send, maybeMonth ) ) { 02074 return false; 02075 } 02076 if ( scursor == send ) { 02077 return false; 02078 } 02079 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); 02080 ++maybeMonth; // 0-11 -> 1-12 02081 02082 eatCFWS( scursor, send, isCRLF ); 02083 if ( scursor == send ) { 02084 return false; 02085 } 02086 02087 // 02088 // 2*DIGIT representing "year": 02089 // 02090 int maybeYear; 02091 if ( !parseDigits( scursor, send, maybeYear ) ) { 02092 return false; 02093 } 02094 // RFC 2822 4.3 processing: 02095 if ( maybeYear < 50 ) { 02096 maybeYear += 2000; 02097 } else if ( maybeYear < 1000 ) { 02098 maybeYear += 1900; 02099 } 02100 // else keep as is 02101 if ( maybeYear < 1900 ) { 02102 return false; // rfc2822, 3.3 02103 } 02104 02105 eatCFWS( scursor, send, isCRLF ); 02106 if ( scursor == send ) { 02107 return false; 02108 } 02109 02110 maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) ); 02111 02112 // 02113 // time 02114 // 02115 int maybeHour, maybeMinute, maybeSecond; 02116 long int secsEastOfGMT; 02117 bool timeZoneKnown = true; 02118 02119 if ( !parseTime( scursor, send, 02120 maybeHour, maybeMinute, maybeSecond, 02121 secsEastOfGMT, timeZoneKnown, isCRLF ) ) { 02122 return false; 02123 } 02124 02125 maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) ); 02126 if ( !maybeDateTime.isValid() ) 02127 return false; 02128 02129 result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) ); 02130 if ( !result.isValid() ) 02131 return false; 02132 return true; 02133 } 02134 02135 Headers::Base *extractFirstHeader( QByteArray &head ) 02136 { 02137 int endOfFieldBody = 0; 02138 bool folded = false; 02139 Headers::Base *header = 0; 02140 02141 int startOfFieldBody = head.indexOf( ':' ); 02142 const int endOfFieldHeader = startOfFieldBody; 02143 02144 if ( startOfFieldBody > -1 ) { //there is another header 02145 startOfFieldBody++; //skip the ':' 02146 if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there 02147 startOfFieldBody++; 02148 } 02149 endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded ); 02150 02151 QByteArray rawType = head.left( endOfFieldHeader ); 02152 QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody ); 02153 if ( folded ) { 02154 rawFieldBody = unfoldHeader( rawFieldBody ); 02155 } 02156 // We might get an invalid mail without a field name, don't crash on that. 02157 if ( !rawType.isEmpty() ) { 02158 header = HeaderFactory::self()->createHeader( rawType ); 02159 } 02160 if( !header ) { 02161 //kWarning() << "Returning Generic header of type" << rawType; 02162 header = new Headers::Generic( rawType ); 02163 } 02164 header->from7BitString( rawFieldBody ); 02165 02166 head.remove( 0, endOfFieldBody + 1 ); 02167 } else { 02168 head.clear(); 02169 } 02170 02171 return header; 02172 } 02173 02174 void extractHeaderAndBody( const QByteArray &content, QByteArray &header, QByteArray &body ) 02175 { 02176 header.clear(); 02177 body.clear(); 02178 02179 // empty header 02180 if ( content.startsWith( '\n' ) ) { 02181 body = content.right( content.length() - 1 ); 02182 return; 02183 } 02184 02185 int pos = content.indexOf( "\n\n", 0 ); 02186 if ( pos > -1 ) { 02187 header = content.left( ++pos ); //header *must* end with "\n" !! 02188 body = content.mid( pos + 1, content.length() - pos - 1 ); 02189 } else { 02190 header = content; 02191 } 02192 } 02193 02194 Headers::Base::List parseHeaders( const QByteArray &head ) 02195 { 02196 Headers::Base::List ret; 02197 Headers::Base *h; 02198 02199 QByteArray copy = head; 02200 while( ( h = extractFirstHeader( copy ) ) ) { 02201 ret << h; 02202 } 02203 02204 return ret; 02205 } 02206 02207 } // namespace HeaderParsing 02208 02209 } // namespace KMime