00001 #ifndef _MIMETIC_PARSER_ITPARSER_H_
00002 #define _MIMETIC_PARSER_ITPARSER_H_
00003 #include <iterator>
00004 #include <algorithm>
00005 #include <stack>
00006 #include <iostream>
00007 #include <mimetic/tree.h>
00008 #include <mimetic/utils.h>
00009 #include <mimetic/mimeentity.h>
00010
00011
00012
00013
00014 namespace mimetic
00015 {
00016
00017
00018 template<typename Iterator,
00019 typename ItCategory=typename std::iterator_traits<Iterator>::iterator_category>
00020 struct IteratorParser
00021 {
00022 };
00023
00024
00025
00026
00027 template<typename Iterator>
00028 struct IteratorParser<Iterator, std::input_iterator_tag>
00029 {
00030
00031 IteratorParser(MimeEntity& me)
00032 : m_me(me), m_iMask(imNone), m_lastBoundary(NoBoundary)
00033 {
00034 m_entityStack.push(&m_me);
00035 }
00036 virtual ~IteratorParser()
00037 {
00038 }
00039
00040
00041
00042 void iMask(size_t mask) { m_iMask = mask; }
00043
00044
00045
00046 size_t iMask() const { return m_iMask; }
00047
00048
00049
00050 void run(Iterator bit, Iterator eit)
00051 {
00052 m_bit = bit;
00053 m_eit = eit;
00054 doLoad();
00055 }
00056 protected:
00057 typedef std::list<std::string> BoundaryList;
00058 enum {
00059 CR = 0xD,
00060 LF = 0xA,
00061 NL = '\n'
00062 };
00063 enum {
00064 peIgnore,
00065 pePreamble,
00066 peBody,
00067 peEpilogue
00068 };
00069 enum BoundaryType {
00070 NoBoundary = 0,
00071 Boundary,
00072 ClosingBoundary,
00073 HigherLevelBoundary
00074
00075 };
00076 enum EntityType {
00077 etRfc822,
00078 etMsgRfc822,
00079 etMultipart
00080 };
00081
00082 MimeEntity& m_me;
00083 Iterator m_bit, m_eit;
00084 size_t m_iMask;
00085 BoundaryList m_boundaryList;
00086 BoundaryType m_lastBoundary;
00087 std::stack<MimeEntity*> m_entityStack;
00088
00089 protected:
00090 void appendPreambleBlock(const char* buf, int sz)
00091 {
00092 MimeEntity* pMe = m_entityStack.top();
00093 pMe->body().preamble().append(buf,sz);
00094 }
00095
00096 void appendEpilogueBlock(const char* buf, int sz)
00097 {
00098 MimeEntity* pMe = m_entityStack.top();
00099 pMe->body().epilogue().append(buf,sz);
00100 }
00101
00102 void appendBodyBlock(const char* buf, int sz)
00103 {
00104 MimeEntity* pMe = m_entityStack.top();
00105 pMe->body().append(buf, sz);
00106 }
00107
00108 std::string getBoundary()
00109 {
00110 const MimeEntity* pMe = m_entityStack.top();
00111 const ContentType& ct = pMe->header().contentType();
00112 return std::string("--") + ct.param("boundary");
00113 }
00114
00115 void popChild()
00116 {
00117 m_entityStack.pop();
00118 }
00119
00120 void pushNewChild()
00121 {
00122 MimeEntity* pMe = m_entityStack.top();
00123 MimeEntity* pChild = new MimeEntity;
00124 pMe->body().parts().push_back(pChild);
00125 m_entityStack.push(pChild);
00126 }
00127
00128 EntityType getType()
00129 {
00130 MimeEntity* pMe = m_entityStack.top();
00131 const Header& h = pMe->header();
00132
00133
00134 const ContentType& ct = h.contentType();
00135 if(ct.isMultipart())
00136 return etMultipart;
00137 else if (ct.type() == "message" && ct.subtype() == "rfc822")
00138 return etMsgRfc822;
00139 else
00140 return etRfc822;
00141 }
00142
00143 void addField(const std::string& name, const std::string& value)
00144 {
00145 MimeEntity* pMe = m_entityStack.top();
00146 Header& h = pMe->header();
00147 Header::iterator it = h.insert(h.end(), Field());
00148 it->name(name);
00149 it->value(value);
00150 }
00151
00152 BoundaryType isBoundary(const std::string& line)
00153 {
00154 if(line.length() == 0 || line[0] != '-')
00155 return m_lastBoundary = NoBoundary;
00156
00157 int level = 0;
00158 int lineLen = line.length();
00159 BoundaryList::const_iterator bit,eit;
00160 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00161 for(;bit != eit; ++bit, ++level)
00162 {
00163 const std::string& b = *bit;
00164 int bLen = b.length();
00165 if(line.compare(0, bLen, b) == 0)
00166 {
00167
00168 if(level > 0)
00169 return m_lastBoundary=HigherLevelBoundary;
00170
00171 if(lineLen > bLen && line.compare(bLen,2,"--") == 0)
00172 return m_lastBoundary = ClosingBoundary;
00173 else
00174 return m_lastBoundary = Boundary;
00175 }
00176 }
00177 return m_lastBoundary = NoBoundary;
00178 }
00179
00180 inline bool isnl(char c) const
00181 {
00182 return (c == CR || c == LF);
00183 }
00184
00185 inline bool isnl(char a, char b) const
00186 {
00187 if(a == CR || a == LF)
00188 if(b == (a == CR ? LF : CR))
00189 return true;
00190 return false;
00191 }
00192 void doLoad()
00193 {
00194 loadHeader();
00195 loadBody();
00196 }
00197 bool valid() const
00198 {
00199 return m_bit != m_eit;
00200 }
00201 void append(char*& buf, size_t& bufsz, char c, size_t& pos)
00202 {
00203 enum { alloc_block = 128};
00204 if(pos == bufsz)
00205 {
00206
00207 char* tmp = buf;
00208 int oldBufsz = bufsz;
00209 while(pos >= bufsz)
00210 bufsz = bufsz + alloc_block;
00211 buf = new char[bufsz+1];
00212 if(tmp != 0)
00213 {
00214 assert(oldBufsz > 0);
00215 memset(buf, 0, bufsz);
00216 memcpy(buf, tmp, oldBufsz);
00217 delete[] tmp;
00218 }
00219 }
00220 buf[pos++] = c;
00221 }
00222
00223
00224 void loadHeader()
00225 {
00226 enum {
00227 sInit,
00228 sIgnoreLine,
00229 sNewline,
00230 sWaitingName,
00231 sWaitingValue,
00232 sWaitingFoldedValue,
00233 sName,
00234 sValue,
00235 sIgnoreHeader
00236 };
00237 register int status;
00238 int pos;
00239 char *name, *value;
00240 size_t nBufSz, vBufSz, nPos, vPos;
00241 char prev, c = 0;
00242
00243 name = value = 0;
00244 pos = nBufSz = vBufSz = nPos = vPos = 0;
00245 status = (m_iMask & imHeader ? sIgnoreHeader : sInit);
00246
00247 while(m_bit != m_eit)
00248 {
00249 c = *m_bit;
00250 switch(status)
00251 {
00252 case sInit:
00253 if(isnl(c))
00254 status = sNewline;
00255 else
00256 status = sName;
00257 continue;
00258 case sIgnoreLine:
00259 if(!isnl(c))
00260 break;
00261 status = sNewline;
00262 continue;
00263 case sNewline:
00264 status = sWaitingName;
00265 if(pos > 0)
00266 {
00267 pos = 0;
00268 prev = c;
00269 if(++m_bit == m_eit) goto out;
00270 c = *m_bit;
00271 if(c == (prev == CR ? LF : CR))
00272 {
00273 --pos;
00274 break;
00275 } else
00276 continue;
00277 } else {
00278
00279 prev = c;
00280 if(++m_bit == m_eit) goto out;
00281 c = *m_bit;
00282 if(c == (prev == CR ? LF : CR))
00283 ++m_bit;
00284 goto out;
00285 }
00286 case sWaitingName:
00287 if(isblank(c))
00288 {
00289
00290 status = sWaitingFoldedValue;
00291 continue;
00292 }
00293
00294 if(nPos)
00295 {
00296 name[nPos] = 0;
00297
00298 if(vPos)
00299 {
00300 value[vPos] = 0;
00301 addField(name,value);
00302 } else
00303 addField(name,"");
00304 nPos = vPos = 0;
00305 }
00306 status = (isnl(c) ? sNewline : sName);
00307 continue;
00308 case sWaitingValue:
00309 if(isblank(c))
00310 break;
00311 status = sValue;
00312 continue;
00313 case sWaitingFoldedValue:
00314 if(isblank(c))
00315 break;
00316 append(value, vBufSz, ' ', vPos);
00317 status = sValue;
00318 continue;
00319 case sName:
00320 if(c > 32 && c < 127 && c != ':') {
00321 if(nPos > 0 && isblank(name[nPos-1]))
00322 {
00323
00324
00325 onBlock(name, nPos, peBody);
00326 goto out;
00327 }
00328 append(name, nBufSz, c, nPos);
00329 } else if(c == ':') {
00330 if(nPos == 0)
00331 {
00332
00333 status = sIgnoreLine;
00334 continue;
00335 }
00336
00337
00338
00339 while(nPos > 0 && isblank(name[nPos-1]))
00340 nPos--;
00341
00342 status = sWaitingValue;
00343 } else if(isblank(c)) {
00344
00345
00346
00347
00348
00349 append(name, nBufSz, c, nPos);
00350 } else {
00351
00352
00353 onBlock(name, nPos, peBody);
00354 goto out;
00355 }
00356 break;
00357 case sValue:
00358 if(isnl(c))
00359 {
00360 status = sNewline;
00361 continue;
00362 }
00363 append(value, vBufSz, c, vPos);
00364 break;
00365 case sIgnoreHeader:
00366 if(isnl(c))
00367 {
00368 prev = c;
00369 if(++m_bit == m_eit) goto out;
00370 c = *m_bit;
00371 if(c == (prev == CR ? LF : CR))
00372 ++m_bit;
00373 if(pos == 0)
00374 goto out;
00375 pos = 0;
00376 continue;
00377 }
00378 break;
00379 }
00380 ++m_bit; ++pos;
00381 }
00382 out:
00383 if(name)
00384 delete[] name;
00385 if(value)
00386 delete[] value;
00387 return;
00388 }
00389 void loadBody()
00390 {
00391 switch(getType())
00392 {
00393 case etRfc822:
00394 if(m_iMask & imBody)
00395 jump_to_next_boundary();
00396 else
00397 copy_until_boundary(peBody);
00398 break;
00399 case etMultipart:
00400 loadMultipart();
00401 break;
00402 case etMsgRfc822:
00403 if(m_iMask & imChildParts)
00404 jump_to_next_boundary();
00405 else {
00406 pushNewChild();
00407 doLoad();
00408 popChild();
00409 }
00410 break;
00411 }
00412 }
00413 void loadMultipart()
00414 {
00415 std::string boundary = getBoundary();
00416 m_boundaryList.push_front(boundary);
00417 ParsingElem pe;
00418
00419 pe = (m_iMask & imPreamble ? peIgnore : pePreamble );
00420 copy_until_boundary(pe);
00421 while(m_bit != m_eit)
00422 {
00423 switch(m_lastBoundary)
00424 {
00425 case NoBoundary:
00426 return;
00427 case Boundary:
00428 if(m_iMask & imChildParts)
00429 jump_to_next_boundary();
00430 else {
00431 pushNewChild();
00432 doLoad();
00433 popChild();
00434 }
00435 break;
00436 case ClosingBoundary:
00437 m_boundaryList.erase(m_boundaryList.begin());
00438
00439 pe=(m_iMask & imEpilogue? peIgnore: peEpilogue);
00440 copy_until_boundary(pe);
00441 return;
00442 case HigherLevelBoundary:
00443 m_boundaryList.erase(m_boundaryList.begin());
00444 return;
00445 }
00446 }
00447 }
00448 inline void onBlock(const char* block, int sz, ParsingElem pe)
00449 {
00450 switch(pe)
00451 {
00452 case peIgnore:
00453 return;
00454 case pePreamble:
00455 appendPreambleBlock(block, sz);
00456 break;
00457 case peEpilogue:
00458 appendEpilogueBlock(block, sz);
00459 break;
00460 case peBody:
00461 appendBodyBlock(block, sz);
00462 break;
00463 }
00464 }
00465 void jump_to_next_boundary()
00466 {
00467 copy_until_boundary(peIgnore);
00468 }
00469
00470
00471
00472 virtual void copy_until_boundary(ParsingElem pe)
00473 {
00474 size_t pos, lines, eomsz = 0;
00475 register char c;
00476 enum { nlsz = 1 };
00477 const char *eom = 0;
00478
00479 enum { blksz = 4096 };
00480 char block[blksz];
00481 size_t blkpos = 0;
00482 size_t sl_off = 0;
00483
00484 pos = lines = 0;
00485 while(m_bit != m_eit)
00486 {
00487
00488 if(blkpos >= blksz - 2 - nlsz)
00489 {
00490 if(sl_off == 0)
00491 {
00492
00493
00494
00495 block[blkpos] = 0;
00496 onBlock(block, blkpos, pe);
00497 blkpos = sl_off = 0;
00498 } else {
00499
00500
00501 size_t llen = blkpos - sl_off;
00502 onBlock(block, sl_off, pe);
00503 memmove(block, block + sl_off, llen);
00504 sl_off = 0;
00505 blkpos = llen;
00506 }
00507 }
00508 c = *m_bit;
00509 if(isnl(c))
00510 {
00511 char nlbuf[3] = { 0, 0, 0 };
00512
00513 nlbuf[0] = c;
00514
00515
00516 if(++m_bit != m_eit)
00517 {
00518 char next = *m_bit;
00519 if(next == (c == CR ? LF : CR))
00520 {
00521 nlbuf[1] = next;
00522 ++m_bit;
00523 }
00524 }
00525
00526 if(pos)
00527 {
00528
00529 block[blkpos] = 0;
00530 if(block[sl_off] == '-' && sl_off < blkpos &&
00531 block[sl_off+1] == '-')
00532 {
00533 std::string Line(block+sl_off, blkpos-sl_off);
00534 if(isBoundary(Line))
00535 {
00536
00537 if (sl_off>=2)
00538 {
00539 int i = sl_off;
00540 char a = block[--i];
00541 char b = block[--i];
00542
00543 if(isnl(a,b))
00544 sl_off -= 2;
00545 else if(isnl(a))
00546 sl_off--;
00547
00548 } else if (sl_off==1 && isnl(block[0])) {
00549 sl_off--;
00550 }
00551 onBlock(block, sl_off, pe);
00552 return;
00553 }
00554 }
00555
00556
00557 if(eom && pos >= eomsz)
00558 {
00559 char *line = block + sl_off;
00560 size_t i = 0;
00561 for(; i < eomsz; i++)
00562 if(eom[i] != line[i])
00563 break;
00564 if(i==eomsz)
00565 {
00566 onBlock(block, sl_off,
00567 pe);
00568 return;
00569 }
00570 }
00571 }
00572
00573 for(int i = 0; nlbuf[i] != 0; i++)
00574 block[blkpos++] = nlbuf[i];
00575 block[blkpos] = 0;
00576 sl_off = blkpos;
00577 pos = 0;
00578 } else {
00579 pos++;
00580 block[blkpos++] = c;
00581 ++m_bit;
00582 }
00583 }
00584
00585 block[blkpos] = 0;
00586 onBlock(block, blkpos, pe);
00587 }
00588 };
00589
00590
00591
00592
00593
00594 template<typename Iterator>
00595 struct IteratorParser<Iterator, std::forward_iterator_tag>:
00596 public IteratorParser<Iterator, std::input_iterator_tag>
00597 {
00598
00599
00600
00601
00602
00603 typedef IteratorParser<Iterator, std::input_iterator_tag> base_type;
00604 IteratorParser(MimeEntity& me)
00605 : base_type(me)
00606 {
00607 }
00608 };
00609
00610
00611
00612
00613 template<typename Iterator>
00614 struct IteratorParser<Iterator, std::bidirectional_iterator_tag>:
00615 public IteratorParser<Iterator, std::forward_iterator_tag>
00616 {
00617 typedef IteratorParser<Iterator, std::forward_iterator_tag> base_type;
00618 IteratorParser(MimeEntity& me)
00619 : base_type(me)
00620 {
00621 }
00622 };
00623
00624
00625
00626
00627 template<typename Iterator>
00628 struct IteratorParser<Iterator, std::random_access_iterator_tag>:
00629 public IteratorParser<Iterator, std::bidirectional_iterator_tag>
00630 {
00631 typedef IteratorParser<Iterator, std::bidirectional_iterator_tag> base_type;
00632 IteratorParser(MimeEntity& me)
00633 : base_type(me)
00634 {
00635 }
00636 private:
00637 using base_type::peIgnore;
00638 using base_type::pePreamble;
00639 using base_type::peBody;
00640 using base_type::peEpilogue;
00641
00642 using base_type::NoBoundary;
00643 using base_type::Boundary;
00644 using base_type::ClosingBoundary;
00645 using base_type::HigherLevelBoundary;
00646
00647 using base_type::m_boundaryList;
00648 using base_type::m_lastBoundary;
00649 using base_type::m_entityStack;
00650 using base_type::m_me;
00651 using base_type::m_iMask;
00652 using base_type::m_bit;
00653 using base_type::m_eit;
00654 using base_type::isnl;
00655
00656 typedef TreeNode<char> BoundaryTree;
00657 inline void onBlock(Iterator bit, int size, ParsingElem pe)
00658 {
00659 if(pe == peIgnore)
00660 return;
00661 Iterator eit = bit + size;
00662 MimeEntity* pMe = m_entityStack.top();
00663 switch(pe)
00664 {
00665 case pePreamble:
00666 pMe->body().preamble().append(bit, eit);
00667 break;
00668 case peEpilogue:
00669 pMe->body().epilogue().append(bit, eit);
00670 break;
00671 case peBody:
00672 pMe->body().append(bit, eit);
00673 break;
00674 }
00675 }
00676 void copy_until_boundary(ParsingElem pe)
00677 {
00678
00679 if(m_boundaryList.empty())
00680 {
00681 onBlock(m_bit, m_eit-m_bit, pe);
00682 m_bit = m_eit;
00683 return;
00684 }
00685
00686
00687
00688 typename base_type::BoundaryList::const_iterator
00689 bBit = m_boundaryList.begin(), bEit = m_boundaryList.end();
00690 m_lastBoundary = NoBoundary;
00691 int depth = 0;
00692 for( ;bBit != bEit; ++bBit, ++depth)
00693 {
00694 const std::string& boundary = *bBit;
00695 Iterator off;
00696 if( (off=utils::find_bm(m_bit,m_eit,boundary)) != m_eit)
00697 {
00698 Iterator base = m_bit;
00699 size_t block_sz = off - base;
00700 m_lastBoundary =
00701 (depth ? HigherLevelBoundary: Boundary);
00702 off += boundary.length();
00703 m_bit = off;
00704 if(off<m_eit-1 && *off =='-' && *(off+1) == '-')
00705 {
00706 m_lastBoundary = ClosingBoundary;
00707 m_bit = off + 2;
00708 }
00709 if(m_bit < m_eit-1 && isnl(*m_bit))
00710 {
00711 char c = *m_bit++;
00712 char next = *m_bit;
00713 if(isnl(next) && next != c)
00714 ++m_bit;
00715 }
00716
00717
00718 if(block_sz)
00719 {
00720 Iterator p = base + block_sz;
00721 char a = *--p, b = *--p;
00722 if(isnl(a,b))
00723 block_sz -= 2;
00724 else if(isnl(a))
00725 block_sz--;
00726 }
00727 onBlock(base, block_sz, pe);
00728 return;
00729 } else {
00730 onBlock(m_bit, m_eit-m_bit, pe);
00731 m_bit = m_eit;
00732 }
00733 }
00734 }
00735 BoundaryTree m_boundaryTree;
00736 void buildBoundaryTree()
00737 {
00738 m_boundaryTree = BoundaryTree();
00739 typename base_type::BoundaryList::const_iterator
00740 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00741 BoundaryTree::NodeList *pChilds;
00742 BoundaryTree::NodeList::iterator it;
00743 int depth = 0;
00744 for( ; bit != eit; ++bit)
00745 {
00746 pChilds = &m_boundaryTree.childList();
00747 it = pChilds->begin();
00748 const char *w = bit->c_str();
00749 do
00750 {
00751 it = find_if(pChilds->begin(), pChilds->end(),
00752 FindNodePred<char>(*w));
00753 if( it == pChilds->end() )
00754 it = pChilds->insert(pChilds->end(),*w);
00755 pChilds = &it->childList();
00756 depth++;
00757 } while(*(++w));
00758 }
00759 }
00760
00761 };
00762
00763 }
00764
00765 #endif