Stxxl
1.2.1
|
00001 /*************************************************************************** 00002 * include/stxxl/bits/containers/priority_queue.h 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 1999 Peter Sanders <sanders@mpi-sb.mpg.de> 00007 * Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev@mpi-sb.mpg.de> 00008 * Copyright (C) 2007 Johannes Singler <singler@ira.uka.de> 00009 * Copyright (C) 2007, 2008 Andreas Beckmann <beckmann@cs.uni-frankfurt.de> 00010 * 00011 * Distributed under the Boost Software License, Version 1.0. 00012 * (See accompanying file LICENSE_1_0.txt or copy at 00013 * http://www.boost.org/LICENSE_1_0.txt) 00014 **************************************************************************/ 00015 00016 #ifndef STXXL_PRIORITY_QUEUE_HEADER 00017 #define STXXL_PRIORITY_QUEUE_HEADER 00018 00019 #include <list> 00020 #include <iterator> 00021 #include <vector> 00022 00023 #include <stxxl/bits/mng/mng.h> 00024 #include <stxxl/bits/mng/prefetch_pool.h> 00025 #include <stxxl/bits/mng/write_pool.h> 00026 #include <stxxl/bits/common/tmeta.h> 00027 00028 00029 __STXXL_BEGIN_NAMESPACE 00030 00034 00037 namespace priority_queue_local 00038 { 00045 template <typename _Tp, typename _Sequence = std::vector<_Tp>, 00046 typename _Compare = std::less<typename _Sequence::value_type> > 00047 class internal_priority_queue 00048 { 00049 // concept requirements 00050 typedef typename _Sequence::value_type _Sequence_value_type; 00051 00052 public: 00053 typedef typename _Sequence::value_type value_type; 00054 typedef typename _Sequence::reference reference; 00055 typedef typename _Sequence::const_reference const_reference; 00056 typedef typename _Sequence::size_type size_type; 00057 typedef _Sequence container_type; 00058 00059 protected: 00060 // See queue::c for notes on these names. 00061 _Sequence c; 00062 _Compare comp; 00063 size_type N; 00064 00065 public: 00069 explicit 00070 internal_priority_queue(size_type capacity) 00071 : c(capacity), N(0) 00072 { } 00073 00077 bool 00078 empty() const 00079 { return N == 0; } 00080 00082 size_type 00083 size() const 00084 { return N; } 00085 00090 const_reference 00091 top() const 00092 { 00093 return c.front(); 00094 } 00095 00104 void 00105 push(const value_type & __x) 00106 { 00107 c[N] = __x; 00108 ++N; 00109 std::push_heap(c.begin(), c.begin() + N, comp); 00110 } 00111 00123 void 00124 pop() 00125 { 00126 std::pop_heap(c.begin(), c.begin() + N, comp); 00127 --N; 00128 } 00129 00133 void sort_to(value_type * target) 00134 { 00135 std::sort(c.begin(), c.begin() + N, comp); 00136 std::reverse_copy(c.begin(), c.begin() + N, target); 00137 } 00138 00142 void clear() 00143 { 00144 N = 0; 00145 } 00146 }; 00147 00148 00150 // auxiliary functions 00151 00152 // merge sz element from the two sentinel terminated input 00153 // sequences from0 and from1 to "to" 00154 // advance from0 and from1 accordingly 00155 // require: at least sz nonsentinel elements available in from0, from1 00156 // require: to may overwrite one of the sources as long as 00157 // *(fromx + sz) is before the end of fromx 00158 template <class InputIterator, class OutputIterator, class Cmp_> 00159 void merge_iterator( 00160 InputIterator & from0, 00161 InputIterator & from1, 00162 OutputIterator to, unsigned_type sz, Cmp_ cmp) 00163 { 00164 OutputIterator done = to + sz; 00165 00166 while (to != done) 00167 { 00168 if (cmp(*from0, *from1)) 00169 { 00170 *to = *from1; 00171 ++from1; 00172 } 00173 else 00174 { 00175 *to = *from0; 00176 ++from0; 00177 } 00178 ++to; 00179 } 00180 } 00181 00182 // merge sz element from the three sentinel terminated input 00183 // sequences from0, from1 and from2 to "to" 00184 // advance from0, from1 and from2 accordingly 00185 // require: at least sz nonsentinel elements available in from0, from1 and from2 00186 // require: to may overwrite one of the sources as long as 00187 // *(fromx + sz) is before the end of fromx 00188 template <class InputIterator, class OutputIterator, class Cmp_> 00189 void merge3_iterator( 00190 InputIterator & from0, 00191 InputIterator & from1, 00192 InputIterator & from2, 00193 OutputIterator to, unsigned_type sz, Cmp_ cmp) 00194 { 00195 OutputIterator done = to + sz; 00196 00197 if (cmp(*from1, *from0)) { 00198 if (cmp(*from2, *from1)) { 00199 goto s012; 00200 } 00201 else { 00202 if (cmp(*from0, *from2)) { 00203 goto s201; 00204 } 00205 else { 00206 goto s021; 00207 } 00208 } 00209 } else { 00210 if (cmp(*from2, *from1)) { 00211 if (cmp(*from2, *from0)) { 00212 goto s102; 00213 } 00214 else { 00215 goto s120; 00216 } 00217 } else { 00218 goto s210; 00219 } 00220 } 00221 00222 #define Merge3Case(a, b, c) \ 00223 s ## a ## b ## c : \ 00224 if (to == done) \ 00225 return;\ 00226 *to = *from ## a; \ 00227 ++to; \ 00228 ++from ## a; \ 00229 if (cmp(*from ## b, *from ## a)) \ 00230 goto s ## a ## b ## c;\ 00231 if (cmp(*from ## c, *from ## a)) \ 00232 goto s ## b ## a ## c;\ 00233 goto s ## b ## c ## a; 00234 00235 // the order is chosen in such a way that 00236 // four of the trailing gotos can be eliminated by the optimizer 00237 Merge3Case(0, 1, 2); 00238 Merge3Case(1, 2, 0); 00239 Merge3Case(2, 0, 1); 00240 Merge3Case(1, 0, 2); 00241 Merge3Case(0, 2, 1); 00242 Merge3Case(2, 1, 0); 00243 00244 #undef Merge3Case 00245 } 00246 00247 00248 // merge sz element from the four sentinel terminated input 00249 // sequences from0, from1, from2 and from3 to "to" 00250 // advance from0, from1, from2 and from3 accordingly 00251 // require: at least sz nonsentinel elements available in from0, from1, from2 and from3 00252 // require: to may overwrite one of the sources as long as 00253 // *(fromx + sz) is before the end of fromx 00254 template <class InputIterator, class OutputIterator, class Cmp_> 00255 void merge4_iterator( 00256 InputIterator & from0, 00257 InputIterator & from1, 00258 InputIterator & from2, 00259 InputIterator & from3, 00260 OutputIterator to, unsigned_type sz, Cmp_ cmp) 00261 { 00262 OutputIterator done = to + sz; 00263 00264 #define StartMerge4(a, b, c, d) \ 00265 if ((!cmp(*from ## a, *from ## b)) && (!cmp(*from ## b, *from ## c)) && (!cmp(*from ## c, *from ## d))) \ 00266 goto s ## a ## b ## c ## d; 00267 00268 // b>a c>b d>c 00269 // a<b b<c c<d 00270 // a<=b b<=c c<=d 00271 // !(a>b) !(b>c) !(c>d) 00272 00273 StartMerge4(0, 1, 2, 3); 00274 StartMerge4(1, 2, 3, 0); 00275 StartMerge4(2, 3, 0, 1); 00276 StartMerge4(3, 0, 1, 2); 00277 00278 StartMerge4(0, 3, 1, 2); 00279 StartMerge4(3, 1, 2, 0); 00280 StartMerge4(1, 2, 0, 3); 00281 StartMerge4(2, 0, 3, 1); 00282 00283 StartMerge4(0, 2, 3, 1); 00284 StartMerge4(2, 3, 1, 0); 00285 StartMerge4(3, 1, 0, 2); 00286 StartMerge4(1, 0, 2, 3); 00287 00288 StartMerge4(2, 0, 1, 3); 00289 StartMerge4(0, 1, 3, 2); 00290 StartMerge4(1, 3, 2, 0); 00291 StartMerge4(3, 2, 0, 1); 00292 00293 StartMerge4(3, 0, 2, 1); 00294 StartMerge4(0, 2, 1, 3); 00295 StartMerge4(2, 1, 3, 0); 00296 StartMerge4(1, 3, 0, 2); 00297 00298 StartMerge4(1, 0, 3, 2); 00299 StartMerge4(0, 3, 2, 1); 00300 StartMerge4(3, 2, 1, 0); 00301 StartMerge4(2, 1, 0, 3); 00302 00303 #define Merge4Case(a, b, c, d) \ 00304 s ## a ## b ## c ## d : \ 00305 if (to == done) \ 00306 return;\ 00307 *to = *from ## a; \ 00308 ++to; \ 00309 ++from ## a; \ 00310 if (cmp(*from ## c, *from ## a)) \ 00311 { \ 00312 if (cmp(*from ## b, *from ## a)) \ 00313 goto s ## a ## b ## c ## d;\ 00314 else \ 00315 goto s ## b ## a ## c ## d;\ 00316 } \ 00317 else \ 00318 { \ 00319 if (cmp(*from ## d, *from ## a)) \ 00320 goto s ## b ## c ## a ## d;\ 00321 else \ 00322 goto s ## b ## c ## d ## a;\ 00323 } 00324 00325 Merge4Case(0, 1, 2, 3); 00326 Merge4Case(1, 2, 3, 0); 00327 Merge4Case(2, 3, 0, 1); 00328 Merge4Case(3, 0, 1, 2); 00329 00330 Merge4Case(0, 3, 1, 2); 00331 Merge4Case(3, 1, 2, 0); 00332 Merge4Case(1, 2, 0, 3); 00333 Merge4Case(2, 0, 3, 1); 00334 00335 Merge4Case(0, 2, 3, 1); 00336 Merge4Case(2, 3, 1, 0); 00337 Merge4Case(3, 1, 0, 2); 00338 Merge4Case(1, 0, 2, 3); 00339 00340 Merge4Case(2, 0, 1, 3); 00341 Merge4Case(0, 1, 3, 2); 00342 Merge4Case(1, 3, 2, 0); 00343 Merge4Case(3, 2, 0, 1); 00344 00345 Merge4Case(3, 0, 2, 1); 00346 Merge4Case(0, 2, 1, 3); 00347 Merge4Case(2, 1, 3, 0); 00348 Merge4Case(1, 3, 0, 2); 00349 00350 Merge4Case(1, 0, 3, 2); 00351 Merge4Case(0, 3, 2, 1); 00352 Merge4Case(3, 2, 1, 0); 00353 Merge4Case(2, 1, 0, 3); 00354 00355 #undef StartMerge4 00356 #undef Merge4Case 00357 } 00358 00359 00365 template <typename Tp_, unsigned_type max_size_> 00366 class internal_bounded_stack 00367 { 00368 typedef Tp_ value_type; 00369 typedef unsigned_type size_type; 00370 enum { max_size = max_size_ }; 00371 00372 size_type size_; 00373 value_type array[max_size]; 00374 00375 public: 00376 internal_bounded_stack() : size_(0) { } 00377 00378 void push(const value_type & x) 00379 { 00380 assert(size_ < max_size); 00381 array[size_++] = x; 00382 } 00383 00384 const value_type & top() const 00385 { 00386 assert(size_ > 0); 00387 return array[size_ - 1]; 00388 } 00389 00390 void pop() 00391 { 00392 assert(size_ > 0); 00393 --size_; 00394 } 00395 00396 void clear() 00397 { 00398 size_ = 0; 00399 } 00400 00401 size_type size() const 00402 { 00403 return size_; 00404 } 00405 00406 bool empty() const 00407 { 00408 return size_ == 0; 00409 } 00410 }; 00411 00412 00417 template <class BlockType_, 00418 class Cmp_, 00419 unsigned Arity_, 00420 class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY> 00421 class ext_merger : private noncopyable 00422 { 00423 public: 00424 typedef stxxl::uint64 size_type; 00425 typedef BlockType_ block_type; 00426 typedef typename block_type::bid_type bid_type; 00427 typedef typename block_type::value_type value_type; 00428 typedef Cmp_ comparator_type; 00429 typedef AllocStr_ alloc_strategy; 00430 typedef value_type Element; 00431 typedef typed_block<sizeof(value_type), value_type> sentinel_block_type; 00432 00433 // KNKMAX / 2 < arity <= KNKMAX 00434 enum { arity = Arity_, KNKMAX = 1UL << (LOG2<Arity_>::ceil) }; 00435 00436 block_type * convert_block_pointer(sentinel_block_type * arg) 00437 { 00438 return reinterpret_cast<block_type *>(arg); 00439 } 00440 00441 protected: 00442 comparator_type cmp; 00443 00444 bool is_sentinel(const Element & a) const 00445 { 00446 return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value() 00447 } 00448 00449 bool not_sentinel(const Element & a) const 00450 { 00451 return cmp(cmp.min_value(), a); // a > cmp.min_value() 00452 } 00453 00454 struct sequence_state : private noncopyable 00455 { 00456 unsigned_type current; //current index 00457 block_type * block; //current block 00458 std::list<bid_type> * bids; //list of blocks forming this sequence 00459 comparator_type cmp; 00460 ext_merger * merger; 00461 bool allocated; 00462 00464 const value_type & operator * () const 00465 { 00466 return (*block)[current]; 00467 } 00468 00469 sequence_state() : bids(NULL), allocated(false) 00470 { } 00471 00472 ~sequence_state() 00473 { 00474 STXXL_VERBOSE2("ext_merger sequence_state::~sequence_state()"); 00475 if (bids != NULL) 00476 { 00477 block_manager * bm = block_manager::get_instance(); 00478 bm->delete_blocks(bids->begin(), bids->end()); 00479 delete bids; 00480 } 00481 } 00482 00483 void make_inf() 00484 { 00485 current = 0; 00486 (*block)[0] = cmp.min_value(); 00487 } 00488 00489 bool is_sentinel(const Element & a) const 00490 { 00491 return !(cmp(cmp.min_value(), a)); 00492 } 00493 00494 bool not_sentinel(const Element & a) const 00495 { 00496 return cmp(cmp.min_value(), a); 00497 } 00498 00499 void swap(sequence_state & obj) 00500 { 00501 if (&obj != this) 00502 { 00503 std::swap(current, obj.current); 00504 std::swap(block, obj.block); 00505 std::swap(bids, obj.bids); 00506 assert(merger == obj.merger); 00507 std::swap(allocated, obj.allocated); 00508 } 00509 } 00510 00511 sequence_state & operator ++ () 00512 { 00513 assert(not_sentinel((*block)[current])); 00514 assert(current < block->size); 00515 00516 ++current; 00517 00518 if (current == block->size) 00519 { 00520 STXXL_VERBOSE2("ext_merger sequence_state operator++ crossing block border "); 00521 // go to the next block 00522 assert(bids != NULL); 00523 if (bids->empty()) // if there is no next block 00524 { 00525 STXXL_VERBOSE2("ext_merger sequence_state operator++ it was the last block in the sequence "); 00526 delete bids; 00527 bids = NULL; 00528 make_inf(); 00529 } 00530 else 00531 { 00532 STXXL_VERBOSE2("ext_merger sequence_state operator++ there is another block "); 00533 bid_type bid = bids->front(); 00534 bids->pop_front(); 00535 if (!(bids->empty())) 00536 { 00537 STXXL_VERBOSE2("ext_merger sequence_state operator++ one more block exists in a sequence: " << 00538 "flushing this block in write cache (if not written yet) and giving hint to prefetcher"); 00539 bid_type next_bid = bids->front(); 00540 //Hint next block of sequence. 00541 //This is mandatory to ensure proper synchronization between prefetch pool and write pool. 00542 merger->p_pool->hint(next_bid, *(merger->w_pool)); 00543 } 00544 merger->p_pool->read(block, bid)->wait(); 00545 STXXL_VERBOSE2("first element of read block " << bid << " " << *(block->begin()) << " cached in " << block); 00546 block_manager::get_instance()->delete_block(bid); 00547 current = 0; 00548 } 00549 } 00550 return *this; 00551 } 00552 }; 00553 00554 00555 //a pair consisting a value 00556 struct Entry 00557 { 00558 value_type key; // Key of Loser element (winner for 0) 00559 unsigned_type index; // the number of losing segment 00560 }; 00561 00562 size_type size_; // total number of elements stored 00563 unsigned_type logK; // log of current tree size 00564 unsigned_type k; // invariant (k == 1 << logK), always a power of two 00565 // only entries 0 .. arity-1 may hold actual sequences, the other 00566 // entries arity .. KNKMAX-1 are sentinels to make the size of the tree 00567 // a power of two always 00568 00569 // stack of empty segment indices 00570 internal_bounded_stack<unsigned_type, arity> free_segments; 00571 00572 // upper levels of loser trees 00573 // entry[0] contains the winner info 00574 Entry entry[KNKMAX]; 00575 00576 // leaf information 00577 // note that Knuth uses indices k..k-1 00578 // while we use 0..k-1 00579 sequence_state states[KNKMAX]; // sequence including current position, dereference gives current element 00580 00581 prefetch_pool<block_type> * p_pool; 00582 write_pool<block_type> * w_pool; 00583 00584 sentinel_block_type sentinel_block; 00585 00586 public: 00587 ext_merger() : 00588 size_(0), logK(0), k(1), p_pool(0), w_pool(0) 00589 { 00590 init(); 00591 } 00592 00593 ext_merger(prefetch_pool<block_type> * p_pool_, 00594 write_pool<block_type> * w_pool_) : 00595 size_(0), logK(0), k(1), 00596 p_pool(p_pool_), 00597 w_pool(w_pool_) 00598 { 00599 init(); 00600 } 00601 00602 virtual ~ext_merger() 00603 { 00604 STXXL_VERBOSE1("ext_merger::~ext_merger()"); 00605 for (unsigned_type i = 0; i < arity; ++i) 00606 { 00607 delete states[i].block; 00608 } 00609 } 00610 00611 void set_pools(prefetch_pool<block_type> * p_pool_, 00612 write_pool<block_type> * w_pool_) 00613 { 00614 p_pool = p_pool_; 00615 w_pool = w_pool_; 00616 } 00617 00618 private: 00619 void init() 00620 { 00621 STXXL_VERBOSE2("ext_merger::init()"); 00622 assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering 00623 00624 sentinel_block[0] = cmp.min_value(); 00625 00626 for (unsigned_type i = 0; i < KNKMAX; ++i) 00627 { 00628 states[i].merger = this; 00629 if (i < arity) 00630 states[i].block = new block_type; 00631 else 00632 states[i].block = convert_block_pointer(&(sentinel_block)); 00633 00634 states[i].make_inf(); 00635 } 00636 00637 assert(k == 1); 00638 free_segments.push(0); //total state: one free sequence 00639 00640 rebuildLoserTree(); 00641 assert(is_sentinel(*states[entry[0].index])); 00642 } 00643 00644 // rebuild loser tree information from the values in current 00645 void rebuildLoserTree() 00646 { 00647 unsigned_type winner = initWinner(1); 00648 entry[0].index = winner; 00649 entry[0].key = *(states[winner]); 00650 } 00651 00652 00653 // given any values in the leaves this 00654 // routing recomputes upper levels of the tree 00655 // from scratch in linear time 00656 // initialize entry[root].index and the subtree rooted there 00657 // return winner index 00658 unsigned_type initWinner(unsigned_type root) 00659 { 00660 if (root >= k) { // leaf reached 00661 return root - k; 00662 } else { 00663 unsigned_type left = initWinner(2 * root); 00664 unsigned_type right = initWinner(2 * root + 1); 00665 Element lk = *(states[left]); 00666 Element rk = *(states[right]); 00667 if (!(cmp(lk, rk))) { // right subtree looses 00668 entry[root].index = right; 00669 entry[root].key = rk; 00670 return left; 00671 } else { 00672 entry[root].index = left; 00673 entry[root].key = lk; 00674 return right; 00675 } 00676 } 00677 } 00678 00679 // first go up the tree all the way to the root 00680 // hand down old winner for the respective subtree 00681 // based on new value, and old winner and loser 00682 // update each node on the path to the root top down. 00683 // This is implemented recursively 00684 void update_on_insert( 00685 unsigned_type node, 00686 const Element & newKey, 00687 unsigned_type newIndex, 00688 Element * winnerKey, 00689 unsigned_type * winnerIndex, // old winner 00690 unsigned_type * mask) // 1 << (ceil(log KNK) - dist-from-root) 00691 { 00692 if (node == 0) { // winner part of root 00693 *mask = 1 << (logK - 1); 00694 *winnerKey = entry[0].key; 00695 *winnerIndex = entry[0].index; 00696 if (cmp(entry[node].key, newKey)) 00697 { 00698 entry[node].key = newKey; 00699 entry[node].index = newIndex; 00700 } 00701 } else { 00702 update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask); 00703 Element loserKey = entry[node].key; 00704 unsigned_type loserIndex = entry[node].index; 00705 if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees 00706 if (cmp(loserKey, newKey)) { // newKey will have influence here 00707 if (cmp(*winnerKey, newKey)) { // old winner loses here 00708 entry[node].key = *winnerKey; 00709 entry[node].index = *winnerIndex; 00710 } else { // new entry looses here 00711 entry[node].key = newKey; 00712 entry[node].index = newIndex; 00713 } 00714 } 00715 *winnerKey = loserKey; 00716 *winnerIndex = loserIndex; 00717 } 00718 // note that nothing needs to be done if 00719 // the winner came from the same subtree 00720 // a) newKey <= winnerKey => even more reason for the other tree to loose 00721 // b) newKey > winnerKey => the old winner will beat the new 00722 // entry further down the tree 00723 // also the same old winner is handed down the tree 00724 00725 *mask >>= 1; // next level 00726 } 00727 } 00728 00729 // make the tree two times as wide 00730 void doubleK() 00731 { 00732 STXXL_VERBOSE1("ext_merger::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size()); 00733 assert(k > 0); 00734 assert(k < arity); 00735 assert(free_segments.empty()); // stack was free (probably not needed) 00736 00737 // make all new entries free 00738 // and push them on the free stack 00739 for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards 00740 { 00741 states[i].make_inf(); 00742 if (i < arity) 00743 free_segments.push(i); 00744 } 00745 00746 // double the size 00747 k *= 2; 00748 logK++; 00749 00750 STXXL_VERBOSE1("ext_merger::doubleK (after) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size()); 00751 assert(!free_segments.empty()); 00752 00753 // recompute loser tree information 00754 rebuildLoserTree(); 00755 } 00756 00757 00758 // compact nonempty segments in the left half of the tree 00759 void compactTree() 00760 { 00761 STXXL_VERBOSE1("ext_merger::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size()); 00762 assert(logK > 0); 00763 00764 // compact all nonempty segments to the left 00765 00766 unsigned_type to = 0; 00767 for (unsigned_type from = 0; from < k; from++) 00768 { 00769 if (!is_segment_empty(from)) 00770 { 00771 assert(is_segment_allocated(from)); 00772 if (from != to) { 00773 assert(!is_segment_allocated(to)); 00774 states[to].swap(states[from]); 00775 } 00776 ++to; 00777 } 00778 } 00779 00780 // half degree as often as possible 00781 while (k > 1 && to <= (k / 2)) { 00782 k /= 2; 00783 logK--; 00784 } 00785 00786 // overwrite garbage and compact the stack of free segment indices 00787 free_segments.clear(); // none free 00788 for ( ; to < k; to++) { 00789 assert(!is_segment_allocated(to)); 00790 states[to].make_inf(); 00791 if (to < arity) 00792 free_segments.push(to); 00793 } 00794 00795 STXXL_VERBOSE1("ext_merger::compactTree (after) k=" << k << " logK=" << logK << " #free=" << free_segments.size()); 00796 assert(k > 0); 00797 00798 // recompute loser tree information 00799 rebuildLoserTree(); 00800 } 00801 00802 00803 #if 0 00804 void swap(ext_merger & obj) 00805 { 00806 std::swap(cmp, obj.cmp); 00807 std::swap(free_segments, obj.free_segments); 00808 std::swap(size_, obj.size_); 00809 std::swap(logK, obj.logK); 00810 std::swap(k, obj.k); 00811 swap_1D_arrays(entry, obj.entry, KNKMAX); 00812 swap_1D_arrays(states, obj.states, KNKMAX); 00813 00814 // std::swap(p_pool,obj.p_pool); 00815 // std::swap(w_pool,obj.w_pool); 00816 } 00817 #endif 00818 00819 public: 00820 unsigned_type mem_cons() const // only rough estimation 00821 { 00822 return (arity * block_type::raw_size); 00823 } 00824 00825 // delete the (length = end-begin) smallest elements and write them to "begin..end" 00826 // empty segments are deallocated 00827 // require: 00828 // - there are at least length elements 00829 // - segments are ended by sentinels 00830 template <class OutputIterator> 00831 void multi_merge(OutputIterator begin, OutputIterator end) 00832 { 00833 size_type length = end - begin; 00834 00835 STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s), length = " << length); 00836 00837 if (length == 0) 00838 return; 00839 00840 assert(k > 0); 00841 assert(length <= size_); 00842 00843 //Hint first non-internal (actually second) block of each sequence. 00844 //This is mandatory to ensure proper synchronization between prefetch pool and write pool. 00845 for (unsigned_type i = 0; i < k; ++i) 00846 { 00847 if (states[i].bids != NULL && !states[i].bids->empty()) 00848 p_pool->hint(states[i].bids->front(), *w_pool); 00849 } 00850 00851 switch (logK) { 00852 case 0: 00853 assert(k == 1); 00854 assert(entry[0].index == 0); 00855 assert(free_segments.empty()); 00856 //memcpy(to, states[0], length * sizeof(Element)); 00857 //std::copy(states[0],states[0]+length,to); 00858 for (size_type i = 0; i < length; ++i, ++(states[0]), ++begin) 00859 *begin = *(states[0]); 00860 00861 entry[0].key = **states; 00862 if (is_segment_empty(0)) 00863 deallocate_segment(0); 00864 00865 break; 00866 case 1: 00867 assert(k == 2); 00868 merge_iterator(states[0], states[1], begin, length, cmp); 00869 rebuildLoserTree(); 00870 if (is_segment_empty(0) && is_segment_allocated(0)) 00871 deallocate_segment(0); 00872 00873 if (is_segment_empty(1) && is_segment_allocated(1)) 00874 deallocate_segment(1); 00875 00876 break; 00877 case 2: 00878 assert(k == 4); 00879 if (is_segment_empty(3)) 00880 merge3_iterator(states[0], states[1], states[2], begin, length, cmp); 00881 else 00882 merge4_iterator(states[0], states[1], states[2], states[3], begin, length, cmp); 00883 rebuildLoserTree(); 00884 if (is_segment_empty(0) && is_segment_allocated(0)) 00885 deallocate_segment(0); 00886 00887 if (is_segment_empty(1) && is_segment_allocated(1)) 00888 deallocate_segment(1); 00889 00890 if (is_segment_empty(2) && is_segment_allocated(2)) 00891 deallocate_segment(2); 00892 00893 if (is_segment_empty(3) && is_segment_allocated(3)) 00894 deallocate_segment(3); 00895 00896 break; 00897 case 3: multi_merge_f<OutputIterator, 3>(begin, end); 00898 break; 00899 case 4: multi_merge_f<OutputIterator, 4>(begin, end); 00900 break; 00901 case 5: multi_merge_f<OutputIterator, 5>(begin, end); 00902 break; 00903 case 6: multi_merge_f<OutputIterator, 6>(begin, end); 00904 break; 00905 case 7: multi_merge_f<OutputIterator, 7>(begin, end); 00906 break; 00907 case 8: multi_merge_f<OutputIterator, 8>(begin, end); 00908 break; 00909 case 9: multi_merge_f<OutputIterator, 9>(begin, end); 00910 break; 00911 case 10: multi_merge_f<OutputIterator, 10>(begin, end); 00912 break; 00913 default: multi_merge_k(begin, end); 00914 break; 00915 } 00916 00917 00918 size_ -= length; 00919 00920 // compact tree if it got considerably smaller 00921 { 00922 const unsigned_type num_segments_used = std::min<unsigned_type>(arity, k) - free_segments.size(); 00923 const unsigned_type num_segments_trigger = k - (3 * k / 5); 00924 // using k/2 would be worst case inefficient (for large k) 00925 // for k \in {2, 4, 8} the trigger is k/2 which is good 00926 // because we have special mergers for k \in {1, 2, 4} 00927 // there is also a special 3-way-merger, that will be 00928 // triggered if k == 4 && is_segment_empty(3) 00929 STXXL_VERBOSE3("ext_merger compact? k=" << k << " #used=" << num_segments_used 00930 << " <= #trigger=" << num_segments_trigger << " ==> " 00931 << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ") 00932 << " || " 00933 << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ") 00934 << " #free=" << free_segments.size()); 00935 if (k > 1 && ((num_segments_used <= num_segments_trigger) || 00936 (k == 4 && !free_segments.empty() && !is_segment_empty(3)))) 00937 { 00938 compactTree(); 00939 } 00940 } 00941 } 00942 00943 private: 00944 // multi-merge for arbitrary K 00945 template <class OutputIterator> 00946 void multi_merge_k(OutputIterator begin, OutputIterator end) 00947 { 00948 Entry * currentPos; 00949 Element currentKey; 00950 unsigned_type currentIndex; // leaf pointed to by current entry 00951 unsigned_type kReg = k; 00952 OutputIterator done = end; 00953 OutputIterator to = begin; 00954 unsigned_type winnerIndex = entry[0].index; 00955 Element winnerKey = entry[0].key; 00956 00957 while (to != done) 00958 { 00959 // write result 00960 *to = *(states[winnerIndex]); 00961 00962 // advance winner segment 00963 ++(states[winnerIndex]); 00964 00965 winnerKey = *(states[winnerIndex]); 00966 00967 // remove winner segment if empty now 00968 if (is_sentinel(winnerKey)) // 00969 deallocate_segment(winnerIndex); 00970 00971 00972 // go up the entry-tree 00973 for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) { 00974 currentPos = entry + i; 00975 currentKey = currentPos->key; 00976 if (cmp(winnerKey, currentKey)) { 00977 currentIndex = currentPos->index; 00978 currentPos->key = winnerKey; 00979 currentPos->index = winnerIndex; 00980 winnerKey = currentKey; 00981 winnerIndex = currentIndex; 00982 } 00983 } 00984 00985 ++to; 00986 } 00987 entry[0].index = winnerIndex; 00988 entry[0].key = winnerKey; 00989 } 00990 00991 template <class OutputIterator, unsigned LogK> 00992 void multi_merge_f(OutputIterator begin, OutputIterator end) 00993 { 00994 OutputIterator done = end; 00995 OutputIterator to = begin; 00996 unsigned_type winnerIndex = entry[0].index; 00997 Entry * regEntry = entry; 00998 sequence_state * regStates = states; 00999 Element winnerKey = entry[0].key; 01000 01001 assert(logK >= LogK); 01002 while (to != done) 01003 { 01004 // write result 01005 *to = *(regStates[winnerIndex]); 01006 01007 // advance winner segment 01008 ++(regStates[winnerIndex]); 01009 01010 winnerKey = *(regStates[winnerIndex]); 01011 01012 01013 // remove winner segment if empty now 01014 if (is_sentinel(winnerKey)) 01015 deallocate_segment(winnerIndex); 01016 01017 01018 ++to; 01019 01020 // update loser tree 01021 #define TreeStep(L) \ 01022 if (1 << LogK >= 1 << L) { \ 01023 Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \ 01024 Element key ## L = pos ## L->key; \ 01025 if (cmp(winnerKey, key ## L)) { \ 01026 unsigned_type index ## L = pos ## L->index; \ 01027 pos ## L->key = winnerKey; \ 01028 pos ## L->index = winnerIndex; \ 01029 winnerKey = key ## L; \ 01030 winnerIndex = index ## L; \ 01031 } \ 01032 } 01033 TreeStep(10); 01034 TreeStep(9); 01035 TreeStep(8); 01036 TreeStep(7); 01037 TreeStep(6); 01038 TreeStep(5); 01039 TreeStep(4); 01040 TreeStep(3); 01041 TreeStep(2); 01042 TreeStep(1); 01043 #undef TreeStep 01044 } 01045 regEntry[0].index = winnerIndex; 01046 regEntry[0].key = winnerKey; 01047 } 01048 01049 public: 01050 bool spaceIsAvailable() const // for new segment 01051 { 01052 return k < arity || !free_segments.empty(); 01053 } 01054 01055 01056 // insert segment beginning at to 01057 // require: spaceIsAvailable() == 1 01058 template <class Merger> 01059 void insert_segment(Merger & another_merger, size_type segment_size) 01060 { 01061 STXXL_VERBOSE1("ext_merger::insert_segment(merger,...)" << this); 01062 01063 if (segment_size > 0) 01064 { 01065 // get a free slot 01066 if (free_segments.empty()) { // tree is too small 01067 doubleK(); 01068 } 01069 assert(!free_segments.empty()); 01070 unsigned_type free_slot = free_segments.top(); 01071 free_segments.pop(); 01072 01073 01074 // link new segment 01075 assert(segment_size); 01076 unsigned_type nblocks = segment_size / block_type::size; 01077 //assert(nblocks); // at least one block 01078 STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks); 01079 if (nblocks == 0) 01080 { 01081 STXXL_VERBOSE1("ext_merger::insert_segment(merger,...) WARNING: inserting a segment with " << 01082 nblocks << " blocks"); 01083 STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS"); 01084 } 01085 unsigned_type first_size = segment_size % block_type::size; 01086 if (first_size == 0) 01087 { 01088 first_size = block_type::size; 01089 --nblocks; 01090 } 01091 block_manager * bm = block_manager::get_instance(); 01092 std::list<bid_type> * bids = new std::list<bid_type>(nblocks); 01093 bm->new_blocks(alloc_strategy(), bids->begin(), bids->end()); 01094 block_type * first_block = new block_type; 01095 01096 another_merger.multi_merge( 01097 first_block->begin() + (block_type::size - first_size), 01098 first_block->end()); 01099 01100 STXXL_VERBOSE1("last element of first block " << *(first_block->end() - 1)); 01101 assert(!cmp(*(first_block->begin() + (block_type::size - first_size)), *(first_block->end() - 1))); 01102 01103 assert(w_pool->size() > 0); 01104 01105 for (typename std::list<bid_type>::iterator curbid = bids->begin(); curbid != bids->end(); ++curbid) 01106 { 01107 block_type * b = w_pool->steal(); 01108 another_merger.multi_merge(b->begin(), b->end()); 01109 STXXL_VERBOSE1("first element of following block " << *curbid << " " << *(b->begin())); 01110 STXXL_VERBOSE1("last element of following block " << *curbid << " " << *(b->end() - 1)); 01111 assert(!cmp(*(b->begin()), *(b->end() - 1))); 01112 w_pool->write(b, *curbid); //->wait() does not help 01113 STXXL_VERBOSE1("written to block " << *curbid << " cached in " << b); 01114 } 01115 01116 insert_segment(bids, first_block, first_size, free_slot); 01117 01118 size_ += segment_size; 01119 01120 // propagate new information up the tree 01121 Element dummyKey; 01122 unsigned_type dummyIndex; 01123 unsigned_type dummyMask; 01124 update_on_insert((free_slot + k) >> 1, *(states[free_slot]), free_slot, 01125 &dummyKey, &dummyIndex, &dummyMask); 01126 } else { 01127 // deallocate memory ? 01128 STXXL_VERBOSE1("Merged segment with zero size."); 01129 } 01130 } 01131 01132 size_type size() const { return size_; } 01133 01134 protected: 01137 void insert_segment(std::list<bid_type> * bidlist, block_type * first_block, 01138 unsigned_type first_size, unsigned_type slot) 01139 { 01140 STXXL_VERBOSE1("ext_merger::insert_segment(bidlist,...) " << this << " " << bidlist->size() << " " << slot); 01141 assert(!is_segment_allocated(slot)); 01142 assert(first_size > 0); 01143 01144 sequence_state & new_sequence = states[slot]; 01145 new_sequence.current = block_type::size - first_size; 01146 std::swap(new_sequence.block, first_block); 01147 delete first_block; 01148 std::swap(new_sequence.bids, bidlist); 01149 if (bidlist) // the old list 01150 { 01151 assert(bidlist->empty()); 01152 delete bidlist; 01153 } 01154 new_sequence.allocated = true; 01155 assert(is_segment_allocated(slot)); 01156 } 01157 01158 // free an empty segment . 01159 void deallocate_segment(unsigned_type slot) 01160 { 01161 STXXL_VERBOSE1("ext_merger::deallocate_segment() deleting segment " << slot << " allocated=" << int(is_segment_allocated(slot))); 01162 assert(is_segment_allocated(slot)); 01163 states[slot].allocated = false; 01164 states[slot].make_inf(); 01165 01166 // push on the stack of free segment indices 01167 free_segments.push(slot); 01168 } 01169 01170 // is this segment empty ? 01171 bool is_segment_empty(unsigned_type slot) const 01172 { 01173 return is_sentinel(*(states[slot])); 01174 } 01175 01176 // Is this segment allocated? Otherwise it's empty, 01177 // already on the stack of free segment indices and can be reused. 01178 bool is_segment_allocated(unsigned_type slot) const 01179 { 01180 return states[slot].allocated; 01181 } 01182 }; //ext_merger 01183 01184 01186 // The data structure from Knuth, "Sorting and Searching", Section 5.4.1 01191 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01192 class loser_tree : private noncopyable 01193 { 01194 public: 01195 typedef ValTp_ value_type; 01196 typedef Cmp_ comparator_type; 01197 typedef value_type Element; 01198 01199 private: 01200 struct Entry 01201 { 01202 value_type key; // Key of Loser element (winner for 0) 01203 unsigned_type index; // number of losing segment 01204 }; 01205 01206 comparator_type cmp; 01207 // stack of free segment indices 01208 internal_bounded_stack<unsigned_type, KNKMAX> free_segments; 01209 01210 unsigned_type size_; // total number of elements stored 01211 unsigned_type logK; // log of current tree size 01212 unsigned_type k; // invariant (k == 1 << logK), always a power of two 01213 01214 Element sentinel; // target of free segment pointers 01215 01216 // upper levels of loser trees 01217 // entry[0] contains the winner info 01218 Entry entry[KNKMAX]; 01219 01220 // leaf information 01221 // note that Knuth uses indices k..k-1 01222 // while we use 0..k-1 01223 Element * current[KNKMAX]; // pointer to current element 01224 Element * segment[KNKMAX]; // start of Segments 01225 unsigned_type segment_size[KNKMAX]; // just to count the internal memory consumption 01226 01227 unsigned_type mem_cons_; 01228 01229 // private member functions 01230 unsigned_type initWinner(unsigned_type root); 01231 void update_on_insert(unsigned_type node, const Element & newKey, unsigned_type newIndex, 01232 Element * winnerKey, unsigned_type * winnerIndex, unsigned_type * mask); 01233 void deallocate_segment(unsigned_type slot); 01234 void doubleK(); 01235 void compactTree(); 01236 void rebuildLoserTree(); 01237 bool is_segment_empty(unsigned_type slot); 01238 void multi_merge_k(Element * to, unsigned_type length); 01239 01240 template <unsigned LogK> 01241 void multi_merge_f(Element * to, unsigned_type length) 01242 { 01243 //Entry *currentPos; 01244 //Element currentKey; 01245 //int currentIndex; // leaf pointed to by current entry 01246 Element * done = to + length; 01247 Entry * regEntry = entry; 01248 Element ** regStates = current; 01249 unsigned_type winnerIndex = regEntry[0].index; 01250 Element winnerKey = regEntry[0].key; 01251 Element * winnerPos; 01252 //Element sup = sentinel; // supremum 01253 01254 assert(logK >= LogK); 01255 while (to != done) 01256 { 01257 winnerPos = regStates[winnerIndex]; 01258 01259 // write result 01260 *to = winnerKey; 01261 01262 // advance winner segment 01263 ++winnerPos; 01264 regStates[winnerIndex] = winnerPos; 01265 winnerKey = *winnerPos; 01266 01267 // remove winner segment if empty now 01268 if (is_sentinel(winnerKey)) 01269 { 01270 deallocate_segment(winnerIndex); 01271 } 01272 ++to; 01273 01274 // update loser tree 01275 #define TreeStep(L) \ 01276 if (1 << LogK >= 1 << L) { \ 01277 Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \ 01278 Element key ## L = pos ## L->key; \ 01279 if (cmp(winnerKey, key ## L)) { \ 01280 unsigned_type index ## L = pos ## L->index; \ 01281 pos ## L->key = winnerKey; \ 01282 pos ## L->index = winnerIndex; \ 01283 winnerKey = key ## L; \ 01284 winnerIndex = index ## L; \ 01285 } \ 01286 } 01287 TreeStep(10); 01288 TreeStep(9); 01289 TreeStep(8); 01290 TreeStep(7); 01291 TreeStep(6); 01292 TreeStep(5); 01293 TreeStep(4); 01294 TreeStep(3); 01295 TreeStep(2); 01296 TreeStep(1); 01297 #undef TreeStep 01298 } 01299 regEntry[0].index = winnerIndex; 01300 regEntry[0].key = winnerKey; 01301 } 01302 01303 public: 01304 bool is_sentinel(const Element & a) 01305 { 01306 return !(cmp(cmp.min_value(), a)); 01307 } 01308 bool not_sentinel(const Element & a) 01309 { 01310 return cmp(cmp.min_value(), a); 01311 } 01312 01313 public: 01314 loser_tree(); 01315 ~loser_tree(); 01316 void init(); 01317 01318 void swap(loser_tree & obj) 01319 { 01320 std::swap(cmp, obj.cmp); 01321 std::swap(free_segments, obj.free_segments); 01322 std::swap(size_, obj.size_); 01323 std::swap(logK, obj.logK); 01324 std::swap(k, obj.k); 01325 std::swap(sentinel, obj.sentinel); 01326 swap_1D_arrays(entry, obj.entry, KNKMAX); 01327 swap_1D_arrays(current, obj.current, KNKMAX); 01328 swap_1D_arrays(segment, obj.segment, KNKMAX); 01329 swap_1D_arrays(segment_size, obj.segment_size, KNKMAX); 01330 std::swap(mem_cons_, obj.mem_cons_); 01331 } 01332 01333 void multi_merge(Element * begin, Element * end) 01334 { 01335 multi_merge(begin, end - begin); 01336 } 01337 void multi_merge(Element *, unsigned_type length); 01338 01339 unsigned_type mem_cons() const { return mem_cons_; } 01340 01341 bool spaceIsAvailable() const // for new segment 01342 { 01343 return k < KNKMAX || !free_segments.empty(); 01344 } 01345 01346 void insert_segment(Element * to, unsigned_type sz); // insert segment beginning at to 01347 unsigned_type size() { return size_; } 01348 }; 01349 01351 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01352 loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_cons_(0) 01353 { 01354 free_segments.push(0); 01355 segment[0] = 0; 01356 current[0] = &sentinel; 01357 // entry and sentinel are initialized by init 01358 // since they need the value of supremum 01359 init(); 01360 } 01361 01362 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01363 void loser_tree<ValTp_, Cmp_, KNKMAX>::init() 01364 { 01365 assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering 01366 sentinel = cmp.min_value(); 01367 rebuildLoserTree(); 01368 assert(current[entry[0].index] == &sentinel); 01369 } 01370 01371 01372 // rebuild loser tree information from the values in current 01373 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01374 void loser_tree<ValTp_, Cmp_, KNKMAX>::rebuildLoserTree() 01375 { 01376 assert(LOG2<KNKMAX>::floor == LOG2<KNKMAX>::ceil); // KNKMAX needs to be a power of two 01377 unsigned_type winner = initWinner(1); 01378 entry[0].index = winner; 01379 entry[0].key = *(current[winner]); 01380 } 01381 01382 01383 // given any values in the leaves this 01384 // routing recomputes upper levels of the tree 01385 // from scratch in linear time 01386 // initialize entry[root].index and the subtree rooted there 01387 // return winner index 01388 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01389 unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root) 01390 { 01391 if (root >= k) { // leaf reached 01392 return root - k; 01393 } else { 01394 unsigned_type left = initWinner(2 * root); 01395 unsigned_type right = initWinner(2 * root + 1); 01396 Element lk = *(current[left]); 01397 Element rk = *(current[right]); 01398 if (!(cmp(lk, rk))) { // right subtree looses 01399 entry[root].index = right; 01400 entry[root].key = rk; 01401 return left; 01402 } else { 01403 entry[root].index = left; 01404 entry[root].key = lk; 01405 return right; 01406 } 01407 } 01408 } 01409 01410 01411 // first go up the tree all the way to the root 01412 // hand down old winner for the respective subtree 01413 // based on new value, and old winner and loser 01414 // update each node on the path to the root top down. 01415 // This is implemented recursively 01416 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01417 void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert( 01418 unsigned_type node, 01419 const Element & newKey, 01420 unsigned_type newIndex, 01421 Element * winnerKey, 01422 unsigned_type * winnerIndex, // old winner 01423 unsigned_type * mask) // 1 << (ceil(log KNK) - dist-from-root) 01424 { 01425 if (node == 0) { // winner part of root 01426 *mask = 1 << (logK - 1); 01427 *winnerKey = entry[0].key; 01428 *winnerIndex = entry[0].index; 01429 if (cmp(entry[node].key, newKey)) 01430 { 01431 entry[node].key = newKey; 01432 entry[node].index = newIndex; 01433 } 01434 } else { 01435 update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask); 01436 Element loserKey = entry[node].key; 01437 unsigned_type loserIndex = entry[node].index; 01438 if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees 01439 if (cmp(loserKey, newKey)) { // newKey will have influence here 01440 if (cmp(*winnerKey, newKey)) { // old winner loses here 01441 entry[node].key = *winnerKey; 01442 entry[node].index = *winnerIndex; 01443 } else { // new entry looses here 01444 entry[node].key = newKey; 01445 entry[node].index = newIndex; 01446 } 01447 } 01448 *winnerKey = loserKey; 01449 *winnerIndex = loserIndex; 01450 } 01451 // note that nothing needs to be done if 01452 // the winner came from the same subtree 01453 // a) newKey <= winnerKey => even more reason for the other tree to loose 01454 // b) newKey > winnerKey => the old winner will beat the new 01455 // entry further down the tree 01456 // also the same old winner is handed down the tree 01457 01458 *mask >>= 1; // next level 01459 } 01460 } 01461 01462 01463 // make the tree two times as wide 01464 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01465 void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK() 01466 { 01467 STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size()); 01468 assert(k > 0); 01469 assert(k < KNKMAX); 01470 assert(free_segments.empty()); // stack was free (probably not needed) 01471 01472 // make all new entries free 01473 // and push them on the free stack 01474 for (unsigned_type i = 2 * k - 1; i >= k; i--) // backwards 01475 { 01476 current[i] = &sentinel; 01477 segment[i] = NULL; 01478 free_segments.push(i); 01479 } 01480 01481 // double the size 01482 k *= 2; 01483 logK++; 01484 01485 STXXL_VERBOSE3("loser_tree::doubleK (after) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size()); 01486 assert(!free_segments.empty()); 01487 01488 // recompute loser tree information 01489 rebuildLoserTree(); 01490 } 01491 01492 01493 // compact nonempty segments in the left half of the tree 01494 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01495 void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree() 01496 { 01497 STXXL_VERBOSE3("loser_tree::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size()); 01498 assert(logK > 0); 01499 01500 // compact all nonempty segments to the left 01501 unsigned_type from = 0; 01502 unsigned_type to = 0; 01503 for ( ; from < k; from++) 01504 { 01505 if (not_sentinel(*(current[from]))) 01506 { 01507 segment_size[to] = segment_size[from]; 01508 current[to] = current[from]; 01509 segment[to] = segment[from]; 01510 to++; 01511 }/* 01512 else 01513 { 01514 if(segment[from]) 01515 { 01516 STXXL_VERBOSE2("loser_tree::compactTree() deleting segment "<<from<< 01517 " address: "<<segment[from]<<" size: "<<segment_size[from]); 01518 delete [] segment[from]; 01519 segment[from] = 0; 01520 mem_cons_ -= segment_size[from]; 01521 } 01522 }*/ 01523 } 01524 01525 // half degree as often as possible 01526 while (k > 1 && to <= (k / 2)) { 01527 k /= 2; 01528 logK--; 01529 } 01530 01531 // overwrite garbage and compact the stack of free segment indices 01532 free_segments.clear(); // none free 01533 for ( ; to < k; to++) { 01534 current[to] = &sentinel; 01535 free_segments.push(to); 01536 } 01537 01538 STXXL_VERBOSE3("loser_tree::compactTree (after) k=" << k << " logK=" << logK << " #free=" << free_segments.size()); 01539 01540 // recompute loser tree information 01541 rebuildLoserTree(); 01542 } 01543 01544 01545 // insert segment beginning at to 01546 // require: spaceIsAvailable() == 1 01547 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01548 void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element * to, unsigned_type sz) 01549 { 01550 STXXL_VERBOSE2("loser_tree::insert_segment(" << to << "," << sz << ")"); 01551 //std::copy(to,to + sz,std::ostream_iterator<ValTp_>(std::cout, "\n")); 01552 01553 if (sz > 0) 01554 { 01555 assert(not_sentinel(to[0])); 01556 assert(not_sentinel(to[sz - 1])); 01557 assert(is_sentinel(to[sz])); 01558 01559 // get a free slot 01560 if (free_segments.empty()) { // tree is too small 01561 doubleK(); 01562 } 01563 assert(!free_segments.empty()); 01564 unsigned_type index = free_segments.top(); 01565 free_segments.pop(); 01566 01567 01568 // link new segment 01569 current[index] = segment[index] = to; 01570 segment_size[index] = (sz + 1) * sizeof(value_type); 01571 mem_cons_ += (sz + 1) * sizeof(value_type); 01572 size_ += sz; 01573 01574 // propagate new information up the tree 01575 Element dummyKey; 01576 unsigned_type dummyIndex; 01577 unsigned_type dummyMask; 01578 update_on_insert((index + k) >> 1, *to, index, 01579 &dummyKey, &dummyIndex, &dummyMask); 01580 } else { 01581 // immediately deallocate 01582 // this is not only an optimization 01583 // but also needed to keep free segments from 01584 // clogging up the tree 01585 delete[] to; 01586 } 01587 } 01588 01589 01590 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01591 loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree() 01592 { 01593 STXXL_VERBOSE1("loser_tree::~loser_tree()"); 01594 for (unsigned_type i = 0; i < k; ++i) 01595 { 01596 if (segment[i]) 01597 { 01598 STXXL_VERBOSE2("loser_tree::~loser_tree() deleting segment " << i); 01599 delete[] segment[i]; 01600 mem_cons_ -= segment_size[i]; 01601 } 01602 } 01603 // check whether we did not loose memory 01604 assert(mem_cons_ == 0); 01605 } 01606 01607 // free an empty segment . 01608 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01609 void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot) 01610 { 01611 // reroute current pointer to some empty sentinel segment 01612 // with a sentinel key 01613 STXXL_VERBOSE2("loser_tree::deallocate_segment() deleting segment " << 01614 slot << " address: " << segment[slot] << " size: " << segment_size[slot]); 01615 current[slot] = &sentinel; 01616 01617 // free memory 01618 delete[] segment[slot]; 01619 segment[slot] = NULL; 01620 mem_cons_ -= segment_size[slot]; 01621 01622 // push on the stack of free segment indices 01623 free_segments.push(slot); 01624 } 01625 01626 01627 // delete the length smallest elements and write them to "to" 01628 // empty segments are deallocated 01629 // require: 01630 // - there are at least length elements 01631 // - segments are ended by sentinels 01632 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01633 void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element * to, unsigned_type length) 01634 { 01635 STXXL_VERBOSE3("loser_tree::multi_merge(to=" << to << ", len=" << length << ") k=" << k); 01636 01637 if (length == 0) 01638 return; 01639 01640 assert(k > 0); 01641 assert(length <= size_); 01642 01643 //This is the place to make statistics about internal multi_merge calls. 01644 01645 switch (logK) { 01646 case 0: 01647 assert(k == 1); 01648 assert(entry[0].index == 0); 01649 assert(free_segments.empty()); 01650 //memcpy(to, current[0], length * sizeof(Element)); 01651 std::copy(current[0], current[0] + length, to); 01652 current[0] += length; 01653 entry[0].key = **current; 01654 if (is_segment_empty(0)) 01655 deallocate_segment(0); 01656 01657 break; 01658 case 1: 01659 assert(k == 2); 01660 merge_iterator(current[0], current[1], to, length, cmp); 01661 rebuildLoserTree(); 01662 if (is_segment_empty(0)) 01663 deallocate_segment(0); 01664 01665 if (is_segment_empty(1)) 01666 deallocate_segment(1); 01667 01668 break; 01669 case 2: 01670 assert(k == 4); 01671 if (is_segment_empty(3)) 01672 merge3_iterator(current[0], current[1], current[2], to, length, cmp); 01673 else 01674 merge4_iterator(current[0], current[1], current[2], current[3], to, length, cmp); 01675 01676 rebuildLoserTree(); 01677 if (is_segment_empty(0)) 01678 deallocate_segment(0); 01679 01680 if (is_segment_empty(1)) 01681 deallocate_segment(1); 01682 01683 if (is_segment_empty(2)) 01684 deallocate_segment(2); 01685 01686 if (is_segment_empty(3)) 01687 deallocate_segment(3); 01688 01689 break; 01690 case 3: multi_merge_f<3>(to, length); 01691 break; 01692 case 4: multi_merge_f<4>(to, length); 01693 break; 01694 case 5: multi_merge_f<5>(to, length); 01695 break; 01696 case 6: multi_merge_f<6>(to, length); 01697 break; 01698 case 7: multi_merge_f<7>(to, length); 01699 break; 01700 case 8: multi_merge_f<8>(to, length); 01701 break; 01702 case 9: multi_merge_f<9>(to, length); 01703 break; 01704 case 10: multi_merge_f<10>(to, length); 01705 break; 01706 default: multi_merge_k(to, length); 01707 break; 01708 } 01709 01710 01711 size_ -= length; 01712 01713 // compact tree if it got considerably smaller 01714 { 01715 const unsigned_type num_segments_used = k - free_segments.size(); 01716 const unsigned_type num_segments_trigger = k - (3 * k / 5); 01717 // using k/2 would be worst case inefficient (for large k) 01718 // for k \in {2, 4, 8} the trigger is k/2 which is good 01719 // because we have special mergers for k \in {1, 2, 4} 01720 // there is also a special 3-way-merger, that will be 01721 // triggered if k == 4 && is_segment_empty(3) 01722 STXXL_VERBOSE3("loser_tree compact? k=" << k << " #used=" << num_segments_used 01723 << " <= #trigger=" << num_segments_trigger << " ==> " 01724 << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ") 01725 << " || " 01726 << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ") 01727 << " #free=" << free_segments.size()); 01728 if (k > 1 && ((num_segments_used <= num_segments_trigger) || 01729 (k == 4 && !free_segments.empty() && !is_segment_empty(3)))) 01730 { 01731 compactTree(); 01732 } 01733 } 01734 //std::copy(to,to + length,std::ostream_iterator<ValTp_>(std::cout, "\n")); 01735 } 01736 01737 01738 // is this segment empty and does not point to sentinel yet? 01739 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01740 inline bool loser_tree<ValTp_, Cmp_, KNKMAX>::is_segment_empty(unsigned_type slot) 01741 { 01742 return (is_sentinel(*(current[slot])) && (current[slot] != &sentinel)); 01743 } 01744 01745 // multi-merge for arbitrary K 01746 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01747 void loser_tree<ValTp_, Cmp_, KNKMAX>:: 01748 multi_merge_k(Element * to, unsigned_type length) 01749 { 01750 Entry * currentPos; 01751 Element currentKey; 01752 unsigned_type currentIndex; // leaf pointed to by current entry 01753 unsigned_type kReg = k; 01754 Element * done = to + length; 01755 unsigned_type winnerIndex = entry[0].index; 01756 Element winnerKey = entry[0].key; 01757 Element * winnerPos; 01758 01759 while (to != done) 01760 { 01761 winnerPos = current[winnerIndex]; 01762 01763 // write result 01764 *to = winnerKey; 01765 01766 // advance winner segment 01767 ++winnerPos; 01768 current[winnerIndex] = winnerPos; 01769 winnerKey = *winnerPos; 01770 01771 // remove winner segment if empty now 01772 if (is_sentinel(winnerKey)) // 01773 deallocate_segment(winnerIndex); 01774 01775 01776 // go up the entry-tree 01777 for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) { 01778 currentPos = entry + i; 01779 currentKey = currentPos->key; 01780 if (cmp(winnerKey, currentKey)) { 01781 currentIndex = currentPos->index; 01782 currentPos->key = winnerKey; 01783 currentPos->index = winnerIndex; 01784 winnerKey = currentKey; 01785 winnerIndex = currentIndex; 01786 } 01787 } 01788 01789 ++to; 01790 } 01791 entry[0].index = winnerIndex; 01792 entry[0].key = winnerKey; 01793 } 01794 } 01795 01796 /* 01797 01798 KNBufferSize1 = 32; 01799 KNN = 512; // bandwidth 01800 KNKMAX = 64; // maximal arity 01801 KNLevels = 4; // overall capacity >= KNN*KNKMAX^KNLevels 01802 LogKNKMAX = 6; // ceil(log KNK) 01803 */ 01804 01805 // internal memory consumption >= N_*(KMAX_^Levels_) + ext 01806 01807 template < 01808 class Tp_, 01809 class Cmp_, 01810 unsigned BufferSize1_ = 32, // equalize procedure call overheads etc. 01811 unsigned N_ = 512, // bandwidth 01812 unsigned IntKMAX_ = 64, // maximal arity for internal mergers 01813 unsigned IntLevels_ = 4, 01814 unsigned BlockSize_ = (2 * 1024 * 1024), 01815 unsigned ExtKMAX_ = 64, // maximal arity for external mergers 01816 unsigned ExtLevels_ = 2, 01817 class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY 01818 > 01819 struct priority_queue_config 01820 { 01821 typedef Tp_ value_type; 01822 typedef Cmp_ comparator_type; 01823 typedef AllocStr_ alloc_strategy_type; 01824 enum 01825 { 01826 BufferSize1 = BufferSize1_, 01827 N = N_, 01828 IntKMAX = IntKMAX_, 01829 IntLevels = IntLevels_, 01830 ExtLevels = ExtLevels_, 01831 BlockSize = BlockSize_, 01832 ExtKMAX = ExtKMAX_, 01833 E = sizeof(Tp_) 01834 }; 01835 }; 01836 01837 __STXXL_END_NAMESPACE 01838 01839 namespace std 01840 { 01841 template <class BlockType_, 01842 class Cmp_, 01843 unsigned Arity_, 01844 class AllocStr_> 01845 void swap(stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & a, 01846 stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & b) 01847 { 01848 a.swap(b); 01849 } 01850 template <class ValTp_, class Cmp_, unsigned KNKMAX> 01851 void swap(stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & a, 01852 stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & b) 01853 { 01854 a.swap(b); 01855 } 01856 } 01857 01858 __STXXL_BEGIN_NAMESPACE 01859 01861 template <class Config_> 01862 class priority_queue : private noncopyable 01863 { 01864 public: 01865 typedef Config_ Config; 01866 enum 01867 { 01868 BufferSize1 = Config::BufferSize1, 01869 N = Config::N, 01870 IntKMAX = Config::IntKMAX, 01871 IntLevels = Config::IntLevels, 01872 ExtLevels = Config::ExtLevels, 01873 Levels = Config::IntLevels + Config::ExtLevels, 01874 BlockSize = Config::BlockSize, 01875 ExtKMAX = Config::ExtKMAX 01876 }; 01877 01879 typedef typename Config::value_type value_type; 01881 typedef typename Config::comparator_type comparator_type; 01882 typedef typename Config::alloc_strategy_type alloc_strategy_type; 01884 typedef stxxl::uint64 size_type; 01885 typedef typed_block<BlockSize, value_type> block_type; 01886 01887 protected: 01888 typedef priority_queue_local::internal_priority_queue<value_type, std::vector<value_type>, comparator_type> 01889 insert_heap_type; 01890 01891 typedef priority_queue_local::loser_tree< 01892 value_type, 01893 comparator_type, 01894 IntKMAX> int_merger_type; 01895 01896 typedef priority_queue_local::ext_merger< 01897 block_type, 01898 comparator_type, 01899 ExtKMAX, 01900 alloc_strategy_type> ext_merger_type; 01901 01902 01903 int_merger_type itree[IntLevels]; 01904 prefetch_pool<block_type> & p_pool; 01905 write_pool<block_type> & w_pool; 01906 ext_merger_type * etree; 01907 01908 // one delete buffer for each tree => group buffer 01909 value_type buffer2[Levels][N + 1]; // tree->buffer2->buffer1 (extra space for sentinel) 01910 value_type * minBuffer2[Levels]; // minBuffer2[i] is current start of buffer2[i], end is buffer2[i] + N 01911 01912 // overall delete buffer 01913 value_type buffer1[BufferSize1 + 1]; 01914 value_type * minBuffer1; // is current start of buffer1, end is buffer1 + BufferSize1 01915 01916 comparator_type cmp; 01917 01918 // insert buffer 01919 insert_heap_type insertHeap; 01920 01921 // how many levels are active 01922 unsigned_type activeLevels; 01923 01924 // total size not counting insertBuffer and buffer1 01925 size_type size_; 01926 bool deallocate_pools; 01927 01928 // private member functions 01929 void refillBuffer1(); 01930 unsigned_type refillBuffer2(unsigned_type k); 01931 01932 unsigned_type makeSpaceAvailable(unsigned_type level); 01933 void emptyInsertHeap(); 01934 01935 value_type getSupremum() const { return cmp.min_value(); } //{ return buffer2[0][KNN].key; } 01936 unsigned_type getSize1() const { return (buffer1 + BufferSize1) - minBuffer1; } 01937 unsigned_type getSize2(unsigned_type i) const { return &(buffer2[i][N]) - minBuffer2[i]; } 01938 01939 public: 01949 priority_queue(prefetch_pool<block_type> & p_pool_, write_pool<block_type> & w_pool_); 01950 01960 priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem); 01961 01962 void swap(priority_queue & obj) 01963 { 01964 //swap_1D_arrays(itree,obj.itree,IntLevels); // does not work in g++ 3.4.3 :( bug? 01965 for (unsigned_type i = 0; i < IntLevels; ++i) 01966 std::swap(itree[i], obj.itree[i]); 01967 01968 // std::swap(p_pool,obj.p_pool); 01969 // std::swap(w_pool,obj.w_pool); 01970 std::swap(etree, obj.etree); 01971 for (unsigned_type i1 = 0; i1 < Levels; ++i1) 01972 for (unsigned_type i2 = 0; i2 < (N + 1); ++i2) 01973 std::swap(buffer2[i1][i2], obj.buffer2[i1][i2]); 01974 01975 swap_1D_arrays(minBuffer2, obj.minBuffer2, Levels); 01976 swap_1D_arrays(buffer1, obj.buffer1, BufferSize1 + 1); 01977 std::swap(minBuffer1, obj.minBuffer1); 01978 std::swap(cmp, obj.cmp); 01979 std::swap(insertHeap, obj.insertHeap); 01980 std::swap(activeLevels, obj.activeLevels); 01981 std::swap(size_, obj.size_); 01982 //std::swap(deallocate_pools,obj.deallocate_pools); 01983 } 01984 01985 virtual ~priority_queue(); 01986 01989 size_type size() const; 01990 01993 bool empty() const { return (size() == 0); } 01994 02006 const value_type & top() const; 02007 02014 void pop(); 02015 02020 void push(const value_type & obj); 02021 02026 unsigned_type mem_cons() const 02027 { 02028 unsigned_type dynam_alloc_mem(0), i(0); 02029 //dynam_alloc_mem += w_pool.mem_cons(); 02030 //dynam_alloc_mem += p_pool.mem_cons(); 02031 for ( ; i < IntLevels; ++i) 02032 dynam_alloc_mem += itree[i].mem_cons(); 02033 02034 for (i = 0; i < ExtLevels; ++i) 02035 dynam_alloc_mem += etree[i].mem_cons(); 02036 02037 02038 return (sizeof(*this) + 02039 sizeof(ext_merger_type) * ExtLevels + 02040 dynam_alloc_mem); 02041 } 02042 }; 02043 02044 02045 template <class Config_> 02046 inline typename priority_queue<Config_>::size_type priority_queue<Config_>::size() const 02047 { 02048 return size_ + 02049 insertHeap.size() - 1 + 02050 ((buffer1 + BufferSize1) - minBuffer1); 02051 } 02052 02053 02054 template <class Config_> 02055 inline const typename priority_queue<Config_>::value_type & priority_queue<Config_>::top() const 02056 { 02057 assert(!insertHeap.empty()); 02058 02059 const typename priority_queue<Config_>::value_type & t = insertHeap.top(); 02060 if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, t)) 02061 return t; 02062 02063 02064 return *minBuffer1; 02065 } 02066 02067 template <class Config_> 02068 inline void priority_queue<Config_>::pop() 02069 { 02070 //STXXL_VERBOSE1("priority_queue::pop()"); 02071 assert(!insertHeap.empty()); 02072 02073 if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, insertHeap.top())) 02074 { 02075 insertHeap.pop(); 02076 } 02077 else 02078 { 02079 assert(minBuffer1 < buffer1 + BufferSize1); 02080 ++minBuffer1; 02081 if (minBuffer1 == buffer1 + BufferSize1) 02082 refillBuffer1(); 02083 } 02084 } 02085 02086 template <class Config_> 02087 inline void priority_queue<Config_>::push(const value_type & obj) 02088 { 02089 //STXXL_VERBOSE3("priority_queue::push("<< obj <<")"); 02090 assert(itree->not_sentinel(obj)); 02091 if (insertHeap.size() == N + 1) 02092 emptyInsertHeap(); 02093 02094 02095 assert(!insertHeap.empty()); 02096 02097 insertHeap.push(obj); 02098 } 02099 02100 02102 02103 template <class Config_> 02104 priority_queue<Config_>::priority_queue(prefetch_pool<block_type> & p_pool_, write_pool<block_type> & w_pool_) : 02105 p_pool(p_pool_), w_pool(w_pool_), 02106 insertHeap(N + 2), 02107 activeLevels(0), size_(0), 02108 deallocate_pools(false) 02109 { 02110 STXXL_VERBOSE2("priority_queue::priority_queue()"); 02111 assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering 02112 //etree = new ext_merger_type[ExtLevels](p_pool,w_pool); 02113 etree = new ext_merger_type[ExtLevels]; 02114 for (unsigned_type j = 0; j < ExtLevels; ++j) 02115 etree[j].set_pools(&p_pool, &w_pool); 02116 02117 value_type sentinel = cmp.min_value(); 02118 buffer1[BufferSize1] = sentinel; // sentinel 02119 insertHeap.push(sentinel); // always keep the sentinel 02120 minBuffer1 = buffer1 + BufferSize1; // empty 02121 for (unsigned_type i = 0; i < Levels; i++) 02122 { 02123 buffer2[i][N] = sentinel; // sentinel 02124 minBuffer2[i] = &(buffer2[i][N]); // empty 02125 } 02126 } 02127 02128 template <class Config_> 02129 priority_queue<Config_>::priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem) : 02130 p_pool(*(new prefetch_pool<block_type>(p_pool_mem / BlockSize))), 02131 w_pool(*(new write_pool<block_type>(w_pool_mem / BlockSize))), 02132 insertHeap(N + 2), 02133 activeLevels(0), size_(0), 02134 deallocate_pools(true) 02135 { 02136 STXXL_VERBOSE2("priority_queue::priority_queue()"); 02137 assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering 02138 etree = new ext_merger_type[ExtLevels]; 02139 for (unsigned_type j = 0; j < ExtLevels; ++j) 02140 etree[j].set_pools(&p_pool, &w_pool); 02141 02142 value_type sentinel = cmp.min_value(); 02143 buffer1[BufferSize1] = sentinel; // sentinel 02144 insertHeap.push(sentinel); // always keep the sentinel 02145 minBuffer1 = buffer1 + BufferSize1; // empty 02146 for (unsigned_type i = 0; i < Levels; i++) 02147 { 02148 buffer2[i][N] = sentinel; // sentinel 02149 minBuffer2[i] = &(buffer2[i][N]); // empty 02150 } 02151 } 02152 02153 template <class Config_> 02154 priority_queue<Config_>::~priority_queue() 02155 { 02156 STXXL_VERBOSE2("priority_queue::~priority_queue()"); 02157 if (deallocate_pools) 02158 { 02159 delete &p_pool; 02160 delete &w_pool; 02161 } 02162 02163 delete[] etree; 02164 } 02165 02166 //--------------------- Buffer refilling ------------------------------- 02167 02168 // refill buffer2[j] and return number of elements found 02169 template <class Config_> 02170 unsigned_type priority_queue<Config_>::refillBuffer2(unsigned_type j) 02171 { 02172 STXXL_VERBOSE2("priority_queue::refillBuffer2(" << j << ")"); 02173 02174 value_type * oldTarget; 02175 unsigned_type deleteSize; 02176 size_type treeSize = (j < IntLevels) ? itree[j].size() : etree[j - IntLevels].size(); //elements left in segments 02177 unsigned_type bufferSize = buffer2[j] + N - minBuffer2[j]; //elements left in target buffer 02178 if (treeSize + bufferSize >= size_type(N)) 02179 { // buffer will be filled completely 02180 oldTarget = buffer2[j]; 02181 deleteSize = N - bufferSize; 02182 } 02183 else 02184 { 02185 oldTarget = buffer2[j] + N - treeSize - bufferSize; 02186 deleteSize = treeSize; 02187 } 02188 02189 if (deleteSize > 0) 02190 { 02191 // shift rest to beginning 02192 // possible hack: 02193 // - use memcpy if no overlap 02194 memmove(oldTarget, minBuffer2[j], bufferSize * sizeof(value_type)); 02195 minBuffer2[j] = oldTarget; 02196 02197 // fill remaining space from tree 02198 if (j < IntLevels) 02199 itree[j].multi_merge(oldTarget + bufferSize, deleteSize); 02200 02201 else 02202 { 02203 //external 02204 etree[j - IntLevels].multi_merge(oldTarget + bufferSize, 02205 oldTarget + bufferSize + deleteSize); 02206 } 02207 } 02208 02209 02210 //STXXL_MSG(deleteSize + bufferSize); 02211 //std::copy(oldTarget,oldTarget + deleteSize + bufferSize,std::ostream_iterator<value_type>(std::cout, "\n")); 02212 02213 return deleteSize + bufferSize; 02214 } 02215 02216 02217 // move elements from the 2nd level buffers 02218 // to the buffer 02219 template <class Config_> 02220 void priority_queue<Config_>::refillBuffer1() 02221 { 02222 STXXL_VERBOSE2("priority_queue::refillBuffer1()"); 02223 02224 size_type totalSize = 0; 02225 unsigned_type sz; 02226 //activeLevels is <= 4 02227 for (int_type i = activeLevels - 1; i >= 0; i--) 02228 { 02229 if ((buffer2[i] + N) - minBuffer2[i] < BufferSize1) 02230 { 02231 sz = refillBuffer2(i); 02232 // max active level dry now? 02233 if (sz == 0 && unsigned_type(i) == activeLevels - 1) 02234 --activeLevels; 02235 02236 else 02237 totalSize += sz; 02238 } 02239 else 02240 { 02241 totalSize += BufferSize1; // actually only a sufficient lower bound 02242 } 02243 } 02244 02245 if (totalSize >= BufferSize1) // buffer can be filled completely 02246 { 02247 minBuffer1 = buffer1; 02248 sz = BufferSize1; // amount to be copied 02249 size_ -= size_type(BufferSize1); // amount left in buffer2 02250 } 02251 else 02252 { 02253 minBuffer1 = buffer1 + BufferSize1 - totalSize; 02254 sz = totalSize; 02255 assert(size_ == size_type(sz)); // trees and buffer2 get empty 02256 size_ = 0; 02257 } 02258 02259 // now call simplified refill routines 02260 // which can make the assumption that 02261 // they find all they are asked to find in the buffers 02262 minBuffer1 = buffer1 + BufferSize1 - sz; 02263 STXXL_VERBOSE2("Active levels = " << activeLevels); 02264 switch (activeLevels) 02265 { 02266 case 0: break; 02267 case 1: 02268 std::copy(minBuffer2[0], minBuffer2[0] + sz, minBuffer1); 02269 minBuffer2[0] += sz; 02270 break; 02271 case 2: 02272 priority_queue_local::merge_iterator( 02273 minBuffer2[0], 02274 minBuffer2[1], minBuffer1, sz, cmp); 02275 break; 02276 case 3: 02277 priority_queue_local::merge3_iterator( 02278 minBuffer2[0], 02279 minBuffer2[1], 02280 minBuffer2[2], minBuffer1, sz, cmp); 02281 break; 02282 case 4: 02283 priority_queue_local::merge4_iterator( 02284 minBuffer2[0], 02285 minBuffer2[1], 02286 minBuffer2[2], 02287 minBuffer2[3], minBuffer1, sz, cmp); //side effect free 02288 break; 02289 default: 02290 STXXL_THROW(std::runtime_error, "priority_queue<...>::refillBuffer1()", 02291 "Overflow! The number of buffers on 2nd level in stxxl::priority_queue is currently limited to 4"); 02292 } 02293 02294 //std::copy(minBuffer1,minBuffer1 + sz,std::ostream_iterator<value_type>(std::cout, "\n")); 02295 } 02296 02297 //-------------------------------------------------------------------- 02298 02299 // check if space is available on level k and 02300 // empty this level if necessary leading to a recursive call. 02301 // return the level where space was finally available 02302 template <class Config_> 02303 unsigned_type priority_queue<Config_>::makeSpaceAvailable(unsigned_type level) 02304 { 02305 STXXL_VERBOSE2("priority_queue::makeSpaceAvailable(" << level << ")"); 02306 unsigned_type finalLevel; 02307 assert(level < Levels); 02308 assert(level <= activeLevels); 02309 02310 if (level == activeLevels) 02311 activeLevels++; 02312 02313 02314 const bool spaceIsAvailable_ = 02315 (level < IntLevels) ? itree[level].spaceIsAvailable() 02316 : ((level == Levels - 1) ? true : (etree[level - IntLevels].spaceIsAvailable())); 02317 02318 if (spaceIsAvailable_) 02319 { 02320 finalLevel = level; 02321 } 02322 else 02323 { 02324 finalLevel = makeSpaceAvailable(level + 1); 02325 02326 if (level < IntLevels - 1) // from internal to internal tree 02327 { 02328 unsigned_type segmentSize = itree[level].size(); 02329 value_type * newSegment = new value_type[segmentSize + 1]; 02330 itree[level].multi_merge(newSegment, segmentSize); // empty this level 02331 02332 newSegment[segmentSize] = buffer1[BufferSize1]; // sentinel 02333 // for queues where size << #inserts 02334 // it might make sense to stay in this level if 02335 // segmentSize < alpha * KNN * k^level for some alpha < 1 02336 itree[level + 1].insert_segment(newSegment, segmentSize); 02337 } 02338 else 02339 { 02340 if (level == IntLevels - 1) // from internal to external tree 02341 { 02342 const unsigned_type segmentSize = itree[IntLevels - 1].size(); 02343 STXXL_VERBOSE1("Inserting segment into first level external: " << level << " " << segmentSize); 02344 etree[0].insert_segment(itree[IntLevels - 1], segmentSize); 02345 } 02346 else // from external to external tree 02347 { 02348 const size_type segmentSize = etree[level - IntLevels].size(); 02349 STXXL_VERBOSE1("Inserting segment into second level external: " << level << " " << segmentSize); 02350 etree[level - IntLevels + 1].insert_segment(etree[level - IntLevels], segmentSize); 02351 } 02352 } 02353 } 02354 return finalLevel; 02355 } 02356 02357 02358 // empty the insert heap into the main data structure 02359 template <class Config_> 02360 void priority_queue<Config_>::emptyInsertHeap() 02361 { 02362 STXXL_VERBOSE2("priority_queue::emptyInsertHeap()"); 02363 assert(insertHeap.size() == (N + 1)); 02364 02365 const value_type sup = getSupremum(); 02366 02367 // build new segment 02368 value_type * newSegment = new value_type[N + 1]; 02369 value_type * newPos = newSegment; 02370 02371 // put the new data there for now 02372 //insertHeap.sortTo(newSegment); 02373 value_type * SortTo = newSegment; 02374 02375 insertHeap.sort_to(SortTo); 02376 02377 SortTo = newSegment + N; 02378 insertHeap.clear(); 02379 insertHeap.push(*SortTo); 02380 02381 assert(insertHeap.size() == 1); 02382 02383 newSegment[N] = sup; // sentinel 02384 02385 // copy the buffer1 and buffer2[0] to temporary storage 02386 // (the temporary can be eliminated using some dirty tricks) 02387 const unsigned_type tempSize = N + BufferSize1; 02388 value_type temp[tempSize + 1]; 02389 unsigned_type sz1 = getSize1(); 02390 unsigned_type sz2 = getSize2(0); 02391 value_type * pos = temp + tempSize - sz1 - sz2; 02392 std::copy(minBuffer1, minBuffer1 + sz1, pos); 02393 std::copy(minBuffer2[0], minBuffer2[0] + sz2, pos + sz1); 02394 temp[tempSize] = sup; // sentinel 02395 02396 // refill buffer1 02397 // (using more complicated code it could be made somewhat fuller 02398 // in certain circumstances) 02399 priority_queue_local::merge_iterator(pos, newPos, minBuffer1, sz1, cmp); 02400 02401 // refill buffer2[0] 02402 // (as above we might want to take the opportunity 02403 // to make buffer2[0] fuller) 02404 priority_queue_local::merge_iterator(pos, newPos, minBuffer2[0], sz2, cmp); 02405 02406 // merge the rest to the new segment 02407 // note that merge exactly trips into the footsteps 02408 // of itself 02409 priority_queue_local::merge_iterator(pos, newPos, newSegment, N, cmp); 02410 02411 // and insert it 02412 unsigned_type freeLevel = makeSpaceAvailable(0); 02413 assert(freeLevel == 0 || itree[0].size() == 0); 02414 itree[0].insert_segment(newSegment, N); 02415 02416 // get rid of invalid level 2 buffers 02417 // by inserting them into tree 0 (which is almost empty in this case) 02418 if (freeLevel > 0) 02419 { 02420 for (int_type i = freeLevel; i >= 0; i--) 02421 { 02422 // reverse order not needed 02423 // but would allow immediate refill 02424 newSegment = new value_type[getSize2(i) + 1]; // with sentinel 02425 std::copy(minBuffer2[i], minBuffer2[i] + getSize2(i) + 1, newSegment); 02426 itree[0].insert_segment(newSegment, getSize2(i)); 02427 minBuffer2[i] = buffer2[i] + N; // empty 02428 } 02429 } 02430 02431 // update size 02432 size_ += size_type(N); 02433 02434 // special case if the tree was empty before 02435 if (minBuffer1 == buffer1 + BufferSize1) 02436 refillBuffer1(); 02437 } 02438 02439 namespace priority_queue_local 02440 { 02441 struct Parameters_for_priority_queue_not_found_Increase_IntM 02442 { 02443 enum { fits = false }; 02444 typedef Parameters_for_priority_queue_not_found_Increase_IntM result; 02445 }; 02446 02447 struct dummy 02448 { 02449 enum { fits = false }; 02450 typedef dummy result; 02451 }; 02452 02453 template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_, bool stop = false> 02454 struct find_B_m 02455 { 02456 typedef find_B_m<E_, IntM_, MaxS_, B_, m_, stop> Self; 02457 enum { 02458 k = IntM_ / B_, // number of blocks that fit into M 02459 E = E_, // element size 02460 IntM = IntM_, 02461 B = B_, // block size 02462 m = m_, // number of blocks fitting into buffers 02463 c = k - m_, 02464 // memory occ. by block must be at least 10 times larger than size of ext sequence 02465 // && satisfy memory req && if we have two ext mergers their degree must be at least 64=m/2 02466 fits = c > 10 && ((k - m) * (m) * (m * B / (E * 4 * 1024))) >= MaxS_ && (MaxS_ < ((k - m) * m / (2 * E)) * 1024 || m >= 128), 02467 step = 1 02468 }; 02469 02470 typedef typename find_B_m<E, IntM, MaxS_, B, m + step, fits || (m >= k - step)>::result candidate1; 02471 typedef typename find_B_m<E, IntM, MaxS_, B / 2, 1, fits || candidate1::fits>::result candidate2; 02472 typedef typename IF<fits, Self, typename IF<candidate1::fits, candidate1, candidate2>::result>::result result; 02473 }; 02474 02475 // specialization for the case when no valid parameters are found 02476 template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, bool stop> 02477 struct find_B_m<E_, IntM_, MaxS_, 2048, 1, stop> 02478 { 02479 enum { fits = false }; 02480 typedef Parameters_for_priority_queue_not_found_Increase_IntM result; 02481 }; 02482 02483 // to speedup search 02484 template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_> 02485 struct find_B_m<E_, IntM_, MaxS_, B_, m_, true> 02486 { 02487 enum { fits = false }; 02488 typedef dummy result; 02489 }; 02490 02491 // E_ size of element in bytes 02492 template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_> 02493 struct find_settings 02494 { 02495 // start from block size (8*1024*1024) bytes 02496 typedef typename find_B_m<E_, IntM_, MaxS_, (8 * 1024 * 1024), 1>::result result; 02497 }; 02498 02499 struct Parameters_not_found_Try_to_change_the_Tune_parameter 02500 { 02501 typedef Parameters_not_found_Try_to_change_the_Tune_parameter result; 02502 }; 02503 02504 02505 template <unsigned_type AI_, unsigned_type X_, unsigned_type CriticalSize_> 02506 struct compute_N 02507 { 02508 typedef compute_N<AI_, X_, CriticalSize_> Self; 02509 enum 02510 { 02511 X = X_, 02512 AI = AI_, 02513 N = X / (AI * AI) // two stage internal 02514 }; 02515 typedef typename IF<(N >= CriticalSize_), Self, typename compute_N<AI / 2, X, CriticalSize_>::result>::result result; 02516 }; 02517 02518 template <unsigned_type X_, unsigned_type CriticalSize_> 02519 struct compute_N<1, X_, CriticalSize_> 02520 { 02521 typedef Parameters_not_found_Try_to_change_the_Tune_parameter result; 02522 }; 02523 } 02524 02526 02529 02531 02595 template <class Tp_, class Cmp_, unsigned_type IntM_, unsigned MaxS_, unsigned Tune_ = 6> 02596 class PRIORITY_QUEUE_GENERATOR 02597 { 02598 public: 02599 // actual calculation of B, m, k and E 02600 typedef typename priority_queue_local::find_settings<sizeof(Tp_), IntM_, MaxS_>::result settings; 02601 enum { 02602 B = settings::B, 02603 m = settings::m, 02604 X = B * (settings::k - m) / settings::E, // interpretation of result 02605 Buffer1Size = 32 // fixed 02606 }; 02607 // derivation of N, AI, AE 02608 typedef typename priority_queue_local::compute_N<(1 << Tune_), X, 4 * Buffer1Size>::result ComputeN; 02609 enum 02610 { 02611 N = ComputeN::N, 02612 AI = ComputeN::AI, 02613 AE = (m / 2 < 2) ? 2 : (m / 2) // at least 2 02614 }; 02615 enum { 02616 // Estimation of maximum internal memory consumption (in bytes) 02617 EConsumption = X * settings::E + settings::B * AE + ((MaxS_ / X) / AE) * settings::B * 1024 02618 }; 02619 /* 02620 unsigned BufferSize1_ = 32, // equalize procedure call overheads etc. 02621 unsigned N_ = 512, // bandwidth 02622 unsigned IntKMAX_ = 64, // maximal arity for internal mergers 02623 unsigned IntLevels_ = 4, 02624 unsigned BlockSize_ = (2*1024*1024), 02625 unsigned ExtKMAX_ = 64, // maximal arity for external mergers 02626 unsigned ExtLevels_ = 2, 02627 */ 02628 typedef priority_queue<priority_queue_config<Tp_, Cmp_, Buffer1Size, N, AI, 2, B, AE, 2> > result; 02629 }; 02630 02632 02633 __STXXL_END_NAMESPACE 02634 02635 02636 namespace std 02637 { 02638 template <class Config_> 02639 void swap(stxxl::priority_queue<Config_> & a, 02640 stxxl::priority_queue<Config_> & b) 02641 { 02642 a.swap(b); 02643 } 02644 } 02645 02646 #endif // !STXXL_PRIORITY_QUEUE_HEADER 02647 // vim: et:ts=4:sw=4