multiway_merge.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/multiway_merge.h
00032 *  @brief Implementation of sequential and parallel multiway merge.
00033 *
00034 *  Explanations on the high-speed merging routines in the appendix of
00035 *
00036 *  P. Sanders.
00037 *  Fast priority queues for cached memory.
00038 *  ACM Journal of Experimental Algorithmics, 5, 2000.
00039 *
00040 *  This file is a GNU parallel extension to the Standard C++ Library.
00041 */
00042 
00043 // Written by Johannes Singler.
00044 
00045 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
00046 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
00047 
00048 #include <vector>
00049 
00050 #include <bits/stl_algo.h>
00051 #include <parallel/features.h>
00052 #include <parallel/parallel.h>
00053 #include <parallel/merge.h>
00054 #include <parallel/losertree.h>
00055 #if _GLIBCXX_ASSERTIONS
00056 #include <parallel/checkers.h>
00057 #endif
00058 
00059 /** @brief Length of a sequence described by a pair of iterators. */
00060 #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
00061 
00062 // XXX need iterator typedefs
00063 namespace __gnu_parallel
00064 {
00065 template<typename RandomAccessIterator, typename Comparator>
00066   class guarded_iterator;
00067 
00068 template<typename RandomAccessIterator, typename Comparator>
00069   inline bool
00070   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00071             guarded_iterator<RandomAccessIterator, Comparator>& bi2);
00072 
00073 template<typename RandomAccessIterator, typename Comparator>
00074   inline bool
00075   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00076          guarded_iterator<RandomAccessIterator, Comparator>& bi2);
00077 
00078   /** @brief Iterator wrapper supporting an implicit supremum at the end
00079       of the sequence, dominating all comparisons.
00080       *  Deriving from RandomAccessIterator is not possible since
00081       *  RandomAccessIterator need not be a class.
00082       */
00083 template<typename RandomAccessIterator, typename Comparator>
00084   class guarded_iterator
00085   {
00086   private:
00087     /** @brief Current iterator position. */
00088     RandomAccessIterator current;
00089 
00090     /** @brief End iterator of the sequence. */
00091     RandomAccessIterator end;
00092 
00093     /** @brief Comparator. */
00094     Comparator& comp;
00095 
00096   public:
00097     /** @brief Constructor. Sets iterator to beginning of sequence.
00098     *  @param begin Begin iterator of sequence.
00099     *  @param end End iterator of sequence.
00100     *  @param comp Comparator provided for associated overloaded
00101     *  compare operators. */
00102     guarded_iterator(RandomAccessIterator begin,
00103              RandomAccessIterator end, Comparator& comp)
00104     : current(begin), end(end), comp(comp)
00105     { }
00106 
00107     /** @brief Pre-increment operator.
00108     *  @return This. */
00109     guarded_iterator<RandomAccessIterator, Comparator>&
00110     operator++()
00111     {
00112       ++current;
00113       return *this;
00114     }
00115 
00116     /** @brief Dereference operator.
00117     *  @return Referenced element. */
00118     typename std::iterator_traits<RandomAccessIterator>::value_type
00119     operator*()
00120     { return *current; }
00121 
00122     /** @brief Convert to wrapped iterator.
00123     *  @return Wrapped iterator. */
00124     operator RandomAccessIterator()
00125     { return current; }
00126 
00127     friend bool
00128     operator< <RandomAccessIterator, Comparator>(
00129       guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00130       guarded_iterator<RandomAccessIterator, Comparator>& bi2);
00131 
00132     friend bool
00133     operator<= <RandomAccessIterator, Comparator>(
00134       guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00135       guarded_iterator<RandomAccessIterator, Comparator>& bi2);
00136   };
00137 
00138 /** @brief Compare two elements referenced by guarded iterators.
00139  *  @param bi1 First iterator.
00140  *  @param bi2 Second iterator.
00141  *  @return @c True if less. */
00142 template<typename RandomAccessIterator, typename Comparator>
00143   inline bool
00144   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00145             guarded_iterator<RandomAccessIterator, Comparator>& bi2)
00146   {
00147     if (bi1.current == bi1.end) //bi1 is sup
00148       return bi2.current == bi2.end;    //bi2 is not sup
00149     if (bi2.current == bi2.end) //bi2 is sup
00150       return true;
00151     return (bi1.comp)(*bi1, *bi2);  //normal compare
00152   }
00153 
00154 /** @brief Compare two elements referenced by guarded iterators.
00155  *  @param bi1 First iterator.
00156  *  @param bi2 Second iterator.
00157  *  @return @c True if less equal. */
00158 template<typename RandomAccessIterator, typename Comparator>
00159   inline bool
00160   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
00161          guarded_iterator<RandomAccessIterator, Comparator>& bi2)
00162   {
00163     if (bi2.current == bi2.end) //bi1 is sup
00164       return bi1.current != bi1.end;    //bi2 is not sup
00165     if (bi1.current == bi1.end) //bi2 is sup
00166       return false;
00167     return !(bi1.comp)(*bi2, *bi1); //normal compare
00168   }
00169 
00170 template<typename RandomAccessIterator, typename Comparator>
00171   class unguarded_iterator;
00172 
00173 template<typename RandomAccessIterator, typename Comparator>
00174   inline bool
00175   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00176             unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
00177 
00178 template<typename RandomAccessIterator, typename Comparator>
00179   inline bool
00180   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00181              unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
00182 
00183 template<typename RandomAccessIterator, typename Comparator>
00184   class unguarded_iterator
00185   {
00186   private:
00187     /** @brief Current iterator position. */
00188     RandomAccessIterator& current;
00189     /** @brief Comparator. */
00190     mutable Comparator& comp;
00191 
00192   public:
00193     /** @brief Constructor. Sets iterator to beginning of sequence.
00194     *  @param begin Begin iterator of sequence.
00195     *  @param end Unused, only for compatibility.
00196     *  @param comp Unused, only for compatibility. */
00197     unguarded_iterator(RandomAccessIterator begin,
00198                RandomAccessIterator end, Comparator& comp)
00199     : current(begin), comp(comp)
00200     { }
00201 
00202     /** @brief Pre-increment operator.
00203     *  @return This. */
00204     unguarded_iterator<RandomAccessIterator, Comparator>&
00205     operator++()
00206     {
00207       ++current;
00208       return *this;
00209     }
00210 
00211     /** @brief Dereference operator.
00212     *  @return Referenced element. */
00213     typename std::iterator_traits<RandomAccessIterator>::value_type
00214     operator*()
00215     { return *current; }
00216 
00217     /** @brief Convert to wrapped iterator.
00218     *  @return Wrapped iterator. */
00219     operator RandomAccessIterator()
00220     { return current; }
00221 
00222     friend bool
00223     operator< <RandomAccessIterator, Comparator>(
00224       unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00225       unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
00226 
00227     friend bool
00228     operator<= <RandomAccessIterator, Comparator>(
00229       unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00230       unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
00231   };
00232 
00233 /** @brief Compare two elements referenced by unguarded iterators.
00234  *  @param bi1 First iterator.
00235  *  @param bi2 Second iterator.
00236  *  @return @c True if less. */
00237 template<typename RandomAccessIterator, typename Comparator>
00238   inline bool
00239   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00240             unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
00241   {
00242     // Normal compare.
00243     return (bi1.comp)(*bi1, *bi2);
00244   }
00245 
00246 /** @brief Compare two elements referenced by unguarded iterators.
00247  *  @param bi1 First iterator.
00248  *  @param bi2 Second iterator.
00249  *  @return @c True if less equal. */
00250 template<typename RandomAccessIterator, typename Comparator>
00251   inline bool
00252   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
00253             unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
00254   {
00255     // Normal compare.
00256     return !(bi1.comp)(*bi2, *bi1);
00257   }
00258 
00259 /** Prepare a set of sequences to be merged without a (end) guard
00260  *  @param seqs_begin
00261  *  @param seqs_end
00262  *  @param comp
00263  *  @param min_sequence
00264  *  @param stable
00265  *  @pre (seqs_end - seqs_begin > 0) */
00266 template<typename RandomAccessIteratorIterator, typename Comparator>
00267   typename std::iterator_traits<
00268       typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
00269       ::first_type>::difference_type
00270   prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
00271                     RandomAccessIteratorIterator seqs_end, Comparator comp,
00272                     int& min_sequence, bool stable)
00273   {
00274     _GLIBCXX_CALL(seqs_end - seqs_begin)
00275 
00276     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00277         ::value_type::first_type
00278       RandomAccessIterator1;
00279     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00280       value_type;
00281     typedef typename std::iterator_traits<RandomAccessIterator1>
00282       ::difference_type
00283       difference_type;
00284 
00285     if ((*seqs_begin).first == (*seqs_begin).second)
00286       {
00287         // Empty sequence found, it's the first one.
00288         min_sequence = 0;
00289         return -1;
00290       }
00291 
00292     // Last element in sequence.
00293     value_type min = *((*seqs_begin).second - 1);
00294     min_sequence = 0;
00295     for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
00296       {
00297         if ((*s).first == (*s).second)
00298           {
00299             // Empty sequence found.
00300             min_sequence = static_cast<int>(s - seqs_begin);
00301             return -1;
00302           }
00303 
00304         // Last element in sequence.
00305         const value_type& v = *((*s).second - 1);
00306         if (comp(v, min))   //strictly smaller
00307           {
00308             min = v;
00309             min_sequence = static_cast<int>(s - seqs_begin);
00310           }
00311       }
00312 
00313     difference_type overhang_size = 0;
00314 
00315     int s = 0;
00316     for (s = 0; s <= min_sequence; ++s)
00317       {
00318         RandomAccessIterator1 split;
00319         if (stable)
00320           split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
00321                                   min, comp);
00322         else
00323           split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
00324                                   min, comp);
00325 
00326         overhang_size += seqs_begin[s].second - split;
00327       }
00328 
00329     for (; s < (seqs_end - seqs_begin); ++s)
00330       {
00331         RandomAccessIterator1 split = std::lower_bound(
00332             seqs_begin[s].first, seqs_begin[s].second, min, comp);
00333         overhang_size += seqs_begin[s].second - split;
00334       }
00335 
00336     // So many elements will be left over afterwards.
00337     return overhang_size;
00338   }
00339 
00340 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
00341  *  @param seqs_begin
00342  *  @param seqs_end
00343  *  @param comp */
00344 template<typename RandomAccessIteratorIterator, typename Comparator>
00345   typename std::iterator_traits<typename std::iterator_traits<
00346       RandomAccessIteratorIterator>::value_type::first_type>::difference_type
00347   prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
00348                             RandomAccessIteratorIterator seqs_end,
00349                             Comparator comp)
00350   {
00351     _GLIBCXX_CALL(seqs_end - seqs_begin)
00352 
00353     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00354       ::value_type::first_type
00355       RandomAccessIterator1;
00356     typedef typename std::iterator_traits<RandomAccessIterator1>
00357       ::value_type
00358       value_type;
00359     typedef typename std::iterator_traits<RandomAccessIterator1>
00360       ::difference_type
00361       difference_type;
00362 
00363     // Last element in sequence.
00364     value_type* max = NULL;
00365     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
00366       {
00367         if ((*s).first == (*s).second)
00368           continue;
00369 
00370         // Last element in sequence.
00371         value_type& v = *((*s).second - 1);
00372 
00373         // Strictly greater.
00374         if (!max || comp(*max, v))
00375           max = &v;
00376       }
00377 
00378     difference_type overhang_size = 0;
00379     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
00380       {
00381         RandomAccessIterator1 split =
00382             std::lower_bound((*s).first, (*s).second, *max, comp);
00383         overhang_size += (*s).second - split;
00384 
00385         // Set sentinel.
00386         *((*s).second) = *max;
00387       }
00388 
00389     // So many elements will be left over afterwards.
00390     return overhang_size;
00391   }
00392 
00393 /** @brief Highly efficient 3-way merging procedure.
00394  *  @param seqs_begin Begin iterator of iterator pair input sequence.
00395  *  @param seqs_end End iterator of iterator pair input sequence.
00396  *  @param target Begin iterator out output sequence.
00397  *  @param comp Comparator.
00398  *  @param length Maximum length to merge.
00399  *  @param stable Unused, stable anyway.
00400  *  @return End iterator of output sequence. */
00401 template<template<typename RAI, typename C> class iterator,
00402      typename RandomAccessIteratorIterator,
00403      typename RandomAccessIterator3,
00404      typename _DifferenceTp,
00405      typename Comparator>
00406   RandomAccessIterator3
00407   multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
00408                RandomAccessIteratorIterator seqs_end,
00409                RandomAccessIterator3 target,
00410                Comparator comp, _DifferenceTp length,
00411                bool stable)
00412   {
00413     _GLIBCXX_CALL(length);
00414 
00415     typedef _DifferenceTp difference_type;
00416 
00417     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00418       ::value_type::first_type
00419       RandomAccessIterator1;
00420     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00421       value_type;
00422 
00423     if (length == 0)
00424       return target;
00425 
00426     iterator<RandomAccessIterator1, Comparator>
00427       seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
00428       seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
00429       seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
00430 
00431     if (seq0 <= seq1)
00432       {
00433         if (seq1 <= seq2)
00434           goto s012;
00435         else
00436           if (seq2 <  seq0)
00437             goto s201;
00438           else
00439             goto s021;
00440       }
00441     else
00442       {
00443         if (seq1 <= seq2)
00444           {
00445             if (seq0 <= seq2)
00446               goto s102;
00447             else
00448               goto s120;
00449           }
00450         else
00451           goto s210;
00452       }
00453 
00454 #define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
00455     s ## a ## b ## c :                                  \
00456       *target = *seq ## a;                              \
00457     ++target;                                           \
00458     --length;                                           \
00459     ++seq ## a;                                         \
00460     if (length == 0) goto finish;                       \
00461     if (seq ## a c0 seq ## b) goto s ## a ## b ## c;    \
00462     if (seq ## a c1 seq ## c) goto s ## b ## a ## c;    \
00463     goto s ## b ## c ## a;
00464 
00465     _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
00466     _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
00467     _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
00468     _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
00469     _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
00470     _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
00471 
00472 #undef _GLIBCXX_PARALLEL_MERGE_3_CASE
00473 
00474   finish:
00475     ;
00476 
00477     seqs_begin[0].first = seq0;
00478     seqs_begin[1].first = seq1;
00479     seqs_begin[2].first = seq2;
00480 
00481     return target;
00482   }
00483 
00484 template<typename RandomAccessIteratorIterator,
00485      typename RandomAccessIterator3,
00486      typename _DifferenceTp,
00487      typename Comparator>
00488   RandomAccessIterator3
00489   multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
00490                             RandomAccessIteratorIterator seqs_end,
00491                             RandomAccessIterator3 target,
00492                             Comparator comp,
00493                             _DifferenceTp length, bool stable)
00494   {
00495     _GLIBCXX_CALL(length);
00496 
00497     typedef _DifferenceTp difference_type;
00498     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00499       ::value_type::first_type
00500       RandomAccessIterator1;
00501     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00502       value_type;
00503 
00504     int min_seq;
00505     RandomAccessIterator3 target_end;
00506 
00507     // Stable anyway.
00508     difference_type overhang =
00509         prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
00510 
00511     difference_type total_length = 0;
00512     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
00513       total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
00514 
00515     if (overhang != -1)
00516       {
00517         difference_type unguarded_length =
00518             std::min(length, total_length - overhang);
00519         target_end = multiway_merge_3_variant<unguarded_iterator>
00520           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
00521         overhang = length - unguarded_length;
00522       }
00523     else
00524       {
00525         // Empty sequence found.
00526         overhang = length;
00527         target_end = target;
00528       }
00529 
00530 #if _GLIBCXX_ASSERTIONS
00531     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
00532     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
00533 #endif
00534 
00535     switch (min_seq)
00536       {
00537       case 0:
00538         // Iterators will be advanced accordingly.
00539         target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
00540                                   seqs_begin[2].first, seqs_begin[2].second,
00541                                   target_end, overhang, comp);
00542         break;
00543       case 1:
00544         target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
00545                                   seqs_begin[2].first, seqs_begin[2].second,
00546                                   target_end, overhang, comp);
00547         break;
00548       case 2:
00549         target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
00550                                   seqs_begin[1].first, seqs_begin[1].second,
00551                                   target_end, overhang, comp);
00552         break;
00553       default:
00554         _GLIBCXX_PARALLEL_ASSERT(false);
00555       }
00556 
00557 #if _GLIBCXX_ASSERTIONS
00558     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
00559     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
00560 #endif
00561 
00562     return target_end;
00563   }
00564 
00565 /** @brief Highly efficient 4-way merging procedure.
00566  *  @param seqs_begin Begin iterator of iterator pair input sequence.
00567  *  @param seqs_end End iterator of iterator pair input sequence.
00568  *  @param target Begin iterator out output sequence.
00569  *  @param comp Comparator.
00570  *  @param length Maximum length to merge.
00571  *  @param stable Unused, stable anyway.
00572  *  @return End iterator of output sequence. */
00573 template<template<typename RAI, typename C> class iterator,
00574      typename RandomAccessIteratorIterator,
00575      typename RandomAccessIterator3,
00576      typename _DifferenceTp,
00577      typename Comparator>
00578   RandomAccessIterator3
00579   multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
00580                            RandomAccessIteratorIterator seqs_end,
00581                            RandomAccessIterator3 target,
00582                            Comparator comp, _DifferenceTp length, bool stable)
00583   {
00584     _GLIBCXX_CALL(length);
00585     typedef _DifferenceTp difference_type;
00586 
00587     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00588       ::value_type::first_type
00589       RandomAccessIterator1;
00590     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00591       value_type;
00592 
00593     iterator<RandomAccessIterator1, Comparator>
00594       seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
00595       seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
00596       seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
00597       seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
00598 
00599 #define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) {                   \
00600       if (seq ## d < seq ## a) goto s ## d ## a ## b ## c;  \
00601       if (seq ## d < seq ## b) goto s ## a ## d ## b ## c;  \
00602       if (seq ## d < seq ## c) goto s ## a ## b ## d ## c;  \
00603       goto s ## a ## b ## c ## d;  }
00604 
00605     if (seq0 <= seq1)
00606       {
00607         if (seq1 <= seq2)
00608           _GLIBCXX_PARALLEL_DECISION(0,1,2,3)
00609           else
00610             if (seq2 < seq0)
00611               _GLIBCXX_PARALLEL_DECISION(2,0,1,3)
00612               else
00613                 _GLIBCXX_PARALLEL_DECISION(0,2,1,3)
00614                   }
00615     else
00616       {
00617         if (seq1 <= seq2)
00618           {
00619             if (seq0 <= seq2)
00620               _GLIBCXX_PARALLEL_DECISION(1,0,2,3)
00621               else
00622                 _GLIBCXX_PARALLEL_DECISION(1,2,0,3)
00623                   }
00624         else
00625           _GLIBCXX_PARALLEL_DECISION(2,1,0,3)
00626             }
00627 
00628 #define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2)        \
00629     s ## a ## b ## c ## d:                                      \
00630       if (length == 0) goto finish;                             \
00631     *target = *seq ## a;                                        \
00632     ++target;                                                   \
00633     --length;                                                   \
00634     ++seq ## a;                                                 \
00635     if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;       \
00636     if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;       \
00637     if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;       \
00638     goto s ## b ## c ## d ## a;
00639 
00640     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
00641     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
00642     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
00643     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
00644     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
00645     _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
00646     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
00647     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
00648     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
00649     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
00650     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
00651     _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
00652     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
00653     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
00654     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
00655     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
00656     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
00657     _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
00658     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
00659     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
00660     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
00661     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
00662     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
00663     _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
00664 
00665 #undef _GLIBCXX_PARALLEL_MERGE_4_CASE
00666 #undef _GLIBCXX_PARALLEL_DECISION
00667 
00668   finish:
00669     ;
00670 
00671     seqs_begin[0].first = seq0;
00672     seqs_begin[1].first = seq1;
00673     seqs_begin[2].first = seq2;
00674     seqs_begin[3].first = seq3;
00675 
00676     return target;
00677   }
00678 
00679 template<typename RandomAccessIteratorIterator,
00680      typename RandomAccessIterator3,
00681      typename _DifferenceTp,
00682      typename Comparator>
00683   RandomAccessIterator3
00684   multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
00685                             RandomAccessIteratorIterator seqs_end,
00686                             RandomAccessIterator3 target,
00687                             Comparator comp,
00688                             _DifferenceTp length, bool stable)
00689   {
00690     _GLIBCXX_CALL(length);
00691     typedef _DifferenceTp difference_type;
00692 
00693     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00694       ::value_type::first_type
00695       RandomAccessIterator1;
00696     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00697       value_type;
00698 
00699     int min_seq;
00700     RandomAccessIterator3 target_end;
00701 
00702     // Stable anyway.
00703     difference_type overhang =
00704         prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
00705 
00706     difference_type total_length = 0;
00707     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
00708       total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
00709 
00710     if (overhang != -1)
00711       {
00712         difference_type unguarded_length =
00713             std::min(length, total_length - overhang);
00714         target_end = multiway_merge_4_variant<unguarded_iterator>
00715           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
00716         overhang = length - unguarded_length;
00717       }
00718     else
00719       {
00720         // Empty sequence found.
00721         overhang = length;
00722         target_end = target;
00723       }
00724 
00725 #if _GLIBCXX_ASSERTIONS
00726     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
00727     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
00728 #endif
00729 
00730     std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
00731         one_missing(seqs_begin, seqs_end);
00732     one_missing.erase(one_missing.begin() + min_seq);   //remove
00733 
00734     target_end = multiway_merge_3_variant<guarded_iterator>(
00735         one_missing.begin(), one_missing.end(),
00736         target_end, comp, overhang, stable);
00737 
00738     // Insert back again.
00739     one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
00740     // Write back modified iterators.
00741     copy(one_missing.begin(), one_missing.end(), seqs_begin);
00742 
00743 #if _GLIBCXX_ASSERTIONS
00744     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
00745     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
00746 #endif
00747 
00748     return target_end;
00749   }
00750 
00751 /** @brief Basic multi-way merging procedure.
00752  *
00753  *  The head elements are kept in a sorted array, new heads are
00754  *  inserted linearly.
00755  *  @param seqs_begin Begin iterator of iterator pair input sequence.
00756  *  @param seqs_end End iterator of iterator pair input sequence.
00757  *  @param target Begin iterator out output sequence.
00758  *  @param comp Comparator.
00759  *  @param length Maximum length to merge.
00760  *  @param stable Stable merging incurs a performance penalty.
00761  *  @return End iterator of output sequence.
00762  */
00763 template<typename RandomAccessIteratorIterator,
00764      typename RandomAccessIterator3,
00765      typename _DifferenceTp,
00766      typename Comparator>
00767   RandomAccessIterator3
00768   multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
00769                         RandomAccessIteratorIterator seqs_end,
00770                         RandomAccessIterator3 target,
00771                         Comparator comp, _DifferenceTp length, bool stable)
00772   {
00773     _GLIBCXX_CALL(length)
00774 
00775     typedef _DifferenceTp difference_type;
00776     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00777       ::value_type::first_type
00778       RandomAccessIterator1;
00779     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00780       value_type;
00781 
00782     int k = static_cast<int>(seqs_end - seqs_begin);
00783     int nrs;  // Number of remaining sequences.
00784 
00785     // Avoid default constructor.
00786     value_type* fe = static_cast<value_type*>(
00787       ::operator new(sizeof(value_type) * k));  // Front elements.
00788     int* source = new int[k];
00789     difference_type total_length = 0;
00790 
00791     // Write entries into queue.
00792     nrs = 0;
00793     for (int pi = 0; pi < k; ++pi)
00794       {
00795         if (seqs_begin[pi].first != seqs_begin[pi].second)
00796           {
00797             ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
00798             source[nrs] = pi;
00799             ++nrs;
00800             total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
00801           }
00802       }
00803 
00804     if (stable)
00805       {
00806         // Bubble sort fe and source by fe.
00807         for (int k = 0; k < nrs - 1; ++k)
00808           for (int pi = nrs - 1; pi > k; --pi)
00809             if (comp(fe[pi], fe[pi - 1]) ||
00810                 (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
00811               {
00812                 std::swap(fe[pi - 1], fe[pi]);
00813                 std::swap(source[pi - 1], source[pi]);
00814               }
00815       }
00816     else
00817       {
00818         for (int k = 0; k < nrs - 1; ++k)
00819           for (int pi = nrs - 1; pi > k; --pi)
00820             if (comp(fe[pi], fe[pi-1]))
00821               {
00822                 std::swap(fe[pi-1], fe[pi]);
00823                 std::swap(source[pi-1], source[pi]);
00824               }
00825       }
00826 
00827     // Iterate.
00828     if (stable)
00829       {
00830         int j;
00831         while (nrs > 0 && length > 0)
00832           {
00833             if (source[0] < source[1])
00834               {
00835                 // fe[0] <= fe[1]
00836                 while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
00837                   {
00838                     *target = fe[0];
00839                     ++target;
00840                     ++(seqs_begin[source[0]].first);
00841                     --length;
00842                     if (seqs_begin[source[0]].first
00843             == seqs_begin[source[0]].second)
00844                       {
00845                         // Move everything to the left.
00846                         for (int s = 0; s < nrs - 1; ++s)
00847                           {
00848                             fe[s] = fe[s + 1];
00849                             source[s] = source[s + 1];
00850                           }
00851                         fe[nrs - 1].~value_type();  //Destruct explicitly.
00852                         --nrs;
00853                         break;
00854                       }
00855                     else
00856                       fe[0] = *(seqs_begin[source[0]].first);
00857                   }
00858               }
00859             else
00860               {
00861                 // fe[0] < fe[1]
00862                 while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
00863                   {
00864                     *target = fe[0];
00865                     ++target;
00866                     ++(seqs_begin[source[0]].first);
00867                     --length;
00868                     if (seqs_begin[source[0]].first
00869             == seqs_begin[source[0]].second)
00870                       {
00871                         for (int s = 0; s < nrs - 1; ++s)
00872                           {
00873                             fe[s] = fe[s + 1];
00874                             source[s] = source[s + 1];
00875                           }
00876                         fe[nrs - 1].~value_type();  //Destruct explicitly.
00877                         --nrs;
00878                         break;
00879                       }
00880                     else
00881                       fe[0] = *(seqs_begin[source[0]].first);
00882                   }
00883               }
00884 
00885             // Sink down.
00886             j = 1;
00887             while ((j < nrs) && (comp(fe[j], fe[j - 1])
00888                  || (!comp(fe[j - 1], fe[j])
00889                      && (source[j] < source[j - 1]))))
00890               {
00891                 std::swap(fe[j - 1], fe[j]);
00892                 std::swap(source[j - 1], source[j]);
00893                 ++j;
00894               }
00895           }
00896       }
00897     else
00898       {
00899         int j;
00900         while (nrs > 0 && length > 0)
00901           {
00902             // fe[0] <= fe[1]
00903             while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
00904               {
00905                 *target = fe[0];
00906                 ++target;
00907                 ++seqs_begin[source[0]].first;
00908                 --length;
00909                 if (seqs_begin[source[0]].first
00910             == seqs_begin[source[0]].second)
00911                   {
00912                     for (int s = 0; s < (nrs - 1); ++s)
00913                       {
00914                         fe[s] = fe[s + 1];
00915                         source[s] = source[s + 1];
00916                       }
00917                     fe[nrs - 1].~value_type();  //Destruct explicitly.
00918                     --nrs;
00919                     break;
00920                   }
00921                 else
00922                   fe[0] = *(seqs_begin[source[0]].first);
00923               }
00924 
00925             // Sink down.
00926             j = 1;
00927             while ((j < nrs) && comp(fe[j], fe[j - 1]))
00928               {
00929                 std::swap(fe[j - 1], fe[j]);
00930                 std::swap(source[j - 1], source[j]);
00931                 ++j;
00932               }
00933           }
00934       }
00935 
00936     ::operator delete(fe);  //Destructors already called.
00937     delete[] source;
00938 
00939     return target;
00940   }
00941 
00942 /** @brief Multi-way merging procedure for a high branching factor,
00943  * guarded case.
00944  *
00945  *  The head elements are kept in a loser tree.
00946  *  @param seqs_begin Begin iterator of iterator pair input sequence.
00947  *  @param seqs_end End iterator of iterator pair input sequence.
00948  *  @param target Begin iterator out output sequence.
00949  *  @param comp Comparator.
00950  *  @param length Maximum length to merge.
00951  *   @param stable Stable merging incurs a performance penalty.
00952  *  @return End iterator of output sequence.
00953  */
00954 template<typename LT,
00955      typename RandomAccessIteratorIterator,
00956      typename RandomAccessIterator3,
00957      typename _DifferenceTp,
00958      typename Comparator>
00959   RandomAccessIterator3
00960   multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
00961                             RandomAccessIteratorIterator seqs_end,
00962                             RandomAccessIterator3 target,
00963                             Comparator comp,
00964                             _DifferenceTp length, bool stable)
00965   {
00966     _GLIBCXX_CALL(length)
00967 
00968     typedef _DifferenceTp difference_type;
00969     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
00970       ::value_type::first_type
00971       RandomAccessIterator1;
00972     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
00973       value_type;
00974 
00975     int k = static_cast<int>(seqs_end - seqs_begin);
00976 
00977     LT lt(k, comp);
00978 
00979     difference_type total_length = 0;
00980 
00981     // Default value for potentially non-default-constructible types.
00982     value_type* arbitrary_element = NULL;
00983 
00984     for (int t = 0; t < k; ++t)
00985       {
00986         if(arbitrary_element == NULL
00987        && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
00988           arbitrary_element = &(*seqs_begin[t].first);
00989         total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
00990       }
00991 
00992     if(total_length == 0)
00993       return target;
00994 
00995     for (int t = 0; t < k; ++t)
00996       {
00997         if (stable)
00998           {
00999             if (seqs_begin[t].first == seqs_begin[t].second)
01000               lt.insert_start_stable(*arbitrary_element, t, true);
01001             else
01002               lt.insert_start_stable(*seqs_begin[t].first, t, false);
01003           }
01004         else
01005           {
01006             if (seqs_begin[t].first == seqs_begin[t].second)
01007               lt.insert_start(*arbitrary_element, t, true);
01008             else
01009               lt.insert_start(*seqs_begin[t].first, t, false);
01010           }
01011       }
01012 
01013     if (stable)
01014       lt.init_stable();
01015     else
01016       lt.init();
01017 
01018     total_length = std::min(total_length, length);
01019 
01020     int source;
01021 
01022     if (stable)
01023       {
01024         for (difference_type i = 0; i < total_length; ++i)
01025           {
01026             // Take out.
01027             source = lt.get_min_source();
01028 
01029             *(target++) = *(seqs_begin[source].first++);
01030 
01031             // Feed.
01032             if (seqs_begin[source].first == seqs_begin[source].second)
01033               lt.delete_min_insert_stable(*arbitrary_element, true);
01034             else
01035               // Replace from same source.
01036               lt.delete_min_insert_stable(*seqs_begin[source].first, false);
01037 
01038           }
01039       }
01040     else
01041       {
01042         for (difference_type i = 0; i < total_length; ++i)
01043           {
01044             //take out
01045             source = lt.get_min_source();
01046 
01047             *(target++) = *(seqs_begin[source].first++);
01048 
01049             // Feed.
01050             if (seqs_begin[source].first == seqs_begin[source].second)
01051               lt.delete_min_insert(*arbitrary_element, true);
01052             else
01053               // Replace from same source.
01054               lt.delete_min_insert(*seqs_begin[source].first, false);
01055           }
01056       }
01057 
01058     return target;
01059   }
01060 
01061 /** @brief Multi-way merging procedure for a high branching factor,
01062  * unguarded case.
01063  *
01064  *  The head elements are kept in a loser tree.
01065  *  @param seqs_begin Begin iterator of iterator pair input sequence.
01066  *  @param seqs_end End iterator of iterator pair input sequence.
01067  *  @param target Begin iterator out output sequence.
01068  *  @param comp Comparator.
01069  *  @param length Maximum length to merge.
01070  *  @param stable Stable merging incurs a performance penalty.
01071  *  @return End iterator of output sequence.
01072  *  @pre No input will run out of elements during the merge.
01073  */
01074 template<typename LT,
01075      typename RandomAccessIteratorIterator,
01076      typename RandomAccessIterator3,
01077      typename _DifferenceTp, typename Comparator>
01078   RandomAccessIterator3
01079   multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
01080                                       RandomAccessIteratorIterator seqs_end,
01081                                       RandomAccessIterator3 target,
01082                                       Comparator comp,
01083                                       _DifferenceTp length, bool stable)
01084   {
01085     _GLIBCXX_CALL(length)
01086     typedef _DifferenceTp difference_type;
01087 
01088     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
01089       ::value_type::first_type
01090       RandomAccessIterator1;
01091     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
01092       value_type;
01093 
01094     int k = seqs_end - seqs_begin;
01095 
01096     LT lt(k, comp);
01097 
01098     difference_type total_length = 0;
01099 
01100     for (int t = 0; t < k; ++t)
01101       {
01102 #if _GLIBCXX_ASSERTIONS
01103         _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
01104 #endif
01105         if (stable)
01106           lt.insert_start_stable(*seqs_begin[t].first, t, false);
01107         else
01108           lt.insert_start(*seqs_begin[t].first, t, false);
01109 
01110         total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
01111       }
01112 
01113     if (stable)
01114       lt.init_stable();
01115     else
01116       lt.init();
01117 
01118     // Do not go past end.
01119     length = std::min(total_length, length);
01120 
01121     int source;
01122 
01123 #if _GLIBCXX_ASSERTIONS
01124     difference_type i = 0;
01125 #endif
01126 
01127     if (stable)
01128       {
01129         RandomAccessIterator3 target_end = target + length;
01130         while (target < target_end)
01131           {
01132             // Take out.
01133             source = lt.get_min_source();
01134 
01135 #if _GLIBCXX_ASSERTIONS
01136             _GLIBCXX_PARALLEL_ASSERT(i == 0
01137                 || !comp(*(seqs_begin[source].first), *(target - 1)));
01138 #endif
01139 
01140             *(target++) = *(seqs_begin[source].first++);
01141 
01142 #if _GLIBCXX_ASSERTIONS
01143             _GLIBCXX_PARALLEL_ASSERT(
01144                 (seqs_begin[source].first != seqs_begin[source].second)
01145                 || (i == length - 1));
01146             ++i;
01147 #endif
01148             // Feed.
01149             // Replace from same source.
01150             lt.delete_min_insert_stable(*seqs_begin[source].first, false);
01151 
01152           }
01153       }
01154     else
01155       {
01156         RandomAccessIterator3 target_end = target + length;
01157         while (target < target_end)
01158           {
01159             // Take out.
01160             source = lt.get_min_source();
01161 
01162 #if _GLIBCXX_ASSERTIONS
01163             if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
01164               printf("         %i %i %i\n", length, i, source);
01165             _GLIBCXX_PARALLEL_ASSERT(i == 0
01166                 || !comp(*(seqs_begin[source].first), *(target - 1)));
01167 #endif
01168 
01169             *(target++) = *(seqs_begin[source].first++);
01170 
01171 #if _GLIBCXX_ASSERTIONS
01172             if (!((seqs_begin[source].first != seqs_begin[source].second)
01173                 || (i >= length - 1)))
01174               printf("         %i %i %i\n", length, i, source);
01175             _GLIBCXX_PARALLEL_ASSERT(
01176                 (seqs_begin[source].first != seqs_begin[source].second)
01177                 || (i >= length - 1));
01178             ++i;
01179 #endif
01180             // Feed.
01181             // Replace from same source.
01182             lt.delete_min_insert(*seqs_begin[source].first, false);
01183           }
01184       }
01185 
01186     return target;
01187   }
01188 
01189 template<typename RandomAccessIteratorIterator,
01190      typename RandomAccessIterator3,
01191      typename _DifferenceTp,
01192      typename Comparator>
01193   RandomAccessIterator3
01194   multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
01195                                      RandomAccessIteratorIterator seqs_end,
01196                                      RandomAccessIterator3 target,
01197                                      Comparator comp,
01198                                      _DifferenceTp length, bool stable)
01199   {
01200     _GLIBCXX_CALL(length)
01201 
01202     typedef _DifferenceTp difference_type;
01203 
01204     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
01205       ::value_type::first_type
01206       RandomAccessIterator1;
01207     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
01208       value_type;
01209 
01210     int min_seq;
01211     RandomAccessIterator3 target_end;
01212     difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
01213                                           comp, min_seq, stable);
01214 
01215     difference_type total_length = 0;
01216     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
01217       total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
01218 
01219     if (overhang != -1)
01220       {
01221         difference_type unguarded_length =
01222             std::min(length, total_length - overhang);
01223         target_end = multiway_merge_loser_tree_unguarded
01224           <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
01225           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
01226         overhang = length - unguarded_length;
01227       }
01228     else
01229       {
01230         // Empty sequence found.
01231         overhang = length;
01232         target_end = target;
01233       }
01234 
01235 #if _GLIBCXX_ASSERTIONS
01236     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
01237     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
01238 #endif
01239 
01240     target_end = multiway_merge_loser_tree
01241       <typename loser_tree_traits<value_type, Comparator>::LT>
01242       (seqs_begin, seqs_end, target_end, comp, overhang, stable);
01243 
01244 #if _GLIBCXX_ASSERTIONS
01245     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
01246     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
01247 #endif
01248 
01249     return target_end;
01250   }
01251 
01252 template<typename RandomAccessIteratorIterator,
01253      typename RandomAccessIterator3,
01254      typename _DifferenceTp,
01255      typename Comparator>
01256   RandomAccessIterator3
01257   multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
01258                                      RandomAccessIteratorIterator seqs_end,
01259                      RandomAccessIterator3 target,
01260                      Comparator comp,
01261                      _DifferenceTp length, bool stable)
01262   {
01263     _GLIBCXX_CALL(length)
01264 
01265     typedef _DifferenceTp difference_type;
01266     typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
01267     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
01268       ::value_type::first_type
01269       RandomAccessIterator1;
01270     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
01271       value_type;
01272 
01273     RandomAccessIterator3 target_end;
01274     difference_type overhang =
01275         prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
01276 
01277     difference_type total_length = 0;
01278     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
01279       {
01280         total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
01281 
01282         // Sentinel spot.
01283         ++((*s).second);
01284       }
01285 
01286     difference_type unguarded_length =
01287         std::min(length, total_length - overhang);
01288     target_end = multiway_merge_loser_tree_unguarded
01289       <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
01290       (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
01291     overhang = length - unguarded_length;
01292 
01293 #if _GLIBCXX_ASSERTIONS
01294     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
01295     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
01296 #endif
01297 
01298     // Copy rest stable.
01299     for (RandomAccessIteratorIterator s = seqs_begin;
01300          s != seqs_end && overhang > 0; ++s)
01301       {
01302         // Restore.
01303         --((*s).second);
01304         difference_type local_length =
01305             std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
01306         target_end = std::copy((*s).first, (*s).first + local_length,
01307                                target_end);
01308         (*s).first += local_length;
01309         overhang -= local_length;
01310       }
01311 
01312 #if _GLIBCXX_ASSERTIONS
01313     _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
01314     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
01315     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
01316 #endif
01317 
01318     return target_end;
01319   }
01320 
01321 /** @brief Sequential multi-way merging switch.
01322  *
01323  *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and
01324  *  runtime settings.
01325  *  @param seqs_begin Begin iterator of iterator pair input sequence.
01326  *  @param seqs_end End iterator of iterator pair input sequence.
01327  *  @param target Begin iterator out output sequence.
01328  *  @param comp Comparator.
01329  *  @param length Maximum length to merge.
01330  *  @param stable Stable merging incurs a performance penalty.
01331  *  @param sentinel The sequences have a sentinel element.
01332  *  @return End iterator of output sequence. */
01333 template<typename RandomAccessIteratorIterator,
01334      typename RandomAccessIterator3,
01335      typename _DifferenceTp,
01336      typename Comparator>
01337   RandomAccessIterator3
01338   multiway_merge(RandomAccessIteratorIterator seqs_begin,
01339                  RandomAccessIteratorIterator seqs_end,
01340                  RandomAccessIterator3 target,
01341                  Comparator comp, _DifferenceTp length,
01342                  bool stable, bool sentinel,
01343                  sequential_tag)
01344   {
01345     _GLIBCXX_CALL(length)
01346 
01347     typedef _DifferenceTp difference_type;
01348     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
01349       ::value_type::first_type
01350       RandomAccessIterator1;
01351     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
01352       value_type;
01353 
01354 #if _GLIBCXX_ASSERTIONS
01355     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
01356       _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
01357 #endif
01358 
01359     RandomAccessIterator3 return_target = target;
01360     int k = static_cast<int>(seqs_end - seqs_begin);
01361 
01362     _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
01363 
01364     if (!sentinel && mwma == LOSER_TREE_SENTINEL)
01365       mwma = LOSER_TREE_COMBINED;
01366 
01367     switch (k)
01368       {
01369       case 0:
01370         break;
01371       case 1:
01372         return_target = std::copy(seqs_begin[0].first,
01373                                   seqs_begin[0].first + length,
01374                                   target);
01375         seqs_begin[0].first += length;
01376         break;
01377       case 2:
01378         return_target = merge_advance(seqs_begin[0].first,
01379                                       seqs_begin[0].second,
01380                                       seqs_begin[1].first,
01381                                       seqs_begin[1].second,
01382                                       target, length, comp);
01383         break;
01384       case 3:
01385         switch (mwma)
01386           {
01387           case LOSER_TREE_COMBINED:
01388             return_target = multiway_merge_3_combined(seqs_begin,
01389                               seqs_end,
01390                               target,
01391                               comp, length,
01392                               stable);
01393             break;
01394           case LOSER_TREE_SENTINEL:
01395             return_target =
01396           multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
01397                                seqs_end,
01398                                target,
01399                                comp, length,
01400                                stable);
01401             break;
01402           default:
01403             return_target = 
01404           multiway_merge_3_variant<guarded_iterator>(seqs_begin,
01405                              seqs_end,
01406                              target,
01407                              comp, length,
01408                              stable);
01409             break;
01410           }
01411         break;
01412       case 4:
01413         switch (mwma)
01414           {
01415           case LOSER_TREE_COMBINED:
01416             return_target = multiway_merge_4_combined(seqs_begin,
01417                               seqs_end,
01418                               target,
01419                               comp, length, stable);
01420             break;
01421           case LOSER_TREE_SENTINEL:
01422             return_target = 
01423           multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
01424                                seqs_end,
01425                                target,
01426                                comp, length,
01427                                stable);
01428             break;
01429           default:
01430             return_target = multiway_merge_4_variant<guarded_iterator>(
01431           seqs_begin,
01432           seqs_end,
01433           target,
01434           comp, length, stable);
01435             break;
01436           }
01437         break;
01438       default:
01439         {
01440           switch (mwma)
01441             {
01442             case BUBBLE:
01443               return_target = multiway_merge_bubble(seqs_begin,
01444                             seqs_end,
01445                             target,
01446                             comp, length, stable);
01447               break;
01448 #if _GLIBCXX_LOSER_TREE_EXPLICIT
01449             case LOSER_TREE_EXPLICIT:
01450               return_target = multiway_merge_loser_tree<
01451           LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
01452                               seqs_end,
01453                               target,
01454                               comp, length,
01455                               stable);
01456               break;
01457 #endif
01458 #if _GLIBCXX_LOSER_TREE
01459             case LOSER_TREE:
01460               return_target = multiway_merge_loser_tree<
01461                     LoserTree<value_type, Comparator> >(seqs_begin,
01462                             seqs_end,
01463                             target,
01464                             comp, length,
01465                             stable);
01466               break;
01467 #endif
01468 #if _GLIBCXX_LOSER_TREE_COMBINED
01469             case LOSER_TREE_COMBINED:
01470               return_target = multiway_merge_loser_tree_combined(seqs_begin,
01471                                  seqs_end,
01472                                  target,
01473                                  comp, length,
01474                                  stable);
01475               break;
01476 #endif
01477 #if _GLIBCXX_LOSER_TREE_SENTINEL
01478             case LOSER_TREE_SENTINEL:
01479               return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
01480                                  seqs_end,
01481                                  target,
01482                                  comp, length,
01483                                  stable);
01484               break;
01485 #endif
01486             default:
01487               // multiway_merge algorithm not implemented.
01488               _GLIBCXX_PARALLEL_ASSERT(0);
01489               break;
01490             }
01491         }
01492       }
01493 #if _GLIBCXX_ASSERTIONS
01494     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
01495 #endif
01496 
01497     return return_target;
01498   }
01499 
01500 /** @brief Parallel multi-way merge routine.
01501  *
01502  *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor
01503  *  and runtime settings.
01504  *  @param seqs_begin Begin iterator of iterator pair input sequence.
01505  *  @param seqs_end End iterator of iterator pair input sequence.
01506  *  @param target Begin iterator out output sequence.
01507  *  @param comp Comparator.
01508  *  @param length Maximum length to merge.
01509  *  @param stable Stable merging incurs a performance penalty.
01510  *  @param sentinel Ignored.
01511  *  @return End iterator of output sequence.
01512  */
01513 template<typename RandomAccessIteratorIterator,
01514      typename RandomAccessIterator3,
01515      typename _DifferenceTp,
01516      typename Comparator>
01517   RandomAccessIterator3
01518   parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
01519                           RandomAccessIteratorIterator seqs_end,
01520                            RandomAccessIterator3 target,
01521                            Comparator comp,
01522                            _DifferenceTp length, bool stable, bool sentinel)
01523     {
01524       _GLIBCXX_CALL(length)
01525 
01526       typedef _DifferenceTp difference_type;
01527       typedef typename std::iterator_traits<RandomAccessIteratorIterator>
01528         ::value_type::first_type
01529         RandomAccessIterator1;
01530       typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
01531         value_type;
01532 
01533       // k sequences.
01534       int k = static_cast<int>(seqs_end - seqs_begin);
01535 
01536       difference_type total_length = 0;
01537       for (RandomAccessIteratorIterator raii = seqs_begin;
01538            raii != seqs_end; ++raii)
01539         total_length += _GLIBCXX_PARALLEL_LENGTH(*raii);
01540 
01541       _GLIBCXX_CALL(total_length)
01542 
01543       if (total_length == 0 || k == 0)
01544         return target;
01545 
01546       bool tight = (total_length == length);
01547 
01548       std::vector<std::pair<difference_type, difference_type> >* pieces;
01549 
01550       thread_index_t num_threads = static_cast<thread_index_t>(
01551     std::min<difference_type>(get_max_threads(), total_length));
01552       const _Settings& __s = _Settings::get();
01553 
01554 #     pragma omp parallel num_threads (num_threads)
01555         {
01556 #         pragma omp single
01557             {
01558               num_threads = omp_get_num_threads();
01559               // Thread t will have to merge pieces[iam][0..k - 1]
01560               pieces = new std::vector<
01561                   std::pair<difference_type, difference_type> >[num_threads];
01562               for (int s = 0; s < num_threads; ++s)
01563                 pieces[s].resize(k);
01564 
01565               difference_type num_samples = __s.merge_oversampling 
01566                         * num_threads;
01567 
01568               if (__s.multiway_merge_splitting == SAMPLING)
01569                 {
01570                   value_type* samples = static_cast<value_type*>(
01571                     ::operator new(sizeof(value_type) * k * num_samples));
01572                   // Sample.
01573                   for (int s = 0; s < k; ++s)
01574                     for (difference_type i = 0; i < num_samples; ++i)
01575                       {
01576                         difference_type sample_index =
01577               static_cast<difference_type>(
01578                 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
01579                 * (double(i + 1) / (num_samples + 1))
01580                 * (double(length) / total_length));
01581                         ::new(&(samples[s * num_samples + i]))
01582                 value_type(seqs_begin[s].first[sample_index]);
01583                       }
01584 
01585                   if (stable)
01586                     __gnu_sequential::stable_sort(samples, samples
01587                           + (num_samples * k), comp);
01588                   else
01589                     __gnu_sequential::sort(samples, samples
01590                        + (num_samples * k), comp);
01591 
01592                   for (int slab = 0; slab < num_threads; ++slab)
01593                     // For each slab / processor.
01594                     for (int seq = 0; seq < k; ++seq)
01595                       {
01596                         // For each sequence.
01597                         if (slab > 0)
01598                           pieces[slab][seq].first =
01599                               std::upper_bound(seqs_begin[seq].first,
01600                            seqs_begin[seq].second,
01601                            samples[num_samples * k
01602                                * slab / num_threads],
01603                            comp)
01604                 - seqs_begin[seq].first;
01605                         else
01606                           {
01607                             // Absolute beginning.
01608                             pieces[slab][seq].first = 0;
01609                           }
01610                         if ((slab + 1) < num_threads)
01611                           pieces[slab][seq].second =
01612                 std::upper_bound(seqs_begin[seq].first,
01613                          seqs_begin[seq].second,
01614                          samples[num_samples * k
01615                              * (slab + 1)
01616                              / num_threads], comp)
01617                 - seqs_begin[seq].first;
01618                         else
01619               pieces[slab][seq].second 
01620                 = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
01621                       }
01622           ::operator delete(samples);
01623                 }
01624               else
01625                 {
01626                   // (_Settings::multiway_merge_splitting == _Settings::EXACT).
01627                   std::vector<RandomAccessIterator1>* offsets =
01628                       new std::vector<RandomAccessIterator1>[num_threads];
01629                   std::vector<
01630                       std::pair<RandomAccessIterator1, RandomAccessIterator1>
01631                       > se(k);
01632 
01633                   copy(seqs_begin, seqs_end, se.begin());
01634 
01635                   difference_type* borders =
01636                       new difference_type[num_threads + 1];
01637                   equally_split(length, num_threads, borders);
01638 
01639                   for (int s = 0; s < (num_threads - 1); ++s)
01640                     {
01641                       offsets[s].resize(k);
01642                       multiseq_partition(
01643                           se.begin(), se.end(), borders[s + 1],
01644                           offsets[s].begin(), comp);
01645 
01646                       // Last one also needed and available.
01647                       if (!tight)
01648                         {
01649                           offsets[num_threads - 1].resize(k);
01650                           multiseq_partition(se.begin(), se.end(),
01651                          difference_type(length),
01652                          offsets[num_threads - 1].begin(),
01653                          comp);
01654                         }
01655                     }
01656 
01657 
01658                   for (int slab = 0; slab < num_threads; ++slab)
01659                     {
01660                       // For each slab / processor.
01661                       for (int seq = 0; seq < k; ++seq)
01662                         {
01663                           // For each sequence.
01664                           if (slab == 0)
01665                             {
01666                               // Absolute beginning.
01667                               pieces[slab][seq].first = 0;
01668                             }
01669                           else
01670                             pieces[slab][seq].first =
01671                                 pieces[slab - 1][seq].second;
01672                           if (!tight || slab < (num_threads - 1))
01673                             pieces[slab][seq].second =
01674                   offsets[slab][seq] - seqs_begin[seq].first;
01675                           else
01676                             {
01677                               // slab == num_threads - 1
01678                               pieces[slab][seq].second =
01679                 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
01680                             }
01681                         }
01682                     }
01683                   delete[] offsets;
01684                 }
01685             } //single
01686 
01687           thread_index_t iam = omp_get_thread_num();
01688 
01689           difference_type target_position = 0;
01690 
01691           for (int c = 0; c < k; ++c)
01692             target_position += pieces[iam][c].first;
01693 
01694           if (k > 2)
01695             {
01696               std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
01697                 = new
01698                   std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
01699 
01700               difference_type local_length = 0;
01701               for (int s = 0; s < k; ++s)
01702                 {
01703                   chunks[s] = std::make_pair(
01704             seqs_begin[s].first + pieces[iam][s].first,
01705             seqs_begin[s].first + pieces[iam][s].second);
01706                   local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
01707                 }
01708 
01709               multiway_merge(
01710                     chunks, chunks + k, target + target_position, comp,
01711                     std::min(local_length, length - target_position),
01712                     stable, false, sequential_tag());
01713 
01714               delete[] chunks;
01715             }
01716           else if (k == 2)
01717             {
01718               RandomAccessIterator1
01719                   begin0 = seqs_begin[0].first + pieces[iam][0].first,
01720                   begin1 = seqs_begin[1].first + pieces[iam][1].first;
01721               merge_advance(begin0,
01722                 seqs_begin[0].first + pieces[iam][0].second,
01723                 begin1,
01724                 seqs_begin[1].first + pieces[iam][1].second,
01725                 target + target_position,
01726                 (pieces[iam][0].second - pieces[iam][0].first) +
01727                 (pieces[iam][1].second - pieces[iam][1].first),
01728                 comp);
01729             }
01730         } //parallel
01731 
01732 #if _GLIBCXX_ASSERTIONS
01733       _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
01734 #endif
01735 
01736       // Update ends of sequences.
01737       for (int s = 0; s < k; ++s)
01738         seqs_begin[s].first += pieces[num_threads - 1][s].second;
01739 
01740       delete[] pieces;
01741 
01742       return target + length;
01743     }
01744 
01745 /**
01746  *  @brief Multi-way merging front-end.
01747  *  @param seqs_begin Begin iterator of iterator pair input sequence.
01748  *  @param seqs_end End iterator of iterator pair input sequence.
01749  *  @param target Begin iterator out output sequence.
01750  *  @param comp Comparator.
01751  *  @param length Maximum length to merge.
01752  *  @param stable Stable merging incurs a performance penalty.
01753  *  @return End iterator of output sequence.
01754  */
01755 template<typename RandomAccessIteratorPairIterator,
01756      typename RandomAccessIterator3,
01757      typename _DifferenceTp,
01758      typename Comparator>
01759   RandomAccessIterator3
01760   multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
01761                 RandomAccessIteratorPairIterator seqs_end,
01762                 RandomAccessIterator3 target, Comparator comp,
01763                 _DifferenceTp length, bool stable)
01764   {
01765     typedef _DifferenceTp difference_type;
01766     _GLIBCXX_CALL(seqs_end - seqs_begin)
01767 
01768     if (seqs_begin == seqs_end)
01769       return target;
01770 
01771     const _Settings& __s = _Settings::get();
01772 
01773     RandomAccessIterator3 target_end;
01774     if (_GLIBCXX_PARALLEL_CONDITION(
01775         ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
01776         && ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
01777       target_end = parallel_multiway_merge(seqs_begin, seqs_end,
01778                        target, comp,
01779                       static_cast<difference_type>(length),
01780                        stable, false);
01781     else
01782       target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length,
01783                   stable, false, sequential_tag());
01784 
01785     return target_end;
01786   }
01787 
01788 /** @brief Multi-way merging front-end.
01789  *  @param seqs_begin Begin iterator of iterator pair input sequence.
01790  *  @param seqs_end End iterator of iterator pair input sequence.
01791  *  @param target Begin iterator out output sequence.
01792  *  @param comp Comparator.
01793  *  @param length Maximum length to merge.
01794  *  @param stable Stable merging incurs a performance penalty.
01795  *  @return End iterator of output sequence.
01796  *  @pre For each @c i, @c seqs_begin[i].second must be the end
01797  *  marker of the sequence, but also reference the one more sentinel
01798  *  element. */
01799 template<typename RandomAccessIteratorPairIterator,
01800      typename RandomAccessIterator3,
01801      typename _DifferenceTp,
01802      typename Comparator>
01803   RandomAccessIterator3
01804   multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
01805                           RandomAccessIteratorPairIterator seqs_end,
01806                           RandomAccessIterator3 target,
01807                           Comparator comp,
01808                           _DifferenceTp length,
01809                           bool stable)
01810   {
01811     typedef _DifferenceTp difference_type;
01812 
01813     if (seqs_begin == seqs_end)
01814       return target;
01815 
01816     _GLIBCXX_CALL(seqs_end - seqs_begin)
01817 
01818     const _Settings& __s = _Settings::get();
01819     const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
01820     const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
01821     if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
01822       return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, 
01823                      length, stable, true);
01824     else
01825       return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
01826                 true, sequential_tag());
01827   }
01828 }
01829 
01830 #endif

Generated on Wed Mar 26 00:43:05 2008 for libstdc++ by  doxygen 1.5.1