multiseq_selection.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/multiseq_selection.h
00032  *  @brief Functions to find elements of a certain global rank in
00033  *  multiple sorted sequences.  Also serves for splitting such
00034  *  sequence sets.
00035  *
00036  *  The algorithm description can be found in 
00037  *
00038  *  P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard.
00039  *  Merging Multiple Lists on Hierarchical-Memory Multiprocessors.
00040  *  Journal of Parallel and Distributed Computing, 12(2):171–177, 1991.
00041  *
00042  *  This file is a GNU parallel extension to the Standard C++ Library.
00043  */
00044 
00045 // Written by Johannes Singler.
00046 
00047 #ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
00048 #define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
00049 
00050 #include <vector>
00051 #include <queue>
00052 
00053 #include <bits/stl_algo.h>
00054 
00055 #include <parallel/sort.h>
00056 
00057 namespace __gnu_parallel
00058 {
00059   /** @brief Compare a pair of types lexicographically, ascending. */
00060   template<typename T1, typename T2, typename Comparator>
00061     class lexicographic
00062     : public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
00063     {
00064     private:
00065       Comparator& comp;
00066 
00067     public:
00068       lexicographic(Comparator& _comp) : comp(_comp) { }
00069 
00070       bool
00071       operator()(const std::pair<T1, T2>& p1,
00072          const std::pair<T1, T2>& p2) const
00073       {
00074     if (comp(p1.first, p2.first))
00075       return true;
00076 
00077     if (comp(p2.first, p1.first))
00078       return false;
00079 
00080     // Firsts are equal.
00081     return p1.second < p2.second;
00082       }
00083     };
00084 
00085   /** @brief Compare a pair of types lexicographically, descending. */
00086   template<typename T1, typename T2, typename Comparator>
00087     class lexicographic_reverse : public std::binary_function<T1, T2, bool>
00088     {
00089     private:
00090       Comparator& comp;
00091 
00092     public:
00093       lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
00094 
00095       bool
00096       operator()(const std::pair<T1, T2>& p1,
00097          const std::pair<T1, T2>& p2) const
00098       {
00099     if (comp(p2.first, p1.first))
00100       return true;
00101 
00102     if (comp(p1.first, p2.first))
00103       return false;
00104 
00105     // Firsts are equal.
00106     return p2.second < p1.second;
00107       }
00108     };
00109 
00110   /** 
00111    *  @brief Splits several sorted sequences at a certain global rank,
00112    *  resulting in a splitting point for each sequence.
00113    *  The sequences are passed via a sequence of random-access
00114    *  iterator pairs, none of the sequences may be empty.  If there
00115    *  are several equal elements across the split, the ones on the
00116    *  left side will be chosen from sequences with smaller number.
00117    *  @param begin_seqs Begin of the sequence of iterator pairs.
00118    *  @param end_seqs End of the sequence of iterator pairs.
00119    *  @param rank The global rank to partition at.
00120    *  @param begin_offsets A random-access sequence begin where the
00121    *  result will be stored in. Each element of the sequence is an
00122    *  iterator that points to the first element on the greater part of
00123    *  the respective sequence.
00124    *  @param comp The ordering functor, defaults to std::less<T>. 
00125    */
00126   template<typename RanSeqs, typename RankType, typename RankIterator,
00127        typename Comparator>
00128     void
00129     multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs,
00130                RankType rank,
00131                RankIterator begin_offsets,
00132                Comparator comp = std::less<
00133                typename std::iterator_traits<typename
00134                std::iterator_traits<RanSeqs>::value_type::
00135                first_type>::value_type>()) // std::less<T>
00136     {
00137       _GLIBCXX_CALL(end_seqs - begin_seqs)
00138 
00139       typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
00140     It;
00141       typedef typename std::iterator_traits<It>::difference_type
00142     difference_type;
00143       typedef typename std::iterator_traits<It>::value_type value_type;
00144 
00145       lexicographic<value_type, int, Comparator> lcomp(comp);
00146       lexicographic_reverse<value_type, int, Comparator> lrcomp(comp);
00147 
00148       // Number of sequences, number of elements in total (possibly
00149       // including padding).
00150       difference_type m = std::distance(begin_seqs, end_seqs), N = 0,
00151     nmax, n, r;
00152 
00153       for (int i = 0; i < m; i++)
00154     N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
00155 
00156       if (rank == N)
00157     {
00158       for (int i = 0; i < m; i++)
00159         begin_offsets[i] = begin_seqs[i].second; // Very end.
00160       // Return m - 1;
00161     }
00162 
00163       _GLIBCXX_PARALLEL_ASSERT(m != 0 && N != 0 && rank >= 0 && rank < N);
00164 
00165       difference_type* ns = new difference_type[m];
00166       difference_type* a = new difference_type[m];
00167       difference_type* b = new difference_type[m];
00168       difference_type l;
00169 
00170       ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
00171       nmax = ns[0];
00172       for (int i = 0; i < m; i++)
00173     {
00174       ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
00175       nmax = std::max(nmax, ns[i]);
00176     }
00177 
00178       r = log2(nmax) + 1;
00179 
00180       // Pad all lists to this length, at least as long as any ns[i],
00181       // equality iff nmax = 2^k - 1.
00182       l = (1ULL << r) - 1;
00183 
00184       // From now on, including padding.
00185       N = l * m;
00186 
00187       for (int i = 0; i < m; i++)
00188     {
00189       a[i] = 0;
00190       b[i] = l;
00191     }
00192       n = l / 2;
00193 
00194       // Invariants:
00195       // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
00196 
00197 #define S(i) (begin_seqs[i].first)
00198 
00199       // Initial partition.
00200       std::vector<std::pair<value_type, int> > sample;
00201 
00202       for (int i = 0; i < m; i++)
00203     if (n < ns[i])  //sequence long enough
00204       sample.push_back(std::make_pair(S(i)[n], i));
00205       __gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
00206 
00207       for (int i = 0; i < m; i++)   //conceptual infinity
00208     if (n >= ns[i]) //sequence too short, conceptual infinity
00209       sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
00210 
00211       difference_type localrank = rank * m / N ;
00212 
00213       int j;
00214       for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
00215     a[sample[j].second] += n + 1;
00216       for (; j < m; j++)
00217     b[sample[j].second] -= n + 1;
00218       
00219       // Further refinement.
00220       while (n > 0)
00221     {
00222       n /= 2;
00223 
00224       int lmax_seq = -1;    // to avoid warning
00225       const value_type* lmax = NULL; // impossible to avoid the warning?
00226       for (int i = 0; i < m; i++)
00227         {
00228           if (a[i] > 0)
00229         {
00230           if (!lmax)
00231             {
00232               lmax = &(S(i)[a[i] - 1]);
00233               lmax_seq = i;
00234             }
00235           else
00236             {
00237               // Max, favor rear sequences.
00238               if (!comp(S(i)[a[i] - 1], *lmax))
00239             {
00240               lmax = &(S(i)[a[i] - 1]);
00241               lmax_seq = i;
00242             }
00243             }
00244         }
00245         }
00246 
00247       int i;
00248       for (i = 0; i < m; i++)
00249         {
00250           difference_type middle = (b[i] + a[i]) / 2;
00251           if (lmax && middle < ns[i] &&
00252           lcomp(std::make_pair(S(i)[middle], i),
00253             std::make_pair(*lmax, lmax_seq)))
00254         a[i] = std::min(a[i] + n + 1, ns[i]);
00255           else
00256         b[i] -= n + 1;
00257         }
00258 
00259       difference_type leftsize = 0, total = 0;
00260       for (int i = 0; i < m; i++)
00261         {
00262           leftsize += a[i] / (n + 1);
00263           total += l / (n + 1);
00264         }
00265       
00266       difference_type skew = static_cast<difference_type>
00267         (static_cast<uint64>(total) * rank / N - leftsize);
00268 
00269       if (skew > 0)
00270         {
00271           // Move to the left, find smallest.
00272           std::priority_queue<std::pair<value_type, int>,
00273         std::vector<std::pair<value_type, int> >,
00274         lexicographic_reverse<value_type, int, Comparator> >
00275         pq(lrcomp);
00276           
00277           for (int i = 0; i < m; i++)
00278         if (b[i] < ns[i])
00279           pq.push(std::make_pair(S(i)[b[i]], i));
00280 
00281           for (; skew != 0 && !pq.empty(); --skew)
00282         {
00283           int source = pq.top().second;
00284           pq.pop();
00285 
00286           a[source] = std::min(a[source] + n + 1, ns[source]);
00287           b[source] += n + 1;
00288 
00289           if (b[source] < ns[source])
00290             pq.push(std::make_pair(S(source)[b[source]], source));
00291         }
00292         }
00293       else if (skew < 0)
00294         {
00295           // Move to the right, find greatest.
00296           std::priority_queue<std::pair<value_type, int>,
00297         std::vector<std::pair<value_type, int> >,
00298         lexicographic<value_type, int, Comparator> > pq(lcomp);
00299 
00300           for (int i = 0; i < m; i++)
00301         if (a[i] > 0)
00302           pq.push(std::make_pair(S(i)[a[i] - 1], i));
00303 
00304           for (; skew != 0; ++skew)
00305         {
00306           int source = pq.top().second;
00307           pq.pop();
00308 
00309           a[source] -= n + 1;
00310           b[source] -= n + 1;
00311 
00312           if (a[source] > 0)
00313             pq.push(std::make_pair(S(source)[a[source] - 1], source));
00314         }
00315         }
00316     }
00317 
00318       // Postconditions:
00319       // a[i] == b[i] in most cases, except when a[i] has been clamped
00320       // because of having reached the boundary
00321 
00322       // Now return the result, calculate the offset.
00323 
00324       // Compare the keys on both edges of the border.
00325 
00326       // Maximum of left edge, minimum of right edge.
00327       value_type* maxleft = NULL;
00328       value_type* minright = NULL;
00329       for (int i = 0; i < m; i++)
00330     {
00331       if (a[i] > 0)
00332         {
00333           if (!maxleft)
00334         maxleft = &(S(i)[a[i] - 1]);
00335           else
00336         {
00337           // Max, favor rear sequences.
00338           if (!comp(S(i)[a[i] - 1], *maxleft))
00339             maxleft = &(S(i)[a[i] - 1]);
00340         }
00341         }
00342       if (b[i] < ns[i])
00343         {
00344           if (!minright)
00345         minright = &(S(i)[b[i]]);
00346           else
00347         {
00348           // Min, favor fore sequences.
00349           if (comp(S(i)[b[i]], *minright))
00350             minright = &(S(i)[b[i]]);
00351         }
00352         }
00353     }
00354 
00355       int seq = 0;
00356       for (int i = 0; i < m; i++)
00357     begin_offsets[i] = S(i) + a[i];
00358 
00359       delete[] ns;
00360       delete[] a;
00361       delete[] b;
00362     }
00363 
00364 
00365   /** 
00366    *  @brief Selects the element at a certain global rank from several
00367    *  sorted sequences.
00368    *
00369    *  The sequences are passed via a sequence of random-access
00370    *  iterator pairs, none of the sequences may be empty.
00371    *  @param begin_seqs Begin of the sequence of iterator pairs.
00372    *  @param end_seqs End of the sequence of iterator pairs.
00373    *  @param rank The global rank to partition at.
00374    *  @param offset The rank of the selected element in the global
00375    *  subsequence of elements equal to the selected element. If the
00376    *  selected element is unique, this number is 0.
00377    *  @param comp The ordering functor, defaults to std::less. 
00378    */
00379   template<typename T, typename RanSeqs, typename RankType,
00380        typename Comparator>
00381     T
00382     multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
00383                RankType& offset, Comparator comp = std::less<T>())
00384     {
00385       _GLIBCXX_CALL(end_seqs - begin_seqs)
00386 
00387       typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
00388     It;
00389       typedef typename std::iterator_traits<It>::difference_type
00390     difference_type;
00391 
00392       lexicographic<T, int, Comparator> lcomp(comp);
00393       lexicographic_reverse<T, int, Comparator> lrcomp(comp);
00394 
00395       // Number of sequences, number of elements in total (possibly
00396       // including padding).
00397       difference_type m = std::distance(begin_seqs, end_seqs);
00398       difference_type N = 0;
00399       difference_type nmax, n, r;
00400 
00401       for (int i = 0; i < m; i++)
00402     N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
00403 
00404       if (m == 0 || N == 0 || rank < 0 || rank >= N)
00405     {
00406       // Result undefined when there is no data or rank is outside bounds.
00407       throw std::exception();
00408     }
00409 
00410 
00411       difference_type* ns = new difference_type[m];
00412       difference_type* a = new difference_type[m];
00413       difference_type* b = new difference_type[m];
00414       difference_type l;
00415 
00416       ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
00417       nmax = ns[0];
00418       for (int i = 0; i < m; ++i)
00419     {
00420       ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
00421       nmax = std::max(nmax, ns[i]);
00422     }
00423 
00424       r = log2(nmax) + 1;
00425 
00426       // Pad all lists to this length, at least as long as any ns[i],
00427       // equality iff nmax = 2^k - 1
00428       l = pow2(r) - 1;
00429 
00430       // From now on, including padding.
00431       N = l * m;
00432 
00433       for (int i = 0; i < m; ++i)
00434     {
00435       a[i] = 0;
00436       b[i] = l;
00437     }
00438       n = l / 2;
00439 
00440       // Invariants:
00441       // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
00442 
00443 #define S(i) (begin_seqs[i].first)
00444 
00445       // Initial partition.
00446       std::vector<std::pair<T, int> > sample;
00447 
00448       for (int i = 0; i < m; i++)
00449     if (n < ns[i])
00450       sample.push_back(std::make_pair(S(i)[n], i));
00451       __gnu_sequential::sort(sample.begin(), sample.end(),
00452                  lcomp, sequential_tag());
00453 
00454       // Conceptual infinity.
00455       for (int i = 0; i < m; i++)
00456     if (n >= ns[i])
00457       sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
00458 
00459       difference_type localrank = rank * m / N ;
00460 
00461       int j;
00462       for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
00463     a[sample[j].second] += n + 1;
00464       for (; j < m; ++j)
00465     b[sample[j].second] -= n + 1;
00466 
00467       // Further refinement.
00468       while (n > 0)
00469     {
00470       n /= 2;
00471 
00472       const T* lmax = NULL;
00473       for (int i = 0; i < m; ++i)
00474         {
00475           if (a[i] > 0)
00476         {
00477           if (!lmax)
00478             lmax = &(S(i)[a[i] - 1]);
00479           else
00480             {
00481               if (comp(*lmax, S(i)[a[i] - 1]))  //max
00482             lmax = &(S(i)[a[i] - 1]);
00483             }
00484         }
00485         }
00486 
00487       int i;
00488       for (i = 0; i < m; i++)
00489         {
00490           difference_type middle = (b[i] + a[i]) / 2;
00491           if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
00492         a[i] = std::min(a[i] + n + 1, ns[i]);
00493           else
00494         b[i] -= n + 1;
00495         }
00496 
00497       difference_type leftsize = 0, total = 0;
00498       for (int i = 0; i < m; ++i)
00499         {
00500           leftsize += a[i] / (n + 1);
00501           total += l / (n + 1);
00502         }
00503 
00504       difference_type skew = ((unsigned long long)total * rank / N
00505                   - leftsize);
00506 
00507       if (skew > 0)
00508         {
00509           // Move to the left, find smallest.
00510           std::priority_queue<std::pair<T, int>,
00511         std::vector<std::pair<T, int> >,
00512         lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
00513 
00514           for (int i = 0; i < m; ++i)
00515         if (b[i] < ns[i])
00516           pq.push(std::make_pair(S(i)[b[i]], i));
00517 
00518           for (; skew != 0 && !pq.empty(); --skew)
00519         {
00520           int source = pq.top().second;
00521           pq.pop();
00522           
00523           a[source] = std::min(a[source] + n + 1, ns[source]);
00524           b[source] += n + 1;
00525           
00526           if (b[source] < ns[source])
00527             pq.push(std::make_pair(S(source)[b[source]], source));
00528         }
00529         }
00530       else if (skew < 0)
00531         {
00532           // Move to the right, find greatest.
00533           std::priority_queue<std::pair<T, int>,
00534         std::vector<std::pair<T, int> >,
00535         lexicographic<T, int, Comparator> > pq(lcomp);
00536 
00537           for (int i = 0; i < m; ++i)
00538         if (a[i] > 0)
00539           pq.push(std::make_pair(S(i)[a[i] - 1], i));
00540 
00541           for (; skew != 0; ++skew)
00542         {
00543           int source = pq.top().second;
00544           pq.pop();
00545 
00546           a[source] -= n + 1;
00547           b[source] -= n + 1;
00548 
00549           if (a[source] > 0)
00550             pq.push(std::make_pair(S(source)[a[source] - 1], source));
00551         }
00552         }
00553     }
00554 
00555       // Postconditions:
00556       // a[i] == b[i] in most cases, except when a[i] has been clamped
00557       // because of having reached the boundary
00558 
00559       // Now return the result, calculate the offset.
00560 
00561       // Compare the keys on both edges of the border.
00562 
00563       // Maximum of left edge, minimum of right edge.
00564       bool maxleftset = false, minrightset = false;
00565 
00566       // Impossible to avoid the warning?
00567       T maxleft, minright;
00568       for (int i = 0; i < m; ++i)
00569     {
00570       if (a[i] > 0)
00571         {
00572           if (!maxleftset)
00573         {
00574           maxleft = S(i)[a[i] - 1];
00575           maxleftset = true;
00576         }
00577           else
00578         {
00579           // Max.
00580           if (comp(maxleft, S(i)[a[i] - 1]))
00581             maxleft = S(i)[a[i] - 1];
00582         }
00583         }
00584       if (b[i] < ns[i])
00585         {
00586           if (!minrightset)
00587         {
00588           minright = S(i)[b[i]];
00589           minrightset = true;
00590         }
00591           else
00592         {
00593           // Min.
00594           if (comp(S(i)[b[i]], minright))
00595             minright = S(i)[b[i]];
00596         }
00597         }
00598       }
00599 
00600       // Minright is the splitter, in any case.
00601 
00602       if (!maxleftset || comp(minright, maxleft))
00603     {
00604       // Good luck, everything is split unambiguously.
00605       offset = 0;
00606     }
00607       else
00608     {
00609       // We have to calculate an offset.
00610       offset = 0;
00611 
00612       for (int i = 0; i < m; ++i)
00613         {
00614           difference_type lb = std::lower_bound(S(i), S(i) + ns[i],
00615                             minright,
00616                             comp) - S(i);
00617           offset += a[i] - lb;
00618         }
00619     }
00620 
00621       delete[] ns;
00622       delete[] a;
00623       delete[] b;
00624 
00625       return minright;
00626     }
00627 }
00628 
00629 #undef S
00630 
00631 #endif
00632 

Generated on Wed Mar 26 00:43:03 2008 for libstdc++ by  doxygen 1.5.1