multiway_mergesort.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/multiway_mergesort.h
00032  *  @brief Parallel multiway merge sort.
00033  *  This file is a GNU parallel extension to the Standard C++ Library.
00034  */
00035 
00036 // Written by Johannes Singler.
00037 
00038 #ifndef _GLIBCXX_PARALLEL_MERGESORT_H
00039 #define _GLIBCXX_PARALLEL_MERGESORT_H 1
00040 
00041 #include <vector>
00042 
00043 #include <parallel/basic_iterator.h>
00044 #include <bits/stl_algo.h>
00045 #include <parallel/parallel.h>
00046 #include <parallel/multiway_merge.h>
00047 
00048 namespace __gnu_parallel
00049 {
00050 
00051 /** @brief Subsequence description. */
00052 template<typename _DifferenceTp>
00053   struct Piece
00054   {
00055     typedef _DifferenceTp difference_type;
00056 
00057     /** @brief Begin of subsequence. */
00058     difference_type begin;
00059 
00060     /** @brief End of subsequence. */
00061     difference_type end;
00062   };
00063 
00064 /** @brief Data accessed by all threads.
00065   *
00066   *  PMWMS = parallel multiway mergesort */
00067 template<typename RandomAccessIterator>
00068   struct PMWMSSortingData
00069   {
00070     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00071     typedef typename traits_type::value_type value_type;
00072     typedef typename traits_type::difference_type difference_type;
00073 
00074     /** @brief Number of threads involved. */
00075     thread_index_t num_threads;
00076 
00077     /** @brief Input begin. */
00078     RandomAccessIterator source;
00079 
00080     /** @brief Start indices, per thread. */
00081     difference_type* starts;
00082 
00083     /** @brief Temporary arrays for each thread.
00084      *
00085      *  Indirection Allows using the temporary storage in different
00086      *  ways, without code duplication.
00087      *  @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
00088     value_type** temporaries;
00089 
00090 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00091     /** @brief Storage in which to sort. */
00092     RandomAccessIterator* sorting_places;
00093 
00094     /** @brief Storage into which to merge. */
00095     value_type** merging_places;
00096 #else
00097     /** @brief Storage in which to sort. */
00098     value_type** sorting_places;
00099 
00100     /** @brief Storage into which to merge. */
00101     RandomAccessIterator* merging_places;
00102 #endif
00103     /** @brief Samples. */
00104     value_type* samples;
00105 
00106     /** @brief Offsets to add to the found positions. */
00107     difference_type* offsets;
00108 
00109     /** @brief Pieces of data to merge @c [thread][sequence] */
00110     std::vector<Piece<difference_type> >* pieces;
00111 
00112     /** @brief Stable sorting desired. */
00113     bool stable;
00114 };
00115 
00116 /**
00117   *  @brief Select samples from a sequence.
00118   *  @param sd Pointer to algorithm data. Result will be placed in
00119   *  @c sd->samples.
00120   *  @param num_samples Number of samples to select.
00121   */
00122 template<typename RandomAccessIterator, typename _DifferenceTp>
00123   void 
00124   determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
00125                     _DifferenceTp& num_samples)
00126   {
00127     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00128     typedef typename traits_type::value_type value_type;
00129     typedef _DifferenceTp difference_type;
00130 
00131     thread_index_t iam = omp_get_thread_num();
00132 
00133     num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
00134 
00135     difference_type* es = new difference_type[num_samples + 2];
00136 
00137     equally_split(sd->starts[iam + 1] - sd->starts[iam], 
00138                   num_samples + 1, es);
00139 
00140     for (difference_type i = 0; i < num_samples; ++i)
00141       ::new(&(sd->samples[iam * num_samples + i]))
00142       value_type(sd->source[sd->starts[iam] + es[i + 1]]);
00143 
00144     delete[] es;
00145   }
00146 
00147 /** @brief PMWMS code executed by each thread.
00148   *  @param sd Pointer to algorithm data.
00149   *  @param comp Comparator.
00150   */
00151 template<typename RandomAccessIterator, typename Comparator>
00152   void 
00153   parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
00154                         Comparator& comp)
00155   {
00156     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00157     typedef typename traits_type::value_type value_type;
00158     typedef typename traits_type::difference_type difference_type;
00159 
00160     thread_index_t iam = omp_get_thread_num();
00161 
00162     // Length of this thread's chunk, before merging.
00163     difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
00164 
00165 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00166     typedef RandomAccessIterator SortingPlacesIterator;
00167 
00168     // Sort in input storage.
00169     sd->sorting_places[iam] = sd->source + sd->starts[iam];
00170 #else
00171     typedef value_type* SortingPlacesIterator;
00172 
00173     // Sort in temporary storage, leave space for sentinel.
00174     sd->sorting_places[iam] = sd->temporaries[iam] = 
00175         static_cast<value_type*>(
00176         ::operator new(sizeof(value_type) * (length_local + 1)));
00177 
00178     // Copy there.
00179     std::uninitialized_copy(sd->source + sd->starts[iam],
00180                             sd->source + sd->starts[iam] + length_local,
00181                             sd->sorting_places[iam]);
00182 #endif
00183 
00184     // Sort locally.
00185     if (sd->stable)
00186       __gnu_sequential::stable_sort(sd->sorting_places[iam],
00187                                     sd->sorting_places[iam] + length_local,
00188                                     comp);
00189     else
00190       __gnu_sequential::sort(sd->sorting_places[iam],
00191                              sd->sorting_places[iam] + length_local,
00192                              comp);
00193 
00194     // Invariant: locally sorted subsequence in sd->sorting_places[iam],
00195     // sd->sorting_places[iam] + length_local.
00196     const _Settings& __s = _Settings::get();
00197     if (__s.sort_splitting == SAMPLING)
00198       {
00199         difference_type num_samples;
00200         determine_samples(sd, num_samples);
00201 
00202 #       pragma omp barrier
00203 
00204 #       pragma omp single
00205         __gnu_sequential::sort(sd->samples,
00206                                sd->samples + (num_samples * sd->num_threads),
00207                                comp);
00208 
00209 #       pragma omp barrier
00210 
00211         for (int s = 0; s < sd->num_threads; ++s)
00212           {
00213             // For each sequence.
00214               if (num_samples * iam > 0)
00215                 sd->pieces[iam][s].begin = 
00216                     std::lower_bound(sd->sorting_places[s],
00217                         sd->sorting_places[s]
00218                             + (sd->starts[s + 1] - sd->starts[s]),
00219                         sd->samples[num_samples * iam],
00220                         comp)
00221                     - sd->sorting_places[s];
00222             else
00223               // Absolute beginning.
00224               sd->pieces[iam][s].begin = 0;
00225 
00226             if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
00227               sd->pieces[iam][s].end =
00228                   std::lower_bound(sd->sorting_places[s],
00229                           sd->sorting_places[s]
00230                               + (sd->starts[s + 1] - sd->starts[s]),
00231                           sd->samples[num_samples * (iam + 1)],
00232                           comp)
00233                   - sd->sorting_places[s];
00234             else
00235               // Absolute end.
00236               sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
00237             }
00238       }
00239     else if (__s.sort_splitting == EXACT)
00240       {
00241 #       pragma omp barrier
00242 
00243         std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00244             seqs(sd->num_threads);
00245         for (int s = 0; s < sd->num_threads; ++s)
00246           seqs[s] = std::make_pair(sd->sorting_places[s],
00247                                    sd->sorting_places[s]
00248                                        + (sd->starts[s + 1] - sd->starts[s]));
00249 
00250         std::vector<SortingPlacesIterator> offsets(sd->num_threads);
00251 
00252         // if not last thread
00253         if (iam < sd->num_threads - 1)
00254           multiseq_partition(seqs.begin(), seqs.end(),
00255                              sd->starts[iam + 1], offsets.begin(), comp);
00256 
00257         for (int seq = 0; seq < sd->num_threads; ++seq)
00258           {
00259             // for each sequence
00260             if (iam < (sd->num_threads - 1))
00261               sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
00262             else
00263               // very end of this sequence
00264               sd->pieces[iam][seq].end = (sd->starts[seq + 1]
00265                       - sd->starts[seq]);
00266           }
00267 
00268 #       pragma omp barrier
00269 
00270         for (int seq = 0; seq < sd->num_threads; ++seq)
00271           {
00272             // For each sequence.
00273             if (iam > 0)
00274               sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
00275             else
00276               // Absolute beginning.
00277               sd->pieces[iam][seq].begin = 0;
00278           }
00279       }
00280 
00281     // Offset from target begin, length after merging.
00282     difference_type offset = 0, length_am = 0;
00283     for (int s = 0; s < sd->num_threads; ++s)
00284       {
00285         length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
00286         offset += sd->pieces[iam][s].begin;
00287       }
00288 
00289 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00290     // Merge to temporary storage, uninitialized creation not possible
00291     // since there is no multiway_merge calling the placement new
00292     // instead of the assignment operator.
00293     // XXX incorrect (de)construction
00294     sd->merging_places[iam] = sd->temporaries[iam] =
00295         static_cast<value_type*>(::operator new(sizeof(value_type)
00296                         * length_am));
00297 #else
00298     // Merge directly to target.
00299     sd->merging_places[iam] = sd->source + offset;
00300 #endif
00301     std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00302         seqs(sd->num_threads);
00303 
00304     for (int s = 0; s < sd->num_threads; ++s)
00305       {
00306         seqs[s] =
00307       std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
00308              sd->sorting_places[s] + sd->pieces[iam][s].end);
00309       }
00310 
00311     multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
00312                    length_am, sd->stable, false, sequential_tag());
00313 
00314 #   pragma omp barrier
00315 
00316 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00317     // Write back.
00318     std::copy(sd->merging_places[iam],
00319               sd->merging_places[iam] + length_am,
00320               sd->source + offset);
00321 #endif
00322 
00323     ::operator delete(sd->temporaries[iam]);
00324   }
00325 
00326 /** @brief PMWMS main call.
00327   *  @param begin Begin iterator of sequence.
00328   *  @param end End iterator of sequence.
00329   *  @param comp Comparator.
00330   *  @param n Length of sequence.
00331   *  @param num_threads Number of threads to use.
00332   *  @param stable Stable sorting.
00333   */
00334 template<typename RandomAccessIterator, typename Comparator>
00335   void
00336   parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
00337                      Comparator comp, typename
00338              std::iterator_traits<RandomAccessIterator>::
00339              difference_type n, int num_threads, bool stable)
00340   {
00341     _GLIBCXX_CALL(n)
00342 
00343     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00344     typedef typename traits_type::value_type value_type;
00345     typedef typename traits_type::difference_type difference_type;
00346 
00347     if (n <= 1)
00348       return;
00349 
00350     // at least one element per thread
00351     if (num_threads > n)
00352       num_threads = static_cast<thread_index_t>(n);
00353 
00354     // shared variables
00355     PMWMSSortingData<RandomAccessIterator> sd;
00356     difference_type* starts;
00357     const _Settings& __s = _Settings::get();
00358 
00359 #   pragma omp parallel num_threads(num_threads)
00360       {
00361         num_threads = omp_get_num_threads();  //no more threads than requested
00362 
00363 #       pragma omp single
00364           {
00365             sd.num_threads = num_threads;
00366             sd.source = begin;
00367             sd.temporaries = new value_type*[num_threads];
00368 
00369 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00370             sd.sorting_places = new RandomAccessIterator[num_threads];
00371             sd.merging_places = new value_type*[num_threads];
00372 #else
00373             sd.sorting_places = new value_type*[num_threads];
00374             sd.merging_places = new RandomAccessIterator[num_threads];
00375 #endif
00376 
00377             if (__s.sort_splitting == SAMPLING)
00378               {
00379                 unsigned int size = 
00380                     (__s.sort_mwms_oversampling * num_threads - 1)
00381                         * num_threads;
00382                 sd.samples = static_cast<value_type*>(
00383           ::operator new(size * sizeof(value_type)));
00384               }
00385             else
00386               sd.samples = NULL;
00387 
00388             sd.offsets = new difference_type[num_threads - 1];
00389             sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
00390             for (int s = 0; s < num_threads; ++s)
00391               sd.pieces[s].resize(num_threads);
00392             starts = sd.starts = new difference_type[num_threads + 1];
00393             sd.stable = stable;
00394 
00395             difference_type chunk_length = n / num_threads;
00396             difference_type split = n % num_threads;
00397             difference_type pos = 0;
00398             for (int i = 0; i < num_threads; ++i)
00399               {
00400                 starts[i] = pos;
00401                 pos += (i < split) ? (chunk_length + 1) : chunk_length;
00402               }
00403             starts[num_threads] = pos;
00404           }
00405 
00406         // Now sort in parallel.
00407         parallel_sort_mwms_pu(&sd, comp);
00408       } //parallel
00409 
00410     delete[] starts;
00411     delete[] sd.temporaries;
00412     delete[] sd.sorting_places;
00413     delete[] sd.merging_places;
00414 
00415     if (__s.sort_splitting == SAMPLING)
00416       ::operator delete(sd.samples);
00417 
00418     delete[] sd.offsets;
00419     delete[] sd.pieces;
00420   }
00421 } //namespace __gnu_parallel
00422 
00423 #endif

Generated on Wed Mar 26 00:43:05 2008 for libstdc++ by  doxygen 1.5.1