00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef _GLIBCXX_PARALLEL_MERGESORT_H
00039 #define _GLIBCXX_PARALLEL_MERGESORT_H 1
00040
00041 #include <vector>
00042
00043 #include <parallel/basic_iterator.h>
00044 #include <bits/stl_algo.h>
00045 #include <parallel/parallel.h>
00046 #include <parallel/multiway_merge.h>
00047
00048 namespace __gnu_parallel
00049 {
00050
00051
00052 template<typename _DifferenceTp>
00053 struct Piece
00054 {
00055 typedef _DifferenceTp difference_type;
00056
00057
00058 difference_type begin;
00059
00060
00061 difference_type end;
00062 };
00063
00064
00065
00066
00067 template<typename RandomAccessIterator>
00068 struct PMWMSSortingData
00069 {
00070 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00071 typedef typename traits_type::value_type value_type;
00072 typedef typename traits_type::difference_type difference_type;
00073
00074
00075 thread_index_t num_threads;
00076
00077
00078 RandomAccessIterator source;
00079
00080
00081 difference_type* starts;
00082
00083
00084
00085
00086
00087
00088 value_type** temporaries;
00089
00090 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00091
00092 RandomAccessIterator* sorting_places;
00093
00094
00095 value_type** merging_places;
00096 #else
00097
00098 value_type** sorting_places;
00099
00100
00101 RandomAccessIterator* merging_places;
00102 #endif
00103
00104 value_type* samples;
00105
00106
00107 difference_type* offsets;
00108
00109
00110 std::vector<Piece<difference_type> >* pieces;
00111
00112
00113 bool stable;
00114 };
00115
00116
00117
00118
00119
00120
00121
00122 template<typename RandomAccessIterator, typename _DifferenceTp>
00123 void
00124 determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
00125 _DifferenceTp& num_samples)
00126 {
00127 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00128 typedef typename traits_type::value_type value_type;
00129 typedef _DifferenceTp difference_type;
00130
00131 thread_index_t iam = omp_get_thread_num();
00132
00133 num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
00134
00135 difference_type* es = new difference_type[num_samples + 2];
00136
00137 equally_split(sd->starts[iam + 1] - sd->starts[iam],
00138 num_samples + 1, es);
00139
00140 for (difference_type i = 0; i < num_samples; ++i)
00141 ::new(&(sd->samples[iam * num_samples + i]))
00142 value_type(sd->source[sd->starts[iam] + es[i + 1]]);
00143
00144 delete[] es;
00145 }
00146
00147
00148
00149
00150
00151 template<typename RandomAccessIterator, typename Comparator>
00152 void
00153 parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
00154 Comparator& comp)
00155 {
00156 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00157 typedef typename traits_type::value_type value_type;
00158 typedef typename traits_type::difference_type difference_type;
00159
00160 thread_index_t iam = omp_get_thread_num();
00161
00162
00163 difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
00164
00165 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00166 typedef RandomAccessIterator SortingPlacesIterator;
00167
00168
00169 sd->sorting_places[iam] = sd->source + sd->starts[iam];
00170 #else
00171 typedef value_type* SortingPlacesIterator;
00172
00173
00174 sd->sorting_places[iam] = sd->temporaries[iam] =
00175 static_cast<value_type*>(
00176 ::operator new(sizeof(value_type) * (length_local + 1)));
00177
00178
00179 std::uninitialized_copy(sd->source + sd->starts[iam],
00180 sd->source + sd->starts[iam] + length_local,
00181 sd->sorting_places[iam]);
00182 #endif
00183
00184
00185 if (sd->stable)
00186 __gnu_sequential::stable_sort(sd->sorting_places[iam],
00187 sd->sorting_places[iam] + length_local,
00188 comp);
00189 else
00190 __gnu_sequential::sort(sd->sorting_places[iam],
00191 sd->sorting_places[iam] + length_local,
00192 comp);
00193
00194
00195
00196 const _Settings& __s = _Settings::get();
00197 if (__s.sort_splitting == SAMPLING)
00198 {
00199 difference_type num_samples;
00200 determine_samples(sd, num_samples);
00201
00202 # pragma omp barrier
00203
00204 # pragma omp single
00205 __gnu_sequential::sort(sd->samples,
00206 sd->samples + (num_samples * sd->num_threads),
00207 comp);
00208
00209 # pragma omp barrier
00210
00211 for (int s = 0; s < sd->num_threads; ++s)
00212 {
00213
00214 if (num_samples * iam > 0)
00215 sd->pieces[iam][s].begin =
00216 std::lower_bound(sd->sorting_places[s],
00217 sd->sorting_places[s]
00218 + (sd->starts[s + 1] - sd->starts[s]),
00219 sd->samples[num_samples * iam],
00220 comp)
00221 - sd->sorting_places[s];
00222 else
00223
00224 sd->pieces[iam][s].begin = 0;
00225
00226 if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
00227 sd->pieces[iam][s].end =
00228 std::lower_bound(sd->sorting_places[s],
00229 sd->sorting_places[s]
00230 + (sd->starts[s + 1] - sd->starts[s]),
00231 sd->samples[num_samples * (iam + 1)],
00232 comp)
00233 - sd->sorting_places[s];
00234 else
00235
00236 sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
00237 }
00238 }
00239 else if (__s.sort_splitting == EXACT)
00240 {
00241 # pragma omp barrier
00242
00243 std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00244 seqs(sd->num_threads);
00245 for (int s = 0; s < sd->num_threads; ++s)
00246 seqs[s] = std::make_pair(sd->sorting_places[s],
00247 sd->sorting_places[s]
00248 + (sd->starts[s + 1] - sd->starts[s]));
00249
00250 std::vector<SortingPlacesIterator> offsets(sd->num_threads);
00251
00252
00253 if (iam < sd->num_threads - 1)
00254 multiseq_partition(seqs.begin(), seqs.end(),
00255 sd->starts[iam + 1], offsets.begin(), comp);
00256
00257 for (int seq = 0; seq < sd->num_threads; ++seq)
00258 {
00259
00260 if (iam < (sd->num_threads - 1))
00261 sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
00262 else
00263
00264 sd->pieces[iam][seq].end = (sd->starts[seq + 1]
00265 - sd->starts[seq]);
00266 }
00267
00268 # pragma omp barrier
00269
00270 for (int seq = 0; seq < sd->num_threads; ++seq)
00271 {
00272
00273 if (iam > 0)
00274 sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
00275 else
00276
00277 sd->pieces[iam][seq].begin = 0;
00278 }
00279 }
00280
00281
00282 difference_type offset = 0, length_am = 0;
00283 for (int s = 0; s < sd->num_threads; ++s)
00284 {
00285 length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
00286 offset += sd->pieces[iam][s].begin;
00287 }
00288
00289 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00290
00291
00292
00293
00294 sd->merging_places[iam] = sd->temporaries[iam] =
00295 static_cast<value_type*>(::operator new(sizeof(value_type)
00296 * length_am));
00297 #else
00298
00299 sd->merging_places[iam] = sd->source + offset;
00300 #endif
00301 std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00302 seqs(sd->num_threads);
00303
00304 for (int s = 0; s < sd->num_threads; ++s)
00305 {
00306 seqs[s] =
00307 std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
00308 sd->sorting_places[s] + sd->pieces[iam][s].end);
00309 }
00310
00311 multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
00312 length_am, sd->stable, false, sequential_tag());
00313
00314 # pragma omp barrier
00315
00316 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00317
00318 std::copy(sd->merging_places[iam],
00319 sd->merging_places[iam] + length_am,
00320 sd->source + offset);
00321 #endif
00322
00323 ::operator delete(sd->temporaries[iam]);
00324 }
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334 template<typename RandomAccessIterator, typename Comparator>
00335 void
00336 parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
00337 Comparator comp, typename
00338 std::iterator_traits<RandomAccessIterator>::
00339 difference_type n, int num_threads, bool stable)
00340 {
00341 _GLIBCXX_CALL(n)
00342
00343 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00344 typedef typename traits_type::value_type value_type;
00345 typedef typename traits_type::difference_type difference_type;
00346
00347 if (n <= 1)
00348 return;
00349
00350
00351 if (num_threads > n)
00352 num_threads = static_cast<thread_index_t>(n);
00353
00354
00355 PMWMSSortingData<RandomAccessIterator> sd;
00356 difference_type* starts;
00357 const _Settings& __s = _Settings::get();
00358
00359 # pragma omp parallel num_threads(num_threads)
00360 {
00361 num_threads = omp_get_num_threads();
00362
00363 # pragma omp single
00364 {
00365 sd.num_threads = num_threads;
00366 sd.source = begin;
00367 sd.temporaries = new value_type*[num_threads];
00368
00369 #if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
00370 sd.sorting_places = new RandomAccessIterator[num_threads];
00371 sd.merging_places = new value_type*[num_threads];
00372 #else
00373 sd.sorting_places = new value_type*[num_threads];
00374 sd.merging_places = new RandomAccessIterator[num_threads];
00375 #endif
00376
00377 if (__s.sort_splitting == SAMPLING)
00378 {
00379 unsigned int size =
00380 (__s.sort_mwms_oversampling * num_threads - 1)
00381 * num_threads;
00382 sd.samples = static_cast<value_type*>(
00383 ::operator new(size * sizeof(value_type)));
00384 }
00385 else
00386 sd.samples = NULL;
00387
00388 sd.offsets = new difference_type[num_threads - 1];
00389 sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
00390 for (int s = 0; s < num_threads; ++s)
00391 sd.pieces[s].resize(num_threads);
00392 starts = sd.starts = new difference_type[num_threads + 1];
00393 sd.stable = stable;
00394
00395 difference_type chunk_length = n / num_threads;
00396 difference_type split = n % num_threads;
00397 difference_type pos = 0;
00398 for (int i = 0; i < num_threads; ++i)
00399 {
00400 starts[i] = pos;
00401 pos += (i < split) ? (chunk_length + 1) : chunk_length;
00402 }
00403 starts[num_threads] = pos;
00404 }
00405
00406
00407 parallel_sort_mwms_pu(&sd, comp);
00408 }
00409
00410 delete[] starts;
00411 delete[] sd.temporaries;
00412 delete[] sd.sorting_places;
00413 delete[] sd.merging_places;
00414
00415 if (__s.sort_splitting == SAMPLING)
00416 ::operator delete(sd.samples);
00417
00418 delete[] sd.offsets;
00419 delete[] sd.pieces;
00420 }
00421 }
00422
00423 #endif