00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
00039 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
00040
00041 #include <limits>
00042 #include <bits/stl_numeric.h>
00043 #include <parallel/parallel.h>
00044 #include <parallel/random_number.h>
00045
00046 namespace __gnu_parallel
00047 {
00048
00049
00050
00051
00052
00053 typedef unsigned short bin_index;
00054
00055
00056
00057 template<typename RandomAccessIterator>
00058 struct DRandomShufflingGlobalData
00059 {
00060 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00061 typedef typename traits_type::value_type value_type;
00062 typedef typename traits_type::difference_type difference_type;
00063
00064
00065 RandomAccessIterator& source;
00066
00067
00068 value_type** temporaries;
00069
00070
00071
00072
00073 difference_type** dist;
00074
00075
00076 difference_type* starts;
00077
00078
00079
00080 thread_index_t* bin_proc;
00081
00082
00083 int num_bins;
00084
00085
00086 int num_bits;
00087
00088
00089 DRandomShufflingGlobalData(RandomAccessIterator& _source)
00090 : source(_source) { }
00091 };
00092
00093
00094
00095
00096 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00097 struct DRSSorterPU
00098 {
00099
00100 int num_threads;
00101
00102
00103 bin_index bins_begin;
00104
00105
00106 bin_index bins_end;
00107
00108
00109 uint32 seed;
00110
00111
00112 DRandomShufflingGlobalData<RandomAccessIterator>* sd;
00113 };
00114
00115
00116
00117
00118
00119 template<typename RandomNumberGenerator>
00120 inline int
00121 random_number_pow2(int logp, RandomNumberGenerator& rng)
00122 { return rng.genrand_bits(logp); }
00123
00124
00125
00126 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00127 void
00128 parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
00129 RandomNumberGenerator>* pus)
00130 {
00131 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00132 typedef typename traits_type::value_type value_type;
00133 typedef typename traits_type::difference_type difference_type;
00134
00135 thread_index_t iam = omp_get_thread_num();
00136 DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
00137 DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
00138
00139
00140 difference_type length = sd->starts[iam + 1] - sd->starts[iam];
00141 bin_index* oracles = new bin_index[length];
00142 difference_type* dist = new difference_type[sd->num_bins + 1];
00143 bin_index* bin_proc = new bin_index[sd->num_bins];
00144 value_type** temporaries = new value_type*[d->num_threads];
00145
00146
00147 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00148 dist[b] = 0;
00149 int num_bits = sd->num_bits;
00150
00151 random_number rng(d->seed);
00152
00153
00154 for (difference_type i = 0; i < length; ++i)
00155 {
00156 bin_index oracle = random_number_pow2(num_bits, rng);
00157 oracles[i] = oracle;
00158
00159
00160 ++(dist[oracle + 1]);
00161 }
00162
00163 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00164 sd->dist[b][iam + 1] = dist[b];
00165
00166 # pragma omp barrier
00167
00168 # pragma omp single
00169 {
00170
00171
00172 for (bin_index s = 0; s < sd->num_bins; ++s)
00173 __gnu_sequential::partial_sum(sd->dist[s + 1],
00174 sd->dist[s + 1] + d->num_threads + 1,
00175 sd->dist[s + 1]);
00176 }
00177
00178 # pragma omp barrier
00179
00180 sequence_index_t offset = 0, global_offset = 0;
00181 for (bin_index s = 0; s < d->bins_begin; ++s)
00182 global_offset += sd->dist[s + 1][d->num_threads];
00183
00184 # pragma omp barrier
00185
00186 for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
00187 {
00188 for (int t = 0; t < d->num_threads + 1; ++t)
00189 sd->dist[s + 1][t] += offset;
00190 offset = sd->dist[s + 1][d->num_threads];
00191 }
00192
00193 sd->temporaries[iam] = static_cast<value_type*>(
00194 ::operator new(sizeof(value_type) * offset));
00195
00196 # pragma omp barrier
00197
00198
00199 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00200 dist[b] = sd->dist[b][iam];
00201 for (bin_index b = 0; b < sd->num_bins; ++b)
00202 bin_proc[b] = sd->bin_proc[b];
00203 for (thread_index_t t = 0; t < d->num_threads; ++t)
00204 temporaries[t] = sd->temporaries[t];
00205
00206 RandomAccessIterator source = sd->source;
00207 difference_type start = sd->starts[iam];
00208
00209
00210 for (difference_type i = 0; i < length; ++i)
00211 {
00212 bin_index target_bin = oracles[i];
00213 thread_index_t target_p = bin_proc[target_bin];
00214
00215
00216 ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
00217 value_type(*(source + i + start));
00218 }
00219
00220 delete[] oracles;
00221 delete[] dist;
00222 delete[] bin_proc;
00223 delete[] temporaries;
00224
00225 # pragma omp barrier
00226
00227
00228 for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
00229 {
00230 value_type* begin =
00231 sd->temporaries[iam] +
00232 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
00233 * end =
00234 sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
00235 sequential_random_shuffle(begin, end, rng);
00236 std::copy(begin, end, sd->source + global_offset +
00237 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
00238 }
00239
00240 ::operator delete(sd->temporaries[iam]);
00241 }
00242
00243
00244
00245 template<typename T>
00246 T
00247 round_up_to_pow2(T x)
00248 {
00249 if (x <= 1)
00250 return 1;
00251 else
00252 return (T)1 << (log2(x - 1) + 1);
00253 }
00254
00255
00256
00257
00258
00259
00260
00261
00262 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00263 void
00264 parallel_random_shuffle_drs(RandomAccessIterator begin,
00265 RandomAccessIterator end,
00266 typename std::iterator_traits
00267 <RandomAccessIterator>::difference_type n,
00268 thread_index_t num_threads,
00269 RandomNumberGenerator& rng)
00270 {
00271 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00272 typedef typename traits_type::value_type value_type;
00273 typedef typename traits_type::difference_type difference_type;
00274
00275 _GLIBCXX_CALL(n)
00276
00277 const _Settings& __s = _Settings::get();
00278
00279 if (num_threads > n)
00280 num_threads = static_cast<thread_index_t>(n);
00281
00282 bin_index num_bins, num_bins_cache;
00283
00284 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00285
00286
00287
00288 num_bins_cache = std::max<difference_type>(
00289 1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00290 num_bins_cache = round_up_to_pow2(num_bins_cache);
00291
00292
00293
00294 num_bins = std::min<difference_type>(n, num_bins_cache);
00295
00296 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00297
00298 num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
00299 #endif
00300 num_bins = round_up_to_pow2(num_bins);
00301
00302 if (num_bins < num_bins_cache)
00303 {
00304 #endif
00305
00306
00307 num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
00308 1, n / (__s.L2_cache_size / sizeof(value_type))));
00309 num_bins_cache = round_up_to_pow2(num_bins_cache);
00310
00311
00312 num_bins = static_cast<bin_index>(
00313 std::min(n, static_cast<difference_type>(num_bins_cache)));
00314
00315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00316
00317 num_bins = std::min(
00318 static_cast<difference_type>(__s.TLB_size / 2), num_bins);
00319 #endif
00320 num_bins = round_up_to_pow2(num_bins);
00321 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00322 }
00323 #endif
00324
00325 num_threads = std::min<bin_index>(num_threads, num_bins);
00326
00327 if (num_threads <= 1)
00328 return sequential_random_shuffle(begin, end, rng);
00329
00330 DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
00331 DRSSorterPU<RandomAccessIterator, random_number >* pus;
00332 difference_type* starts;
00333
00334 # pragma omp parallel num_threads(num_threads)
00335 {
00336 # pragma omp single
00337 {
00338 pus = new DRSSorterPU<RandomAccessIterator, random_number>
00339 [num_threads];
00340
00341 sd.temporaries = new value_type*[num_threads];
00342 sd.dist = new difference_type*[num_bins + 1];
00343 sd.bin_proc = new thread_index_t[num_bins];
00344 for (bin_index b = 0; b < num_bins + 1; ++b)
00345 sd.dist[b] = new difference_type[num_threads + 1];
00346 for (bin_index b = 0; b < (num_bins + 1); ++b)
00347 {
00348 sd.dist[0][0] = 0;
00349 sd.dist[b][0] = 0;
00350 }
00351 starts = sd.starts = new difference_type[num_threads + 1];
00352 int bin_cursor = 0;
00353 sd.num_bins = num_bins;
00354 sd.num_bits = log2(num_bins);
00355
00356 difference_type chunk_length = n / num_threads,
00357 split = n % num_threads, start = 0;
00358 difference_type bin_chunk_length = num_bins / num_threads,
00359 bin_split = num_bins % num_threads;
00360 for (thread_index_t i = 0; i < num_threads; ++i)
00361 {
00362 starts[i] = start;
00363 start += (i < split) ? (chunk_length + 1) : chunk_length;
00364 int j = pus[i].bins_begin = bin_cursor;
00365
00366
00367 bin_cursor += (i < bin_split) ?
00368 (bin_chunk_length + 1) : bin_chunk_length;
00369 pus[i].bins_end = bin_cursor;
00370 for (; j < bin_cursor; ++j)
00371 sd.bin_proc[j] = i;
00372 pus[i].num_threads = num_threads;
00373 pus[i].seed = rng(std::numeric_limits<uint32>::max());
00374 pus[i].sd = &sd;
00375 }
00376 starts[num_threads] = start;
00377 }
00378
00379 parallel_random_shuffle_drs_pu(pus);
00380 }
00381
00382 delete[] starts;
00383 delete[] sd.bin_proc;
00384 for (int s = 0; s < (num_bins + 1); ++s)
00385 delete[] sd.dist[s];
00386 delete[] sd.dist;
00387 delete[] sd.temporaries;
00388
00389 delete[] pus;
00390 }
00391
00392
00393
00394
00395
00396
00397 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00398 void
00399 sequential_random_shuffle(RandomAccessIterator begin,
00400 RandomAccessIterator end,
00401 RandomNumberGenerator& rng)
00402 {
00403 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00404 typedef typename traits_type::value_type value_type;
00405 typedef typename traits_type::difference_type difference_type;
00406
00407 difference_type n = end - begin;
00408 const _Settings& __s = _Settings::get();
00409
00410 bin_index num_bins, num_bins_cache;
00411
00412 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00413
00414 num_bins_cache =
00415 std::max<difference_type>
00416 (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00417 num_bins_cache = round_up_to_pow2(num_bins_cache);
00418
00419
00420
00421 num_bins = std::min(n, (difference_type)num_bins_cache);
00422 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00423
00424 num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
00425 #endif
00426 num_bins = round_up_to_pow2(num_bins);
00427
00428 if (num_bins < num_bins_cache)
00429 {
00430 #endif
00431
00432 num_bins_cache =
00433 static_cast<bin_index>(std::max<difference_type>(
00434 1, n / (__s.L2_cache_size / sizeof(value_type))));
00435 num_bins_cache = round_up_to_pow2(num_bins_cache);
00436
00437
00438
00439 num_bins = static_cast<bin_index>
00440 (std::min(n, static_cast<difference_type>(num_bins_cache)));
00441
00442 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00443
00444 num_bins =
00445 std::min<difference_type>(__s.TLB_size / 2, num_bins);
00446 #endif
00447 num_bins = round_up_to_pow2(num_bins);
00448 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00449 }
00450 #endif
00451
00452 int num_bits = log2(num_bins);
00453
00454 if (num_bins > 1)
00455 {
00456 value_type* target = static_cast<value_type*>(
00457 ::operator new(sizeof(value_type) * n));
00458 bin_index* oracles = new bin_index[n];
00459 difference_type* dist0 = new difference_type[num_bins + 1],
00460 * dist1 = new difference_type[num_bins + 1];
00461
00462 for (int b = 0; b < num_bins + 1; ++b)
00463 dist0[b] = 0;
00464
00465 random_number bitrng(rng(0xFFFFFFFF));
00466
00467 for (difference_type i = 0; i < n; ++i)
00468 {
00469 bin_index oracle = random_number_pow2(num_bits, bitrng);
00470 oracles[i] = oracle;
00471
00472
00473 ++(dist0[oracle + 1]);
00474 }
00475
00476
00477 __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
00478
00479 for (int b = 0; b < num_bins + 1; ++b)
00480 dist1[b] = dist0[b];
00481
00482
00483 for (difference_type i = 0; i < n; ++i)
00484 ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
00485
00486 for (int b = 0; b < num_bins; ++b)
00487 {
00488 sequential_random_shuffle(target + dist1[b],
00489 target + dist1[b + 1],
00490 rng);
00491 }
00492
00493 delete[] dist0;
00494 delete[] dist1;
00495 delete[] oracles;
00496 ::operator delete(target);
00497 }
00498 else
00499 __gnu_sequential::random_shuffle(begin, end, rng);
00500 }
00501
00502
00503
00504
00505
00506
00507 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00508 inline void
00509 parallel_random_shuffle(RandomAccessIterator begin,
00510 RandomAccessIterator end,
00511 RandomNumberGenerator rng = random_number())
00512 {
00513 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00514 typedef typename traits_type::difference_type difference_type;
00515 difference_type n = end - begin;
00516 parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
00517 }
00518
00519 }
00520
00521 #endif