00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H
00047 #define _GLIBCXX_PARALLEL_WORKSTEALING_H 1
00048
00049 #include <parallel/parallel.h>
00050 #include <parallel/random_number.h>
00051 #include <parallel/compatibility.h>
00052
00053 namespace __gnu_parallel
00054 {
00055
00056 #define _GLIBCXX_JOB_VOLATILE volatile
00057
00058
00059 template<typename _DifferenceTp>
00060 struct Job
00061 {
00062 typedef _DifferenceTp difference_type;
00063
00064
00065
00066
00067
00068 _GLIBCXX_JOB_VOLATILE difference_type first;
00069
00070
00071
00072
00073 _GLIBCXX_JOB_VOLATILE difference_type last;
00074
00075
00076
00077
00078 _GLIBCXX_JOB_VOLATILE difference_type load;
00079 };
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099 template<typename RandomAccessIterator,
00100 typename Op,
00101 typename Fu,
00102 typename Red,
00103 typename Result>
00104 Op
00105 for_each_template_random_access_workstealing(RandomAccessIterator begin,
00106 RandomAccessIterator end,
00107 Op op, Fu& f, Red r,
00108 Result base, Result& output,
00109 typename std::iterator_traits
00110 <RandomAccessIterator>::
00111 difference_type bound)
00112 {
00113 _GLIBCXX_CALL(end - begin)
00114
00115 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00116 typedef typename traits_type::difference_type difference_type;
00117
00118 const _Settings& __s = _Settings::get();
00119
00120 difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size);
00121
00122
00123 difference_type length = (bound < 0) ? (end - begin) : bound;
00124
00125
00126 const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
00127
00128
00129 thread_index_t busy = 0;
00130
00131 Job<difference_type> *job;
00132
00133 omp_lock_t output_lock;
00134 omp_init_lock(&output_lock);
00135
00136
00137 output = base;
00138
00139
00140 thread_index_t num_threads =
00141 __gnu_parallel::max<thread_index_t>(1,
00142 __gnu_parallel::min<difference_type>(length, get_max_threads()));
00143
00144 # pragma omp parallel shared(busy) num_threads(num_threads)
00145 {
00146
00147 # pragma omp single
00148 {
00149 num_threads = omp_get_num_threads();
00150
00151
00152 job = new Job<difference_type>[num_threads * stride];
00153 }
00154
00155
00156
00157
00158 bool iam_working = false;
00159
00160
00161 thread_index_t iam = omp_get_thread_num();
00162
00163
00164 Job<difference_type>& my_job = job[iam * stride];
00165
00166
00167 thread_index_t victim;
00168
00169
00170 Result result = Result();
00171
00172
00173 difference_type steal;
00174
00175
00176
00177 random_number rand_gen(iam, num_threads);
00178
00179
00180 # pragma omp atomic
00181 ++busy;
00182
00183 iam_working = true;
00184
00185
00186 my_job.first =
00187 static_cast<difference_type>(iam * (length / num_threads));
00188
00189 my_job.last = (iam == (num_threads - 1)) ?
00190 (length - 1) : ((iam + 1) * (length / num_threads) - 1);
00191 my_job.load = my_job.last - my_job.first + 1;
00192
00193
00194 if (my_job.first <= my_job.last)
00195 {
00196
00197 difference_type my_first = my_job.first;
00198 result = f(op, begin + my_first);
00199 ++my_job.first;
00200 --my_job.load;
00201 }
00202
00203 RandomAccessIterator current;
00204
00205 # pragma omp barrier
00206
00207
00208
00209 while (busy > 0)
00210 {
00211
00212 # pragma omp flush(busy)
00213
00214
00215 while (my_job.first <= my_job.last)
00216 {
00217
00218
00219 difference_type current_job =
00220 fetch_and_add<difference_type>(&(my_job.first), chunk_size);
00221
00222
00223
00224 my_job.load = my_job.last - my_job.first + 1;
00225 for (difference_type job_counter = 0;
00226 job_counter < chunk_size && current_job <= my_job.last;
00227 ++job_counter)
00228 {
00229
00230 current = begin + current_job;
00231 ++current_job;
00232
00233
00234 result = r(result, f(op, current));
00235 }
00236
00237 # pragma omp flush(busy)
00238 }
00239
00240
00241 if (iam_working)
00242 {
00243
00244 # pragma omp atomic
00245 --busy;
00246
00247 iam_working = false;
00248 }
00249
00250 difference_type supposed_first, supposed_last, supposed_load;
00251 do
00252 {
00253
00254 yield();
00255 # pragma omp flush(busy)
00256 victim = rand_gen();
00257 supposed_first = job[victim * stride].first;
00258 supposed_last = job[victim * stride].last;
00259 supposed_load = job[victim * stride].load;
00260 }
00261 while (busy > 0
00262 && ((supposed_load <= 0)
00263 || ((supposed_first + supposed_load - 1) != supposed_last)));
00264
00265 if (busy == 0)
00266 break;
00267
00268 if (supposed_load > 0)
00269 {
00270
00271
00272 steal = (supposed_load < 2) ? 1 : supposed_load / 2;
00273
00274
00275 difference_type stolen_first =
00276 fetch_and_add<difference_type>(
00277 &(job[victim * stride].first), steal);
00278 difference_type stolen_try =
00279 stolen_first + steal - difference_type(1);
00280
00281 my_job.first = stolen_first;
00282 my_job.last = __gnu_parallel::min(stolen_try, supposed_last);
00283 my_job.load = my_job.last - my_job.first + 1;
00284
00285
00286 # pragma omp atomic
00287 ++busy;
00288 iam_working = true;
00289
00290 # pragma omp flush(busy)
00291 }
00292 # pragma omp flush(busy)
00293 }
00294
00295 omp_set_lock(&output_lock);
00296 output = r(output, result);
00297 omp_unset_lock(&output_lock);
00298 }
00299
00300 delete[] job;
00301
00302
00303
00304 f.finish_iterator = begin + length;
00305
00306 omp_destroy_lock(&output_lock);
00307
00308 return op;
00309 }
00310 }
00311
00312 #endif