LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 template <typename T>
65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66  kmp_int32 schedtype, kmp_int32 *plastiter,
67  T *plower, T *pupper,
68  typename traits_t<T>::signed_t *pstride,
69  typename traits_t<T>::signed_t incr,
70  typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
72  ,
73  void *codeptr
74 #endif
75  ) {
76  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79 
80  typedef typename traits_t<T>::unsigned_t UT;
81  typedef typename traits_t<T>::signed_t ST;
82  /* this all has to be changed back to TID and such.. */
83  kmp_int32 gtid = global_tid;
84  kmp_uint32 tid;
85  kmp_uint32 nth;
86  UT trip_count;
87  kmp_team_t *team;
88  kmp_info_t *th = __kmp_threads[gtid];
89 
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91  ompt_team_info_t *team_info = NULL;
92  ompt_task_info_t *task_info = NULL;
93  ompt_work_t ompt_work_type = ompt_work_loop;
94 
95  static kmp_int8 warn = 0;
96 
97  if (ompt_enabled.ompt_callback_work) {
98  // Only fully initialize variables needed by OMPT if OMPT is enabled.
99  team_info = __ompt_get_teaminfo(0, NULL);
100  task_info = __ompt_get_task_info_object(0);
101  // Determine workshare type
102  if (loc != NULL) {
103  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104  ompt_work_type = ompt_work_loop;
105  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106  ompt_work_type = ompt_work_sections;
107  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108  ompt_work_type = ompt_work_distribute;
109  } else {
110  kmp_int8 bool_res =
111  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112  if (bool_res)
113  KMP_WARNING(OmptOutdatedWorkshare);
114  }
115  KMP_DEBUG_ASSERT(ompt_work_type);
116  }
117  }
118 #endif
119 
120  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122 #ifdef KMP_DEBUG
123  {
124  char *buff;
125  // create format specifiers before the debug output
126  buff = __kmp_str_format(
127  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132  *pstride, incr, chunk));
133  __kmp_str_free(&buff);
134  }
135 #endif
136 
137  if (__kmp_env_consistency_check) {
138  __kmp_push_workshare(global_tid, ct_pdo, loc);
139  if (incr == 0) {
140  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141  loc);
142  }
143  }
144  /* special handling for zero-trip loops */
145  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146  if (plastiter != NULL)
147  *plastiter = FALSE;
148  /* leave pupper and plower set to entire iteration space */
149  *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154 #ifdef KMP_DEBUG
155  {
156  char *buff;
157  // create format specifiers before the debug output
158  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159  "lower=%%%s upper=%%%s stride = %%%s "
160  "signed?<%s>, loc = %%s\n",
161  traits_t<T>::spec, traits_t<T>::spec,
162  traits_t<ST>::spec, traits_t<T>::spec);
163  KD_TRACE(100,
164  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165  __kmp_str_free(&buff);
166  }
167 #endif
168  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169 
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171  if (ompt_enabled.ompt_callback_work) {
172  ompt_callbacks.ompt_callback(ompt_callback_work)(
173  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174  &(task_info->task_data), 0, codeptr);
175  }
176 #endif
177  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178  return;
179  }
180 
181 #if OMP_40_ENABLED
182  // Although there are schedule enumerations above kmp_ord_upper which are not
183  // schedules for "distribute", the only ones which are useful are dynamic, so
184  // cannot be seen here, since this codepath is only executed for static
185  // schedules.
186  if (schedtype > kmp_ord_upper) {
187  // we are in DISTRIBUTE construct
188  schedtype += kmp_sch_static -
189  kmp_distribute_static; // AC: convert to usual schedule type
190  tid = th->th.th_team->t.t_master_tid;
191  team = th->th.th_team->t.t_parent;
192  } else
193 #endif
194  {
195  tid = __kmp_tid_from_gtid(global_tid);
196  team = th->th.th_team;
197  }
198 
199  /* determine if "for" loop is an active worksharing construct */
200  if (team->t.t_serialized) {
201  /* serialized parallel, each thread executes whole iteration space */
202  if (plastiter != NULL)
203  *plastiter = TRUE;
204  /* leave pupper and plower set to entire iteration space */
205  *pstride =
206  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
207 
208 #ifdef KMP_DEBUG
209  {
210  char *buff;
211  // create format specifiers before the debug output
212  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
213  "lower=%%%s upper=%%%s stride = %%%s\n",
214  traits_t<T>::spec, traits_t<T>::spec,
215  traits_t<ST>::spec);
216  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
217  __kmp_str_free(&buff);
218  }
219 #endif
220  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
221 
222 #if OMPT_SUPPORT && OMPT_OPTIONAL
223  if (ompt_enabled.ompt_callback_work) {
224  ompt_callbacks.ompt_callback(ompt_callback_work)(
225  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
226  &(task_info->task_data), *pstride, codeptr);
227  }
228 #endif
229  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
230  return;
231  }
232  nth = team->t.t_nproc;
233  if (nth == 1) {
234  if (plastiter != NULL)
235  *plastiter = TRUE;
236  *pstride =
237  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
238 #ifdef KMP_DEBUG
239  {
240  char *buff;
241  // create format specifiers before the debug output
242  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
243  "lower=%%%s upper=%%%s stride = %%%s\n",
244  traits_t<T>::spec, traits_t<T>::spec,
245  traits_t<ST>::spec);
246  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
247  __kmp_str_free(&buff);
248  }
249 #endif
250  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
251 
252 #if OMPT_SUPPORT && OMPT_OPTIONAL
253  if (ompt_enabled.ompt_callback_work) {
254  ompt_callbacks.ompt_callback(ompt_callback_work)(
255  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
256  &(task_info->task_data), *pstride, codeptr);
257  }
258 #endif
259  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
260  return;
261  }
262 
263  /* compute trip count */
264  if (incr == 1) {
265  trip_count = *pupper - *plower + 1;
266  } else if (incr == -1) {
267  trip_count = *plower - *pupper + 1;
268  } else if (incr > 0) {
269  // upper-lower can exceed the limit of signed type
270  trip_count = (UT)(*pupper - *plower) / incr + 1;
271  } else {
272  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
273  }
274 
275 #if KMP_STATS_ENABLED
276  if (KMP_MASTER_GTID(gtid)) {
277  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
278  }
279 #endif
280 
281  if (__kmp_env_consistency_check) {
282  /* tripcount overflow? */
283  if (trip_count == 0 && *pupper != *plower) {
284  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
285  loc);
286  }
287  }
288 
289  /* compute remaining parameters */
290  switch (schedtype) {
291  case kmp_sch_static: {
292  if (trip_count < nth) {
293  KMP_DEBUG_ASSERT(
294  __kmp_static == kmp_sch_static_greedy ||
295  __kmp_static ==
296  kmp_sch_static_balanced); // Unknown static scheduling type.
297  if (tid < trip_count) {
298  *pupper = *plower = *plower + tid * incr;
299  } else {
300  *plower = *pupper + incr;
301  }
302  if (plastiter != NULL)
303  *plastiter = (tid == trip_count - 1);
304  } else {
305  if (__kmp_static == kmp_sch_static_balanced) {
306  UT small_chunk = trip_count / nth;
307  UT extras = trip_count % nth;
308  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
309  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
310  if (plastiter != NULL)
311  *plastiter = (tid == nth - 1);
312  } else {
313  T big_chunk_inc_count =
314  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
315  T old_upper = *pupper;
316 
317  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
318  // Unknown static scheduling type.
319 
320  *plower += tid * big_chunk_inc_count;
321  *pupper = *plower + big_chunk_inc_count - incr;
322  if (incr > 0) {
323  if (*pupper < *plower)
324  *pupper = traits_t<T>::max_value;
325  if (plastiter != NULL)
326  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
327  if (*pupper > old_upper)
328  *pupper = old_upper; // tracker C73258
329  } else {
330  if (*pupper > *plower)
331  *pupper = traits_t<T>::min_value;
332  if (plastiter != NULL)
333  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
334  if (*pupper < old_upper)
335  *pupper = old_upper; // tracker C73258
336  }
337  }
338  }
339  *pstride = trip_count;
340  break;
341  }
342  case kmp_sch_static_chunked: {
343  ST span;
344  if (chunk < 1) {
345  chunk = 1;
346  }
347  span = chunk * incr;
348  *pstride = span * nth;
349  *plower = *plower + (span * tid);
350  *pupper = *plower + span - incr;
351  if (plastiter != NULL)
352  *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
353  break;
354  }
355 #if OMP_45_ENABLED
356  case kmp_sch_static_balanced_chunked: {
357  T old_upper = *pupper;
358  // round up to make sure the chunk is enough to cover all iterations
359  UT span = (trip_count + nth - 1) / nth;
360 
361  // perform chunk adjustment
362  chunk = (span + chunk - 1) & ~(chunk - 1);
363 
364  span = chunk * incr;
365  *plower = *plower + (span * tid);
366  *pupper = *plower + span - incr;
367  if (incr > 0) {
368  if (*pupper > old_upper)
369  *pupper = old_upper;
370  } else if (*pupper < old_upper)
371  *pupper = old_upper;
372 
373  if (plastiter != NULL)
374  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
375  break;
376  }
377 #endif
378  default:
379  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
380  break;
381  }
382 
383 #if USE_ITT_BUILD
384  // Report loop metadata
385  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
386  __kmp_forkjoin_frames_mode == 3 &&
387 #if OMP_40_ENABLED
388  th->th.th_teams_microtask == NULL &&
389 #endif
390  team->t.t_active_level == 1) {
391  kmp_uint64 cur_chunk = chunk;
392  // Calculate chunk in case it was not specified; it is specified for
393  // kmp_sch_static_chunked
394  if (schedtype == kmp_sch_static) {
395  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
396  }
397  // 0 - "static" schedule
398  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
399  }
400 #endif
401 #ifdef KMP_DEBUG
402  {
403  char *buff;
404  // create format specifiers before the debug output
405  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
406  "upper=%%%s stride = %%%s signed?<%s>\n",
407  traits_t<T>::spec, traits_t<T>::spec,
408  traits_t<ST>::spec, traits_t<T>::spec);
409  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
410  __kmp_str_free(&buff);
411  }
412 #endif
413  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
414 
415 #if OMPT_SUPPORT && OMPT_OPTIONAL
416  if (ompt_enabled.ompt_callback_work) {
417  ompt_callbacks.ompt_callback(ompt_callback_work)(
418  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
419  &(task_info->task_data), trip_count, codeptr);
420  }
421 #endif
422 
423  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
424  return;
425 }
426 
427 template <typename T>
428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
429  kmp_int32 schedule, kmp_int32 *plastiter,
430  T *plower, T *pupper, T *pupperDist,
431  typename traits_t<T>::signed_t *pstride,
432  typename traits_t<T>::signed_t incr,
433  typename traits_t<T>::signed_t chunk) {
434  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
437  typedef typename traits_t<T>::unsigned_t UT;
438  typedef typename traits_t<T>::signed_t ST;
439  kmp_uint32 tid;
440  kmp_uint32 nth;
441  kmp_uint32 team_id;
442  kmp_uint32 nteams;
443  UT trip_count;
444  kmp_team_t *team;
445  kmp_info_t *th;
446 
447  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
448  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
449 #ifdef KMP_DEBUG
450  {
451  char *buff;
452  // create format specifiers before the debug output
453  buff = __kmp_str_format(
454  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
455  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
456  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
457  traits_t<ST>::spec, traits_t<T>::spec);
458  KD_TRACE(100,
459  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
460  __kmp_str_free(&buff);
461  }
462 #endif
463 
464  if (__kmp_env_consistency_check) {
465  __kmp_push_workshare(gtid, ct_pdo, loc);
466  if (incr == 0) {
467  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
468  loc);
469  }
470  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
471  // The loop is illegal.
472  // Some zero-trip loops maintained by compiler, e.g.:
473  // for(i=10;i<0;++i) // lower >= upper - run-time check
474  // for(i=0;i>10;--i) // lower <= upper - run-time check
475  // for(i=0;i>10;++i) // incr > 0 - compile-time check
476  // for(i=10;i<0;--i) // incr < 0 - compile-time check
477  // Compiler does not check the following illegal loops:
478  // for(i=0;i<10;i+=incr) // where incr<0
479  // for(i=10;i>0;i-=incr) // where incr<0
480  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
481  }
482  }
483  tid = __kmp_tid_from_gtid(gtid);
484  th = __kmp_threads[gtid];
485  nth = th->th.th_team_nproc;
486  team = th->th.th_team;
487 #if OMP_40_ENABLED
488  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
489  nteams = th->th.th_teams_size.nteams;
490 #endif
491  team_id = team->t.t_master_tid;
492  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
493 
494  // compute global trip count
495  if (incr == 1) {
496  trip_count = *pupper - *plower + 1;
497  } else if (incr == -1) {
498  trip_count = *plower - *pupper + 1;
499  } else if (incr > 0) {
500  // upper-lower can exceed the limit of signed type
501  trip_count = (UT)(*pupper - *plower) / incr + 1;
502  } else {
503  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
504  }
505 
506  *pstride = *pupper - *plower; // just in case (can be unused)
507  if (trip_count <= nteams) {
508  KMP_DEBUG_ASSERT(
509  __kmp_static == kmp_sch_static_greedy ||
510  __kmp_static ==
511  kmp_sch_static_balanced); // Unknown static scheduling type.
512  // only masters of some teams get single iteration, other threads get
513  // nothing
514  if (team_id < trip_count && tid == 0) {
515  *pupper = *pupperDist = *plower = *plower + team_id * incr;
516  } else {
517  *pupperDist = *pupper;
518  *plower = *pupper + incr; // compiler should skip loop body
519  }
520  if (plastiter != NULL)
521  *plastiter = (tid == 0 && team_id == trip_count - 1);
522  } else {
523  // Get the team's chunk first (each team gets at most one chunk)
524  if (__kmp_static == kmp_sch_static_balanced) {
525  UT chunkD = trip_count / nteams;
526  UT extras = trip_count % nteams;
527  *plower +=
528  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
529  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
530  if (plastiter != NULL)
531  *plastiter = (team_id == nteams - 1);
532  } else {
533  T chunk_inc_count =
534  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
535  T upper = *pupper;
536  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
537  // Unknown static scheduling type.
538  *plower += team_id * chunk_inc_count;
539  *pupperDist = *plower + chunk_inc_count - incr;
540  // Check/correct bounds if needed
541  if (incr > 0) {
542  if (*pupperDist < *plower)
543  *pupperDist = traits_t<T>::max_value;
544  if (plastiter != NULL)
545  *plastiter = *plower <= upper && *pupperDist > upper - incr;
546  if (*pupperDist > upper)
547  *pupperDist = upper; // tracker C73258
548  if (*plower > *pupperDist) {
549  *pupper = *pupperDist; // no iterations available for the team
550  goto end;
551  }
552  } else {
553  if (*pupperDist > *plower)
554  *pupperDist = traits_t<T>::min_value;
555  if (plastiter != NULL)
556  *plastiter = *plower >= upper && *pupperDist < upper - incr;
557  if (*pupperDist < upper)
558  *pupperDist = upper; // tracker C73258
559  if (*plower < *pupperDist) {
560  *pupper = *pupperDist; // no iterations available for the team
561  goto end;
562  }
563  }
564  }
565  // Get the parallel loop chunk now (for thread)
566  // compute trip count for team's chunk
567  if (incr == 1) {
568  trip_count = *pupperDist - *plower + 1;
569  } else if (incr == -1) {
570  trip_count = *plower - *pupperDist + 1;
571  } else if (incr > 1) {
572  // upper-lower can exceed the limit of signed type
573  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
574  } else {
575  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
576  }
577  KMP_DEBUG_ASSERT(trip_count);
578  switch (schedule) {
579  case kmp_sch_static: {
580  if (trip_count <= nth) {
581  KMP_DEBUG_ASSERT(
582  __kmp_static == kmp_sch_static_greedy ||
583  __kmp_static ==
584  kmp_sch_static_balanced); // Unknown static scheduling type.
585  if (tid < trip_count)
586  *pupper = *plower = *plower + tid * incr;
587  else
588  *plower = *pupper + incr; // no iterations available
589  if (plastiter != NULL)
590  if (*plastiter != 0 && !(tid == trip_count - 1))
591  *plastiter = 0;
592  } else {
593  if (__kmp_static == kmp_sch_static_balanced) {
594  UT chunkL = trip_count / nth;
595  UT extras = trip_count % nth;
596  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
597  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
598  if (plastiter != NULL)
599  if (*plastiter != 0 && !(tid == nth - 1))
600  *plastiter = 0;
601  } else {
602  T chunk_inc_count =
603  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
604  T upper = *pupperDist;
605  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
606  // Unknown static scheduling type.
607  *plower += tid * chunk_inc_count;
608  *pupper = *plower + chunk_inc_count - incr;
609  if (incr > 0) {
610  if (*pupper < *plower)
611  *pupper = traits_t<T>::max_value;
612  if (plastiter != NULL)
613  if (*plastiter != 0 &&
614  !(*plower <= upper && *pupper > upper - incr))
615  *plastiter = 0;
616  if (*pupper > upper)
617  *pupper = upper; // tracker C73258
618  } else {
619  if (*pupper > *plower)
620  *pupper = traits_t<T>::min_value;
621  if (plastiter != NULL)
622  if (*plastiter != 0 &&
623  !(*plower >= upper && *pupper < upper - incr))
624  *plastiter = 0;
625  if (*pupper < upper)
626  *pupper = upper; // tracker C73258
627  }
628  }
629  }
630  break;
631  }
632  case kmp_sch_static_chunked: {
633  ST span;
634  if (chunk < 1)
635  chunk = 1;
636  span = chunk * incr;
637  *pstride = span * nth;
638  *plower = *plower + (span * tid);
639  *pupper = *plower + span - incr;
640  if (plastiter != NULL)
641  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
642  *plastiter = 0;
643  break;
644  }
645  default:
646  KMP_ASSERT2(0,
647  "__kmpc_dist_for_static_init: unknown loop scheduling type");
648  break;
649  }
650  }
651 end:;
652 #ifdef KMP_DEBUG
653  {
654  char *buff;
655  // create format specifiers before the debug output
656  buff = __kmp_str_format(
657  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
658  "stride=%%%s signed?<%s>\n",
659  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
660  traits_t<ST>::spec, traits_t<T>::spec);
661  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
662  __kmp_str_free(&buff);
663  }
664 #endif
665  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
666  KMP_STATS_LOOP_END(OMP_distribute_iterations);
667  return;
668 }
669 
670 template <typename T>
671 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
672  kmp_int32 *p_last, T *p_lb, T *p_ub,
673  typename traits_t<T>::signed_t *p_st,
674  typename traits_t<T>::signed_t incr,
675  typename traits_t<T>::signed_t chunk) {
676  // The routine returns the first chunk distributed to the team and
677  // stride for next chunks calculation.
678  // Last iteration flag set for the team that will execute
679  // the last iteration of the loop.
680  // The routine is called for dist_schedue(static,chunk) only.
681  typedef typename traits_t<T>::unsigned_t UT;
682  typedef typename traits_t<T>::signed_t ST;
683  kmp_uint32 team_id;
684  kmp_uint32 nteams;
685  UT trip_count;
686  T lower;
687  T upper;
688  ST span;
689  kmp_team_t *team;
690  kmp_info_t *th;
691 
692  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
693  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
694 #ifdef KMP_DEBUG
695  {
696  char *buff;
697  // create format specifiers before the debug output
698  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
699  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
700  traits_t<T>::spec, traits_t<T>::spec,
701  traits_t<ST>::spec, traits_t<ST>::spec,
702  traits_t<T>::spec);
703  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
704  __kmp_str_free(&buff);
705  }
706 #endif
707 
708  lower = *p_lb;
709  upper = *p_ub;
710  if (__kmp_env_consistency_check) {
711  if (incr == 0) {
712  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
713  loc);
714  }
715  if (incr > 0 ? (upper < lower) : (lower < upper)) {
716  // The loop is illegal.
717  // Some zero-trip loops maintained by compiler, e.g.:
718  // for(i=10;i<0;++i) // lower >= upper - run-time check
719  // for(i=0;i>10;--i) // lower <= upper - run-time check
720  // for(i=0;i>10;++i) // incr > 0 - compile-time check
721  // for(i=10;i<0;--i) // incr < 0 - compile-time check
722  // Compiler does not check the following illegal loops:
723  // for(i=0;i<10;i+=incr) // where incr<0
724  // for(i=10;i>0;i-=incr) // where incr<0
725  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
726  }
727  }
728  th = __kmp_threads[gtid];
729  team = th->th.th_team;
730 #if OMP_40_ENABLED
731  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
732  nteams = th->th.th_teams_size.nteams;
733 #endif
734  team_id = team->t.t_master_tid;
735  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
736 
737  // compute trip count
738  if (incr == 1) {
739  trip_count = upper - lower + 1;
740  } else if (incr == -1) {
741  trip_count = lower - upper + 1;
742  } else if (incr > 0) {
743  // upper-lower can exceed the limit of signed type
744  trip_count = (UT)(upper - lower) / incr + 1;
745  } else {
746  trip_count = (UT)(lower - upper) / (-incr) + 1;
747  }
748  if (chunk < 1)
749  chunk = 1;
750  span = chunk * incr;
751  *p_st = span * nteams;
752  *p_lb = lower + (span * team_id);
753  *p_ub = *p_lb + span - incr;
754  if (p_last != NULL)
755  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
756  // Correct upper bound if needed
757  if (incr > 0) {
758  if (*p_ub < *p_lb) // overflow?
759  *p_ub = traits_t<T>::max_value;
760  if (*p_ub > upper)
761  *p_ub = upper; // tracker C73258
762  } else { // incr < 0
763  if (*p_ub > *p_lb)
764  *p_ub = traits_t<T>::min_value;
765  if (*p_ub < upper)
766  *p_ub = upper; // tracker C73258
767  }
768 #ifdef KMP_DEBUG
769  {
770  char *buff;
771  // create format specifiers before the debug output
772  buff =
773  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
774  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
775  traits_t<T>::spec, traits_t<T>::spec,
776  traits_t<ST>::spec, traits_t<ST>::spec);
777  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
778  __kmp_str_free(&buff);
779  }
780 #endif
781 }
782 
783 //------------------------------------------------------------------------------
784 extern "C" {
806 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
807  kmp_int32 *plastiter, kmp_int32 *plower,
808  kmp_int32 *pupper, kmp_int32 *pstride,
809  kmp_int32 incr, kmp_int32 chunk) {
810  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
811  pupper, pstride, incr, chunk
812 #if OMPT_SUPPORT && OMPT_OPTIONAL
813  ,
814  OMPT_GET_RETURN_ADDRESS(0)
815 #endif
816  );
817 }
818 
822 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
823  kmp_int32 schedtype, kmp_int32 *plastiter,
824  kmp_uint32 *plower, kmp_uint32 *pupper,
825  kmp_int32 *pstride, kmp_int32 incr,
826  kmp_int32 chunk) {
827  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
828  pupper, pstride, incr, chunk
829 #if OMPT_SUPPORT && OMPT_OPTIONAL
830  ,
831  OMPT_GET_RETURN_ADDRESS(0)
832 #endif
833  );
834 }
835 
839 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
840  kmp_int32 *plastiter, kmp_int64 *plower,
841  kmp_int64 *pupper, kmp_int64 *pstride,
842  kmp_int64 incr, kmp_int64 chunk) {
843  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
844  pupper, pstride, incr, chunk
845 #if OMPT_SUPPORT && OMPT_OPTIONAL
846  ,
847  OMPT_GET_RETURN_ADDRESS(0)
848 #endif
849  );
850 }
851 
855 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
856  kmp_int32 schedtype, kmp_int32 *plastiter,
857  kmp_uint64 *plower, kmp_uint64 *pupper,
858  kmp_int64 *pstride, kmp_int64 incr,
859  kmp_int64 chunk) {
860  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
861  pupper, pstride, incr, chunk
862 #if OMPT_SUPPORT && OMPT_OPTIONAL
863  ,
864  OMPT_GET_RETURN_ADDRESS(0)
865 #endif
866  );
867 }
894 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
895  kmp_int32 schedule, kmp_int32 *plastiter,
896  kmp_int32 *plower, kmp_int32 *pupper,
897  kmp_int32 *pupperD, kmp_int32 *pstride,
898  kmp_int32 incr, kmp_int32 chunk) {
899  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
900  pupper, pupperD, pstride, incr, chunk);
901 }
902 
906 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
907  kmp_int32 schedule, kmp_int32 *plastiter,
908  kmp_uint32 *plower, kmp_uint32 *pupper,
909  kmp_uint32 *pupperD, kmp_int32 *pstride,
910  kmp_int32 incr, kmp_int32 chunk) {
911  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
912  pupper, pupperD, pstride, incr, chunk);
913 }
914 
918 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
919  kmp_int32 schedule, kmp_int32 *plastiter,
920  kmp_int64 *plower, kmp_int64 *pupper,
921  kmp_int64 *pupperD, kmp_int64 *pstride,
922  kmp_int64 incr, kmp_int64 chunk) {
923  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
924  pupper, pupperD, pstride, incr, chunk);
925 }
926 
930 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
931  kmp_int32 schedule, kmp_int32 *plastiter,
932  kmp_uint64 *plower, kmp_uint64 *pupper,
933  kmp_uint64 *pupperD, kmp_int64 *pstride,
934  kmp_int64 incr, kmp_int64 chunk) {
935  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
936  pupper, pupperD, pstride, incr, chunk);
937 }
942 //------------------------------------------------------------------------------
943 // Auxiliary routines for Distribute Parallel Loop construct implementation
944 // Transfer call to template< type T >
945 // __kmp_team_static_init( ident_t *loc, int gtid,
946 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
947 
968 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969  kmp_int32 *p_lb, kmp_int32 *p_ub,
970  kmp_int32 *p_st, kmp_int32 incr,
971  kmp_int32 chunk) {
972  KMP_DEBUG_ASSERT(__kmp_init_serial);
973  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974  chunk);
975 }
976 
980 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
982  kmp_int32 *p_st, kmp_int32 incr,
983  kmp_int32 chunk) {
984  KMP_DEBUG_ASSERT(__kmp_init_serial);
985  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986  chunk);
987 }
988 
992 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993  kmp_int64 *p_lb, kmp_int64 *p_ub,
994  kmp_int64 *p_st, kmp_int64 incr,
995  kmp_int64 chunk) {
996  KMP_DEBUG_ASSERT(__kmp_init_serial);
997  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998  chunk);
999 }
1000 
1004 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1005  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1006  kmp_int64 *p_st, kmp_int64 incr,
1007  kmp_int64 chunk) {
1008  KMP_DEBUG_ASSERT(__kmp_init_serial);
1009  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1010  chunk);
1011 }
1016 } // extern "C"
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:992
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:906
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:887
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:968
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:839
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:980
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:822
Definition: kmp.h:223
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:894
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1004
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:930
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:806
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:918
char const * psource
Definition: kmp.h:233
kmp_int32 flags
Definition: kmp.h:225
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:855