LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
74  // Normal exit process on Windows does not allow worker threads of the final
75  // parallel region to finish reporting their events, so shutting down the
76  // library here fixes the issue at least for the cases where __kmpc_end() is
77  // placed properly.
78  if (ompt_enabled.enabled)
79  __kmp_internal_end_library(__kmp_gtid_get_specific());
80 #endif
81 }
82 
102  kmp_int32 gtid = __kmp_entry_gtid();
103 
104  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
105 
106  return gtid;
107 }
108 
124  KC_TRACE(10,
125  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 
127  return TCR_4(__kmp_all_nth);
128 }
129 
137  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138  return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 }
140 
147  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
148 
149  return __kmp_entry_thread()->th.th_team->t.t_nproc;
150 }
151 
158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
159 #ifndef KMP_DEBUG
160 
161  return TRUE;
162 
163 #else
164 
165  const char *semi2;
166  const char *semi3;
167  int line_no;
168 
169  if (__kmp_par_range == 0) {
170  return TRUE;
171  }
172  semi2 = loc->psource;
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  semi2 = strchr(semi2, ';');
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2 + 1, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  if (__kmp_par_range_filename[0]) {
185  const char *name = semi2 - 1;
186  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187  name--;
188  }
189  if ((*name == '/') || (*name == ';')) {
190  name++;
191  }
192  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193  return __kmp_par_range < 0;
194  }
195  }
196  semi3 = strchr(semi2 + 1, ';');
197  if (__kmp_par_range_routine[0]) {
198  if ((semi3 != NULL) && (semi3 > semi2) &&
199  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200  return __kmp_par_range < 0;
201  }
202  }
203  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205  return __kmp_par_range > 0;
206  }
207  return __kmp_par_range < 0;
208  }
209  return TRUE;
210 
211 #endif /* KMP_DEBUG */
212 }
213 
220 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221  return __kmp_entry_thread()->th.th_root->r.r_active;
222 }
223 
233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234  kmp_int32 num_threads) {
235  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236  global_tid, num_threads));
237 
238  __kmp_push_num_threads(loc, global_tid, num_threads);
239 }
240 
241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
243 
244  /* the num_threads are automatically popped */
245 }
246 
247 #if OMP_40_ENABLED
248 
249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250  kmp_int32 proc_bind) {
251  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252  proc_bind));
253 
254  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
255 }
256 
257 #endif /* OMP_40_ENABLED */
258 
269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270  int gtid = __kmp_entry_gtid();
271 
272 #if (KMP_STATS_ENABLED)
273  // If we were in a serial region, then stop the serial timer, record
274  // the event, and start parallel region timer
275  stats_state_e previous_state = KMP_GET_THREAD_STATE();
276  if (previous_state == stats_state_e::SERIAL_REGION) {
277  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278  } else {
279  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280  }
281  int inParallel = __kmpc_in_parallel(loc);
282  if (inParallel) {
283  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284  } else {
285  KMP_COUNT_BLOCK(OMP_PARALLEL);
286  }
287 #endif
288 
289  // maybe to save thr_state is enough here
290  {
291  va_list ap;
292  va_start(ap, microtask);
293 
294 #if OMPT_SUPPORT
295  ompt_frame_t *ompt_frame;
296  if (ompt_enabled.enabled) {
297  kmp_info_t *master_th = __kmp_threads[gtid];
298  kmp_team_t *parent_team = master_th->th.th_team;
299  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300  if (lwt)
301  ompt_frame = &(lwt->ompt_task_info.frame);
302  else {
303  int tid = __kmp_tid_from_gtid(gtid);
304  ompt_frame = &(
305  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306  }
307  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308  OMPT_STORE_RETURN_ADDRESS(gtid);
309  }
310 #endif
311 
312 #if INCLUDE_SSC_MARKS
313  SSC_MARK_FORKING();
314 #endif
315  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
320  &ap
321 #else
322  ap
323 #endif
324  );
325 #if INCLUDE_SSC_MARKS
326  SSC_MARK_JOINING();
327 #endif
328  __kmp_join_call(loc, gtid
329 #if OMPT_SUPPORT
330  ,
331  fork_context_intel
332 #endif
333  );
334 
335  va_end(ap);
336  }
337 
338 #if KMP_STATS_ENABLED
339  if (previous_state == stats_state_e::SERIAL_REGION) {
340  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341  } else {
342  KMP_POP_PARTITIONED_TIMER();
343  }
344 #endif // KMP_STATS_ENABLED
345 }
346 
347 #if OMP_40_ENABLED
348 
359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360  kmp_int32 num_teams, kmp_int32 num_threads) {
361  KA_TRACE(20,
362  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363  global_tid, num_teams, num_threads));
364 
365  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
366 }
367 
378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379  ...) {
380  int gtid = __kmp_entry_gtid();
381  kmp_info_t *this_thr = __kmp_threads[gtid];
382  va_list ap;
383  va_start(ap, microtask);
384 
385 #if KMP_STATS_ENABLED
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387  stats_state_e previous_state = KMP_GET_THREAD_STATE();
388  if (previous_state == stats_state_e::SERIAL_REGION) {
389  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
390  } else {
391  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
392  }
393 #endif
394 
395  // remember teams entry point and nesting level
396  this_thr->th.th_teams_microtask = microtask;
397  this_thr->th.th_teams_level =
398  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
399 
400 #if OMPT_SUPPORT
401  kmp_team_t *parent_team = this_thr->th.th_team;
402  int tid = __kmp_tid_from_gtid(gtid);
403  if (ompt_enabled.enabled) {
404  parent_team->t.t_implicit_task_taskdata[tid]
405  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
406  }
407  OMPT_STORE_RETURN_ADDRESS(gtid);
408 #endif
409 
410  // check if __kmpc_push_num_teams called, set default number of teams
411  // otherwise
412  if (this_thr->th.th_teams_size.nteams == 0) {
413  __kmp_push_num_teams(loc, gtid, 0, 0);
414  }
415  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
418 
419  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
420  VOLATILE_CAST(microtask_t)
421  __kmp_teams_master, // "wrapped" task
422  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
423 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
424  &ap
425 #else
426  ap
427 #endif
428  );
429  __kmp_join_call(loc, gtid
430 #if OMPT_SUPPORT
431  ,
432  fork_context_intel
433 #endif
434  );
435 
436  // Pop current CG root off list
437  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439  this_thr->th.th_cg_roots = tmp->up;
440  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
441  " to node %p. cg_nthreads was %d\n",
442  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
443  __kmp_free(tmp);
444  // Restore current task's thread_limit from CG root
445  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446  this_thr->th.th_current_task->td_icvs.thread_limit =
447  this_thr->th.th_cg_roots->cg_thread_limit;
448 
449  this_thr->th.th_teams_microtask = NULL;
450  this_thr->th.th_teams_level = 0;
451  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452  va_end(ap);
453 #if KMP_STATS_ENABLED
454  if (previous_state == stats_state_e::SERIAL_REGION) {
455  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456  } else {
457  KMP_POP_PARTITIONED_TIMER();
458  }
459 #endif // KMP_STATS_ENABLED
460 }
461 #endif /* OMP_40_ENABLED */
462 
463 // I don't think this function should ever have been exported.
464 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
465 // openmp code ever called it, but it's been exported from the RTL for so
466 // long that I'm afraid to remove the definition.
467 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
468 
481 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
482 // The implementation is now in kmp_runtime.cpp so that it can share static
483 // functions with kmp_fork_call since the tasks to be done are similar in
484 // each case.
485 #if OMPT_SUPPORT
486  OMPT_STORE_RETURN_ADDRESS(global_tid);
487 #endif
488  __kmp_serialized_parallel(loc, global_tid);
489 }
490 
498 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
499  kmp_internal_control_t *top;
500  kmp_info_t *this_thr;
501  kmp_team_t *serial_team;
502 
503  KC_TRACE(10,
504  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
505 
506  /* skip all this code for autopar serialized loops since it results in
507  unacceptable overhead */
508  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
509  return;
510 
511  // Not autopar code
512  if (!TCR_4(__kmp_init_parallel))
513  __kmp_parallel_initialize();
514 
515 #if OMP_50_ENABLED
516  __kmp_resume_if_soft_paused();
517 #endif
518 
519  this_thr = __kmp_threads[global_tid];
520  serial_team = this_thr->th.th_serial_team;
521 
522 #if OMP_45_ENABLED
523  kmp_task_team_t *task_team = this_thr->th.th_task_team;
524 
525  // we need to wait for the proxy tasks before finishing the thread
526  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
527  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
528 #endif
529 
530  KMP_MB();
531  KMP_DEBUG_ASSERT(serial_team);
532  KMP_ASSERT(serial_team->t.t_serialized);
533  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
534  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
535  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
536  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
537 
538 #if OMPT_SUPPORT
539  if (ompt_enabled.enabled &&
540  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
541  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
542  if (ompt_enabled.ompt_callback_implicit_task) {
543  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
544  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
545  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
546  }
547 
548  // reset clear the task id only after unlinking the task
549  ompt_data_t *parent_task_data;
550  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
551 
552  if (ompt_enabled.ompt_callback_parallel_end) {
553  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
554  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
555  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
556  }
557  __ompt_lw_taskteam_unlink(this_thr);
558  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
559  }
560 #endif
561 
562  /* If necessary, pop the internal control stack values and replace the team
563  * values */
564  top = serial_team->t.t_control_stack_top;
565  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
566  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
567  serial_team->t.t_control_stack_top = top->next;
568  __kmp_free(top);
569  }
570 
571  // if( serial_team -> t.t_serialized > 1 )
572  serial_team->t.t_level--;
573 
574  /* pop dispatch buffers stack */
575  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
576  {
577  dispatch_private_info_t *disp_buffer =
578  serial_team->t.t_dispatch->th_disp_buffer;
579  serial_team->t.t_dispatch->th_disp_buffer =
580  serial_team->t.t_dispatch->th_disp_buffer->next;
581  __kmp_free(disp_buffer);
582  }
583 #if OMP_50_ENABLED
584  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
585 #endif
586 
587  --serial_team->t.t_serialized;
588  if (serial_team->t.t_serialized == 0) {
589 
590 /* return to the parallel section */
591 
592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
593  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
594  __kmp_clear_x87_fpu_status_word();
595  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
596  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
597  }
598 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
599 
600  this_thr->th.th_team = serial_team->t.t_parent;
601  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
602 
603  /* restore values cached in the thread */
604  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
605  this_thr->th.th_team_master =
606  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
607  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
608 
609  /* TODO the below shouldn't need to be adjusted for serialized teams */
610  this_thr->th.th_dispatch =
611  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
612 
613  __kmp_pop_current_task_from_thread(this_thr);
614 
615  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
616  this_thr->th.th_current_task->td_flags.executing = 1;
617 
618  if (__kmp_tasking_mode != tskm_immediate_exec) {
619  // Copy the task team from the new child / old parent team to the thread.
620  this_thr->th.th_task_team =
621  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
622  KA_TRACE(20,
623  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
624  "team %p\n",
625  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
626  }
627  } else {
628  if (__kmp_tasking_mode != tskm_immediate_exec) {
629  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
630  "depth of serial team %p to %d\n",
631  global_tid, serial_team, serial_team->t.t_serialized));
632  }
633  }
634 
635  if (__kmp_env_consistency_check)
636  __kmp_pop_parallel(global_tid, NULL);
637 #if OMPT_SUPPORT
638  if (ompt_enabled.enabled)
639  this_thr->th.ompt_thread_info.state =
640  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
641  : ompt_state_work_parallel);
642 #endif
643 }
644 
653 void __kmpc_flush(ident_t *loc) {
654  KC_TRACE(10, ("__kmpc_flush: called\n"));
655 
656  /* need explicit __mf() here since use volatile instead in library */
657  KMP_MB(); /* Flush all pending memory write invalidates. */
658 
659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
660 #if KMP_MIC
661 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
662 // We shouldn't need it, though, since the ABI rules require that
663 // * If the compiler generates NGO stores it also generates the fence
664 // * If users hand-code NGO stores they should insert the fence
665 // therefore no incomplete unordered stores should be visible.
666 #else
667  // C74404
668  // This is to address non-temporal store instructions (sfence needed).
669  // The clflush instruction is addressed either (mfence needed).
670  // Probably the non-temporal load monvtdqa instruction should also be
671  // addressed.
672  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
673  if (!__kmp_cpuinfo.initialized) {
674  __kmp_query_cpuid(&__kmp_cpuinfo);
675  }
676  if (!__kmp_cpuinfo.sse2) {
677  // CPU cannot execute SSE2 instructions.
678  } else {
679 #if KMP_COMPILER_ICC
680  _mm_mfence();
681 #elif KMP_COMPILER_MSVC
682  MemoryBarrier();
683 #else
684  __sync_synchronize();
685 #endif // KMP_COMPILER_ICC
686  }
687 #endif // KMP_MIC
688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
689 // Nothing to see here move along
690 #elif KMP_ARCH_PPC64
691 // Nothing needed here (we have a real MB above).
692 #if KMP_OS_CNK
693  // The flushing thread needs to yield here; this prevents a
694  // busy-waiting thread from saturating the pipeline. flush is
695  // often used in loops like this:
696  // while (!flag) {
697  // #pragma omp flush(flag)
698  // }
699  // and adding the yield here is good for at least a 10x speedup
700  // when running >2 threads per core (on the NAS LU benchmark).
701  __kmp_yield();
702 #endif
703 #else
704 #error Unknown or unsupported architecture
705 #endif
706 
707 #if OMPT_SUPPORT && OMPT_OPTIONAL
708  if (ompt_enabled.ompt_callback_flush) {
709  ompt_callbacks.ompt_callback(ompt_callback_flush)(
710  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
711  }
712 #endif
713 }
714 
715 /* -------------------------------------------------------------------------- */
723 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
724  KMP_COUNT_BLOCK(OMP_BARRIER);
725  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
726 
727  if (!TCR_4(__kmp_init_parallel))
728  __kmp_parallel_initialize();
729 
730 #if OMP_50_ENABLED
731  __kmp_resume_if_soft_paused();
732 #endif
733 
734  if (__kmp_env_consistency_check) {
735  if (loc == 0) {
736  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
737  }
738 
739  __kmp_check_barrier(global_tid, ct_barrier, loc);
740  }
741 
742 #if OMPT_SUPPORT
743  ompt_frame_t *ompt_frame;
744  if (ompt_enabled.enabled) {
745  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
746  if (ompt_frame->enter_frame.ptr == NULL)
747  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
748  OMPT_STORE_RETURN_ADDRESS(global_tid);
749  }
750 #endif
751  __kmp_threads[global_tid]->th.th_ident = loc;
752  // TODO: explicit barrier_wait_id:
753  // this function is called when 'barrier' directive is present or
754  // implicit barrier at the end of a worksharing construct.
755  // 1) better to add a per-thread barrier counter to a thread data structure
756  // 2) set to 0 when a new team is created
757  // 4) no sync is required
758 
759  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
760 #if OMPT_SUPPORT && OMPT_OPTIONAL
761  if (ompt_enabled.enabled) {
762  ompt_frame->enter_frame = ompt_data_none;
763  }
764 #endif
765 }
766 
767 /* The BARRIER for a MASTER section is always explicit */
774 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
775  int status = 0;
776 
777  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
778 
779  if (!TCR_4(__kmp_init_parallel))
780  __kmp_parallel_initialize();
781 
782 #if OMP_50_ENABLED
783  __kmp_resume_if_soft_paused();
784 #endif
785 
786  if (KMP_MASTER_GTID(global_tid)) {
787  KMP_COUNT_BLOCK(OMP_MASTER);
788  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789  status = 1;
790  }
791 
792 #if OMPT_SUPPORT && OMPT_OPTIONAL
793  if (status) {
794  if (ompt_enabled.ompt_callback_master) {
795  kmp_info_t *this_thr = __kmp_threads[global_tid];
796  kmp_team_t *team = this_thr->th.th_team;
797 
798  int tid = __kmp_tid_from_gtid(global_tid);
799  ompt_callbacks.ompt_callback(ompt_callback_master)(
800  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802  OMPT_GET_RETURN_ADDRESS(0));
803  }
804  }
805 #endif
806 
807  if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK
809  if (status)
810  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
811  else
812  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
813 #else
814  if (status)
815  __kmp_push_sync(global_tid, ct_master, loc, NULL);
816  else
817  __kmp_check_sync(global_tid, ct_master, loc, NULL);
818 #endif
819  }
820 
821  return status;
822 }
823 
832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
833  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
834 
835  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836  KMP_POP_PARTITIONED_TIMER();
837 
838 #if OMPT_SUPPORT && OMPT_OPTIONAL
839  kmp_info_t *this_thr = __kmp_threads[global_tid];
840  kmp_team_t *team = this_thr->th.th_team;
841  if (ompt_enabled.ompt_callback_master) {
842  int tid = __kmp_tid_from_gtid(global_tid);
843  ompt_callbacks.ompt_callback(ompt_callback_master)(
844  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846  OMPT_GET_RETURN_ADDRESS(0));
847  }
848 #endif
849 
850  if (__kmp_env_consistency_check) {
851  if (global_tid < 0)
852  KMP_WARNING(ThreadIdentInvalid);
853 
854  if (KMP_MASTER_GTID(global_tid))
855  __kmp_pop_sync(global_tid, ct_master, loc);
856  }
857 }
858 
866 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
867  int cid = 0;
868  kmp_info_t *th;
869  KMP_DEBUG_ASSERT(__kmp_init_serial);
870 
871  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
872 
873  if (!TCR_4(__kmp_init_parallel))
874  __kmp_parallel_initialize();
875 
876 #if OMP_50_ENABLED
877  __kmp_resume_if_soft_paused();
878 #endif
879 
880 #if USE_ITT_BUILD
881  __kmp_itt_ordered_prep(gtid);
882 // TODO: ordered_wait_id
883 #endif /* USE_ITT_BUILD */
884 
885  th = __kmp_threads[gtid];
886 
887 #if OMPT_SUPPORT && OMPT_OPTIONAL
888  kmp_team_t *team;
889  ompt_wait_id_t lck;
890  void *codeptr_ra;
891  if (ompt_enabled.enabled) {
892  OMPT_STORE_RETURN_ADDRESS(gtid);
893  team = __kmp_team_from_gtid(gtid);
894  lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
895  /* OMPT state update */
896  th->th.ompt_thread_info.wait_id = lck;
897  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
898 
899  /* OMPT event callback */
900  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
901  if (ompt_enabled.ompt_callback_mutex_acquire) {
902  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
903  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
904  (ompt_wait_id_t)lck, codeptr_ra);
905  }
906  }
907 #endif
908 
909  if (th->th.th_dispatch->th_deo_fcn != 0)
910  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
911  else
912  __kmp_parallel_deo(&gtid, &cid, loc);
913 
914 #if OMPT_SUPPORT && OMPT_OPTIONAL
915  if (ompt_enabled.enabled) {
916  /* OMPT state update */
917  th->th.ompt_thread_info.state = ompt_state_work_parallel;
918  th->th.ompt_thread_info.wait_id = 0;
919 
920  /* OMPT event callback */
921  if (ompt_enabled.ompt_callback_mutex_acquired) {
922  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
923  ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
924  }
925  }
926 #endif
927 
928 #if USE_ITT_BUILD
929  __kmp_itt_ordered_start(gtid);
930 #endif /* USE_ITT_BUILD */
931 }
932 
940 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
941  int cid = 0;
942  kmp_info_t *th;
943 
944  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
945 
946 #if USE_ITT_BUILD
947  __kmp_itt_ordered_end(gtid);
948 // TODO: ordered_wait_id
949 #endif /* USE_ITT_BUILD */
950 
951  th = __kmp_threads[gtid];
952 
953  if (th->th.th_dispatch->th_dxo_fcn != 0)
954  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
955  else
956  __kmp_parallel_dxo(&gtid, &cid, loc);
957 
958 #if OMPT_SUPPORT && OMPT_OPTIONAL
959  OMPT_STORE_RETURN_ADDRESS(gtid);
960  if (ompt_enabled.ompt_callback_mutex_released) {
961  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
962  ompt_mutex_ordered,
963  (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
964  OMPT_LOAD_RETURN_ADDRESS(gtid));
965  }
966 #endif
967 }
968 
969 #if KMP_USE_DYNAMIC_LOCK
970 
971 static __forceinline void
972 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
973  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
974  // Pointer to the allocated indirect lock is written to crit, while indexing
975  // is ignored.
976  void *idx;
977  kmp_indirect_lock_t **lck;
978  lck = (kmp_indirect_lock_t **)crit;
979  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
980  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
981  KMP_SET_I_LOCK_LOCATION(ilk, loc);
982  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
983  KA_TRACE(20,
984  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
985 #if USE_ITT_BUILD
986  __kmp_itt_critical_creating(ilk->lock, loc);
987 #endif
988  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
989  if (status == 0) {
990 #if USE_ITT_BUILD
991  __kmp_itt_critical_destroyed(ilk->lock);
992 #endif
993  // We don't really need to destroy the unclaimed lock here since it will be
994  // cleaned up at program exit.
995  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
996  }
997  KMP_DEBUG_ASSERT(*lck != NULL);
998 }
999 
1000 // Fast-path acquire tas lock
1001 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1002  { \
1003  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1004  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1005  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1006  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1007  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1008  kmp_uint32 spins; \
1009  KMP_FSYNC_PREPARE(l); \
1010  KMP_INIT_YIELD(spins); \
1011  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1012  do { \
1013  if (TCR_4(__kmp_nth) > \
1014  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1015  KMP_YIELD(TRUE); \
1016  } else { \
1017  KMP_YIELD_SPIN(spins); \
1018  } \
1019  __kmp_spin_backoff(&backoff); \
1020  } while ( \
1021  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1022  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1023  } \
1024  KMP_FSYNC_ACQUIRED(l); \
1025  }
1026 
1027 // Fast-path test tas lock
1028 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1029  { \
1030  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1031  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1032  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1033  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1034  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1035  }
1036 
1037 // Fast-path release tas lock
1038 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1039  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1040 
1041 #if KMP_USE_FUTEX
1042 
1043 #include <sys/syscall.h>
1044 #include <unistd.h>
1045 #ifndef FUTEX_WAIT
1046 #define FUTEX_WAIT 0
1047 #endif
1048 #ifndef FUTEX_WAKE
1049 #define FUTEX_WAKE 1
1050 #endif
1051 
1052 // Fast-path acquire futex lock
1053 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1054  { \
1055  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1056  kmp_int32 gtid_code = (gtid + 1) << 1; \
1057  KMP_MB(); \
1058  KMP_FSYNC_PREPARE(ftx); \
1059  kmp_int32 poll_val; \
1060  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1061  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1062  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1063  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1064  if (!cond) { \
1065  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1066  poll_val | \
1067  KMP_LOCK_BUSY(1, futex))) { \
1068  continue; \
1069  } \
1070  poll_val |= KMP_LOCK_BUSY(1, futex); \
1071  } \
1072  kmp_int32 rc; \
1073  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1074  NULL, NULL, 0)) != 0) { \
1075  continue; \
1076  } \
1077  gtid_code |= 1; \
1078  } \
1079  KMP_FSYNC_ACQUIRED(ftx); \
1080  }
1081 
1082 // Fast-path test futex lock
1083 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1084  { \
1085  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1086  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1087  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1088  KMP_FSYNC_ACQUIRED(ftx); \
1089  rc = TRUE; \
1090  } else { \
1091  rc = FALSE; \
1092  } \
1093  }
1094 
1095 // Fast-path release futex lock
1096 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1097  { \
1098  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1099  KMP_MB(); \
1100  KMP_FSYNC_RELEASING(ftx); \
1101  kmp_int32 poll_val = \
1102  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1103  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1104  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1105  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1106  } \
1107  KMP_MB(); \
1108  KMP_YIELD_OVERSUB(); \
1109  }
1110 
1111 #endif // KMP_USE_FUTEX
1112 
1113 #else // KMP_USE_DYNAMIC_LOCK
1114 
1115 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1116  ident_t const *loc,
1117  kmp_int32 gtid) {
1118  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1119 
1120  // Because of the double-check, the following load doesn't need to be volatile
1121  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1122 
1123  if (lck == NULL) {
1124  void *idx;
1125 
1126  // Allocate & initialize the lock.
1127  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1128  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1129  __kmp_init_user_lock_with_checks(lck);
1130  __kmp_set_user_lock_location(lck, loc);
1131 #if USE_ITT_BUILD
1132  __kmp_itt_critical_creating(lck);
1133 // __kmp_itt_critical_creating() should be called *before* the first usage
1134 // of underlying lock. It is the only place where we can guarantee it. There
1135 // are chances the lock will destroyed with no usage, but it is not a
1136 // problem, because this is not real event seen by user but rather setting
1137 // name for object (lock). See more details in kmp_itt.h.
1138 #endif /* USE_ITT_BUILD */
1139 
1140  // Use a cmpxchg instruction to slam the start of the critical section with
1141  // the lock pointer. If another thread beat us to it, deallocate the lock,
1142  // and use the lock that the other thread allocated.
1143  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1144 
1145  if (status == 0) {
1146 // Deallocate the lock and reload the value.
1147 #if USE_ITT_BUILD
1148  __kmp_itt_critical_destroyed(lck);
1149 // Let ITT know the lock is destroyed and the same memory location may be reused
1150 // for another purpose.
1151 #endif /* USE_ITT_BUILD */
1152  __kmp_destroy_user_lock_with_checks(lck);
1153  __kmp_user_lock_free(&idx, gtid, lck);
1154  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1155  KMP_DEBUG_ASSERT(lck != NULL);
1156  }
1157  }
1158  return lck;
1159 }
1160 
1161 #endif // KMP_USE_DYNAMIC_LOCK
1162 
1173 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1174  kmp_critical_name *crit) {
1175 #if KMP_USE_DYNAMIC_LOCK
1176 #if OMPT_SUPPORT && OMPT_OPTIONAL
1177  OMPT_STORE_RETURN_ADDRESS(global_tid);
1178 #endif // OMPT_SUPPORT
1179  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1180 #else
1181  KMP_COUNT_BLOCK(OMP_CRITICAL);
1182 #if OMPT_SUPPORT && OMPT_OPTIONAL
1183  ompt_state_t prev_state = ompt_state_undefined;
1184  ompt_thread_info_t ti;
1185 #endif
1186  kmp_user_lock_p lck;
1187 
1188  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1189 
1190  // TODO: add THR_OVHD_STATE
1191 
1192  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1193  KMP_CHECK_USER_LOCK_INIT();
1194 
1195  if ((__kmp_user_lock_kind == lk_tas) &&
1196  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1197  lck = (kmp_user_lock_p)crit;
1198  }
1199 #if KMP_USE_FUTEX
1200  else if ((__kmp_user_lock_kind == lk_futex) &&
1201  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1202  lck = (kmp_user_lock_p)crit;
1203  }
1204 #endif
1205  else { // ticket, queuing or drdpa
1206  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1207  }
1208 
1209  if (__kmp_env_consistency_check)
1210  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1211 
1212 // since the critical directive binds to all threads, not just the current
1213 // team we have to check this even if we are in a serialized team.
1214 // also, even if we are the uber thread, we still have to conduct the lock,
1215 // as we have to contend with sibling threads.
1216 
1217 #if USE_ITT_BUILD
1218  __kmp_itt_critical_acquiring(lck);
1219 #endif /* USE_ITT_BUILD */
1220 #if OMPT_SUPPORT && OMPT_OPTIONAL
1221  OMPT_STORE_RETURN_ADDRESS(gtid);
1222  void *codeptr_ra = NULL;
1223  if (ompt_enabled.enabled) {
1224  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1225  /* OMPT state update */
1226  prev_state = ti.state;
1227  ti.wait_id = (ompt_wait_id_t)lck;
1228  ti.state = ompt_state_wait_critical;
1229 
1230  /* OMPT event callback */
1231  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1232  if (ompt_enabled.ompt_callback_mutex_acquire) {
1233  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1234  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1235  (ompt_wait_id_t)crit, codeptr_ra);
1236  }
1237  }
1238 #endif
1239  // Value of 'crit' should be good for using as a critical_id of the critical
1240  // section directive.
1241  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1242 
1243 #if USE_ITT_BUILD
1244  __kmp_itt_critical_acquired(lck);
1245 #endif /* USE_ITT_BUILD */
1246 #if OMPT_SUPPORT && OMPT_OPTIONAL
1247  if (ompt_enabled.enabled) {
1248  /* OMPT state update */
1249  ti.state = prev_state;
1250  ti.wait_id = 0;
1251 
1252  /* OMPT event callback */
1253  if (ompt_enabled.ompt_callback_mutex_acquired) {
1254  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1255  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1256  }
1257  }
1258 #endif
1259  KMP_POP_PARTITIONED_TIMER();
1260 
1261  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1262  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1263 #endif // KMP_USE_DYNAMIC_LOCK
1264 }
1265 
1266 #if KMP_USE_DYNAMIC_LOCK
1267 
1268 // Converts the given hint to an internal lock implementation
1269 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1270 #if KMP_USE_TSX
1271 #define KMP_TSX_LOCK(seq) lockseq_##seq
1272 #else
1273 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1274 #endif
1275 
1276 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1277 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1278 #else
1279 #define KMP_CPUINFO_RTM 0
1280 #endif
1281 
1282  // Hints that do not require further logic
1283  if (hint & kmp_lock_hint_hle)
1284  return KMP_TSX_LOCK(hle);
1285  if (hint & kmp_lock_hint_rtm)
1286  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1287  if (hint & kmp_lock_hint_adaptive)
1288  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1289 
1290  // Rule out conflicting hints first by returning the default lock
1291  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1292  return __kmp_user_lock_seq;
1293  if ((hint & omp_lock_hint_speculative) &&
1294  (hint & omp_lock_hint_nonspeculative))
1295  return __kmp_user_lock_seq;
1296 
1297  // Do not even consider speculation when it appears to be contended
1298  if (hint & omp_lock_hint_contended)
1299  return lockseq_queuing;
1300 
1301  // Uncontended lock without speculation
1302  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1303  return lockseq_tas;
1304 
1305  // HLE lock for speculation
1306  if (hint & omp_lock_hint_speculative)
1307  return KMP_TSX_LOCK(hle);
1308 
1309  return __kmp_user_lock_seq;
1310 }
1311 
1312 #if OMPT_SUPPORT && OMPT_OPTIONAL
1313 #if KMP_USE_DYNAMIC_LOCK
1314 static kmp_mutex_impl_t
1315 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1316  if (user_lock) {
1317  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1318  case 0:
1319  break;
1320 #if KMP_USE_FUTEX
1321  case locktag_futex:
1322  return kmp_mutex_impl_queuing;
1323 #endif
1324  case locktag_tas:
1325  return kmp_mutex_impl_spin;
1326 #if KMP_USE_TSX
1327  case locktag_hle:
1328  return kmp_mutex_impl_speculative;
1329 #endif
1330  default:
1331  return kmp_mutex_impl_none;
1332  }
1333  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1334  }
1335  KMP_ASSERT(ilock);
1336  switch (ilock->type) {
1337 #if KMP_USE_TSX
1338  case locktag_adaptive:
1339  case locktag_rtm:
1340  return kmp_mutex_impl_speculative;
1341 #endif
1342  case locktag_nested_tas:
1343  return kmp_mutex_impl_spin;
1344 #if KMP_USE_FUTEX
1345  case locktag_nested_futex:
1346 #endif
1347  case locktag_ticket:
1348  case locktag_queuing:
1349  case locktag_drdpa:
1350  case locktag_nested_ticket:
1351  case locktag_nested_queuing:
1352  case locktag_nested_drdpa:
1353  return kmp_mutex_impl_queuing;
1354  default:
1355  return kmp_mutex_impl_none;
1356  }
1357 }
1358 #else
1359 // For locks without dynamic binding
1360 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1361  switch (__kmp_user_lock_kind) {
1362  case lk_tas:
1363  return kmp_mutex_impl_spin;
1364 #if KMP_USE_FUTEX
1365  case lk_futex:
1366 #endif
1367  case lk_ticket:
1368  case lk_queuing:
1369  case lk_drdpa:
1370  return kmp_mutex_impl_queuing;
1371 #if KMP_USE_TSX
1372  case lk_hle:
1373  case lk_rtm:
1374  case lk_adaptive:
1375  return kmp_mutex_impl_speculative;
1376 #endif
1377  default:
1378  return kmp_mutex_impl_none;
1379  }
1380 }
1381 #endif // KMP_USE_DYNAMIC_LOCK
1382 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1383 
1397 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1398  kmp_critical_name *crit, uint32_t hint) {
1399  KMP_COUNT_BLOCK(OMP_CRITICAL);
1400  kmp_user_lock_p lck;
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL
1402  ompt_state_t prev_state = ompt_state_undefined;
1403  ompt_thread_info_t ti;
1404  // This is the case, if called from __kmpc_critical:
1405  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1406  if (!codeptr)
1407  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1408 #endif
1409 
1410  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1411 
1412  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1413  // Check if it is initialized.
1414  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1415  if (*lk == 0) {
1416  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1417  if (KMP_IS_D_LOCK(lckseq)) {
1418  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1419  KMP_GET_D_TAG(lckseq));
1420  } else {
1421  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1422  }
1423  }
1424  // Branch for accessing the actual lock object and set operation. This
1425  // branching is inevitable since this lock initialization does not follow the
1426  // normal dispatch path (lock table is not used).
1427  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1428  lck = (kmp_user_lock_p)lk;
1429  if (__kmp_env_consistency_check) {
1430  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1431  __kmp_map_hint_to_lock(hint));
1432  }
1433 #if USE_ITT_BUILD
1434  __kmp_itt_critical_acquiring(lck);
1435 #endif
1436 #if OMPT_SUPPORT && OMPT_OPTIONAL
1437  if (ompt_enabled.enabled) {
1438  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1439  /* OMPT state update */
1440  prev_state = ti.state;
1441  ti.wait_id = (ompt_wait_id_t)lck;
1442  ti.state = ompt_state_wait_critical;
1443 
1444  /* OMPT event callback */
1445  if (ompt_enabled.ompt_callback_mutex_acquire) {
1446  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1447  ompt_mutex_critical, (unsigned int)hint,
1448  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1449  }
1450  }
1451 #endif
1452 #if KMP_USE_INLINED_TAS
1453  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1454  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1455  } else
1456 #elif KMP_USE_INLINED_FUTEX
1457  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1458  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1459  } else
1460 #endif
1461  {
1462  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1463  }
1464  } else {
1465  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1466  lck = ilk->lock;
1467  if (__kmp_env_consistency_check) {
1468  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1469  __kmp_map_hint_to_lock(hint));
1470  }
1471 #if USE_ITT_BUILD
1472  __kmp_itt_critical_acquiring(lck);
1473 #endif
1474 #if OMPT_SUPPORT && OMPT_OPTIONAL
1475  if (ompt_enabled.enabled) {
1476  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1477  /* OMPT state update */
1478  prev_state = ti.state;
1479  ti.wait_id = (ompt_wait_id_t)lck;
1480  ti.state = ompt_state_wait_critical;
1481 
1482  /* OMPT event callback */
1483  if (ompt_enabled.ompt_callback_mutex_acquire) {
1484  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1485  ompt_mutex_critical, (unsigned int)hint,
1486  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1487  }
1488  }
1489 #endif
1490  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1491  }
1492  KMP_POP_PARTITIONED_TIMER();
1493 
1494 #if USE_ITT_BUILD
1495  __kmp_itt_critical_acquired(lck);
1496 #endif /* USE_ITT_BUILD */
1497 #if OMPT_SUPPORT && OMPT_OPTIONAL
1498  if (ompt_enabled.enabled) {
1499  /* OMPT state update */
1500  ti.state = prev_state;
1501  ti.wait_id = 0;
1502 
1503  /* OMPT event callback */
1504  if (ompt_enabled.ompt_callback_mutex_acquired) {
1505  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1506  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1507  }
1508  }
1509 #endif
1510 
1511  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1512  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1513 } // __kmpc_critical_with_hint
1514 
1515 #endif // KMP_USE_DYNAMIC_LOCK
1516 
1526 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1527  kmp_critical_name *crit) {
1528  kmp_user_lock_p lck;
1529 
1530  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1531 
1532 #if KMP_USE_DYNAMIC_LOCK
1533  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1534  lck = (kmp_user_lock_p)crit;
1535  KMP_ASSERT(lck != NULL);
1536  if (__kmp_env_consistency_check) {
1537  __kmp_pop_sync(global_tid, ct_critical, loc);
1538  }
1539 #if USE_ITT_BUILD
1540  __kmp_itt_critical_releasing(lck);
1541 #endif
1542 #if KMP_USE_INLINED_TAS
1543  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1544  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1545  } else
1546 #elif KMP_USE_INLINED_FUTEX
1547  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1548  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1549  } else
1550 #endif
1551  {
1552  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1553  }
1554  } else {
1555  kmp_indirect_lock_t *ilk =
1556  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1557  KMP_ASSERT(ilk != NULL);
1558  lck = ilk->lock;
1559  if (__kmp_env_consistency_check) {
1560  __kmp_pop_sync(global_tid, ct_critical, loc);
1561  }
1562 #if USE_ITT_BUILD
1563  __kmp_itt_critical_releasing(lck);
1564 #endif
1565  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1566  }
1567 
1568 #else // KMP_USE_DYNAMIC_LOCK
1569 
1570  if ((__kmp_user_lock_kind == lk_tas) &&
1571  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1572  lck = (kmp_user_lock_p)crit;
1573  }
1574 #if KMP_USE_FUTEX
1575  else if ((__kmp_user_lock_kind == lk_futex) &&
1576  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1577  lck = (kmp_user_lock_p)crit;
1578  }
1579 #endif
1580  else { // ticket, queuing or drdpa
1581  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1582  }
1583 
1584  KMP_ASSERT(lck != NULL);
1585 
1586  if (__kmp_env_consistency_check)
1587  __kmp_pop_sync(global_tid, ct_critical, loc);
1588 
1589 #if USE_ITT_BUILD
1590  __kmp_itt_critical_releasing(lck);
1591 #endif /* USE_ITT_BUILD */
1592  // Value of 'crit' should be good for using as a critical_id of the critical
1593  // section directive.
1594  __kmp_release_user_lock_with_checks(lck, global_tid);
1595 
1596 #endif // KMP_USE_DYNAMIC_LOCK
1597 
1598 #if OMPT_SUPPORT && OMPT_OPTIONAL
1599  /* OMPT release event triggers after lock is released; place here to trigger
1600  * for all #if branches */
1601  OMPT_STORE_RETURN_ADDRESS(global_tid);
1602  if (ompt_enabled.ompt_callback_mutex_released) {
1603  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1604  ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1605  }
1606 #endif
1607 
1608  KMP_POP_PARTITIONED_TIMER();
1609  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1610 }
1611 
1621 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1622  int status;
1623 
1624  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1625 
1626  if (!TCR_4(__kmp_init_parallel))
1627  __kmp_parallel_initialize();
1628 
1629 #if OMP_50_ENABLED
1630  __kmp_resume_if_soft_paused();
1631 #endif
1632 
1633  if (__kmp_env_consistency_check)
1634  __kmp_check_barrier(global_tid, ct_barrier, loc);
1635 
1636 #if OMPT_SUPPORT
1637  ompt_frame_t *ompt_frame;
1638  if (ompt_enabled.enabled) {
1639  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1640  if (ompt_frame->enter_frame.ptr == NULL)
1641  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1642  OMPT_STORE_RETURN_ADDRESS(global_tid);
1643  }
1644 #endif
1645 #if USE_ITT_NOTIFY
1646  __kmp_threads[global_tid]->th.th_ident = loc;
1647 #endif
1648  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1649 #if OMPT_SUPPORT && OMPT_OPTIONAL
1650  if (ompt_enabled.enabled) {
1651  ompt_frame->enter_frame = ompt_data_none;
1652  }
1653 #endif
1654 
1655  return (status != 0) ? 0 : 1;
1656 }
1657 
1667 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1668  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1669 
1670  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1671 }
1672 
1683 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1684  kmp_int32 ret;
1685 
1686  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1687 
1688  if (!TCR_4(__kmp_init_parallel))
1689  __kmp_parallel_initialize();
1690 
1691 #if OMP_50_ENABLED
1692  __kmp_resume_if_soft_paused();
1693 #endif
1694 
1695  if (__kmp_env_consistency_check) {
1696  if (loc == 0) {
1697  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1698  }
1699  __kmp_check_barrier(global_tid, ct_barrier, loc);
1700  }
1701 
1702 #if OMPT_SUPPORT
1703  ompt_frame_t *ompt_frame;
1704  if (ompt_enabled.enabled) {
1705  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1706  if (ompt_frame->enter_frame.ptr == NULL)
1707  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1708  OMPT_STORE_RETURN_ADDRESS(global_tid);
1709  }
1710 #endif
1711 #if USE_ITT_NOTIFY
1712  __kmp_threads[global_tid]->th.th_ident = loc;
1713 #endif
1714  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1715 #if OMPT_SUPPORT && OMPT_OPTIONAL
1716  if (ompt_enabled.enabled) {
1717  ompt_frame->enter_frame = ompt_data_none;
1718  }
1719 #endif
1720 
1721  ret = __kmpc_master(loc, global_tid);
1722 
1723  if (__kmp_env_consistency_check) {
1724  /* there's no __kmpc_end_master called; so the (stats) */
1725  /* actions of __kmpc_end_master are done here */
1726 
1727  if (global_tid < 0) {
1728  KMP_WARNING(ThreadIdentInvalid);
1729  }
1730  if (ret) {
1731  /* only one thread should do the pop since only */
1732  /* one did the push (see __kmpc_master()) */
1733 
1734  __kmp_pop_sync(global_tid, ct_master, loc);
1735  }
1736  }
1737 
1738  return (ret);
1739 }
1740 
1741 /* The BARRIER for a SINGLE process section is always explicit */
1753 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1754  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1755 
1756  if (rc) {
1757  // We are going to execute the single statement, so we should count it.
1758  KMP_COUNT_BLOCK(OMP_SINGLE);
1759  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1760  }
1761 
1762 #if OMPT_SUPPORT && OMPT_OPTIONAL
1763  kmp_info_t *this_thr = __kmp_threads[global_tid];
1764  kmp_team_t *team = this_thr->th.th_team;
1765  int tid = __kmp_tid_from_gtid(global_tid);
1766 
1767  if (ompt_enabled.enabled) {
1768  if (rc) {
1769  if (ompt_enabled.ompt_callback_work) {
1770  ompt_callbacks.ompt_callback(ompt_callback_work)(
1771  ompt_work_single_executor, ompt_scope_begin,
1772  &(team->t.ompt_team_info.parallel_data),
1773  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1774  1, OMPT_GET_RETURN_ADDRESS(0));
1775  }
1776  } else {
1777  if (ompt_enabled.ompt_callback_work) {
1778  ompt_callbacks.ompt_callback(ompt_callback_work)(
1779  ompt_work_single_other, ompt_scope_begin,
1780  &(team->t.ompt_team_info.parallel_data),
1781  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1782  1, OMPT_GET_RETURN_ADDRESS(0));
1783  ompt_callbacks.ompt_callback(ompt_callback_work)(
1784  ompt_work_single_other, ompt_scope_end,
1785  &(team->t.ompt_team_info.parallel_data),
1786  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1787  1, OMPT_GET_RETURN_ADDRESS(0));
1788  }
1789  }
1790  }
1791 #endif
1792 
1793  return rc;
1794 }
1795 
1805 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1806  __kmp_exit_single(global_tid);
1807  KMP_POP_PARTITIONED_TIMER();
1808 
1809 #if OMPT_SUPPORT && OMPT_OPTIONAL
1810  kmp_info_t *this_thr = __kmp_threads[global_tid];
1811  kmp_team_t *team = this_thr->th.th_team;
1812  int tid = __kmp_tid_from_gtid(global_tid);
1813 
1814  if (ompt_enabled.ompt_callback_work) {
1815  ompt_callbacks.ompt_callback(ompt_callback_work)(
1816  ompt_work_single_executor, ompt_scope_end,
1817  &(team->t.ompt_team_info.parallel_data),
1818  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1819  OMPT_GET_RETURN_ADDRESS(0));
1820  }
1821 #endif
1822 }
1823 
1831 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1832  KMP_POP_PARTITIONED_TIMER();
1833  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1834 
1835 #if OMPT_SUPPORT && OMPT_OPTIONAL
1836  if (ompt_enabled.ompt_callback_work) {
1837  ompt_work_t ompt_work_type = ompt_work_loop;
1838  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1839  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1840  // Determine workshare type
1841  if (loc != NULL) {
1842  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1843  ompt_work_type = ompt_work_loop;
1844  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1845  ompt_work_type = ompt_work_sections;
1846  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1847  ompt_work_type = ompt_work_distribute;
1848  } else {
1849  // use default set above.
1850  // a warning about this case is provided in __kmpc_for_static_init
1851  }
1852  KMP_DEBUG_ASSERT(ompt_work_type);
1853  }
1854  ompt_callbacks.ompt_callback(ompt_callback_work)(
1855  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1856  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1857  }
1858 #endif
1859  if (__kmp_env_consistency_check)
1860  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1861 }
1862 
1863 // User routines which take C-style arguments (call by value)
1864 // different from the Fortran equivalent routines
1865 
1866 void ompc_set_num_threads(int arg) {
1867  // !!!!! TODO: check the per-task binding
1868  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1869 }
1870 
1871 void ompc_set_dynamic(int flag) {
1872  kmp_info_t *thread;
1873 
1874  /* For the thread-private implementation of the internal controls */
1875  thread = __kmp_entry_thread();
1876 
1877  __kmp_save_internal_controls(thread);
1878 
1879  set__dynamic(thread, flag ? TRUE : FALSE);
1880 }
1881 
1882 void ompc_set_nested(int flag) {
1883  kmp_info_t *thread;
1884 
1885  /* For the thread-private internal controls implementation */
1886  thread = __kmp_entry_thread();
1887 
1888  __kmp_save_internal_controls(thread);
1889 
1890  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1891 }
1892 
1893 void ompc_set_max_active_levels(int max_active_levels) {
1894  /* TO DO */
1895  /* we want per-task implementation of this internal control */
1896 
1897  /* For the per-thread internal controls implementation */
1898  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1899 }
1900 
1901 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1902  // !!!!! TODO: check the per-task binding
1903  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1904 }
1905 
1906 int ompc_get_ancestor_thread_num(int level) {
1907  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1908 }
1909 
1910 int ompc_get_team_size(int level) {
1911  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1912 }
1913 
1914 #if OMP_50_ENABLED
1915 /* OpenMP 5.0 Affinity Format API */
1916 
1917 void ompc_set_affinity_format(char const *format) {
1918  if (!__kmp_init_serial) {
1919  __kmp_serial_initialize();
1920  }
1921  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1922  format, KMP_STRLEN(format) + 1);
1923 }
1924 
1925 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1926  size_t format_size;
1927  if (!__kmp_init_serial) {
1928  __kmp_serial_initialize();
1929  }
1930  format_size = KMP_STRLEN(__kmp_affinity_format);
1931  if (buffer && size) {
1932  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1933  format_size + 1);
1934  }
1935  return format_size;
1936 }
1937 
1938 void ompc_display_affinity(char const *format) {
1939  int gtid;
1940  if (!TCR_4(__kmp_init_middle)) {
1941  __kmp_middle_initialize();
1942  }
1943  gtid = __kmp_get_gtid();
1944  __kmp_aux_display_affinity(gtid, format);
1945 }
1946 
1947 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1948  char const *format) {
1949  int gtid;
1950  size_t num_required;
1951  kmp_str_buf_t capture_buf;
1952  if (!TCR_4(__kmp_init_middle)) {
1953  __kmp_middle_initialize();
1954  }
1955  gtid = __kmp_get_gtid();
1956  __kmp_str_buf_init(&capture_buf);
1957  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1958  if (buffer && buf_size) {
1959  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1960  capture_buf.used + 1);
1961  }
1962  __kmp_str_buf_free(&capture_buf);
1963  return num_required;
1964 }
1965 #endif /* OMP_50_ENABLED */
1966 
1967 void kmpc_set_stacksize(int arg) {
1968  // __kmp_aux_set_stacksize initializes the library if needed
1969  __kmp_aux_set_stacksize(arg);
1970 }
1971 
1972 void kmpc_set_stacksize_s(size_t arg) {
1973  // __kmp_aux_set_stacksize initializes the library if needed
1974  __kmp_aux_set_stacksize(arg);
1975 }
1976 
1977 void kmpc_set_blocktime(int arg) {
1978  int gtid, tid;
1979  kmp_info_t *thread;
1980 
1981  gtid = __kmp_entry_gtid();
1982  tid = __kmp_tid_from_gtid(gtid);
1983  thread = __kmp_thread_from_gtid(gtid);
1984 
1985  __kmp_aux_set_blocktime(arg, thread, tid);
1986 }
1987 
1988 void kmpc_set_library(int arg) {
1989  // __kmp_user_set_library initializes the library if needed
1990  __kmp_user_set_library((enum library_type)arg);
1991 }
1992 
1993 void kmpc_set_defaults(char const *str) {
1994  // __kmp_aux_set_defaults initializes the library if needed
1995  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1996 }
1997 
1998 void kmpc_set_disp_num_buffers(int arg) {
1999  // ignore after initialization because some teams have already
2000  // allocated dispatch buffers
2001  if (__kmp_init_serial == 0 && arg > 0)
2002  __kmp_dispatch_num_buffers = arg;
2003 }
2004 
2005 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2006 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2007  return -1;
2008 #else
2009  if (!TCR_4(__kmp_init_middle)) {
2010  __kmp_middle_initialize();
2011  }
2012  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2013 #endif
2014 }
2015 
2016 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2017 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2018  return -1;
2019 #else
2020  if (!TCR_4(__kmp_init_middle)) {
2021  __kmp_middle_initialize();
2022  }
2023  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2024 #endif
2025 }
2026 
2027 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2028 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2029  return -1;
2030 #else
2031  if (!TCR_4(__kmp_init_middle)) {
2032  __kmp_middle_initialize();
2033  }
2034  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2035 #endif
2036 }
2037 
2038 /* -------------------------------------------------------------------------- */
2083 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2084  void *cpy_data, void (*cpy_func)(void *, void *),
2085  kmp_int32 didit) {
2086  void **data_ptr;
2087 
2088  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2089 
2090  KMP_MB();
2091 
2092  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2093 
2094  if (__kmp_env_consistency_check) {
2095  if (loc == 0) {
2096  KMP_WARNING(ConstructIdentInvalid);
2097  }
2098  }
2099 
2100  // ToDo: Optimize the following two barriers into some kind of split barrier
2101 
2102  if (didit)
2103  *data_ptr = cpy_data;
2104 
2105 #if OMPT_SUPPORT
2106  ompt_frame_t *ompt_frame;
2107  if (ompt_enabled.enabled) {
2108  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2109  if (ompt_frame->enter_frame.ptr == NULL)
2110  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2111  OMPT_STORE_RETURN_ADDRESS(gtid);
2112  }
2113 #endif
2114 /* This barrier is not a barrier region boundary */
2115 #if USE_ITT_NOTIFY
2116  __kmp_threads[gtid]->th.th_ident = loc;
2117 #endif
2118  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2119 
2120  if (!didit)
2121  (*cpy_func)(cpy_data, *data_ptr);
2122 
2123 // Consider next barrier a user-visible barrier for barrier region boundaries
2124 // Nesting checks are already handled by the single construct checks
2125 
2126 #if OMPT_SUPPORT
2127  if (ompt_enabled.enabled) {
2128  OMPT_STORE_RETURN_ADDRESS(gtid);
2129  }
2130 #endif
2131 #if USE_ITT_NOTIFY
2132  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2133 // tasks can overwrite the location)
2134 #endif
2135  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2136 #if OMPT_SUPPORT && OMPT_OPTIONAL
2137  if (ompt_enabled.enabled) {
2138  ompt_frame->enter_frame = ompt_data_none;
2139  }
2140 #endif
2141 }
2142 
2143 /* -------------------------------------------------------------------------- */
2144 
2145 #define INIT_LOCK __kmp_init_user_lock_with_checks
2146 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2147 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2148 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2149 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2150 #define ACQUIRE_NESTED_LOCK_TIMED \
2151  __kmp_acquire_nested_user_lock_with_checks_timed
2152 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2153 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2154 #define TEST_LOCK __kmp_test_user_lock_with_checks
2155 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2156 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2157 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2158 
2159 // TODO: Make check abort messages use location info & pass it into
2160 // with_checks routines
2161 
2162 #if KMP_USE_DYNAMIC_LOCK
2163 
2164 // internal lock initializer
2165 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2166  kmp_dyna_lockseq_t seq) {
2167  if (KMP_IS_D_LOCK(seq)) {
2168  KMP_INIT_D_LOCK(lock, seq);
2169 #if USE_ITT_BUILD
2170  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2171 #endif
2172  } else {
2173  KMP_INIT_I_LOCK(lock, seq);
2174 #if USE_ITT_BUILD
2175  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2176  __kmp_itt_lock_creating(ilk->lock, loc);
2177 #endif
2178  }
2179 }
2180 
2181 // internal nest lock initializer
2182 static __forceinline void
2183 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2184  kmp_dyna_lockseq_t seq) {
2185 #if KMP_USE_TSX
2186  // Don't have nested lock implementation for speculative locks
2187  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2188  seq = __kmp_user_lock_seq;
2189 #endif
2190  switch (seq) {
2191  case lockseq_tas:
2192  seq = lockseq_nested_tas;
2193  break;
2194 #if KMP_USE_FUTEX
2195  case lockseq_futex:
2196  seq = lockseq_nested_futex;
2197  break;
2198 #endif
2199  case lockseq_ticket:
2200  seq = lockseq_nested_ticket;
2201  break;
2202  case lockseq_queuing:
2203  seq = lockseq_nested_queuing;
2204  break;
2205  case lockseq_drdpa:
2206  seq = lockseq_nested_drdpa;
2207  break;
2208  default:
2209  seq = lockseq_nested_queuing;
2210  }
2211  KMP_INIT_I_LOCK(lock, seq);
2212 #if USE_ITT_BUILD
2213  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2214  __kmp_itt_lock_creating(ilk->lock, loc);
2215 #endif
2216 }
2217 
2218 /* initialize the lock with a hint */
2219 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2220  uintptr_t hint) {
2221  KMP_DEBUG_ASSERT(__kmp_init_serial);
2222  if (__kmp_env_consistency_check && user_lock == NULL) {
2223  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2224  }
2225 
2226  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2227 
2228 #if OMPT_SUPPORT && OMPT_OPTIONAL
2229  // This is the case, if called from omp_init_lock_with_hint:
2230  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2231  if (!codeptr)
2232  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2233  if (ompt_enabled.ompt_callback_lock_init) {
2234  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2235  ompt_mutex_lock, (omp_lock_hint_t)hint,
2236  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2237  codeptr);
2238  }
2239 #endif
2240 }
2241 
2242 /* initialize the lock with a hint */
2243 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2244  void **user_lock, uintptr_t hint) {
2245  KMP_DEBUG_ASSERT(__kmp_init_serial);
2246  if (__kmp_env_consistency_check && user_lock == NULL) {
2247  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2248  }
2249 
2250  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2251 
2252 #if OMPT_SUPPORT && OMPT_OPTIONAL
2253  // This is the case, if called from omp_init_lock_with_hint:
2254  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2255  if (!codeptr)
2256  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2257  if (ompt_enabled.ompt_callback_lock_init) {
2258  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2259  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2260  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2261  codeptr);
2262  }
2263 #endif
2264 }
2265 
2266 #endif // KMP_USE_DYNAMIC_LOCK
2267 
2268 /* initialize the lock */
2269 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2270 #if KMP_USE_DYNAMIC_LOCK
2271 
2272  KMP_DEBUG_ASSERT(__kmp_init_serial);
2273  if (__kmp_env_consistency_check && user_lock == NULL) {
2274  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2275  }
2276  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2277 
2278 #if OMPT_SUPPORT && OMPT_OPTIONAL
2279  // This is the case, if called from omp_init_lock_with_hint:
2280  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2281  if (!codeptr)
2282  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2283  if (ompt_enabled.ompt_callback_lock_init) {
2284  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2285  ompt_mutex_lock, omp_lock_hint_none,
2286  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2287  codeptr);
2288  }
2289 #endif
2290 
2291 #else // KMP_USE_DYNAMIC_LOCK
2292 
2293  static char const *const func = "omp_init_lock";
2294  kmp_user_lock_p lck;
2295  KMP_DEBUG_ASSERT(__kmp_init_serial);
2296 
2297  if (__kmp_env_consistency_check) {
2298  if (user_lock == NULL) {
2299  KMP_FATAL(LockIsUninitialized, func);
2300  }
2301  }
2302 
2303  KMP_CHECK_USER_LOCK_INIT();
2304 
2305  if ((__kmp_user_lock_kind == lk_tas) &&
2306  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2307  lck = (kmp_user_lock_p)user_lock;
2308  }
2309 #if KMP_USE_FUTEX
2310  else if ((__kmp_user_lock_kind == lk_futex) &&
2311  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2312  lck = (kmp_user_lock_p)user_lock;
2313  }
2314 #endif
2315  else {
2316  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2317  }
2318  INIT_LOCK(lck);
2319  __kmp_set_user_lock_location(lck, loc);
2320 
2321 #if OMPT_SUPPORT && OMPT_OPTIONAL
2322  // This is the case, if called from omp_init_lock_with_hint:
2323  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2324  if (!codeptr)
2325  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2326  if (ompt_enabled.ompt_callback_lock_init) {
2327  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2328  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2329  (ompt_wait_id_t)user_lock, codeptr);
2330  }
2331 #endif
2332 
2333 #if USE_ITT_BUILD
2334  __kmp_itt_lock_creating(lck);
2335 #endif /* USE_ITT_BUILD */
2336 
2337 #endif // KMP_USE_DYNAMIC_LOCK
2338 } // __kmpc_init_lock
2339 
2340 /* initialize the lock */
2341 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2342 #if KMP_USE_DYNAMIC_LOCK
2343 
2344  KMP_DEBUG_ASSERT(__kmp_init_serial);
2345  if (__kmp_env_consistency_check && user_lock == NULL) {
2346  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2347  }
2348  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2349 
2350 #if OMPT_SUPPORT && OMPT_OPTIONAL
2351  // This is the case, if called from omp_init_lock_with_hint:
2352  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2353  if (!codeptr)
2354  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2355  if (ompt_enabled.ompt_callback_lock_init) {
2356  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2357  ompt_mutex_nest_lock, omp_lock_hint_none,
2358  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2359  codeptr);
2360  }
2361 #endif
2362 
2363 #else // KMP_USE_DYNAMIC_LOCK
2364 
2365  static char const *const func = "omp_init_nest_lock";
2366  kmp_user_lock_p lck;
2367  KMP_DEBUG_ASSERT(__kmp_init_serial);
2368 
2369  if (__kmp_env_consistency_check) {
2370  if (user_lock == NULL) {
2371  KMP_FATAL(LockIsUninitialized, func);
2372  }
2373  }
2374 
2375  KMP_CHECK_USER_LOCK_INIT();
2376 
2377  if ((__kmp_user_lock_kind == lk_tas) &&
2378  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2379  OMP_NEST_LOCK_T_SIZE)) {
2380  lck = (kmp_user_lock_p)user_lock;
2381  }
2382 #if KMP_USE_FUTEX
2383  else if ((__kmp_user_lock_kind == lk_futex) &&
2384  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2385  OMP_NEST_LOCK_T_SIZE)) {
2386  lck = (kmp_user_lock_p)user_lock;
2387  }
2388 #endif
2389  else {
2390  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2391  }
2392 
2393  INIT_NESTED_LOCK(lck);
2394  __kmp_set_user_lock_location(lck, loc);
2395 
2396 #if OMPT_SUPPORT && OMPT_OPTIONAL
2397  // This is the case, if called from omp_init_lock_with_hint:
2398  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2399  if (!codeptr)
2400  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2401  if (ompt_enabled.ompt_callback_lock_init) {
2402  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2403  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2404  (ompt_wait_id_t)user_lock, codeptr);
2405  }
2406 #endif
2407 
2408 #if USE_ITT_BUILD
2409  __kmp_itt_lock_creating(lck);
2410 #endif /* USE_ITT_BUILD */
2411 
2412 #endif // KMP_USE_DYNAMIC_LOCK
2413 } // __kmpc_init_nest_lock
2414 
2415 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2416 #if KMP_USE_DYNAMIC_LOCK
2417 
2418 #if USE_ITT_BUILD
2419  kmp_user_lock_p lck;
2420  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2421  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2422  } else {
2423  lck = (kmp_user_lock_p)user_lock;
2424  }
2425  __kmp_itt_lock_destroyed(lck);
2426 #endif
2427 #if OMPT_SUPPORT && OMPT_OPTIONAL
2428  // This is the case, if called from omp_init_lock_with_hint:
2429  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2430  if (!codeptr)
2431  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2432  if (ompt_enabled.ompt_callback_lock_destroy) {
2433  kmp_user_lock_p lck;
2434  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2435  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2436  } else {
2437  lck = (kmp_user_lock_p)user_lock;
2438  }
2439  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2440  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2441  }
2442 #endif
2443  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2444 #else
2445  kmp_user_lock_p lck;
2446 
2447  if ((__kmp_user_lock_kind == lk_tas) &&
2448  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2449  lck = (kmp_user_lock_p)user_lock;
2450  }
2451 #if KMP_USE_FUTEX
2452  else if ((__kmp_user_lock_kind == lk_futex) &&
2453  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2454  lck = (kmp_user_lock_p)user_lock;
2455  }
2456 #endif
2457  else {
2458  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2459  }
2460 
2461 #if OMPT_SUPPORT && OMPT_OPTIONAL
2462  // This is the case, if called from omp_init_lock_with_hint:
2463  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2464  if (!codeptr)
2465  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2466  if (ompt_enabled.ompt_callback_lock_destroy) {
2467  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2468  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2469  }
2470 #endif
2471 
2472 #if USE_ITT_BUILD
2473  __kmp_itt_lock_destroyed(lck);
2474 #endif /* USE_ITT_BUILD */
2475  DESTROY_LOCK(lck);
2476 
2477  if ((__kmp_user_lock_kind == lk_tas) &&
2478  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2479  ;
2480  }
2481 #if KMP_USE_FUTEX
2482  else if ((__kmp_user_lock_kind == lk_futex) &&
2483  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2484  ;
2485  }
2486 #endif
2487  else {
2488  __kmp_user_lock_free(user_lock, gtid, lck);
2489  }
2490 #endif // KMP_USE_DYNAMIC_LOCK
2491 } // __kmpc_destroy_lock
2492 
2493 /* destroy the lock */
2494 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2495 #if KMP_USE_DYNAMIC_LOCK
2496 
2497 #if USE_ITT_BUILD
2498  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2499  __kmp_itt_lock_destroyed(ilk->lock);
2500 #endif
2501 #if OMPT_SUPPORT && OMPT_OPTIONAL
2502  // This is the case, if called from omp_init_lock_with_hint:
2503  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2504  if (!codeptr)
2505  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2506  if (ompt_enabled.ompt_callback_lock_destroy) {
2507  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2508  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2509  }
2510 #endif
2511  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2512 
2513 #else // KMP_USE_DYNAMIC_LOCK
2514 
2515  kmp_user_lock_p lck;
2516 
2517  if ((__kmp_user_lock_kind == lk_tas) &&
2518  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2519  OMP_NEST_LOCK_T_SIZE)) {
2520  lck = (kmp_user_lock_p)user_lock;
2521  }
2522 #if KMP_USE_FUTEX
2523  else if ((__kmp_user_lock_kind == lk_futex) &&
2524  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2525  OMP_NEST_LOCK_T_SIZE)) {
2526  lck = (kmp_user_lock_p)user_lock;
2527  }
2528 #endif
2529  else {
2530  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2531  }
2532 
2533 #if OMPT_SUPPORT && OMPT_OPTIONAL
2534  // This is the case, if called from omp_init_lock_with_hint:
2535  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2536  if (!codeptr)
2537  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2538  if (ompt_enabled.ompt_callback_lock_destroy) {
2539  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2540  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2541  }
2542 #endif
2543 
2544 #if USE_ITT_BUILD
2545  __kmp_itt_lock_destroyed(lck);
2546 #endif /* USE_ITT_BUILD */
2547 
2548  DESTROY_NESTED_LOCK(lck);
2549 
2550  if ((__kmp_user_lock_kind == lk_tas) &&
2551  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2552  OMP_NEST_LOCK_T_SIZE)) {
2553  ;
2554  }
2555 #if KMP_USE_FUTEX
2556  else if ((__kmp_user_lock_kind == lk_futex) &&
2557  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2558  OMP_NEST_LOCK_T_SIZE)) {
2559  ;
2560  }
2561 #endif
2562  else {
2563  __kmp_user_lock_free(user_lock, gtid, lck);
2564  }
2565 #endif // KMP_USE_DYNAMIC_LOCK
2566 } // __kmpc_destroy_nest_lock
2567 
2568 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2569  KMP_COUNT_BLOCK(OMP_set_lock);
2570 #if KMP_USE_DYNAMIC_LOCK
2571  int tag = KMP_EXTRACT_D_TAG(user_lock);
2572 #if USE_ITT_BUILD
2573  __kmp_itt_lock_acquiring(
2574  (kmp_user_lock_p)
2575  user_lock); // itt function will get to the right lock object.
2576 #endif
2577 #if OMPT_SUPPORT && OMPT_OPTIONAL
2578  // This is the case, if called from omp_init_lock_with_hint:
2579  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2580  if (!codeptr)
2581  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2582  if (ompt_enabled.ompt_callback_mutex_acquire) {
2583  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2584  ompt_mutex_lock, omp_lock_hint_none,
2585  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2586  codeptr);
2587  }
2588 #endif
2589 #if KMP_USE_INLINED_TAS
2590  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2591  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2592  } else
2593 #elif KMP_USE_INLINED_FUTEX
2594  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2595  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2596  } else
2597 #endif
2598  {
2599  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2600  }
2601 #if USE_ITT_BUILD
2602  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2603 #endif
2604 #if OMPT_SUPPORT && OMPT_OPTIONAL
2605  if (ompt_enabled.ompt_callback_mutex_acquired) {
2606  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2607  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2608  }
2609 #endif
2610 
2611 #else // KMP_USE_DYNAMIC_LOCK
2612 
2613  kmp_user_lock_p lck;
2614 
2615  if ((__kmp_user_lock_kind == lk_tas) &&
2616  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2617  lck = (kmp_user_lock_p)user_lock;
2618  }
2619 #if KMP_USE_FUTEX
2620  else if ((__kmp_user_lock_kind == lk_futex) &&
2621  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2622  lck = (kmp_user_lock_p)user_lock;
2623  }
2624 #endif
2625  else {
2626  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2627  }
2628 
2629 #if USE_ITT_BUILD
2630  __kmp_itt_lock_acquiring(lck);
2631 #endif /* USE_ITT_BUILD */
2632 #if OMPT_SUPPORT && OMPT_OPTIONAL
2633  // This is the case, if called from omp_init_lock_with_hint:
2634  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2635  if (!codeptr)
2636  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2637  if (ompt_enabled.ompt_callback_mutex_acquire) {
2638  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2639  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2640  (ompt_wait_id_t)lck, codeptr);
2641  }
2642 #endif
2643 
2644  ACQUIRE_LOCK(lck, gtid);
2645 
2646 #if USE_ITT_BUILD
2647  __kmp_itt_lock_acquired(lck);
2648 #endif /* USE_ITT_BUILD */
2649 
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651  if (ompt_enabled.ompt_callback_mutex_acquired) {
2652  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2653  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2654  }
2655 #endif
2656 
2657 #endif // KMP_USE_DYNAMIC_LOCK
2658 }
2659 
2660 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2661 #if KMP_USE_DYNAMIC_LOCK
2662 
2663 #if USE_ITT_BUILD
2664  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2665 #endif
2666 #if OMPT_SUPPORT && OMPT_OPTIONAL
2667  // This is the case, if called from omp_init_lock_with_hint:
2668  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2669  if (!codeptr)
2670  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2671  if (ompt_enabled.enabled) {
2672  if (ompt_enabled.ompt_callback_mutex_acquire) {
2673  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2674  ompt_mutex_nest_lock, omp_lock_hint_none,
2675  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2676  codeptr);
2677  }
2678  }
2679 #endif
2680  int acquire_status =
2681  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2682  (void) acquire_status;
2683 #if USE_ITT_BUILD
2684  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2685 #endif
2686 
2687 #if OMPT_SUPPORT && OMPT_OPTIONAL
2688  if (ompt_enabled.enabled) {
2689  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2690  if (ompt_enabled.ompt_callback_mutex_acquired) {
2691  // lock_first
2692  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2693  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2694  }
2695  } else {
2696  if (ompt_enabled.ompt_callback_nest_lock) {
2697  // lock_next
2698  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2699  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2700  }
2701  }
2702  }
2703 #endif
2704 
2705 #else // KMP_USE_DYNAMIC_LOCK
2706  int acquire_status;
2707  kmp_user_lock_p lck;
2708 
2709  if ((__kmp_user_lock_kind == lk_tas) &&
2710  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2711  OMP_NEST_LOCK_T_SIZE)) {
2712  lck = (kmp_user_lock_p)user_lock;
2713  }
2714 #if KMP_USE_FUTEX
2715  else if ((__kmp_user_lock_kind == lk_futex) &&
2716  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2717  OMP_NEST_LOCK_T_SIZE)) {
2718  lck = (kmp_user_lock_p)user_lock;
2719  }
2720 #endif
2721  else {
2722  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2723  }
2724 
2725 #if USE_ITT_BUILD
2726  __kmp_itt_lock_acquiring(lck);
2727 #endif /* USE_ITT_BUILD */
2728 #if OMPT_SUPPORT && OMPT_OPTIONAL
2729  // This is the case, if called from omp_init_lock_with_hint:
2730  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2731  if (!codeptr)
2732  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2733  if (ompt_enabled.enabled) {
2734  if (ompt_enabled.ompt_callback_mutex_acquire) {
2735  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2736  ompt_mutex_nest_lock, omp_lock_hint_none,
2737  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2738  }
2739  }
2740 #endif
2741 
2742  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2743 
2744 #if USE_ITT_BUILD
2745  __kmp_itt_lock_acquired(lck);
2746 #endif /* USE_ITT_BUILD */
2747 
2748 #if OMPT_SUPPORT && OMPT_OPTIONAL
2749  if (ompt_enabled.enabled) {
2750  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2751  if (ompt_enabled.ompt_callback_mutex_acquired) {
2752  // lock_first
2753  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2754  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2755  }
2756  } else {
2757  if (ompt_enabled.ompt_callback_nest_lock) {
2758  // lock_next
2759  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2760  ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2761  }
2762  }
2763  }
2764 #endif
2765 
2766 #endif // KMP_USE_DYNAMIC_LOCK
2767 }
2768 
2769 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2770 #if KMP_USE_DYNAMIC_LOCK
2771 
2772  int tag = KMP_EXTRACT_D_TAG(user_lock);
2773 #if USE_ITT_BUILD
2774  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2775 #endif
2776 #if KMP_USE_INLINED_TAS
2777  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2778  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2779  } else
2780 #elif KMP_USE_INLINED_FUTEX
2781  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2782  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2783  } else
2784 #endif
2785  {
2786  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2787  }
2788 
2789 #if OMPT_SUPPORT && OMPT_OPTIONAL
2790  // This is the case, if called from omp_init_lock_with_hint:
2791  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2792  if (!codeptr)
2793  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2794  if (ompt_enabled.ompt_callback_mutex_released) {
2795  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2796  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2797  }
2798 #endif
2799 
2800 #else // KMP_USE_DYNAMIC_LOCK
2801 
2802  kmp_user_lock_p lck;
2803 
2804  /* Can't use serial interval since not block structured */
2805  /* release the lock */
2806 
2807  if ((__kmp_user_lock_kind == lk_tas) &&
2808  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2809 #if KMP_OS_LINUX && \
2810  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2811 // "fast" path implemented to fix customer performance issue
2812 #if USE_ITT_BUILD
2813  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2814 #endif /* USE_ITT_BUILD */
2815  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2816  KMP_MB();
2817 
2818 #if OMPT_SUPPORT && OMPT_OPTIONAL
2819  // This is the case, if called from omp_init_lock_with_hint:
2820  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2821  if (!codeptr)
2822  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2823  if (ompt_enabled.ompt_callback_mutex_released) {
2824  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2825  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2826  }
2827 #endif
2828 
2829  return;
2830 #else
2831  lck = (kmp_user_lock_p)user_lock;
2832 #endif
2833  }
2834 #if KMP_USE_FUTEX
2835  else if ((__kmp_user_lock_kind == lk_futex) &&
2836  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2837  lck = (kmp_user_lock_p)user_lock;
2838  }
2839 #endif
2840  else {
2841  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2842  }
2843 
2844 #if USE_ITT_BUILD
2845  __kmp_itt_lock_releasing(lck);
2846 #endif /* USE_ITT_BUILD */
2847 
2848  RELEASE_LOCK(lck, gtid);
2849 
2850 #if OMPT_SUPPORT && OMPT_OPTIONAL
2851  // This is the case, if called from omp_init_lock_with_hint:
2852  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2853  if (!codeptr)
2854  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2855  if (ompt_enabled.ompt_callback_mutex_released) {
2856  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2857  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2858  }
2859 #endif
2860 
2861 #endif // KMP_USE_DYNAMIC_LOCK
2862 }
2863 
2864 /* release the lock */
2865 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2866 #if KMP_USE_DYNAMIC_LOCK
2867 
2868 #if USE_ITT_BUILD
2869  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2870 #endif
2871  int release_status =
2872  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2873  (void) release_status;
2874 
2875 #if OMPT_SUPPORT && OMPT_OPTIONAL
2876  // This is the case, if called from omp_init_lock_with_hint:
2877  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2878  if (!codeptr)
2879  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2880  if (ompt_enabled.enabled) {
2881  if (release_status == KMP_LOCK_RELEASED) {
2882  if (ompt_enabled.ompt_callback_mutex_released) {
2883  // release_lock_last
2884  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2885  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2886  }
2887  } else if (ompt_enabled.ompt_callback_nest_lock) {
2888  // release_lock_prev
2889  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2890  ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2891  }
2892  }
2893 #endif
2894 
2895 #else // KMP_USE_DYNAMIC_LOCK
2896 
2897  kmp_user_lock_p lck;
2898 
2899  /* Can't use serial interval since not block structured */
2900 
2901  if ((__kmp_user_lock_kind == lk_tas) &&
2902  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2903  OMP_NEST_LOCK_T_SIZE)) {
2904 #if KMP_OS_LINUX && \
2905  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2906  // "fast" path implemented to fix customer performance issue
2907  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2908 #if USE_ITT_BUILD
2909  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2910 #endif /* USE_ITT_BUILD */
2911 
2912 #if OMPT_SUPPORT && OMPT_OPTIONAL
2913  int release_status = KMP_LOCK_STILL_HELD;
2914 #endif
2915 
2916  if (--(tl->lk.depth_locked) == 0) {
2917  TCW_4(tl->lk.poll, 0);
2918 #if OMPT_SUPPORT && OMPT_OPTIONAL
2919  release_status = KMP_LOCK_RELEASED;
2920 #endif
2921  }
2922  KMP_MB();
2923 
2924 #if OMPT_SUPPORT && OMPT_OPTIONAL
2925  // This is the case, if called from omp_init_lock_with_hint:
2926  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2927  if (!codeptr)
2928  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2929  if (ompt_enabled.enabled) {
2930  if (release_status == KMP_LOCK_RELEASED) {
2931  if (ompt_enabled.ompt_callback_mutex_released) {
2932  // release_lock_last
2933  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2934  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2935  }
2936  } else if (ompt_enabled.ompt_callback_nest_lock) {
2937  // release_lock_previous
2938  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2939  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2940  }
2941  }
2942 #endif
2943 
2944  return;
2945 #else
2946  lck = (kmp_user_lock_p)user_lock;
2947 #endif
2948  }
2949 #if KMP_USE_FUTEX
2950  else if ((__kmp_user_lock_kind == lk_futex) &&
2951  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2952  OMP_NEST_LOCK_T_SIZE)) {
2953  lck = (kmp_user_lock_p)user_lock;
2954  }
2955 #endif
2956  else {
2957  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2958  }
2959 
2960 #if USE_ITT_BUILD
2961  __kmp_itt_lock_releasing(lck);
2962 #endif /* USE_ITT_BUILD */
2963 
2964  int release_status;
2965  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2966 #if OMPT_SUPPORT && OMPT_OPTIONAL
2967  // This is the case, if called from omp_init_lock_with_hint:
2968  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2969  if (!codeptr)
2970  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2971  if (ompt_enabled.enabled) {
2972  if (release_status == KMP_LOCK_RELEASED) {
2973  if (ompt_enabled.ompt_callback_mutex_released) {
2974  // release_lock_last
2975  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2976  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2977  }
2978  } else if (ompt_enabled.ompt_callback_nest_lock) {
2979  // release_lock_previous
2980  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2981  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2982  }
2983  }
2984 #endif
2985 
2986 #endif // KMP_USE_DYNAMIC_LOCK
2987 }
2988 
2989 /* try to acquire the lock */
2990 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2991  KMP_COUNT_BLOCK(OMP_test_lock);
2992 
2993 #if KMP_USE_DYNAMIC_LOCK
2994  int rc;
2995  int tag = KMP_EXTRACT_D_TAG(user_lock);
2996 #if USE_ITT_BUILD
2997  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2998 #endif
2999 #if OMPT_SUPPORT && OMPT_OPTIONAL
3000  // This is the case, if called from omp_init_lock_with_hint:
3001  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3002  if (!codeptr)
3003  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3004  if (ompt_enabled.ompt_callback_mutex_acquire) {
3005  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3006  ompt_mutex_lock, omp_lock_hint_none,
3007  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3008  codeptr);
3009  }
3010 #endif
3011 #if KMP_USE_INLINED_TAS
3012  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3013  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3014  } else
3015 #elif KMP_USE_INLINED_FUTEX
3016  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3017  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3018  } else
3019 #endif
3020  {
3021  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3022  }
3023  if (rc) {
3024 #if USE_ITT_BUILD
3025  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3026 #endif
3027 #if OMPT_SUPPORT && OMPT_OPTIONAL
3028  if (ompt_enabled.ompt_callback_mutex_acquired) {
3029  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3030  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
3031  }
3032 #endif
3033  return FTN_TRUE;
3034  } else {
3035 #if USE_ITT_BUILD
3036  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3037 #endif
3038  return FTN_FALSE;
3039  }
3040 
3041 #else // KMP_USE_DYNAMIC_LOCK
3042 
3043  kmp_user_lock_p lck;
3044  int rc;
3045 
3046  if ((__kmp_user_lock_kind == lk_tas) &&
3047  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3048  lck = (kmp_user_lock_p)user_lock;
3049  }
3050 #if KMP_USE_FUTEX
3051  else if ((__kmp_user_lock_kind == lk_futex) &&
3052  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3053  lck = (kmp_user_lock_p)user_lock;
3054  }
3055 #endif
3056  else {
3057  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3058  }
3059 
3060 #if USE_ITT_BUILD
3061  __kmp_itt_lock_acquiring(lck);
3062 #endif /* USE_ITT_BUILD */
3063 #if OMPT_SUPPORT && OMPT_OPTIONAL
3064  // This is the case, if called from omp_init_lock_with_hint:
3065  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3066  if (!codeptr)
3067  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3068  if (ompt_enabled.ompt_callback_mutex_acquire) {
3069  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3070  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3071  (ompt_wait_id_t)lck, codeptr);
3072  }
3073 #endif
3074 
3075  rc = TEST_LOCK(lck, gtid);
3076 #if USE_ITT_BUILD
3077  if (rc) {
3078  __kmp_itt_lock_acquired(lck);
3079  } else {
3080  __kmp_itt_lock_cancelled(lck);
3081  }
3082 #endif /* USE_ITT_BUILD */
3083 #if OMPT_SUPPORT && OMPT_OPTIONAL
3084  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3085  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3086  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3087  }
3088 #endif
3089 
3090  return (rc ? FTN_TRUE : FTN_FALSE);
3091 
3092 /* Can't use serial interval since not block structured */
3093 
3094 #endif // KMP_USE_DYNAMIC_LOCK
3095 }
3096 
3097 /* try to acquire the lock */
3098 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3099 #if KMP_USE_DYNAMIC_LOCK
3100  int rc;
3101 #if USE_ITT_BUILD
3102  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3103 #endif
3104 #if OMPT_SUPPORT && OMPT_OPTIONAL
3105  // This is the case, if called from omp_init_lock_with_hint:
3106  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3107  if (!codeptr)
3108  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3109  if (ompt_enabled.ompt_callback_mutex_acquire) {
3110  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3111  ompt_mutex_nest_lock, omp_lock_hint_none,
3112  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3113  codeptr);
3114  }
3115 #endif
3116  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3117 #if USE_ITT_BUILD
3118  if (rc) {
3119  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3120  } else {
3121  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3122  }
3123 #endif
3124 #if OMPT_SUPPORT && OMPT_OPTIONAL
3125  if (ompt_enabled.enabled && rc) {
3126  if (rc == 1) {
3127  if (ompt_enabled.ompt_callback_mutex_acquired) {
3128  // lock_first
3129  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3130  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3131  }
3132  } else {
3133  if (ompt_enabled.ompt_callback_nest_lock) {
3134  // lock_next
3135  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3136  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3137  }
3138  }
3139  }
3140 #endif
3141  return rc;
3142 
3143 #else // KMP_USE_DYNAMIC_LOCK
3144 
3145  kmp_user_lock_p lck;
3146  int rc;
3147 
3148  if ((__kmp_user_lock_kind == lk_tas) &&
3149  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3150  OMP_NEST_LOCK_T_SIZE)) {
3151  lck = (kmp_user_lock_p)user_lock;
3152  }
3153 #if KMP_USE_FUTEX
3154  else if ((__kmp_user_lock_kind == lk_futex) &&
3155  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3156  OMP_NEST_LOCK_T_SIZE)) {
3157  lck = (kmp_user_lock_p)user_lock;
3158  }
3159 #endif
3160  else {
3161  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3162  }
3163 
3164 #if USE_ITT_BUILD
3165  __kmp_itt_lock_acquiring(lck);
3166 #endif /* USE_ITT_BUILD */
3167 
3168 #if OMPT_SUPPORT && OMPT_OPTIONAL
3169  // This is the case, if called from omp_init_lock_with_hint:
3170  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3171  if (!codeptr)
3172  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3173  if (ompt_enabled.enabled) &&
3174  ompt_enabled.ompt_callback_mutex_acquire) {
3175  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3176  ompt_mutex_nest_lock, omp_lock_hint_none,
3177  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3178  }
3179 #endif
3180 
3181  rc = TEST_NESTED_LOCK(lck, gtid);
3182 #if USE_ITT_BUILD
3183  if (rc) {
3184  __kmp_itt_lock_acquired(lck);
3185  } else {
3186  __kmp_itt_lock_cancelled(lck);
3187  }
3188 #endif /* USE_ITT_BUILD */
3189 #if OMPT_SUPPORT && OMPT_OPTIONAL
3190  if (ompt_enabled.enabled && rc) {
3191  if (rc == 1) {
3192  if (ompt_enabled.ompt_callback_mutex_acquired) {
3193  // lock_first
3194  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3195  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3196  }
3197  } else {
3198  if (ompt_enabled.ompt_callback_nest_lock) {
3199  // lock_next
3200  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3201  ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3202  }
3203  }
3204  }
3205 #endif
3206  return rc;
3207 
3208 /* Can't use serial interval since not block structured */
3209 
3210 #endif // KMP_USE_DYNAMIC_LOCK
3211 }
3212 
3213 // Interface to fast scalable reduce methods routines
3214 
3215 // keep the selected method in a thread local structure for cross-function
3216 // usage: will be used in __kmpc_end_reduce* functions;
3217 // another solution: to re-determine the method one more time in
3218 // __kmpc_end_reduce* functions (new prototype required then)
3219 // AT: which solution is better?
3220 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3221  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3222 
3223 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3224  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3225 
3226 // description of the packed_reduction_method variable: look at the macros in
3227 // kmp.h
3228 
3229 // used in a critical section reduce block
3230 static __forceinline void
3231 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3232  kmp_critical_name *crit) {
3233 
3234  // this lock was visible to a customer and to the threading profile tool as a
3235  // serial overhead span (although it's used for an internal purpose only)
3236  // why was it visible in previous implementation?
3237  // should we keep it visible in new reduce block?
3238  kmp_user_lock_p lck;
3239 
3240 #if KMP_USE_DYNAMIC_LOCK
3241 
3242  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3243  // Check if it is initialized.
3244  if (*lk == 0) {
3245  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3246  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3247  KMP_GET_D_TAG(__kmp_user_lock_seq));
3248  } else {
3249  __kmp_init_indirect_csptr(crit, loc, global_tid,
3250  KMP_GET_I_TAG(__kmp_user_lock_seq));
3251  }
3252  }
3253  // Branch for accessing the actual lock object and set operation. This
3254  // branching is inevitable since this lock initialization does not follow the
3255  // normal dispatch path (lock table is not used).
3256  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3257  lck = (kmp_user_lock_p)lk;
3258  KMP_DEBUG_ASSERT(lck != NULL);
3259  if (__kmp_env_consistency_check) {
3260  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3261  }
3262  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3263  } else {
3264  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3265  lck = ilk->lock;
3266  KMP_DEBUG_ASSERT(lck != NULL);
3267  if (__kmp_env_consistency_check) {
3268  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3269  }
3270  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3271  }
3272 
3273 #else // KMP_USE_DYNAMIC_LOCK
3274 
3275  // We know that the fast reduction code is only emitted by Intel compilers
3276  // with 32 byte critical sections. If there isn't enough space, then we
3277  // have to use a pointer.
3278  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3279  lck = (kmp_user_lock_p)crit;
3280  } else {
3281  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3282  }
3283  KMP_DEBUG_ASSERT(lck != NULL);
3284 
3285  if (__kmp_env_consistency_check)
3286  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3287 
3288  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3289 
3290 #endif // KMP_USE_DYNAMIC_LOCK
3291 }
3292 
3293 // used in a critical section reduce block
3294 static __forceinline void
3295 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3296  kmp_critical_name *crit) {
3297 
3298  kmp_user_lock_p lck;
3299 
3300 #if KMP_USE_DYNAMIC_LOCK
3301 
3302  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3303  lck = (kmp_user_lock_p)crit;
3304  if (__kmp_env_consistency_check)
3305  __kmp_pop_sync(global_tid, ct_critical, loc);
3306  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3307  } else {
3308  kmp_indirect_lock_t *ilk =
3309  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3310  if (__kmp_env_consistency_check)
3311  __kmp_pop_sync(global_tid, ct_critical, loc);
3312  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3313  }
3314 
3315 #else // KMP_USE_DYNAMIC_LOCK
3316 
3317  // We know that the fast reduction code is only emitted by Intel compilers
3318  // with 32 byte critical sections. If there isn't enough space, then we have
3319  // to use a pointer.
3320  if (__kmp_base_user_lock_size > 32) {
3321  lck = *((kmp_user_lock_p *)crit);
3322  KMP_ASSERT(lck != NULL);
3323  } else {
3324  lck = (kmp_user_lock_p)crit;
3325  }
3326 
3327  if (__kmp_env_consistency_check)
3328  __kmp_pop_sync(global_tid, ct_critical, loc);
3329 
3330  __kmp_release_user_lock_with_checks(lck, global_tid);
3331 
3332 #endif // KMP_USE_DYNAMIC_LOCK
3333 } // __kmp_end_critical_section_reduce_block
3334 
3335 #if OMP_40_ENABLED
3336 static __forceinline int
3337 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3338  int *task_state) {
3339  kmp_team_t *team;
3340 
3341  // Check if we are inside the teams construct?
3342  if (th->th.th_teams_microtask) {
3343  *team_p = team = th->th.th_team;
3344  if (team->t.t_level == th->th.th_teams_level) {
3345  // This is reduction at teams construct.
3346  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3347  // Let's swap teams temporarily for the reduction.
3348  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3349  th->th.th_team = team->t.t_parent;
3350  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3351  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3352  *task_state = th->th.th_task_state;
3353  th->th.th_task_state = 0;
3354 
3355  return 1;
3356  }
3357  }
3358  return 0;
3359 }
3360 
3361 static __forceinline void
3362 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3363  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3364  th->th.th_info.ds.ds_tid = 0;
3365  th->th.th_team = team;
3366  th->th.th_team_nproc = team->t.t_nproc;
3367  th->th.th_task_team = team->t.t_task_team[task_state];
3368  th->th.th_task_state = task_state;
3369 }
3370 #endif
3371 
3372 /* 2.a.i. Reduce Block without a terminating barrier */
3388 kmp_int32
3389 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3390  size_t reduce_size, void *reduce_data,
3391  void (*reduce_func)(void *lhs_data, void *rhs_data),
3392  kmp_critical_name *lck) {
3393 
3394  KMP_COUNT_BLOCK(REDUCE_nowait);
3395  int retval = 0;
3396  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3397 #if OMP_40_ENABLED
3398  kmp_info_t *th;
3399  kmp_team_t *team;
3400  int teams_swapped = 0, task_state;
3401 #endif
3402  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3403 
3404  // why do we need this initialization here at all?
3405  // Reduction clause can not be used as a stand-alone directive.
3406 
3407  // do not call __kmp_serial_initialize(), it will be called by
3408  // __kmp_parallel_initialize() if needed
3409  // possible detection of false-positive race by the threadchecker ???
3410  if (!TCR_4(__kmp_init_parallel))
3411  __kmp_parallel_initialize();
3412 
3413 #if OMP_50_ENABLED
3414  __kmp_resume_if_soft_paused();
3415 #endif
3416 
3417 // check correctness of reduce block nesting
3418 #if KMP_USE_DYNAMIC_LOCK
3419  if (__kmp_env_consistency_check)
3420  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3421 #else
3422  if (__kmp_env_consistency_check)
3423  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3424 #endif
3425 
3426 #if OMP_40_ENABLED
3427  th = __kmp_thread_from_gtid(global_tid);
3428  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3429 #endif // OMP_40_ENABLED
3430 
3431  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3432  // the value should be kept in a variable
3433  // the variable should be either a construct-specific or thread-specific
3434  // property, not a team specific property
3435  // (a thread can reach the next reduce block on the next construct, reduce
3436  // method may differ on the next construct)
3437  // an ident_t "loc" parameter could be used as a construct-specific property
3438  // (what if loc == 0?)
3439  // (if both construct-specific and team-specific variables were shared,
3440  // then unness extra syncs should be needed)
3441  // a thread-specific variable is better regarding two issues above (next
3442  // construct and extra syncs)
3443  // a thread-specific "th_local.reduction_method" variable is used currently
3444  // each thread executes 'determine' and 'set' lines (no need to execute by one
3445  // thread, to avoid unness extra syncs)
3446 
3447  packed_reduction_method = __kmp_determine_reduction_method(
3448  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3449  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3450 
3451  if (packed_reduction_method == critical_reduce_block) {
3452 
3453  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3454  retval = 1;
3455 
3456  } else if (packed_reduction_method == empty_reduce_block) {
3457 
3458  // usage: if team size == 1, no synchronization is required ( Intel
3459  // platforms only )
3460  retval = 1;
3461 
3462  } else if (packed_reduction_method == atomic_reduce_block) {
3463 
3464  retval = 2;
3465 
3466  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3467  // won't be called by the code gen)
3468  // (it's not quite good, because the checking block has been closed by
3469  // this 'pop',
3470  // but atomic operation has not been executed yet, will be executed
3471  // slightly later, literally on next instruction)
3472  if (__kmp_env_consistency_check)
3473  __kmp_pop_sync(global_tid, ct_reduce, loc);
3474 
3475  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3476  tree_reduce_block)) {
3477 
3478 // AT: performance issue: a real barrier here
3479 // AT: (if master goes slow, other threads are blocked here waiting for the
3480 // master to come and release them)
3481 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3482 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3483 // be confusing to a customer)
3484 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3485 // might go faster and be more in line with sense of NOWAIT
3486 // AT: TO DO: do epcc test and compare times
3487 
3488 // this barrier should be invisible to a customer and to the threading profile
3489 // tool (it's neither a terminating barrier nor customer's code, it's
3490 // used for an internal purpose)
3491 #if OMPT_SUPPORT
3492  // JP: can this barrier potentially leed to task scheduling?
3493  // JP: as long as there is a barrier in the implementation, OMPT should and
3494  // will provide the barrier events
3495  // so we set-up the necessary frame/return addresses.
3496  ompt_frame_t *ompt_frame;
3497  if (ompt_enabled.enabled) {
3498  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3499  if (ompt_frame->enter_frame.ptr == NULL)
3500  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3501  OMPT_STORE_RETURN_ADDRESS(global_tid);
3502  }
3503 #endif
3504 #if USE_ITT_NOTIFY
3505  __kmp_threads[global_tid]->th.th_ident = loc;
3506 #endif
3507  retval =
3508  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3509  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3510  retval = (retval != 0) ? (0) : (1);
3511 #if OMPT_SUPPORT && OMPT_OPTIONAL
3512  if (ompt_enabled.enabled) {
3513  ompt_frame->enter_frame = ompt_data_none;
3514  }
3515 #endif
3516 
3517  // all other workers except master should do this pop here
3518  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3519  if (__kmp_env_consistency_check) {
3520  if (retval == 0) {
3521  __kmp_pop_sync(global_tid, ct_reduce, loc);
3522  }
3523  }
3524 
3525  } else {
3526 
3527  // should never reach this block
3528  KMP_ASSERT(0); // "unexpected method"
3529  }
3530 #if OMP_40_ENABLED
3531  if (teams_swapped) {
3532  __kmp_restore_swapped_teams(th, team, task_state);
3533  }
3534 #endif
3535  KA_TRACE(
3536  10,
3537  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3538  global_tid, packed_reduction_method, retval));
3539 
3540  return retval;
3541 }
3542 
3551 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3552  kmp_critical_name *lck) {
3553 
3554  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3555 
3556  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3557 
3558  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3559 
3560  if (packed_reduction_method == critical_reduce_block) {
3561 
3562  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3563 
3564  } else if (packed_reduction_method == empty_reduce_block) {
3565 
3566  // usage: if team size == 1, no synchronization is required ( on Intel
3567  // platforms only )
3568 
3569  } else if (packed_reduction_method == atomic_reduce_block) {
3570 
3571  // neither master nor other workers should get here
3572  // (code gen does not generate this call in case 2: atomic reduce block)
3573  // actually it's better to remove this elseif at all;
3574  // after removal this value will checked by the 'else' and will assert
3575 
3576  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3577  tree_reduce_block)) {
3578 
3579  // only master gets here
3580 
3581  } else {
3582 
3583  // should never reach this block
3584  KMP_ASSERT(0); // "unexpected method"
3585  }
3586 
3587  if (__kmp_env_consistency_check)
3588  __kmp_pop_sync(global_tid, ct_reduce, loc);
3589 
3590  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3591  global_tid, packed_reduction_method));
3592 
3593  return;
3594 }
3595 
3596 /* 2.a.ii. Reduce Block with a terminating barrier */
3597 
3613 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3614  size_t reduce_size, void *reduce_data,
3615  void (*reduce_func)(void *lhs_data, void *rhs_data),
3616  kmp_critical_name *lck) {
3617  KMP_COUNT_BLOCK(REDUCE_wait);
3618  int retval = 0;
3619  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3620 #if OMP_40_ENABLED
3621  kmp_info_t *th;
3622  kmp_team_t *team;
3623  int teams_swapped = 0, task_state;
3624 #endif
3625 
3626  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3627 
3628  // why do we need this initialization here at all?
3629  // Reduction clause can not be a stand-alone directive.
3630 
3631  // do not call __kmp_serial_initialize(), it will be called by
3632  // __kmp_parallel_initialize() if needed
3633  // possible detection of false-positive race by the threadchecker ???
3634  if (!TCR_4(__kmp_init_parallel))
3635  __kmp_parallel_initialize();
3636 
3637 #if OMP_50_ENABLED
3638  __kmp_resume_if_soft_paused();
3639 #endif
3640 
3641 // check correctness of reduce block nesting
3642 #if KMP_USE_DYNAMIC_LOCK
3643  if (__kmp_env_consistency_check)
3644  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3645 #else
3646  if (__kmp_env_consistency_check)
3647  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3648 #endif
3649 
3650 #if OMP_40_ENABLED
3651  th = __kmp_thread_from_gtid(global_tid);
3652  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3653 #endif // OMP_40_ENABLED
3654 
3655  packed_reduction_method = __kmp_determine_reduction_method(
3656  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3657  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3658 
3659  if (packed_reduction_method == critical_reduce_block) {
3660 
3661  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3662  retval = 1;
3663 
3664  } else if (packed_reduction_method == empty_reduce_block) {
3665 
3666  // usage: if team size == 1, no synchronization is required ( Intel
3667  // platforms only )
3668  retval = 1;
3669 
3670  } else if (packed_reduction_method == atomic_reduce_block) {
3671 
3672  retval = 2;
3673 
3674  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3675  tree_reduce_block)) {
3676 
3677 // case tree_reduce_block:
3678 // this barrier should be visible to a customer and to the threading profile
3679 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3680 #if OMPT_SUPPORT
3681  ompt_frame_t *ompt_frame;
3682  if (ompt_enabled.enabled) {
3683  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3684  if (ompt_frame->enter_frame.ptr == NULL)
3685  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3686  OMPT_STORE_RETURN_ADDRESS(global_tid);
3687  }
3688 #endif
3689 #if USE_ITT_NOTIFY
3690  __kmp_threads[global_tid]->th.th_ident =
3691  loc; // needed for correct notification of frames
3692 #endif
3693  retval =
3694  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3695  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3696  retval = (retval != 0) ? (0) : (1);
3697 #if OMPT_SUPPORT && OMPT_OPTIONAL
3698  if (ompt_enabled.enabled) {
3699  ompt_frame->enter_frame = ompt_data_none;
3700  }
3701 #endif
3702 
3703  // all other workers except master should do this pop here
3704  // ( none of other workers except master will enter __kmpc_end_reduce() )
3705  if (__kmp_env_consistency_check) {
3706  if (retval == 0) { // 0: all other workers; 1: master
3707  __kmp_pop_sync(global_tid, ct_reduce, loc);
3708  }
3709  }
3710 
3711  } else {
3712 
3713  // should never reach this block
3714  KMP_ASSERT(0); // "unexpected method"
3715  }
3716 #if OMP_40_ENABLED
3717  if (teams_swapped) {
3718  __kmp_restore_swapped_teams(th, team, task_state);
3719  }
3720 #endif
3721 
3722  KA_TRACE(10,
3723  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3724  global_tid, packed_reduction_method, retval));
3725 
3726  return retval;
3727 }
3728 
3739 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3740  kmp_critical_name *lck) {
3741 
3742  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3743 #if OMP_40_ENABLED
3744  kmp_info_t *th;
3745  kmp_team_t *team;
3746  int teams_swapped = 0, task_state;
3747 #endif
3748 
3749  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3750 
3751 #if OMP_40_ENABLED
3752  th = __kmp_thread_from_gtid(global_tid);
3753  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3754 #endif // OMP_40_ENABLED
3755 
3756  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3757 
3758  // this barrier should be visible to a customer and to the threading profile
3759  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3760 
3761  if (packed_reduction_method == critical_reduce_block) {
3762 
3763  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3764 
3765 // TODO: implicit barrier: should be exposed
3766 #if OMPT_SUPPORT
3767  ompt_frame_t *ompt_frame;
3768  if (ompt_enabled.enabled) {
3769  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3770  if (ompt_frame->enter_frame.ptr == NULL)
3771  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3772  OMPT_STORE_RETURN_ADDRESS(global_tid);
3773  }
3774 #endif
3775 #if USE_ITT_NOTIFY
3776  __kmp_threads[global_tid]->th.th_ident = loc;
3777 #endif
3778  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3779 #if OMPT_SUPPORT && OMPT_OPTIONAL
3780  if (ompt_enabled.enabled) {
3781  ompt_frame->enter_frame = ompt_data_none;
3782  }
3783 #endif
3784 
3785  } else if (packed_reduction_method == empty_reduce_block) {
3786 
3787 // usage: if team size==1, no synchronization is required (Intel platforms only)
3788 
3789 // TODO: implicit barrier: should be exposed
3790 #if OMPT_SUPPORT
3791  ompt_frame_t *ompt_frame;
3792  if (ompt_enabled.enabled) {
3793  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3794  if (ompt_frame->enter_frame.ptr == NULL)
3795  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3796  OMPT_STORE_RETURN_ADDRESS(global_tid);
3797  }
3798 #endif
3799 #if USE_ITT_NOTIFY
3800  __kmp_threads[global_tid]->th.th_ident = loc;
3801 #endif
3802  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3803 #if OMPT_SUPPORT && OMPT_OPTIONAL
3804  if (ompt_enabled.enabled) {
3805  ompt_frame->enter_frame = ompt_data_none;
3806  }
3807 #endif
3808 
3809  } else if (packed_reduction_method == atomic_reduce_block) {
3810 
3811 #if OMPT_SUPPORT
3812  ompt_frame_t *ompt_frame;
3813  if (ompt_enabled.enabled) {
3814  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3815  if (ompt_frame->enter_frame.ptr == NULL)
3816  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3817  OMPT_STORE_RETURN_ADDRESS(global_tid);
3818  }
3819 #endif
3820 // TODO: implicit barrier: should be exposed
3821 #if USE_ITT_NOTIFY
3822  __kmp_threads[global_tid]->th.th_ident = loc;
3823 #endif
3824  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3825 #if OMPT_SUPPORT && OMPT_OPTIONAL
3826  if (ompt_enabled.enabled) {
3827  ompt_frame->enter_frame = ompt_data_none;
3828  }
3829 #endif
3830 
3831  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3832  tree_reduce_block)) {
3833 
3834  // only master executes here (master releases all other workers)
3835  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3836  global_tid);
3837 
3838  } else {
3839 
3840  // should never reach this block
3841  KMP_ASSERT(0); // "unexpected method"
3842  }
3843 #if OMP_40_ENABLED
3844  if (teams_swapped) {
3845  __kmp_restore_swapped_teams(th, team, task_state);
3846  }
3847 #endif
3848 
3849  if (__kmp_env_consistency_check)
3850  __kmp_pop_sync(global_tid, ct_reduce, loc);
3851 
3852  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3853  global_tid, packed_reduction_method));
3854 
3855  return;
3856 }
3857 
3858 #undef __KMP_GET_REDUCTION_METHOD
3859 #undef __KMP_SET_REDUCTION_METHOD
3860 
3861 /* end of interface to fast scalable reduce routines */
3862 
3863 kmp_uint64 __kmpc_get_taskid() {
3864 
3865  kmp_int32 gtid;
3866  kmp_info_t *thread;
3867 
3868  gtid = __kmp_get_gtid();
3869  if (gtid < 0) {
3870  return 0;
3871  }
3872  thread = __kmp_thread_from_gtid(gtid);
3873  return thread->th.th_current_task->td_task_id;
3874 
3875 } // __kmpc_get_taskid
3876 
3877 kmp_uint64 __kmpc_get_parent_taskid() {
3878 
3879  kmp_int32 gtid;
3880  kmp_info_t *thread;
3881  kmp_taskdata_t *parent_task;
3882 
3883  gtid = __kmp_get_gtid();
3884  if (gtid < 0) {
3885  return 0;
3886  }
3887  thread = __kmp_thread_from_gtid(gtid);
3888  parent_task = thread->th.th_current_task->td_parent;
3889  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3890 
3891 } // __kmpc_get_parent_taskid
3892 
3893 #if OMP_45_ENABLED
3894 
3905 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3906  const struct kmp_dim *dims) {
3907  int j, idx;
3908  kmp_int64 last, trace_count;
3909  kmp_info_t *th = __kmp_threads[gtid];
3910  kmp_team_t *team = th->th.th_team;
3911  kmp_uint32 *flags;
3912  kmp_disp_t *pr_buf = th->th.th_dispatch;
3913  dispatch_shared_info_t *sh_buf;
3914 
3915  KA_TRACE(
3916  20,
3917  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3918  gtid, num_dims, !team->t.t_serialized));
3919  KMP_DEBUG_ASSERT(dims != NULL);
3920  KMP_DEBUG_ASSERT(num_dims > 0);
3921 
3922  if (team->t.t_serialized) {
3923  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3924  return; // no dependencies if team is serialized
3925  }
3926  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3927  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3928  // the next loop
3929  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3930 
3931  // Save bounds info into allocated private buffer
3932  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3933  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3934  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3935  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3936  pr_buf->th_doacross_info[0] =
3937  (kmp_int64)num_dims; // first element is number of dimensions
3938  // Save also address of num_done in order to access it later without knowing
3939  // the buffer index
3940  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3941  pr_buf->th_doacross_info[2] = dims[0].lo;
3942  pr_buf->th_doacross_info[3] = dims[0].up;
3943  pr_buf->th_doacross_info[4] = dims[0].st;
3944  last = 5;
3945  for (j = 1; j < num_dims; ++j) {
3946  kmp_int64
3947  range_length; // To keep ranges of all dimensions but the first dims[0]
3948  if (dims[j].st == 1) { // most common case
3949  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3950  range_length = dims[j].up - dims[j].lo + 1;
3951  } else {
3952  if (dims[j].st > 0) {
3953  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3954  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3955  } else { // negative increment
3956  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3957  range_length =
3958  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3959  }
3960  }
3961  pr_buf->th_doacross_info[last++] = range_length;
3962  pr_buf->th_doacross_info[last++] = dims[j].lo;
3963  pr_buf->th_doacross_info[last++] = dims[j].up;
3964  pr_buf->th_doacross_info[last++] = dims[j].st;
3965  }
3966 
3967  // Compute total trip count.
3968  // Start with range of dims[0] which we don't need to keep in the buffer.
3969  if (dims[0].st == 1) { // most common case
3970  trace_count = dims[0].up - dims[0].lo + 1;
3971  } else if (dims[0].st > 0) {
3972  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3973  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3974  } else { // negative increment
3975  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3976  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3977  }
3978  for (j = 1; j < num_dims; ++j) {
3979  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3980  }
3981  KMP_DEBUG_ASSERT(trace_count > 0);
3982 
3983  // Check if shared buffer is not occupied by other loop (idx -
3984  // __kmp_dispatch_num_buffers)
3985  if (idx != sh_buf->doacross_buf_idx) {
3986  // Shared buffer is occupied, wait for it to be free
3987  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3988  __kmp_eq_4, NULL);
3989  }
3990 #if KMP_32_BIT_ARCH
3991  // Check if we are the first thread. After the CAS the first thread gets 0,
3992  // others get 1 if initialization is in progress, allocated pointer otherwise.
3993  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3994  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3995  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3996 #else
3997  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3998  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3999 #endif
4000  if (flags == NULL) {
4001  // we are the first thread, allocate the array of flags
4002  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
4003  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4004  KMP_MB();
4005  sh_buf->doacross_flags = flags;
4006  } else if (flags == (kmp_uint32 *)1) {
4007 #if KMP_32_BIT_ARCH
4008  // initialization is still in progress, need to wait
4009  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4010 #else
4011  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4012 #endif
4013  KMP_YIELD(TRUE);
4014  KMP_MB();
4015  } else {
4016  KMP_MB();
4017  }
4018  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4019  pr_buf->th_doacross_flags =
4020  sh_buf->doacross_flags; // save private copy in order to not
4021  // touch shared buffer on each iteration
4022  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4023 }
4024 
4025 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4026  kmp_int32 shft, num_dims, i;
4027  kmp_uint32 flag;
4028  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4029  kmp_info_t *th = __kmp_threads[gtid];
4030  kmp_team_t *team = th->th.th_team;
4031  kmp_disp_t *pr_buf;
4032  kmp_int64 lo, up, st;
4033 
4034  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4035  if (team->t.t_serialized) {
4036  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4037  return; // no dependencies if team is serialized
4038  }
4039 
4040  // calculate sequential iteration number and check out-of-bounds condition
4041  pr_buf = th->th.th_dispatch;
4042  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4043  num_dims = pr_buf->th_doacross_info[0];
4044  lo = pr_buf->th_doacross_info[2];
4045  up = pr_buf->th_doacross_info[3];
4046  st = pr_buf->th_doacross_info[4];
4047  if (st == 1) { // most common case
4048  if (vec[0] < lo || vec[0] > up) {
4049  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4050  "bounds [%lld,%lld]\n",
4051  gtid, vec[0], lo, up));
4052  return;
4053  }
4054  iter_number = vec[0] - lo;
4055  } else if (st > 0) {
4056  if (vec[0] < lo || vec[0] > up) {
4057  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058  "bounds [%lld,%lld]\n",
4059  gtid, vec[0], lo, up));
4060  return;
4061  }
4062  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4063  } else { // negative increment
4064  if (vec[0] > lo || vec[0] < up) {
4065  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4066  "bounds [%lld,%lld]\n",
4067  gtid, vec[0], lo, up));
4068  return;
4069  }
4070  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4071  }
4072  for (i = 1; i < num_dims; ++i) {
4073  kmp_int64 iter, ln;
4074  kmp_int32 j = i * 4;
4075  ln = pr_buf->th_doacross_info[j + 1];
4076  lo = pr_buf->th_doacross_info[j + 2];
4077  up = pr_buf->th_doacross_info[j + 3];
4078  st = pr_buf->th_doacross_info[j + 4];
4079  if (st == 1) {
4080  if (vec[i] < lo || vec[i] > up) {
4081  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4082  "bounds [%lld,%lld]\n",
4083  gtid, vec[i], lo, up));
4084  return;
4085  }
4086  iter = vec[i] - lo;
4087  } else if (st > 0) {
4088  if (vec[i] < lo || vec[i] > up) {
4089  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4090  "bounds [%lld,%lld]\n",
4091  gtid, vec[i], lo, up));
4092  return;
4093  }
4094  iter = (kmp_uint64)(vec[i] - lo) / st;
4095  } else { // st < 0
4096  if (vec[i] > lo || vec[i] < up) {
4097  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4098  "bounds [%lld,%lld]\n",
4099  gtid, vec[i], lo, up));
4100  return;
4101  }
4102  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4103  }
4104  iter_number = iter + ln * iter_number;
4105  }
4106  shft = iter_number % 32; // use 32-bit granularity
4107  iter_number >>= 5; // divided by 32
4108  flag = 1 << shft;
4109  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4110  KMP_YIELD(TRUE);
4111  }
4112  KMP_MB();
4113  KA_TRACE(20,
4114  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4115  gtid, (iter_number << 5) + shft));
4116 }
4117 
4118 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4119  kmp_int32 shft, num_dims, i;
4120  kmp_uint32 flag;
4121  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4122  kmp_info_t *th = __kmp_threads[gtid];
4123  kmp_team_t *team = th->th.th_team;
4124  kmp_disp_t *pr_buf;
4125  kmp_int64 lo, st;
4126 
4127  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4128  if (team->t.t_serialized) {
4129  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4130  return; // no dependencies if team is serialized
4131  }
4132 
4133  // calculate sequential iteration number (same as in "wait" but no
4134  // out-of-bounds checks)
4135  pr_buf = th->th.th_dispatch;
4136  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4137  num_dims = pr_buf->th_doacross_info[0];
4138  lo = pr_buf->th_doacross_info[2];
4139  st = pr_buf->th_doacross_info[4];
4140  if (st == 1) { // most common case
4141  iter_number = vec[0] - lo;
4142  } else if (st > 0) {
4143  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4144  } else { // negative increment
4145  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4146  }
4147  for (i = 1; i < num_dims; ++i) {
4148  kmp_int64 iter, ln;
4149  kmp_int32 j = i * 4;
4150  ln = pr_buf->th_doacross_info[j + 1];
4151  lo = pr_buf->th_doacross_info[j + 2];
4152  st = pr_buf->th_doacross_info[j + 4];
4153  if (st == 1) {
4154  iter = vec[i] - lo;
4155  } else if (st > 0) {
4156  iter = (kmp_uint64)(vec[i] - lo) / st;
4157  } else { // st < 0
4158  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4159  }
4160  iter_number = iter + ln * iter_number;
4161  }
4162  shft = iter_number % 32; // use 32-bit granularity
4163  iter_number >>= 5; // divided by 32
4164  flag = 1 << shft;
4165  KMP_MB();
4166  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4167  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4168  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4169  (iter_number << 5) + shft));
4170 }
4171 
4172 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4173  kmp_int32 num_done;
4174  kmp_info_t *th = __kmp_threads[gtid];
4175  kmp_team_t *team = th->th.th_team;
4176  kmp_disp_t *pr_buf = th->th.th_dispatch;
4177 
4178  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4179  if (team->t.t_serialized) {
4180  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4181  return; // nothing to do
4182  }
4183  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4184  if (num_done == th->th.th_team_nproc) {
4185  // we are the last thread, need to free shared resources
4186  int idx = pr_buf->th_doacross_buf_idx - 1;
4187  dispatch_shared_info_t *sh_buf =
4188  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4189  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4190  (kmp_int64)&sh_buf->doacross_num_done);
4191  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4192  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4193  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4194  sh_buf->doacross_flags = NULL;
4195  sh_buf->doacross_num_done = 0;
4196  sh_buf->doacross_buf_idx +=
4197  __kmp_dispatch_num_buffers; // free buffer for future re-use
4198  }
4199  // free private resources (need to keep buffer index forever)
4200  pr_buf->th_doacross_flags = NULL;
4201  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4202  pr_buf->th_doacross_info = NULL;
4203  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4204 }
4205 #endif
4206 
4207 #if OMP_50_ENABLED
4208 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4209 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4210  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4211 }
4212 
4213 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4214  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4215 }
4216 
4217 int __kmpc_get_target_offload(void) {
4218  if (!__kmp_init_serial) {
4219  __kmp_serial_initialize();
4220  }
4221  return __kmp_target_offload;
4222 }
4223 
4224 int __kmpc_pause_resource(kmp_pause_status_t level) {
4225  if (!__kmp_init_serial) {
4226  return 1; // Can't pause if runtime is not initialized
4227  }
4228  return __kmp_pause_resource(level);
4229 }
4230 #endif // OMP_50_ENABLED
4231 
4232 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:223
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1336
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:233
kmp_int32 flags
Definition: kmp.h:225