16 #include "kmp_error.h" 20 #include "kmp_stats.h" 23 #include "ompt-specific.h" 26 #define MAX_MESSAGE 512 42 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
46 }
else if (__kmp_ignore_mppbeg() == FALSE) {
48 __kmp_internal_begin();
49 KC_TRACE(10, (
"__kmpc_begin: called\n"));
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, (
"__kmpc_end: called\n"));
69 KA_TRACE(30, (
"__kmpc_end\n"));
71 __kmp_internal_end_thread(-1);
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
102 kmp_int32 gtid = __kmp_entry_gtid();
104 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
125 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 return TCR_4(__kmp_all_nth);
137 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
147 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
169 if (__kmp_par_range == 0) {
176 semi2 = strchr(semi2,
';');
180 semi2 = strchr(semi2 + 1,
';');
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
189 if ((*name ==
'/') || (*name ==
';')) {
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
196 semi3 = strchr(semi2 + 1,
';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
203 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
207 return __kmp_par_range < 0;
221 return __kmp_entry_thread()->th.th_root->r.r_active;
234 kmp_int32 num_threads) {
235 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
238 __kmp_push_num_threads(loc, global_tid, num_threads);
241 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
249 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
270 int gtid = __kmp_entry_gtid();
272 #if (KMP_STATS_ENABLED) 276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
292 va_start(ap, microtask);
295 ompt_frame_t *ompt_frame;
296 if (ompt_enabled.enabled) {
297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 ompt_frame = &(lwt->ompt_task_info.frame);
303 int tid = __kmp_tid_from_gtid(gtid);
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308 OMPT_STORE_RETURN_ADDRESS(gtid);
312 #if INCLUDE_SSC_MARKS 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316 VOLATILE_CAST(microtask_t) microtask,
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
325 #if INCLUDE_SSC_MARKS 328 __kmp_join_call(loc, gtid
338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 KMP_POP_PARTITIONED_TIMER();
344 #endif // KMP_STATS_ENABLED 360 kmp_int32 num_teams, kmp_int32 num_threads) {
362 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_start(ap, microtask);
385 #if KMP_STATS_ENABLED 388 if (previous_state == stats_state_e::SERIAL_REGION) {
389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
396 this_thr->th.th_teams_microtask = microtask;
397 this_thr->th.th_teams_level =
398 this_thr->th.th_team->t.t_level;
401 kmp_team_t *parent_team = this_thr->th.th_team;
402 int tid = __kmp_tid_from_gtid(gtid);
403 if (ompt_enabled.enabled) {
404 parent_team->t.t_implicit_task_taskdata[tid]
405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
407 OMPT_STORE_RETURN_ADDRESS(gtid);
412 if (this_thr->th.th_teams_size.nteams == 0) {
413 __kmp_push_num_teams(loc, gtid, 0, 0);
415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
419 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
420 VOLATILE_CAST(microtask_t)
422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
423 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
429 __kmp_join_call(loc, gtid
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439 this_thr->th.th_cg_roots = tmp->up;
440 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up" 441 " to node %p. cg_nthreads was %d\n",
442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 this_thr->th.th_current_task->td_icvs.thread_limit =
447 this_thr->th.th_cg_roots->cg_thread_limit;
449 this_thr->th.th_teams_microtask = NULL;
450 this_thr->th.th_teams_level = 0;
451 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
453 #if KMP_STATS_ENABLED 454 if (previous_state == stats_state_e::SERIAL_REGION) {
455 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
457 KMP_POP_PARTITIONED_TIMER();
459 #endif // KMP_STATS_ENABLED 467 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
486 OMPT_STORE_RETURN_ADDRESS(global_tid);
488 __kmp_serialized_parallel(loc, global_tid);
499 kmp_internal_control_t *top;
500 kmp_info_t *this_thr;
501 kmp_team_t *serial_team;
504 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
512 if (!TCR_4(__kmp_init_parallel))
513 __kmp_parallel_initialize();
516 __kmp_resume_if_soft_paused();
519 this_thr = __kmp_threads[global_tid];
520 serial_team = this_thr->th.th_serial_team;
523 kmp_task_team_t *task_team = this_thr->th.th_task_team;
526 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
527 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
531 KMP_DEBUG_ASSERT(serial_team);
532 KMP_ASSERT(serial_team->t.t_serialized);
533 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
534 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
535 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
536 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
539 if (ompt_enabled.enabled &&
540 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
541 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
542 if (ompt_enabled.ompt_callback_implicit_task) {
543 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
544 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
545 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
549 ompt_data_t *parent_task_data;
550 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
552 if (ompt_enabled.ompt_callback_parallel_end) {
553 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
554 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
555 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
557 __ompt_lw_taskteam_unlink(this_thr);
558 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
564 top = serial_team->t.t_control_stack_top;
565 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
566 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
567 serial_team->t.t_control_stack_top = top->next;
572 serial_team->t.t_level--;
575 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
577 dispatch_private_info_t *disp_buffer =
578 serial_team->t.t_dispatch->th_disp_buffer;
579 serial_team->t.t_dispatch->th_disp_buffer =
580 serial_team->t.t_dispatch->th_disp_buffer->next;
581 __kmp_free(disp_buffer);
584 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
587 --serial_team->t.t_serialized;
588 if (serial_team->t.t_serialized == 0) {
592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 593 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
594 __kmp_clear_x87_fpu_status_word();
595 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
596 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
600 this_thr->th.th_team = serial_team->t.t_parent;
601 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
604 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
605 this_thr->th.th_team_master =
606 serial_team->t.t_parent->t.t_threads[0];
607 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
610 this_thr->th.th_dispatch =
611 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
613 __kmp_pop_current_task_from_thread(this_thr);
615 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
616 this_thr->th.th_current_task->td_flags.executing = 1;
618 if (__kmp_tasking_mode != tskm_immediate_exec) {
620 this_thr->th.th_task_team =
621 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
623 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 625 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
628 if (__kmp_tasking_mode != tskm_immediate_exec) {
629 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 630 "depth of serial team %p to %d\n",
631 global_tid, serial_team, serial_team->t.t_serialized));
635 if (__kmp_env_consistency_check)
636 __kmp_pop_parallel(global_tid, NULL);
638 if (ompt_enabled.enabled)
639 this_thr->th.ompt_thread_info.state =
640 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
641 : ompt_state_work_parallel);
654 KC_TRACE(10, (
"__kmpc_flush: called\n"));
659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 673 if (!__kmp_cpuinfo.initialized) {
674 __kmp_query_cpuid(&__kmp_cpuinfo);
676 if (!__kmp_cpuinfo.sse2) {
681 #elif KMP_COMPILER_MSVC 684 __sync_synchronize();
685 #endif // KMP_COMPILER_ICC 688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 704 #error Unknown or unsupported architecture 707 #if OMPT_SUPPORT && OMPT_OPTIONAL 708 if (ompt_enabled.ompt_callback_flush) {
709 ompt_callbacks.ompt_callback(ompt_callback_flush)(
710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
725 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
727 if (!TCR_4(__kmp_init_parallel))
728 __kmp_parallel_initialize();
731 __kmp_resume_if_soft_paused();
734 if (__kmp_env_consistency_check) {
736 KMP_WARNING(ConstructIdentInvalid);
739 __kmp_check_barrier(global_tid, ct_barrier, loc);
743 ompt_frame_t *ompt_frame;
744 if (ompt_enabled.enabled) {
745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
746 if (ompt_frame->enter_frame.ptr == NULL)
747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
748 OMPT_STORE_RETURN_ADDRESS(global_tid);
751 __kmp_threads[global_tid]->th.th_ident = loc;
759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
760 #if OMPT_SUPPORT && OMPT_OPTIONAL 761 if (ompt_enabled.enabled) {
762 ompt_frame->enter_frame = ompt_data_none;
777 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
779 if (!TCR_4(__kmp_init_parallel))
780 __kmp_parallel_initialize();
783 __kmp_resume_if_soft_paused();
786 if (KMP_MASTER_GTID(global_tid)) {
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
792 #if OMPT_SUPPORT && OMPT_OPTIONAL 794 if (ompt_enabled.ompt_callback_master) {
795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
798 int tid = __kmp_tid_from_gtid(global_tid);
799 ompt_callbacks.ompt_callback(ompt_callback_master)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
807 if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK 810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
833 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
838 #if OMPT_SUPPORT && OMPT_OPTIONAL 839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
841 if (ompt_enabled.ompt_callback_master) {
842 int tid = __kmp_tid_from_gtid(global_tid);
843 ompt_callbacks.ompt_callback(ompt_callback_master)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
850 if (__kmp_env_consistency_check) {
852 KMP_WARNING(ThreadIdentInvalid);
854 if (KMP_MASTER_GTID(global_tid))
855 __kmp_pop_sync(global_tid, ct_master, loc);
869 KMP_DEBUG_ASSERT(__kmp_init_serial);
871 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
873 if (!TCR_4(__kmp_init_parallel))
874 __kmp_parallel_initialize();
877 __kmp_resume_if_soft_paused();
881 __kmp_itt_ordered_prep(gtid);
885 th = __kmp_threads[gtid];
887 #if OMPT_SUPPORT && OMPT_OPTIONAL 891 if (ompt_enabled.enabled) {
892 OMPT_STORE_RETURN_ADDRESS(gtid);
893 team = __kmp_team_from_gtid(gtid);
894 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
896 th->th.ompt_thread_info.wait_id = lck;
897 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
900 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
901 if (ompt_enabled.ompt_callback_mutex_acquire) {
902 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
903 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
904 (ompt_wait_id_t)lck, codeptr_ra);
909 if (th->th.th_dispatch->th_deo_fcn != 0)
910 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
912 __kmp_parallel_deo(>id, &cid, loc);
914 #if OMPT_SUPPORT && OMPT_OPTIONAL 915 if (ompt_enabled.enabled) {
917 th->th.ompt_thread_info.state = ompt_state_work_parallel;
918 th->th.ompt_thread_info.wait_id = 0;
921 if (ompt_enabled.ompt_callback_mutex_acquired) {
922 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
923 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
929 __kmp_itt_ordered_start(gtid);
944 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
947 __kmp_itt_ordered_end(gtid);
951 th = __kmp_threads[gtid];
953 if (th->th.th_dispatch->th_dxo_fcn != 0)
954 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
956 __kmp_parallel_dxo(>id, &cid, loc);
958 #if OMPT_SUPPORT && OMPT_OPTIONAL 959 OMPT_STORE_RETURN_ADDRESS(gtid);
960 if (ompt_enabled.ompt_callback_mutex_released) {
961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
963 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
964 OMPT_LOAD_RETURN_ADDRESS(gtid));
969 #if KMP_USE_DYNAMIC_LOCK 971 static __forceinline
void 972 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
973 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
977 kmp_indirect_lock_t **lck;
978 lck = (kmp_indirect_lock_t **)crit;
979 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
980 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
981 KMP_SET_I_LOCK_LOCATION(ilk, loc);
982 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
984 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
986 __kmp_itt_critical_creating(ilk->lock, loc);
988 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
991 __kmp_itt_critical_destroyed(ilk->lock);
997 KMP_DEBUG_ASSERT(*lck != NULL);
1001 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1003 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1004 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1005 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1006 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1009 KMP_FSYNC_PREPARE(l); \ 1010 KMP_INIT_YIELD(spins); \ 1011 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1013 if (TCR_4(__kmp_nth) > \ 1014 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1017 KMP_YIELD_SPIN(spins); \ 1019 __kmp_spin_backoff(&backoff); \ 1021 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1022 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1024 KMP_FSYNC_ACQUIRED(l); \ 1028 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1030 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1031 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1032 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1033 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1034 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1038 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1039 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1043 #include <sys/syscall.h> 1046 #define FUTEX_WAIT 0 1049 #define FUTEX_WAKE 1 1053 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1055 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1056 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1058 KMP_FSYNC_PREPARE(ftx); \ 1059 kmp_int32 poll_val; \ 1060 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1061 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1062 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1063 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1065 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1067 KMP_LOCK_BUSY(1, futex))) { \ 1070 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1073 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1074 NULL, NULL, 0)) != 0) { \ 1079 KMP_FSYNC_ACQUIRED(ftx); \ 1083 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1085 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1086 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1087 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1088 KMP_FSYNC_ACQUIRED(ftx); \ 1096 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1098 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1100 KMP_FSYNC_RELEASING(ftx); \ 1101 kmp_int32 poll_val = \ 1102 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1103 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1104 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1105 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1108 KMP_YIELD_OVERSUB(); \ 1111 #endif // KMP_USE_FUTEX 1113 #else // KMP_USE_DYNAMIC_LOCK 1115 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1118 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1121 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1128 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1129 __kmp_init_user_lock_with_checks(lck);
1130 __kmp_set_user_lock_location(lck, loc);
1132 __kmp_itt_critical_creating(lck);
1143 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1148 __kmp_itt_critical_destroyed(lck);
1152 __kmp_destroy_user_lock_with_checks(lck);
1153 __kmp_user_lock_free(&idx, gtid, lck);
1154 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1155 KMP_DEBUG_ASSERT(lck != NULL);
1161 #endif // KMP_USE_DYNAMIC_LOCK 1174 kmp_critical_name *crit) {
1175 #if KMP_USE_DYNAMIC_LOCK 1176 #if OMPT_SUPPORT && OMPT_OPTIONAL 1177 OMPT_STORE_RETURN_ADDRESS(global_tid);
1178 #endif // OMPT_SUPPORT 1179 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1182 #if OMPT_SUPPORT && OMPT_OPTIONAL 1183 ompt_state_t prev_state = ompt_state_undefined;
1184 ompt_thread_info_t ti;
1186 kmp_user_lock_p lck;
1188 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1192 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1193 KMP_CHECK_USER_LOCK_INIT();
1195 if ((__kmp_user_lock_kind == lk_tas) &&
1196 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1197 lck = (kmp_user_lock_p)crit;
1200 else if ((__kmp_user_lock_kind == lk_futex) &&
1201 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1202 lck = (kmp_user_lock_p)crit;
1206 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1209 if (__kmp_env_consistency_check)
1210 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1218 __kmp_itt_critical_acquiring(lck);
1220 #if OMPT_SUPPORT && OMPT_OPTIONAL 1221 OMPT_STORE_RETURN_ADDRESS(gtid);
1222 void *codeptr_ra = NULL;
1223 if (ompt_enabled.enabled) {
1224 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1226 prev_state = ti.state;
1227 ti.wait_id = (ompt_wait_id_t)lck;
1228 ti.state = ompt_state_wait_critical;
1231 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1232 if (ompt_enabled.ompt_callback_mutex_acquire) {
1233 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1234 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1235 (ompt_wait_id_t)crit, codeptr_ra);
1241 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1244 __kmp_itt_critical_acquired(lck);
1246 #if OMPT_SUPPORT && OMPT_OPTIONAL 1247 if (ompt_enabled.enabled) {
1249 ti.state = prev_state;
1253 if (ompt_enabled.ompt_callback_mutex_acquired) {
1254 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1255 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1259 KMP_POP_PARTITIONED_TIMER();
1261 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1262 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1263 #endif // KMP_USE_DYNAMIC_LOCK 1266 #if KMP_USE_DYNAMIC_LOCK 1269 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1271 #define KMP_TSX_LOCK(seq) lockseq_##seq 1273 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1276 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1277 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1279 #define KMP_CPUINFO_RTM 0 1283 if (hint & kmp_lock_hint_hle)
1284 return KMP_TSX_LOCK(hle);
1285 if (hint & kmp_lock_hint_rtm)
1286 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1287 if (hint & kmp_lock_hint_adaptive)
1288 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1291 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1292 return __kmp_user_lock_seq;
1293 if ((hint & omp_lock_hint_speculative) &&
1294 (hint & omp_lock_hint_nonspeculative))
1295 return __kmp_user_lock_seq;
1298 if (hint & omp_lock_hint_contended)
1299 return lockseq_queuing;
1302 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1306 if (hint & omp_lock_hint_speculative)
1307 return KMP_TSX_LOCK(hle);
1309 return __kmp_user_lock_seq;
1312 #if OMPT_SUPPORT && OMPT_OPTIONAL 1313 #if KMP_USE_DYNAMIC_LOCK 1314 static kmp_mutex_impl_t
1315 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1317 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1322 return kmp_mutex_impl_queuing;
1325 return kmp_mutex_impl_spin;
1328 return kmp_mutex_impl_speculative;
1331 return kmp_mutex_impl_none;
1333 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1336 switch (ilock->type) {
1338 case locktag_adaptive:
1340 return kmp_mutex_impl_speculative;
1342 case locktag_nested_tas:
1343 return kmp_mutex_impl_spin;
1345 case locktag_nested_futex:
1347 case locktag_ticket:
1348 case locktag_queuing:
1350 case locktag_nested_ticket:
1351 case locktag_nested_queuing:
1352 case locktag_nested_drdpa:
1353 return kmp_mutex_impl_queuing;
1355 return kmp_mutex_impl_none;
1360 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1361 switch (__kmp_user_lock_kind) {
1363 return kmp_mutex_impl_spin;
1370 return kmp_mutex_impl_queuing;
1375 return kmp_mutex_impl_speculative;
1378 return kmp_mutex_impl_none;
1381 #endif // KMP_USE_DYNAMIC_LOCK 1382 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1397 void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1398 kmp_critical_name *crit, uint32_t hint) {
1400 kmp_user_lock_p lck;
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL 1402 ompt_state_t prev_state = ompt_state_undefined;
1403 ompt_thread_info_t ti;
1405 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1407 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1410 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1412 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1414 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1416 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1417 if (KMP_IS_D_LOCK(lckseq)) {
1418 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1419 KMP_GET_D_TAG(lckseq));
1421 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1427 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1428 lck = (kmp_user_lock_p)lk;
1429 if (__kmp_env_consistency_check) {
1430 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1431 __kmp_map_hint_to_lock(hint));
1434 __kmp_itt_critical_acquiring(lck);
1436 #if OMPT_SUPPORT && OMPT_OPTIONAL 1437 if (ompt_enabled.enabled) {
1438 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1440 prev_state = ti.state;
1441 ti.wait_id = (ompt_wait_id_t)lck;
1442 ti.state = ompt_state_wait_critical;
1445 if (ompt_enabled.ompt_callback_mutex_acquire) {
1446 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1447 ompt_mutex_critical, (
unsigned int)hint,
1448 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1452 #if KMP_USE_INLINED_TAS 1453 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1454 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1456 #elif KMP_USE_INLINED_FUTEX 1457 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1458 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1462 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1465 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1467 if (__kmp_env_consistency_check) {
1468 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1469 __kmp_map_hint_to_lock(hint));
1472 __kmp_itt_critical_acquiring(lck);
1474 #if OMPT_SUPPORT && OMPT_OPTIONAL 1475 if (ompt_enabled.enabled) {
1476 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1478 prev_state = ti.state;
1479 ti.wait_id = (ompt_wait_id_t)lck;
1480 ti.state = ompt_state_wait_critical;
1483 if (ompt_enabled.ompt_callback_mutex_acquire) {
1484 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1485 ompt_mutex_critical, (
unsigned int)hint,
1486 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1490 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1492 KMP_POP_PARTITIONED_TIMER();
1495 __kmp_itt_critical_acquired(lck);
1497 #if OMPT_SUPPORT && OMPT_OPTIONAL 1498 if (ompt_enabled.enabled) {
1500 ti.state = prev_state;
1504 if (ompt_enabled.ompt_callback_mutex_acquired) {
1505 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1506 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1511 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1512 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1515 #endif // KMP_USE_DYNAMIC_LOCK 1527 kmp_critical_name *crit) {
1528 kmp_user_lock_p lck;
1530 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1532 #if KMP_USE_DYNAMIC_LOCK 1533 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1534 lck = (kmp_user_lock_p)crit;
1535 KMP_ASSERT(lck != NULL);
1536 if (__kmp_env_consistency_check) {
1537 __kmp_pop_sync(global_tid, ct_critical, loc);
1540 __kmp_itt_critical_releasing(lck);
1542 #if KMP_USE_INLINED_TAS 1543 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1544 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1546 #elif KMP_USE_INLINED_FUTEX 1547 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1548 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1552 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1555 kmp_indirect_lock_t *ilk =
1556 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1557 KMP_ASSERT(ilk != NULL);
1559 if (__kmp_env_consistency_check) {
1560 __kmp_pop_sync(global_tid, ct_critical, loc);
1563 __kmp_itt_critical_releasing(lck);
1565 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1568 #else // KMP_USE_DYNAMIC_LOCK 1570 if ((__kmp_user_lock_kind == lk_tas) &&
1571 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1572 lck = (kmp_user_lock_p)crit;
1575 else if ((__kmp_user_lock_kind == lk_futex) &&
1576 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1577 lck = (kmp_user_lock_p)crit;
1581 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1584 KMP_ASSERT(lck != NULL);
1586 if (__kmp_env_consistency_check)
1587 __kmp_pop_sync(global_tid, ct_critical, loc);
1590 __kmp_itt_critical_releasing(lck);
1594 __kmp_release_user_lock_with_checks(lck, global_tid);
1596 #endif // KMP_USE_DYNAMIC_LOCK 1598 #if OMPT_SUPPORT && OMPT_OPTIONAL 1601 OMPT_STORE_RETURN_ADDRESS(global_tid);
1602 if (ompt_enabled.ompt_callback_mutex_released) {
1603 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1604 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1608 KMP_POP_PARTITIONED_TIMER();
1609 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1624 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1626 if (!TCR_4(__kmp_init_parallel))
1627 __kmp_parallel_initialize();
1630 __kmp_resume_if_soft_paused();
1633 if (__kmp_env_consistency_check)
1634 __kmp_check_barrier(global_tid, ct_barrier, loc);
1637 ompt_frame_t *ompt_frame;
1638 if (ompt_enabled.enabled) {
1639 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1640 if (ompt_frame->enter_frame.ptr == NULL)
1641 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1642 OMPT_STORE_RETURN_ADDRESS(global_tid);
1646 __kmp_threads[global_tid]->th.th_ident = loc;
1648 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1649 #if OMPT_SUPPORT && OMPT_OPTIONAL 1650 if (ompt_enabled.enabled) {
1651 ompt_frame->enter_frame = ompt_data_none;
1655 return (status != 0) ? 0 : 1;
1668 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1670 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1686 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1688 if (!TCR_4(__kmp_init_parallel))
1689 __kmp_parallel_initialize();
1692 __kmp_resume_if_soft_paused();
1695 if (__kmp_env_consistency_check) {
1697 KMP_WARNING(ConstructIdentInvalid);
1699 __kmp_check_barrier(global_tid, ct_barrier, loc);
1703 ompt_frame_t *ompt_frame;
1704 if (ompt_enabled.enabled) {
1705 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1706 if (ompt_frame->enter_frame.ptr == NULL)
1707 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1708 OMPT_STORE_RETURN_ADDRESS(global_tid);
1712 __kmp_threads[global_tid]->th.th_ident = loc;
1714 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1715 #if OMPT_SUPPORT && OMPT_OPTIONAL 1716 if (ompt_enabled.enabled) {
1717 ompt_frame->enter_frame = ompt_data_none;
1723 if (__kmp_env_consistency_check) {
1727 if (global_tid < 0) {
1728 KMP_WARNING(ThreadIdentInvalid);
1734 __kmp_pop_sync(global_tid, ct_master, loc);
1754 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1759 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1762 #if OMPT_SUPPORT && OMPT_OPTIONAL 1763 kmp_info_t *this_thr = __kmp_threads[global_tid];
1764 kmp_team_t *team = this_thr->th.th_team;
1765 int tid = __kmp_tid_from_gtid(global_tid);
1767 if (ompt_enabled.enabled) {
1769 if (ompt_enabled.ompt_callback_work) {
1770 ompt_callbacks.ompt_callback(ompt_callback_work)(
1771 ompt_work_single_executor, ompt_scope_begin,
1772 &(team->t.ompt_team_info.parallel_data),
1773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1774 1, OMPT_GET_RETURN_ADDRESS(0));
1777 if (ompt_enabled.ompt_callback_work) {
1778 ompt_callbacks.ompt_callback(ompt_callback_work)(
1779 ompt_work_single_other, ompt_scope_begin,
1780 &(team->t.ompt_team_info.parallel_data),
1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1782 1, OMPT_GET_RETURN_ADDRESS(0));
1783 ompt_callbacks.ompt_callback(ompt_callback_work)(
1784 ompt_work_single_other, ompt_scope_end,
1785 &(team->t.ompt_team_info.parallel_data),
1786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1787 1, OMPT_GET_RETURN_ADDRESS(0));
1806 __kmp_exit_single(global_tid);
1807 KMP_POP_PARTITIONED_TIMER();
1809 #if OMPT_SUPPORT && OMPT_OPTIONAL 1810 kmp_info_t *this_thr = __kmp_threads[global_tid];
1811 kmp_team_t *team = this_thr->th.th_team;
1812 int tid = __kmp_tid_from_gtid(global_tid);
1814 if (ompt_enabled.ompt_callback_work) {
1815 ompt_callbacks.ompt_callback(ompt_callback_work)(
1816 ompt_work_single_executor, ompt_scope_end,
1817 &(team->t.ompt_team_info.parallel_data),
1818 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1819 OMPT_GET_RETURN_ADDRESS(0));
1832 KMP_POP_PARTITIONED_TIMER();
1833 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1835 #if OMPT_SUPPORT && OMPT_OPTIONAL 1836 if (ompt_enabled.ompt_callback_work) {
1837 ompt_work_t ompt_work_type = ompt_work_loop;
1838 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1839 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1843 ompt_work_type = ompt_work_loop;
1845 ompt_work_type = ompt_work_sections;
1847 ompt_work_type = ompt_work_distribute;
1852 KMP_DEBUG_ASSERT(ompt_work_type);
1854 ompt_callbacks.ompt_callback(ompt_callback_work)(
1855 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1856 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1859 if (__kmp_env_consistency_check)
1860 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1866 void ompc_set_num_threads(
int arg) {
1868 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1871 void ompc_set_dynamic(
int flag) {
1875 thread = __kmp_entry_thread();
1877 __kmp_save_internal_controls(thread);
1879 set__dynamic(thread, flag ? TRUE : FALSE);
1882 void ompc_set_nested(
int flag) {
1886 thread = __kmp_entry_thread();
1888 __kmp_save_internal_controls(thread);
1890 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1893 void ompc_set_max_active_levels(
int max_active_levels) {
1898 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1901 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1903 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1906 int ompc_get_ancestor_thread_num(
int level) {
1907 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1910 int ompc_get_team_size(
int level) {
1911 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1917 void ompc_set_affinity_format(
char const *format) {
1918 if (!__kmp_init_serial) {
1919 __kmp_serial_initialize();
1921 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1922 format, KMP_STRLEN(format) + 1);
1925 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1927 if (!__kmp_init_serial) {
1928 __kmp_serial_initialize();
1930 format_size = KMP_STRLEN(__kmp_affinity_format);
1931 if (buffer && size) {
1932 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1938 void ompc_display_affinity(
char const *format) {
1940 if (!TCR_4(__kmp_init_middle)) {
1941 __kmp_middle_initialize();
1943 gtid = __kmp_get_gtid();
1944 __kmp_aux_display_affinity(gtid, format);
1947 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1948 char const *format) {
1950 size_t num_required;
1951 kmp_str_buf_t capture_buf;
1952 if (!TCR_4(__kmp_init_middle)) {
1953 __kmp_middle_initialize();
1955 gtid = __kmp_get_gtid();
1956 __kmp_str_buf_init(&capture_buf);
1957 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1958 if (buffer && buf_size) {
1959 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1960 capture_buf.used + 1);
1962 __kmp_str_buf_free(&capture_buf);
1963 return num_required;
1967 void kmpc_set_stacksize(
int arg) {
1969 __kmp_aux_set_stacksize(arg);
1972 void kmpc_set_stacksize_s(
size_t arg) {
1974 __kmp_aux_set_stacksize(arg);
1977 void kmpc_set_blocktime(
int arg) {
1981 gtid = __kmp_entry_gtid();
1982 tid = __kmp_tid_from_gtid(gtid);
1983 thread = __kmp_thread_from_gtid(gtid);
1985 __kmp_aux_set_blocktime(arg, thread, tid);
1988 void kmpc_set_library(
int arg) {
1990 __kmp_user_set_library((
enum library_type)arg);
1993 void kmpc_set_defaults(
char const *str) {
1995 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1998 void kmpc_set_disp_num_buffers(
int arg) {
2001 if (__kmp_init_serial == 0 && arg > 0)
2002 __kmp_dispatch_num_buffers = arg;
2005 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2006 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2009 if (!TCR_4(__kmp_init_middle)) {
2010 __kmp_middle_initialize();
2012 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2016 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2017 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2020 if (!TCR_4(__kmp_init_middle)) {
2021 __kmp_middle_initialize();
2023 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2027 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2028 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2031 if (!TCR_4(__kmp_init_middle)) {
2032 __kmp_middle_initialize();
2034 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2084 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2088 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2092 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2094 if (__kmp_env_consistency_check) {
2096 KMP_WARNING(ConstructIdentInvalid);
2103 *data_ptr = cpy_data;
2106 ompt_frame_t *ompt_frame;
2107 if (ompt_enabled.enabled) {
2108 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2109 if (ompt_frame->enter_frame.ptr == NULL)
2110 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2111 OMPT_STORE_RETURN_ADDRESS(gtid);
2116 __kmp_threads[gtid]->th.th_ident = loc;
2118 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2121 (*cpy_func)(cpy_data, *data_ptr);
2127 if (ompt_enabled.enabled) {
2128 OMPT_STORE_RETURN_ADDRESS(gtid);
2132 __kmp_threads[gtid]->th.th_ident = loc;
2135 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2136 #if OMPT_SUPPORT && OMPT_OPTIONAL 2137 if (ompt_enabled.enabled) {
2138 ompt_frame->enter_frame = ompt_data_none;
2145 #define INIT_LOCK __kmp_init_user_lock_with_checks 2146 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2147 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2148 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2149 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2150 #define ACQUIRE_NESTED_LOCK_TIMED \ 2151 __kmp_acquire_nested_user_lock_with_checks_timed 2152 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2153 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2154 #define TEST_LOCK __kmp_test_user_lock_with_checks 2155 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2156 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2157 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2162 #if KMP_USE_DYNAMIC_LOCK 2165 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2166 kmp_dyna_lockseq_t seq) {
2167 if (KMP_IS_D_LOCK(seq)) {
2168 KMP_INIT_D_LOCK(lock, seq);
2170 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2173 KMP_INIT_I_LOCK(lock, seq);
2175 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2176 __kmp_itt_lock_creating(ilk->lock, loc);
2182 static __forceinline
void 2183 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2184 kmp_dyna_lockseq_t seq) {
2187 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2188 seq = __kmp_user_lock_seq;
2192 seq = lockseq_nested_tas;
2196 seq = lockseq_nested_futex;
2199 case lockseq_ticket:
2200 seq = lockseq_nested_ticket;
2202 case lockseq_queuing:
2203 seq = lockseq_nested_queuing;
2206 seq = lockseq_nested_drdpa;
2209 seq = lockseq_nested_queuing;
2211 KMP_INIT_I_LOCK(lock, seq);
2213 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2214 __kmp_itt_lock_creating(ilk->lock, loc);
2219 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2221 KMP_DEBUG_ASSERT(__kmp_init_serial);
2222 if (__kmp_env_consistency_check && user_lock == NULL) {
2223 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2226 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2228 #if OMPT_SUPPORT && OMPT_OPTIONAL 2230 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2232 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2233 if (ompt_enabled.ompt_callback_lock_init) {
2234 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2235 ompt_mutex_lock, (omp_lock_hint_t)hint,
2236 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2243 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2244 void **user_lock, uintptr_t hint) {
2245 KMP_DEBUG_ASSERT(__kmp_init_serial);
2246 if (__kmp_env_consistency_check && user_lock == NULL) {
2247 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2250 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2252 #if OMPT_SUPPORT && OMPT_OPTIONAL 2254 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2256 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2257 if (ompt_enabled.ompt_callback_lock_init) {
2258 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2259 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2260 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2266 #endif // KMP_USE_DYNAMIC_LOCK 2269 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2270 #if KMP_USE_DYNAMIC_LOCK 2272 KMP_DEBUG_ASSERT(__kmp_init_serial);
2273 if (__kmp_env_consistency_check && user_lock == NULL) {
2274 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2276 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2278 #if OMPT_SUPPORT && OMPT_OPTIONAL 2280 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2282 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2283 if (ompt_enabled.ompt_callback_lock_init) {
2284 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2285 ompt_mutex_lock, omp_lock_hint_none,
2286 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2291 #else // KMP_USE_DYNAMIC_LOCK 2293 static char const *
const func =
"omp_init_lock";
2294 kmp_user_lock_p lck;
2295 KMP_DEBUG_ASSERT(__kmp_init_serial);
2297 if (__kmp_env_consistency_check) {
2298 if (user_lock == NULL) {
2299 KMP_FATAL(LockIsUninitialized, func);
2303 KMP_CHECK_USER_LOCK_INIT();
2305 if ((__kmp_user_lock_kind == lk_tas) &&
2306 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2307 lck = (kmp_user_lock_p)user_lock;
2310 else if ((__kmp_user_lock_kind == lk_futex) &&
2311 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2312 lck = (kmp_user_lock_p)user_lock;
2316 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2319 __kmp_set_user_lock_location(lck, loc);
2321 #if OMPT_SUPPORT && OMPT_OPTIONAL 2323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2325 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2326 if (ompt_enabled.ompt_callback_lock_init) {
2327 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2328 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2329 (ompt_wait_id_t)user_lock, codeptr);
2334 __kmp_itt_lock_creating(lck);
2337 #endif // KMP_USE_DYNAMIC_LOCK 2341 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2342 #if KMP_USE_DYNAMIC_LOCK 2344 KMP_DEBUG_ASSERT(__kmp_init_serial);
2345 if (__kmp_env_consistency_check && user_lock == NULL) {
2346 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2348 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2350 #if OMPT_SUPPORT && OMPT_OPTIONAL 2352 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2354 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2355 if (ompt_enabled.ompt_callback_lock_init) {
2356 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2357 ompt_mutex_nest_lock, omp_lock_hint_none,
2358 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2363 #else // KMP_USE_DYNAMIC_LOCK 2365 static char const *
const func =
"omp_init_nest_lock";
2366 kmp_user_lock_p lck;
2367 KMP_DEBUG_ASSERT(__kmp_init_serial);
2369 if (__kmp_env_consistency_check) {
2370 if (user_lock == NULL) {
2371 KMP_FATAL(LockIsUninitialized, func);
2375 KMP_CHECK_USER_LOCK_INIT();
2377 if ((__kmp_user_lock_kind == lk_tas) &&
2378 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2379 OMP_NEST_LOCK_T_SIZE)) {
2380 lck = (kmp_user_lock_p)user_lock;
2383 else if ((__kmp_user_lock_kind == lk_futex) &&
2384 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2385 OMP_NEST_LOCK_T_SIZE)) {
2386 lck = (kmp_user_lock_p)user_lock;
2390 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2393 INIT_NESTED_LOCK(lck);
2394 __kmp_set_user_lock_location(lck, loc);
2396 #if OMPT_SUPPORT && OMPT_OPTIONAL 2398 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2400 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2401 if (ompt_enabled.ompt_callback_lock_init) {
2402 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2403 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2404 (ompt_wait_id_t)user_lock, codeptr);
2409 __kmp_itt_lock_creating(lck);
2412 #endif // KMP_USE_DYNAMIC_LOCK 2415 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2416 #if KMP_USE_DYNAMIC_LOCK 2419 kmp_user_lock_p lck;
2420 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2421 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2423 lck = (kmp_user_lock_p)user_lock;
2425 __kmp_itt_lock_destroyed(lck);
2427 #if OMPT_SUPPORT && OMPT_OPTIONAL 2429 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2431 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2432 if (ompt_enabled.ompt_callback_lock_destroy) {
2433 kmp_user_lock_p lck;
2434 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2435 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2437 lck = (kmp_user_lock_p)user_lock;
2439 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2440 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2443 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2445 kmp_user_lock_p lck;
2447 if ((__kmp_user_lock_kind == lk_tas) &&
2448 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2449 lck = (kmp_user_lock_p)user_lock;
2452 else if ((__kmp_user_lock_kind == lk_futex) &&
2453 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2454 lck = (kmp_user_lock_p)user_lock;
2458 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2461 #if OMPT_SUPPORT && OMPT_OPTIONAL 2463 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2465 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2466 if (ompt_enabled.ompt_callback_lock_destroy) {
2467 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2468 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2473 __kmp_itt_lock_destroyed(lck);
2477 if ((__kmp_user_lock_kind == lk_tas) &&
2478 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2482 else if ((__kmp_user_lock_kind == lk_futex) &&
2483 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2488 __kmp_user_lock_free(user_lock, gtid, lck);
2490 #endif // KMP_USE_DYNAMIC_LOCK 2494 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2495 #if KMP_USE_DYNAMIC_LOCK 2498 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2499 __kmp_itt_lock_destroyed(ilk->lock);
2501 #if OMPT_SUPPORT && OMPT_OPTIONAL 2503 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2505 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2506 if (ompt_enabled.ompt_callback_lock_destroy) {
2507 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2508 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2511 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2513 #else // KMP_USE_DYNAMIC_LOCK 2515 kmp_user_lock_p lck;
2517 if ((__kmp_user_lock_kind == lk_tas) &&
2518 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2519 OMP_NEST_LOCK_T_SIZE)) {
2520 lck = (kmp_user_lock_p)user_lock;
2523 else if ((__kmp_user_lock_kind == lk_futex) &&
2524 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2525 OMP_NEST_LOCK_T_SIZE)) {
2526 lck = (kmp_user_lock_p)user_lock;
2530 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2533 #if OMPT_SUPPORT && OMPT_OPTIONAL 2535 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2537 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2538 if (ompt_enabled.ompt_callback_lock_destroy) {
2539 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2540 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2545 __kmp_itt_lock_destroyed(lck);
2548 DESTROY_NESTED_LOCK(lck);
2550 if ((__kmp_user_lock_kind == lk_tas) &&
2551 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2552 OMP_NEST_LOCK_T_SIZE)) {
2556 else if ((__kmp_user_lock_kind == lk_futex) &&
2557 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2558 OMP_NEST_LOCK_T_SIZE)) {
2563 __kmp_user_lock_free(user_lock, gtid, lck);
2565 #endif // KMP_USE_DYNAMIC_LOCK 2568 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2570 #if KMP_USE_DYNAMIC_LOCK 2571 int tag = KMP_EXTRACT_D_TAG(user_lock);
2573 __kmp_itt_lock_acquiring(
2577 #if OMPT_SUPPORT && OMPT_OPTIONAL 2579 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2581 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2582 if (ompt_enabled.ompt_callback_mutex_acquire) {
2583 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2584 ompt_mutex_lock, omp_lock_hint_none,
2585 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2589 #if KMP_USE_INLINED_TAS 2590 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2591 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2593 #elif KMP_USE_INLINED_FUTEX 2594 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2595 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2599 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2602 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2604 #if OMPT_SUPPORT && OMPT_OPTIONAL 2605 if (ompt_enabled.ompt_callback_mutex_acquired) {
2606 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2607 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2611 #else // KMP_USE_DYNAMIC_LOCK 2613 kmp_user_lock_p lck;
2615 if ((__kmp_user_lock_kind == lk_tas) &&
2616 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2617 lck = (kmp_user_lock_p)user_lock;
2620 else if ((__kmp_user_lock_kind == lk_futex) &&
2621 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2622 lck = (kmp_user_lock_p)user_lock;
2626 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2630 __kmp_itt_lock_acquiring(lck);
2632 #if OMPT_SUPPORT && OMPT_OPTIONAL 2634 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2636 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2637 if (ompt_enabled.ompt_callback_mutex_acquire) {
2638 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2639 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2640 (ompt_wait_id_t)lck, codeptr);
2644 ACQUIRE_LOCK(lck, gtid);
2647 __kmp_itt_lock_acquired(lck);
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2651 if (ompt_enabled.ompt_callback_mutex_acquired) {
2652 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2653 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2657 #endif // KMP_USE_DYNAMIC_LOCK 2660 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2661 #if KMP_USE_DYNAMIC_LOCK 2664 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2666 #if OMPT_SUPPORT && OMPT_OPTIONAL 2668 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2670 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2671 if (ompt_enabled.enabled) {
2672 if (ompt_enabled.ompt_callback_mutex_acquire) {
2673 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2674 ompt_mutex_nest_lock, omp_lock_hint_none,
2675 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2680 int acquire_status =
2681 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2682 (void) acquire_status;
2684 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2687 #if OMPT_SUPPORT && OMPT_OPTIONAL 2688 if (ompt_enabled.enabled) {
2689 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2690 if (ompt_enabled.ompt_callback_mutex_acquired) {
2692 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2693 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2696 if (ompt_enabled.ompt_callback_nest_lock) {
2698 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2699 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2705 #else // KMP_USE_DYNAMIC_LOCK 2707 kmp_user_lock_p lck;
2709 if ((__kmp_user_lock_kind == lk_tas) &&
2710 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2711 OMP_NEST_LOCK_T_SIZE)) {
2712 lck = (kmp_user_lock_p)user_lock;
2715 else if ((__kmp_user_lock_kind == lk_futex) &&
2716 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2717 OMP_NEST_LOCK_T_SIZE)) {
2718 lck = (kmp_user_lock_p)user_lock;
2722 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2726 __kmp_itt_lock_acquiring(lck);
2728 #if OMPT_SUPPORT && OMPT_OPTIONAL 2730 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2732 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2733 if (ompt_enabled.enabled) {
2734 if (ompt_enabled.ompt_callback_mutex_acquire) {
2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2736 ompt_mutex_nest_lock, omp_lock_hint_none,
2737 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2742 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2745 __kmp_itt_lock_acquired(lck);
2748 #if OMPT_SUPPORT && OMPT_OPTIONAL 2749 if (ompt_enabled.enabled) {
2750 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2751 if (ompt_enabled.ompt_callback_mutex_acquired) {
2753 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2754 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2757 if (ompt_enabled.ompt_callback_nest_lock) {
2759 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2760 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2766 #endif // KMP_USE_DYNAMIC_LOCK 2769 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2770 #if KMP_USE_DYNAMIC_LOCK 2772 int tag = KMP_EXTRACT_D_TAG(user_lock);
2774 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2776 #if KMP_USE_INLINED_TAS 2777 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2778 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2780 #elif KMP_USE_INLINED_FUTEX 2781 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2782 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2786 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2789 #if OMPT_SUPPORT && OMPT_OPTIONAL 2791 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2793 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2794 if (ompt_enabled.ompt_callback_mutex_released) {
2795 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2796 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2800 #else // KMP_USE_DYNAMIC_LOCK 2802 kmp_user_lock_p lck;
2807 if ((__kmp_user_lock_kind == lk_tas) &&
2808 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2809 #if KMP_OS_LINUX && \ 2810 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2813 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2815 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2818 #if OMPT_SUPPORT && OMPT_OPTIONAL 2820 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2822 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2823 if (ompt_enabled.ompt_callback_mutex_released) {
2824 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2825 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2831 lck = (kmp_user_lock_p)user_lock;
2835 else if ((__kmp_user_lock_kind == lk_futex) &&
2836 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2837 lck = (kmp_user_lock_p)user_lock;
2841 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2845 __kmp_itt_lock_releasing(lck);
2848 RELEASE_LOCK(lck, gtid);
2850 #if OMPT_SUPPORT && OMPT_OPTIONAL 2852 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2854 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2855 if (ompt_enabled.ompt_callback_mutex_released) {
2856 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2857 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2861 #endif // KMP_USE_DYNAMIC_LOCK 2865 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2866 #if KMP_USE_DYNAMIC_LOCK 2869 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2871 int release_status =
2872 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2873 (void) release_status;
2875 #if OMPT_SUPPORT && OMPT_OPTIONAL 2877 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2879 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2880 if (ompt_enabled.enabled) {
2881 if (release_status == KMP_LOCK_RELEASED) {
2882 if (ompt_enabled.ompt_callback_mutex_released) {
2884 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2885 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2887 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2889 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2890 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2895 #else // KMP_USE_DYNAMIC_LOCK 2897 kmp_user_lock_p lck;
2901 if ((__kmp_user_lock_kind == lk_tas) &&
2902 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2903 OMP_NEST_LOCK_T_SIZE)) {
2904 #if KMP_OS_LINUX && \ 2905 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2907 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2909 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2912 #if OMPT_SUPPORT && OMPT_OPTIONAL 2913 int release_status = KMP_LOCK_STILL_HELD;
2916 if (--(tl->lk.depth_locked) == 0) {
2917 TCW_4(tl->lk.poll, 0);
2918 #if OMPT_SUPPORT && OMPT_OPTIONAL 2919 release_status = KMP_LOCK_RELEASED;
2924 #if OMPT_SUPPORT && OMPT_OPTIONAL 2926 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2928 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2929 if (ompt_enabled.enabled) {
2930 if (release_status == KMP_LOCK_RELEASED) {
2931 if (ompt_enabled.ompt_callback_mutex_released) {
2933 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2934 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2936 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2938 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2939 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2946 lck = (kmp_user_lock_p)user_lock;
2950 else if ((__kmp_user_lock_kind == lk_futex) &&
2951 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2952 OMP_NEST_LOCK_T_SIZE)) {
2953 lck = (kmp_user_lock_p)user_lock;
2957 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2961 __kmp_itt_lock_releasing(lck);
2965 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2966 #if OMPT_SUPPORT && OMPT_OPTIONAL 2968 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2971 if (ompt_enabled.enabled) {
2972 if (release_status == KMP_LOCK_RELEASED) {
2973 if (ompt_enabled.ompt_callback_mutex_released) {
2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2976 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2978 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2980 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2981 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2986 #endif // KMP_USE_DYNAMIC_LOCK 2990 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2993 #if KMP_USE_DYNAMIC_LOCK 2995 int tag = KMP_EXTRACT_D_TAG(user_lock);
2997 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3001 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3003 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3004 if (ompt_enabled.ompt_callback_mutex_acquire) {
3005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3006 ompt_mutex_lock, omp_lock_hint_none,
3007 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3011 #if KMP_USE_INLINED_TAS 3012 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3013 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3015 #elif KMP_USE_INLINED_FUTEX 3016 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3017 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3021 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3025 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3027 #if OMPT_SUPPORT && OMPT_OPTIONAL 3028 if (ompt_enabled.ompt_callback_mutex_acquired) {
3029 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3030 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
3036 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3041 #else // KMP_USE_DYNAMIC_LOCK 3043 kmp_user_lock_p lck;
3046 if ((__kmp_user_lock_kind == lk_tas) &&
3047 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3048 lck = (kmp_user_lock_p)user_lock;
3051 else if ((__kmp_user_lock_kind == lk_futex) &&
3052 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3053 lck = (kmp_user_lock_p)user_lock;
3057 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3061 __kmp_itt_lock_acquiring(lck);
3063 #if OMPT_SUPPORT && OMPT_OPTIONAL 3065 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3067 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3068 if (ompt_enabled.ompt_callback_mutex_acquire) {
3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3070 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3071 (ompt_wait_id_t)lck, codeptr);
3075 rc = TEST_LOCK(lck, gtid);
3078 __kmp_itt_lock_acquired(lck);
3080 __kmp_itt_lock_cancelled(lck);
3083 #if OMPT_SUPPORT && OMPT_OPTIONAL 3084 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3086 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3090 return (rc ? FTN_TRUE : FTN_FALSE);
3094 #endif // KMP_USE_DYNAMIC_LOCK 3098 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3099 #if KMP_USE_DYNAMIC_LOCK 3102 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3104 #if OMPT_SUPPORT && OMPT_OPTIONAL 3106 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3108 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3109 if (ompt_enabled.ompt_callback_mutex_acquire) {
3110 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3111 ompt_mutex_nest_lock, omp_lock_hint_none,
3112 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3116 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3119 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3121 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3124 #if OMPT_SUPPORT && OMPT_OPTIONAL 3125 if (ompt_enabled.enabled && rc) {
3127 if (ompt_enabled.ompt_callback_mutex_acquired) {
3129 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3130 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3133 if (ompt_enabled.ompt_callback_nest_lock) {
3135 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3136 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3143 #else // KMP_USE_DYNAMIC_LOCK 3145 kmp_user_lock_p lck;
3148 if ((__kmp_user_lock_kind == lk_tas) &&
3149 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3150 OMP_NEST_LOCK_T_SIZE)) {
3151 lck = (kmp_user_lock_p)user_lock;
3154 else if ((__kmp_user_lock_kind == lk_futex) &&
3155 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3156 OMP_NEST_LOCK_T_SIZE)) {
3157 lck = (kmp_user_lock_p)user_lock;
3161 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3165 __kmp_itt_lock_acquiring(lck);
3168 #if OMPT_SUPPORT && OMPT_OPTIONAL 3170 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3172 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3173 if (ompt_enabled.enabled) &&
3174 ompt_enabled.ompt_callback_mutex_acquire) {
3175 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3176 ompt_mutex_nest_lock, omp_lock_hint_none,
3177 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3181 rc = TEST_NESTED_LOCK(lck, gtid);
3184 __kmp_itt_lock_acquired(lck);
3186 __kmp_itt_lock_cancelled(lck);
3189 #if OMPT_SUPPORT && OMPT_OPTIONAL 3190 if (ompt_enabled.enabled && rc) {
3192 if (ompt_enabled.ompt_callback_mutex_acquired) {
3194 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3195 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3198 if (ompt_enabled.ompt_callback_nest_lock) {
3200 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3201 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3210 #endif // KMP_USE_DYNAMIC_LOCK 3220 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3221 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3223 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3224 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3230 static __forceinline
void 3231 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3232 kmp_critical_name *crit) {
3238 kmp_user_lock_p lck;
3240 #if KMP_USE_DYNAMIC_LOCK 3242 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3245 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3246 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3247 KMP_GET_D_TAG(__kmp_user_lock_seq));
3249 __kmp_init_indirect_csptr(crit, loc, global_tid,
3250 KMP_GET_I_TAG(__kmp_user_lock_seq));
3256 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3257 lck = (kmp_user_lock_p)lk;
3258 KMP_DEBUG_ASSERT(lck != NULL);
3259 if (__kmp_env_consistency_check) {
3260 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3262 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
3264 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3266 KMP_DEBUG_ASSERT(lck != NULL);
3267 if (__kmp_env_consistency_check) {
3268 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3270 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
3273 #else // KMP_USE_DYNAMIC_LOCK 3278 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3279 lck = (kmp_user_lock_p)crit;
3281 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3283 KMP_DEBUG_ASSERT(lck != NULL);
3285 if (__kmp_env_consistency_check)
3286 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3288 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3290 #endif // KMP_USE_DYNAMIC_LOCK 3294 static __forceinline
void 3295 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3296 kmp_critical_name *crit) {
3298 kmp_user_lock_p lck;
3300 #if KMP_USE_DYNAMIC_LOCK 3302 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3303 lck = (kmp_user_lock_p)crit;
3304 if (__kmp_env_consistency_check)
3305 __kmp_pop_sync(global_tid, ct_critical, loc);
3306 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3308 kmp_indirect_lock_t *ilk =
3309 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3310 if (__kmp_env_consistency_check)
3311 __kmp_pop_sync(global_tid, ct_critical, loc);
3312 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3315 #else // KMP_USE_DYNAMIC_LOCK 3320 if (__kmp_base_user_lock_size > 32) {
3321 lck = *((kmp_user_lock_p *)crit);
3322 KMP_ASSERT(lck != NULL);
3324 lck = (kmp_user_lock_p)crit;
3327 if (__kmp_env_consistency_check)
3328 __kmp_pop_sync(global_tid, ct_critical, loc);
3330 __kmp_release_user_lock_with_checks(lck, global_tid);
3332 #endif // KMP_USE_DYNAMIC_LOCK 3336 static __forceinline
int 3337 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3342 if (th->th.th_teams_microtask) {
3343 *team_p = team = th->th.th_team;
3344 if (team->t.t_level == th->th.th_teams_level) {
3346 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3348 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3349 th->th.th_team = team->t.t_parent;
3350 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3351 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3352 *task_state = th->th.th_task_state;
3353 th->th.th_task_state = 0;
3361 static __forceinline
void 3362 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3364 th->th.th_info.ds.ds_tid = 0;
3365 th->th.th_team = team;
3366 th->th.th_team_nproc = team->t.t_nproc;
3367 th->th.th_task_team = team->t.t_task_team[task_state];
3368 th->th.th_task_state = task_state;
3390 size_t reduce_size,
void *reduce_data,
3391 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3392 kmp_critical_name *lck) {
3396 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3400 int teams_swapped = 0, task_state;
3402 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3410 if (!TCR_4(__kmp_init_parallel))
3411 __kmp_parallel_initialize();
3414 __kmp_resume_if_soft_paused();
3418 #if KMP_USE_DYNAMIC_LOCK 3419 if (__kmp_env_consistency_check)
3420 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3422 if (__kmp_env_consistency_check)
3423 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3427 th = __kmp_thread_from_gtid(global_tid);
3428 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3429 #endif // OMP_40_ENABLED 3447 packed_reduction_method = __kmp_determine_reduction_method(
3448 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3449 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3451 if (packed_reduction_method == critical_reduce_block) {
3453 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3456 }
else if (packed_reduction_method == empty_reduce_block) {
3462 }
else if (packed_reduction_method == atomic_reduce_block) {
3472 if (__kmp_env_consistency_check)
3473 __kmp_pop_sync(global_tid, ct_reduce, loc);
3475 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3476 tree_reduce_block)) {
3496 ompt_frame_t *ompt_frame;
3497 if (ompt_enabled.enabled) {
3498 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3499 if (ompt_frame->enter_frame.ptr == NULL)
3500 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3501 OMPT_STORE_RETURN_ADDRESS(global_tid);
3505 __kmp_threads[global_tid]->th.th_ident = loc;
3508 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3509 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3510 retval = (retval != 0) ? (0) : (1);
3511 #if OMPT_SUPPORT && OMPT_OPTIONAL 3512 if (ompt_enabled.enabled) {
3513 ompt_frame->enter_frame = ompt_data_none;
3519 if (__kmp_env_consistency_check) {
3521 __kmp_pop_sync(global_tid, ct_reduce, loc);
3531 if (teams_swapped) {
3532 __kmp_restore_swapped_teams(th, team, task_state);
3537 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3538 global_tid, packed_reduction_method, retval));
3552 kmp_critical_name *lck) {
3554 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3556 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3558 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3560 if (packed_reduction_method == critical_reduce_block) {
3562 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3564 }
else if (packed_reduction_method == empty_reduce_block) {
3569 }
else if (packed_reduction_method == atomic_reduce_block) {
3576 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3577 tree_reduce_block)) {
3587 if (__kmp_env_consistency_check)
3588 __kmp_pop_sync(global_tid, ct_reduce, loc);
3590 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3591 global_tid, packed_reduction_method));
3614 size_t reduce_size,
void *reduce_data,
3615 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3616 kmp_critical_name *lck) {
3619 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3623 int teams_swapped = 0, task_state;
3626 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3634 if (!TCR_4(__kmp_init_parallel))
3635 __kmp_parallel_initialize();
3638 __kmp_resume_if_soft_paused();
3642 #if KMP_USE_DYNAMIC_LOCK 3643 if (__kmp_env_consistency_check)
3644 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3646 if (__kmp_env_consistency_check)
3647 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3651 th = __kmp_thread_from_gtid(global_tid);
3652 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3653 #endif // OMP_40_ENABLED 3655 packed_reduction_method = __kmp_determine_reduction_method(
3656 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3657 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3659 if (packed_reduction_method == critical_reduce_block) {
3661 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3664 }
else if (packed_reduction_method == empty_reduce_block) {
3670 }
else if (packed_reduction_method == atomic_reduce_block) {
3674 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3675 tree_reduce_block)) {
3681 ompt_frame_t *ompt_frame;
3682 if (ompt_enabled.enabled) {
3683 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3684 if (ompt_frame->enter_frame.ptr == NULL)
3685 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3686 OMPT_STORE_RETURN_ADDRESS(global_tid);
3690 __kmp_threads[global_tid]->th.th_ident =
3694 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3695 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3696 retval = (retval != 0) ? (0) : (1);
3697 #if OMPT_SUPPORT && OMPT_OPTIONAL 3698 if (ompt_enabled.enabled) {
3699 ompt_frame->enter_frame = ompt_data_none;
3705 if (__kmp_env_consistency_check) {
3707 __kmp_pop_sync(global_tid, ct_reduce, loc);
3717 if (teams_swapped) {
3718 __kmp_restore_swapped_teams(th, team, task_state);
3723 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3724 global_tid, packed_reduction_method, retval));
3740 kmp_critical_name *lck) {
3742 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3746 int teams_swapped = 0, task_state;
3749 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3752 th = __kmp_thread_from_gtid(global_tid);
3753 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3754 #endif // OMP_40_ENABLED 3756 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3761 if (packed_reduction_method == critical_reduce_block) {
3763 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3767 ompt_frame_t *ompt_frame;
3768 if (ompt_enabled.enabled) {
3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3770 if (ompt_frame->enter_frame.ptr == NULL)
3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3772 OMPT_STORE_RETURN_ADDRESS(global_tid);
3776 __kmp_threads[global_tid]->th.th_ident = loc;
3778 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3779 #if OMPT_SUPPORT && OMPT_OPTIONAL 3780 if (ompt_enabled.enabled) {
3781 ompt_frame->enter_frame = ompt_data_none;
3785 }
else if (packed_reduction_method == empty_reduce_block) {
3791 ompt_frame_t *ompt_frame;
3792 if (ompt_enabled.enabled) {
3793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3794 if (ompt_frame->enter_frame.ptr == NULL)
3795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3796 OMPT_STORE_RETURN_ADDRESS(global_tid);
3800 __kmp_threads[global_tid]->th.th_ident = loc;
3802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3803 #if OMPT_SUPPORT && OMPT_OPTIONAL 3804 if (ompt_enabled.enabled) {
3805 ompt_frame->enter_frame = ompt_data_none;
3809 }
else if (packed_reduction_method == atomic_reduce_block) {
3812 ompt_frame_t *ompt_frame;
3813 if (ompt_enabled.enabled) {
3814 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3815 if (ompt_frame->enter_frame.ptr == NULL)
3816 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3817 OMPT_STORE_RETURN_ADDRESS(global_tid);
3822 __kmp_threads[global_tid]->th.th_ident = loc;
3824 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3825 #if OMPT_SUPPORT && OMPT_OPTIONAL 3826 if (ompt_enabled.enabled) {
3827 ompt_frame->enter_frame = ompt_data_none;
3831 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3832 tree_reduce_block)) {
3835 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3844 if (teams_swapped) {
3845 __kmp_restore_swapped_teams(th, team, task_state);
3849 if (__kmp_env_consistency_check)
3850 __kmp_pop_sync(global_tid, ct_reduce, loc);
3852 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3853 global_tid, packed_reduction_method));
3858 #undef __KMP_GET_REDUCTION_METHOD 3859 #undef __KMP_SET_REDUCTION_METHOD 3863 kmp_uint64 __kmpc_get_taskid() {
3868 gtid = __kmp_get_gtid();
3872 thread = __kmp_thread_from_gtid(gtid);
3873 return thread->th.th_current_task->td_task_id;
3877 kmp_uint64 __kmpc_get_parent_taskid() {
3881 kmp_taskdata_t *parent_task;
3883 gtid = __kmp_get_gtid();
3887 thread = __kmp_thread_from_gtid(gtid);
3888 parent_task = thread->th.th_current_task->td_parent;
3889 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3905 void __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
3906 const struct kmp_dim *dims) {
3908 kmp_int64 last, trace_count;
3909 kmp_info_t *th = __kmp_threads[gtid];
3910 kmp_team_t *team = th->th.th_team;
3912 kmp_disp_t *pr_buf = th->th.th_dispatch;
3913 dispatch_shared_info_t *sh_buf;
3917 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3918 gtid, num_dims, !team->t.t_serialized));
3919 KMP_DEBUG_ASSERT(dims != NULL);
3920 KMP_DEBUG_ASSERT(num_dims > 0);
3922 if (team->t.t_serialized) {
3923 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3926 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3927 idx = pr_buf->th_doacross_buf_idx++;
3929 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3932 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3933 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3934 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3935 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3936 pr_buf->th_doacross_info[0] =
3937 (kmp_int64)num_dims;
3940 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3941 pr_buf->th_doacross_info[2] = dims[0].lo;
3942 pr_buf->th_doacross_info[3] = dims[0].up;
3943 pr_buf->th_doacross_info[4] = dims[0].st;
3945 for (j = 1; j < num_dims; ++j) {
3948 if (dims[j].st == 1) {
3950 range_length = dims[j].up - dims[j].lo + 1;
3952 if (dims[j].st > 0) {
3953 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3954 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3956 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3958 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3961 pr_buf->th_doacross_info[last++] = range_length;
3962 pr_buf->th_doacross_info[last++] = dims[j].lo;
3963 pr_buf->th_doacross_info[last++] = dims[j].up;
3964 pr_buf->th_doacross_info[last++] = dims[j].st;
3969 if (dims[0].st == 1) {
3970 trace_count = dims[0].up - dims[0].lo + 1;
3971 }
else if (dims[0].st > 0) {
3972 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3973 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3975 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3976 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3978 for (j = 1; j < num_dims; ++j) {
3979 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3981 KMP_DEBUG_ASSERT(trace_count > 0);
3985 if (idx != sh_buf->doacross_buf_idx) {
3987 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3994 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3995 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3997 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3998 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4000 if (flags == NULL) {
4002 size_t size = trace_count / 8 + 8;
4003 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4005 sh_buf->doacross_flags = flags;
4006 }
else if (flags == (kmp_uint32 *)1) {
4009 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4011 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4018 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4019 pr_buf->th_doacross_flags =
4020 sh_buf->doacross_flags;
4022 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4025 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4026 kmp_int32 shft, num_dims, i;
4028 kmp_int64 iter_number;
4029 kmp_info_t *th = __kmp_threads[gtid];
4030 kmp_team_t *team = th->th.th_team;
4032 kmp_int64 lo, up, st;
4034 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4035 if (team->t.t_serialized) {
4036 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4041 pr_buf = th->th.th_dispatch;
4042 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4043 num_dims = pr_buf->th_doacross_info[0];
4044 lo = pr_buf->th_doacross_info[2];
4045 up = pr_buf->th_doacross_info[3];
4046 st = pr_buf->th_doacross_info[4];
4048 if (vec[0] < lo || vec[0] > up) {
4049 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4050 "bounds [%lld,%lld]\n",
4051 gtid, vec[0], lo, up));
4054 iter_number = vec[0] - lo;
4055 }
else if (st > 0) {
4056 if (vec[0] < lo || vec[0] > up) {
4057 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n",
4059 gtid, vec[0], lo, up));
4062 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4064 if (vec[0] > lo || vec[0] < up) {
4065 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4066 "bounds [%lld,%lld]\n",
4067 gtid, vec[0], lo, up));
4070 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4072 for (i = 1; i < num_dims; ++i) {
4074 kmp_int32 j = i * 4;
4075 ln = pr_buf->th_doacross_info[j + 1];
4076 lo = pr_buf->th_doacross_info[j + 2];
4077 up = pr_buf->th_doacross_info[j + 3];
4078 st = pr_buf->th_doacross_info[j + 4];
4080 if (vec[i] < lo || vec[i] > up) {
4081 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4082 "bounds [%lld,%lld]\n",
4083 gtid, vec[i], lo, up));
4087 }
else if (st > 0) {
4088 if (vec[i] < lo || vec[i] > up) {
4089 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4090 "bounds [%lld,%lld]\n",
4091 gtid, vec[i], lo, up));
4094 iter = (kmp_uint64)(vec[i] - lo) / st;
4096 if (vec[i] > lo || vec[i] < up) {
4097 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4098 "bounds [%lld,%lld]\n",
4099 gtid, vec[i], lo, up));
4102 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4104 iter_number = iter + ln * iter_number;
4106 shft = iter_number % 32;
4109 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4114 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4115 gtid, (iter_number << 5) + shft));
4118 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4119 kmp_int32 shft, num_dims, i;
4121 kmp_int64 iter_number;
4122 kmp_info_t *th = __kmp_threads[gtid];
4123 kmp_team_t *team = th->th.th_team;
4127 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4128 if (team->t.t_serialized) {
4129 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4135 pr_buf = th->th.th_dispatch;
4136 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4137 num_dims = pr_buf->th_doacross_info[0];
4138 lo = pr_buf->th_doacross_info[2];
4139 st = pr_buf->th_doacross_info[4];
4141 iter_number = vec[0] - lo;
4142 }
else if (st > 0) {
4143 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4145 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4147 for (i = 1; i < num_dims; ++i) {
4149 kmp_int32 j = i * 4;
4150 ln = pr_buf->th_doacross_info[j + 1];
4151 lo = pr_buf->th_doacross_info[j + 2];
4152 st = pr_buf->th_doacross_info[j + 4];
4155 }
else if (st > 0) {
4156 iter = (kmp_uint64)(vec[i] - lo) / st;
4158 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4160 iter_number = iter + ln * iter_number;
4162 shft = iter_number % 32;
4166 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4167 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4168 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4169 (iter_number << 5) + shft));
4172 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4174 kmp_info_t *th = __kmp_threads[gtid];
4175 kmp_team_t *team = th->th.th_team;
4176 kmp_disp_t *pr_buf = th->th.th_dispatch;
4178 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4179 if (team->t.t_serialized) {
4180 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4183 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4184 if (num_done == th->th.th_team_nproc) {
4186 int idx = pr_buf->th_doacross_buf_idx - 1;
4187 dispatch_shared_info_t *sh_buf =
4188 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4189 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4190 (kmp_int64)&sh_buf->doacross_num_done);
4191 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4192 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4193 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4194 sh_buf->doacross_flags = NULL;
4195 sh_buf->doacross_num_done = 0;
4196 sh_buf->doacross_buf_idx +=
4197 __kmp_dispatch_num_buffers;
4200 pr_buf->th_doacross_flags = NULL;
4201 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4202 pr_buf->th_doacross_info = NULL;
4203 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4209 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4210 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4213 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4214 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4217 int __kmpc_get_target_offload(
void) {
4218 if (!__kmp_init_serial) {
4219 __kmp_serial_initialize();
4221 return __kmp_target_offload;
4224 int __kmpc_pause_resource(kmp_pause_status_t level) {
4225 if (!__kmp_init_serial) {
4228 return __kmp_pause_resource(level);
4230 #endif // OMP_50_ENABLED kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)