17 memset(&revent->
cv, 0,
sizeof(revent->
cv));
23#ifdef USE_RAND_TIEBREAKER
24#define PQ_MINUMUM(pe) tw_pq_minimum_sig_ptr(pe->pq)->recv_ts
25#define CMP_KP_TO_EVENT_TIME(kp, e) tw_event_sig_compare_ptr(&kp->last_sig, &e->sig)
26#define CMP_GVT_HOOK_TO_NEXT_IN_QUEUE(trigger, pe) tw_event_sig_compare_ptr(&trigger.sig_at, tw_pq_minimum_sig_ptr(pe->pq))
27#define TRIGGER_ROLLBACK_TO_EVENT_TIME(kp, e) tw_kp_rollback_to_sig(kp, &e->sig)
28#define STIME_FROM_PE(pe) TW_STIME_DBL(pe->GVT_sig.recv_ts)
29#define STIME_FROM_KP(kp) TW_STIME_DBL(kp->last_sig.recv_ts)
31#define PQ_MINUMUM(pe) tw_pq_minimum(pe->pq)
32#define CMP_KP_TO_EVENT_TIME(kp, e) TW_STIME_CMP(kp->last_time, e->recv_ts)
33#define CMP_GVT_HOOK_TO_NEXT_IN_QUEUE(trigger, pe) (trigger.at - tw_pq_minimum(pe->pq))
34#define TRIGGER_ROLLBACK_TO_EVENT_TIME(kp, e) tw_kp_rollback_to(kp, e->recv_ts);
35#define STIME_FROM_PE(pe) TW_STIME_DBL(pe->GVT)
36#define STIME_FROM_KP(kp) TW_STIME_DBL(kp->last_time)
53 for (; cev; cev = nev) {
107 for (; cev; cev = nev) {
164 static int no_free_event_buffers = 0;
165 static int warned_no_free_event_buffers = 0;
166 const int max_alloc_fail_count = 20;
184 if (++no_free_event_buffers > 10) {
185 if (!warned_no_free_event_buffers) {
186 fprintf(stderr,
"WARNING: No free event buffers. Try increasing memory via the --extramem option.\n");
187 warned_no_free_event_buffers = 1;
189 if (no_free_event_buffers >= max_alloc_fail_count) {
190 tw_error(
TW_LOC,
"Event allocation failed %d consecutive times. Exiting.", max_alloc_fail_count);
196 no_free_event_buffers = 0;
212#ifndef USE_RAND_TIEBREAKER
223#ifdef USE_RAND_TIEBREAKER
276#ifdef USE_RAND_TIEBREAKER
305 if (g_st_damaris_enabled)
306 st_damaris_expose_data(me, me->
GVT,
RT_COL);
324 static int no_free_event_buffers = 0;
325 static int warned_no_free_event_buffers = 0;
326 const int max_alloc_fail_count = 20;
344 if (++no_free_event_buffers > 10) {
345 if (!warned_no_free_event_buffers) {
346 fprintf(stderr,
"WARNING: No free event buffers. Try increasing memory via the --extramem option.\n");
347 warned_no_free_event_buffers = 1;
349 if (no_free_event_buffers >= max_alloc_fail_count) {
350 tw_error(
TW_LOC,
"Event allocation failed %d consecutive times. Exiting.", max_alloc_fail_count);
356 no_free_event_buffers = 0;
372#ifndef USE_RAND_TIEBREAKER
383#ifdef USE_RAND_TIEBREAKER
435#ifdef USE_RAND_TIEBREAKER
517 if(MPI_Allreduce(&val, &global_val, 1, MPI_C_BOOL, MPI_LOR,
MPI_COMM_ROSS) != MPI_SUCCESS) {
518 tw_error(
TW_LOC,
"MPI_Allreduce for custom rollback and cleanup failed");
530#ifdef USE_RAND_TIEBREAKER
534 for (
unsigned int i = 0; i <
g_tw_nkp; i++) {
541 for (
unsigned int i = 0; i <
g_tw_nkp; i++) {
544 assert(
pe->GVT == gvt);
569#ifdef USE_RAND_TIEBREAKER
574 bool global_triggered;
575 if(MPI_Allreduce(&activate_trigger, &global_triggered, 1, MPI_C_BOOL, MPI_LAND,
MPI_COMM_ROSS) != MPI_SUCCESS) {
576 tw_error(
TW_LOC,
"MPI_Allreduce to check arbitrary function activation failed");
578 return global_triggered;
586 bool has_hook_been_triggered =
false;
589 tw_error(
TW_LOC,
"This is weird. This should have never happened. This switch case is guarded by g_tw_gvt_hook_trigger.status != 0 (aka, != GVT_HOOK_STATUS_disabled). Panic.");
593 if (has_hook_been_triggered) {
600 has_hook_been_triggered = (
g_tw_gvt_done - starting_at) % every == 0;
605 bool const triggered_somewhere =
does_any_pe(triggered_here);
606 if (triggered_somewhere) {
608 has_hook_been_triggered =
true;
615 if (has_hook_been_triggered) {
616#ifdef USE_RAND_TIEBREAKER
640 bool triggered =
false;
650 static bool first_seq_warning =
true;
651 if (first_seq_warning) {
652 tw_warning(
TW_LOC,
"During sequential simulation the GVT hook cannot be triggered by other than: the timestamp trigger (set by calling `tw_trigger_gvt_hook_at`), or by an LP call (should call `tw_trigger_gvt_hook_now`). The GVT hook won't be called!");
653 first_seq_warning =
false;
664#ifdef USE_RAND_TIEBREAKER
680 tw_error(
TW_LOC,
"This is weird. This should have never happened. Only GVT_HOOK_STATUS_timestamp and GVT_HOOK_STATUS_model_call are allowed");
695 tw_error(
TW_LOC,
"Sequential Scheduler used for world size greater than 1.");
700 printf(
"*** START SEQUENTIAL SIMULATION ***\n\n");
718#ifdef USE_RAND_TIEBREAKER
770 printf(
"*** END SIMULATION ***\n\n");
783 printf(
"Warning: GVT Hook will not be triggered in Parallel Conservative Simulation by the function `tw_trigger_gvt_hook_now` (The function `tw_trigger_gvt_hook_when_model_calls` has activated the GVT hook functionality for LP modes.) Please use sequential or parallel optimistic simulation modes.\n");
787 printf(
"*** START PARALLEL CONSERVATIVE SIMULATION ***\n\n");
846#ifndef USE_RAND_TIEBREAKER
858 tw_error(
TW_LOC,
"Found KP last time %lf > current event time %lf for LP %d, PE %lu"
859 "src LP %lu, src PE %lu",
863#ifdef USE_RAND_TIEBREAKER
909 printf(
"*** END SIMULATION ***\n\n");
926 printf(
"*** START PARALLEL OPTIMISTIC SIMULATION WITH SUSPEND LP FEATURE ***\n\n");
960 printf(
"*** END SIMULATION ***\n\n");
978 printf(
"*** START PARALLEL OPTIMISTIC SIMULATION WITH SUSPEND LP FEATURE AND REAL TIME GVT ***\n\n");
1015 printf(
"*** END SIMULATION ***\n\n");
1032 tw_error(
TW_LOC,
"Sequential Scheduler used for world size greater than 1.");
1035 printf(
"/***************************************************************************/\n");
1036 printf(
"/***** WARNING: Starting Optimistic Debug Scheduler!! **********************/\n");
1037 printf(
"This schedule assumes the following: \n");
1038 printf(
" 1) One 1 Processor/Core is used.\n");
1039 printf(
" 2) One 1 KP is used.\n");
1040 printf(
" NOTE: use the --nkp=1 argument to the simulation to ensure that\n");
1041 printf(
" it only uses 1 KP.\n");
1042 printf(
" 3) Events ARE NEVER RECLAIMED (LP Commit Functions are not called).\n");
1043 printf(
" 4) Executes til out of memory (16 events left) and \n injects rollback to first before primodal init event.\n");
1045 printf(
"/***************************************************************************/\n");
1052 printf(
"Warning: GVT Hook will not be triggered in the Optimistic Debug Scheduler.\n");
1056 printf(
"*** START OPTIMISTIC DEBUG SIMULATION ***\n\n");
1064#ifdef USE_RAND_TIEBREAKER
1093 printf(
"/******************* Starting Rollback Phase ******************************/\n");
1094#ifdef USE_RAND_TIEBREAKER
1100 printf(
"/******************* Completed Rollback Phase ******************************/\n");
1104 printf(
"*** END SIMULATION ***\n\n");
1115 tw_error(
TW_LOC,
"Sequential Scheduler used for world size greater than 1.");
1119 size_t largest_lp_size = 0;
1120 for (
size_t i = 0; i <
g_tw_nlp; i++) {
1121 size_t const lp_size =
g_tw_lp[i]->type->state_sz;
1122 if (lp_size > largest_lp_size) {
1123 largest_lp_size = lp_size;
1128 prev.
state = malloc(largest_lp_size);
1129 cur.
state = malloc(largest_lp_size);
1130 if (prev.
state == NULL || cur.
state == NULL) {
1134 if (largest_lp_checkpoint > largest_lp_size) {
1135 largest_lp_size = largest_lp_checkpoint;
1138 printf(
"*** START SEQUENTIAL ROLLBACK TEST SIMULATION ***\n\n");
1151 if (!cev) {
break; }
1156#ifdef USE_RAND_TIEBREAKER
1178 tw_clock total_event_process = 0.0;
1194 crv_check_lpstates(clp, cev, &prev,
"before processing event",
"after processing event and rollback");
1207 crv_check_lpstates(clp, cev, &cur,
"after processing event",
"after processing, rollback, processing event again and commiting");
1238 printf(
"*** END SIMULATION ***\n\n");
void crv_clean_lpstate(crv_lpstate_checkpoint_internal *state, tw_lp const *clp)
void crv_copy_lpstate(crv_lpstate_checkpoint_internal *into, tw_lp const *clp)
void crv_check_lpstates(tw_lp *clp, tw_event *cev, crv_lpstate_checkpoint_internal const *before_state, char const *before_msg, char const *after_msg)
size_t crv_init_checkpoints(void)
static tw_clock tw_clock_read(void)
static double percent_complete
static void gvt_print(tw_stime gvt)
void st_collect_event_data(tw_event *cev, double recv_rt)
void st_inst_finalize(tw_pe *me)
tw_clock g_st_rt_samp_start_cycles
void st_collect_engine_data(tw_pe *me, int col_type)
tw_clock g_st_rt_interval
void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type)
void tw_net_read(tw_pe *)
starts service_queues() to poll network
void tw_net_barrier(void)
void tw_pq_enqueue(tw_pq *, tw_event *)
void tw_pq_delete_any(tw_pq *, tw_event *)
tw_event * tw_pq_dequeue(tw_pq *)
tw_stime tw_pq_minimum(tw_pq *)
unsigned int tw_pq_get_size(tw_pq *)
void io_load_events(tw_pe *me)
#define TW_STIME_CMP(x, y)
unsigned int g_tw_gvt_threshold
static void * tw_event_data(tw_event *event)
void tw_kp_rollback_to_sig(tw_kp *kp, tw_event_sig const *to_sig)
unsigned long long g_tw_clock_rate
void tw_init_kps(tw_pe *me)
static void tw_event_free(tw_pe *, tw_event *)
unsigned long long g_tw_gvt_interval_start_cycles
void tw_kp_rollback_to(tw_kp *kp, tw_stime to)
void tw_kp_rollback_event(tw_event *event)
void tw_error(const char *file, int line, const char *fmt,...)
void tw_pre_run_lps(tw_pe *me)
unsigned int g_tw_gvt_done
void tw_wall_now(tw_wtime *t)
void tw_init_lps(tw_pe *me)
void tw_warning(const char *file, int line, const char *fmt,...)
unsigned int g_tw_gvt_interval
unsigned int g_tw_sim_started
void tw_event_rollback(tw_event *event)
void tw_clock_init(tw_pe *me)
void tw_all_lp_stats(tw_pe *me)
unsigned long long g_tw_gvt_realtime_interval
@ GVT_HOOK_STATUS_disabled
@ GVT_HOOK_STATUS_timestamp
@ GVT_HOOK_STATUS_every_n_gvt
void tw_gvt_step2(tw_pe *)
struct gvt_hook_trigger g_tw_gvt_hook_trigger
void tw_gvt_force_update(void)
void tw_gvt_force_update_realtime(void)
void(* g_tw_gvt_hook)(tw_pe *pe, bool is_queue_empty)
void tw_gvt_step1(tw_pe *)
void tw_gvt_step1_realtime(tw_pe *)
static int tw_event_sig_compare_ptr(tw_event_sig const *e_sig, tw_event_sig const *n_sig)
tw_event_sig const g_tw_max_sig
static void tw_copy_event_sig(tw_event_sig *e, tw_event_sig const *sig)
@ TW_pe_anti_msg
Anti-message.
@ TW_pe_free_q
In tw_pe.free_q.
@ TW_pe_event_q
In a tw_pe.event_q list.
@ TW_kp_pevent_q
In a tw_kp.pevent_q.
static tw_event * tw_eventq_peek(tw_eventq *q)
static tw_event * tw_eventq_pop_list(tw_eventq *q)
static void tw_eventq_delete_any(tw_eventq *q, tw_event *e)
static void tw_eventq_unshift(tw_eventq *q, tw_event *e)
@ GVT_HOOK_STATUS_model_call
unsigned int s_nevent_processed
unsigned int s_nevent_abort
unsigned int s_nevent_processed
unsigned int s_nevent_abort
struct tw_event::@130070134144252114152124341363102114315067064025 state
tw_stime recv_ts
Actual time to be received.
unsigned int critical_path
Critical path of this event.
unsigned char owner
Owner of the next/prev pointers; see tw_event_owner.
tw_lpid send_lp
sending LP ID for data collection uses
tw_bf cv
Used by app during reverse computation.
tw_lp * dest_lp
Destination LP ID.
tw_event * cancel_next
Next event in the cancel queue for the dest_pe.
tw_event_sig sig
Event signature, to be used by tiebreaker.
unsigned char cancel_q
Actively on a dest_lp->pe's cancel_q.
tw_eventq pevent_q
Events processed by LPs bound to this KP.
struct st_kp_stats * kp_stats
tw_event_sig last_sig
Event signature of the current event being processed.
tw_stime last_time
Time of the current event being processed.
tw_stat s_nevent_processed
Number of events processed.
long s_rb_total
Number of total rollbacks by this LP.
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
tw_lptype * type
Type of this LP, including service callbacks.
struct st_lp_stats * lp_stats
unsigned int critical_path
Critical path value for this LP.
void * cur_state
Current application LP data.
unsigned int suspend_flag
event_f event
LP event handler routine.
revent_f revent
LP Reverse event handler routine.
commit_f commit
LP Commit event routine.
Holds the entire PE state.
tw_eventq free_q
Linked list of free tw_events.
tw_wtime end_time
When this PE finished its execution.
tw_petype type
Model defined PE type routines.
tw_eventq event_q
Linked list of events sent to this PE.
tw_pq * pq
Priority queue used to sort events.
tw_event * cancel_q
List of canceled events.
tw_stime GVT
Global Virtual Time.
unsigned char cev_abort
Current event being processed must be aborted.
tw_event * cur_event
Current event being processed.
unsigned char gvt_status
Bits available for gvt computation.
tw_statistics stats
per PE counters
tw_event_sig GVT_sig
Global Virtual Time Signature.
tw_wtime start_time
When this PE first started execution.
pe_final_f final
PE finilization routine.
pe_init_f post_lp_init
PE initialization routine, after LP init.
pe_init_f pre_lp_init
PE initialization routine, before LP init.
static void tw_sched_event_q(tw_pe *me)
static void tw_sched_batch_realtime(tw_pe *me)
static void tw_gvt_hook_step_seq(tw_pe *me)
void tw_scheduler_optimistic(tw_pe *me)
#define STIME_FROM_PE(pe)
static bool does_any_pe(bool val)
#define CMP_GVT_HOOK_TO_NEXT_IN_QUEUE(trigger, pe)
double g_tw_rollback_time
static void tw_gvt_hook_step(tw_pe *me)
static void reset_bitfields(tw_event *revent)
Reset the event bitfield prior to entering the event handler post-reverse - reset the bitfield so tha...
static bool is_gvt_past_hook_threshold(tw_pe *me)
void tw_scheduler_sequential_rollback_check(tw_pe *me)
void tw_scheduler_conservative(tw_pe *me)
static void tw_sched_cancel_q(tw_pe *me)
#define STIME_FROM_KP(kp)
void tw_scheduler_optimistic_realtime(tw_pe *me)
static void tw_sched_batch(tw_pe *me)
void tw_scheduler_optimistic_debug(tw_pe *me)
#define TRIGGER_ROLLBACK_TO_EVENT_TIME(kp, e)
void tw_scheduler_rollback_and_cancel_events_pe(tw_pe *pe)
void tw_scheduler_sequential(tw_pe *me)
void tw_sched_init(tw_pe *me)
#define CMP_KP_TO_EVENT_TIME(kp, e)