ROSS
tw-event.c
Go to the documentation of this file.
1#include <ross.h>
2#include <assert.h>
3
4#ifdef USE_RAND_TIEBREAKER
5#define INCREASE_ABORTED_COUNT(event) event->aborted_total++
6#else
7#define INCREASE_ABORTED_COUNT(event)
8#endif
9
10static inline void link_causality (tw_event *nev, tw_event *cev) {
11 nev->cause_next = cev->caused_by_me;
12 cev->caused_by_me = nev;
13}
14
15void tw_event_send(tw_event * event) {
16 tw_lp *src_lp = event->src_lp;
17 tw_pe *send_pe = src_lp->pe;
18 tw_pe *dest_pe = NULL;
19 tw_clock pq_start, net_start;
20
21 tw_peid dest_peid = -1;
22 tw_stime recv_ts = event->recv_ts;
23
24 if (event == send_pe->abort_event) {
25 // @bug In the "else" case. We need guarantee that the `event`'s parent
26 // is reversed! We hope it is reversed, but it might not be the case. Of
27 // course, the user will get an "--extramem" warning, but they won't be
28 // told that this might lead to a faulty simulation.
29 if (TW_STIME_DBL(recv_ts) < g_tw_ts_end) {
30 send_pe->cev_abort = 1;
31 }
33 return;
34 }
35
36#ifdef USE_RIO
37 // rio saves events scheduled past end time
38 if (recv_ts >= g_tw_ts_end) {
39 link_causality(event, send_pe->cur_event);
41 return;
42 }
43#endif
44
45 // moved from network-mpi.c in order to give all events a seq_num
46 event->event_id = (tw_eventid) ++send_pe->seq_num;
47
48 // call LP remote mapping function to get dest_pe
49 dest_peid = (*src_lp->type->map) ((tw_lpid) event->dest_lp);
50 event->send_lp = src_lp->gid;
51
52 //Trap lookahead violations
54 if (TW_STIME_DBL(recv_ts) - TW_STIME_DBL(tw_now(src_lp)) < g_tw_lookahead) {
55 tw_error(TW_LOC, "Lookahead violation: decrease g_tw_lookahead %f\n"
56 "Event causing violation: src LP: %lu, src PE: %lu\n"
57 "dest LP %lu, dest PE %lu, recv_ts %f\n",
58 g_tw_lookahead, src_lp->gid, send_pe->id, event->dest_lpid,
59 dest_peid, recv_ts);
60 }
61 }
62
63 if (event->out_msgs) {
64 tw_error(TW_LOC, "It is an error to send an event with pre-loaded output message.");
65 }
66
67 link_causality(event, send_pe->cur_event);
68
69 if (dest_peid == g_tw_mynode) {
70 event->dest_lp = tw_getlocal_lp((tw_lpid) event->dest_lp);
71 dest_pe = event->dest_lp->pe;
72
73#ifdef USE_RAND_TIEBREAKER
74 if (send_pe == dest_pe && tw_event_sig_compare_ptr(&event->dest_lp->kp->last_sig, &event->sig) <= 0) {
75#else
76 if (send_pe == dest_pe && TW_STIME_CMP(event->dest_lp->kp->last_time, recv_ts) <= 0) {
77#endif
78 /* Fast case, we are sending to our own PE and there is
79 * no rollback caused by this send. We cannot have any
80 * transient messages on local sends so we can return.
81 */
82 pq_start = tw_clock_read();
83 tw_pq_enqueue(send_pe->pq, event);
84 send_pe->stats.s_pq += tw_clock_read() - pq_start;
85 return;
86 } else {
87 /* Slower, but still local send, so put into top of
88 * dest_pe->event_q.
89 */
90 event->state.owner = TW_pe_event_q;
91
92 tw_eventq_push(&dest_pe->event_q, event);
93
94 if(send_pe != dest_pe) {
95 send_pe->stats.s_nsend_loc_remote++;
96 }
97 }
98 } else {
99 /* Slowest approach of all; this is not a local event.
100 * We need to send it over the network to the other PE
101 * for processing.
102 */
103 send_pe->stats.s_nsend_net_remote++;
104 //event->src_lp->lp_stats->s_nsend_net_remote++;
105 event->state.owner = TW_net_asend;
106 net_start = tw_clock_read();
107 tw_net_send(event);
108 send_pe->stats.s_net_other += tw_clock_read() - net_start;
109 }
110
111 if(tw_gvt_inprogress(send_pe)) {
112#ifdef USE_RAND_TIEBREAKER
113 if (tw_event_sig_compare_ptr(&send_pe->trans_msg_sig, &event->sig) >= 0) {
114 tw_copy_event_sig(&send_pe->trans_msg_sig, &event->sig);
115 }
116#else
117 send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, recv_ts) < 0) ? send_pe->trans_msg_ts : recv_ts;
118#endif
119 }
120}
121
122static inline void local_cancel(tw_pe *d, tw_event *event) {
123 event->state.cancel_q = 1;
124
125 event->cancel_next = d->cancel_q;
126 d->cancel_q = event;
127}
128
129static inline void event_cancel(tw_event * event) {
130 tw_pe *send_pe = event->src_lp->pe;
131 tw_peid dest_peid;
132 tw_clock net_start;
133
134 if( event->state.owner == TW_net_asend ||
135 event->state.owner == TW_net_outq || // need to consider this case - Chris 06/13/2018
136 event->state.owner == TW_pe_sevent_q) {
137 /* Slowest approach of all; this has to be sent over the
138 * network to let the dest_pe know it shouldn't have seen
139 * it in the first place.
140 */
141 net_start = tw_clock_read();
142 tw_net_cancel(event);
143 send_pe->stats.s_nsend_net_remote--;
144 send_pe->stats.s_net_other += tw_clock_read() - net_start;
145 //event->src_lp->lp_stats->s_nsend_net_remote--;
146
147 if(tw_gvt_inprogress(send_pe)) {
148#ifdef USE_RAND_TIEBREAKER
149 if (tw_event_sig_compare_ptr(&send_pe->trans_msg_sig, &event->sig) >= 0) {
150 tw_copy_event_sig(&send_pe->trans_msg_sig, &event->sig);
151 }
152#else
153 send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
154#endif
155 }
156
157 return;
158 }
159
160#ifdef USE_RIO
161 if (event->state.owner == IO_buffer) {
162 io_event_cancel(event);
163 return;
164 }
165#endif
166
167 dest_peid = event->dest_lp->pe->id;
168
169 tw_clock pq_start;
170 if (send_pe->id == dest_peid) {
171 switch (event->state.owner) {
172 case TW_pe_pq:
173 /* Currently in our pq and not processed; delete it and
174 * free the event buffer immediately. No need to wait.
175 */
176 pq_start = tw_clock_read();
177 tw_pq_delete_any(send_pe->pq, event);
178 send_pe->stats.s_pq += tw_clock_read() - pq_start;
179 tw_event_free(send_pe, event);
180 break;
181
182 case TW_pe_event_q:
183 case TW_kp_pevent_q:
184 local_cancel(send_pe, event);
185
186 if(tw_gvt_inprogress(send_pe)) {
187#ifdef USE_RAND_TIEBREAKER
188 if (tw_event_sig_compare_ptr(&send_pe->trans_msg_sig, &event->sig) >= 0) {
189 tw_copy_event_sig(&send_pe->trans_msg_sig, &event->sig);
190 }
191#else
192 send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
193#endif
194 }
195 break;
196
197 default:
198 tw_error(TW_LOC, "unknown fast local cancel owner %d", event->state.owner);
199 }
200 } else if ((unsigned long)send_pe->id == dest_peid) {
201 /* Slower, but still a local cancel, so put into
202 * top of dest_pe->cancel_q for final deletion.
203 */
204 local_cancel(event->dest_lp->pe, event);
205 send_pe->stats.s_nsend_loc_remote--;
206
207 if(tw_gvt_inprogress(send_pe)) {
208#ifdef USE_RAND_TIEBREAKER
209 if (tw_event_sig_compare_ptr(&send_pe->trans_msg_sig, &event->sig) >= 0) {
210 tw_copy_event_sig(&send_pe->trans_msg_sig, &event->sig);
211 }
212#else
213 send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
214#endif
215 }
216 } else {
217 tw_error(TW_LOC, "Should be remote cancel!");
218 }
219}
220
222 tw_event *e = event->caused_by_me;
223 tw_lp *dest_lp = event->dest_lp;
224
225 tw_free_output_messages(event, 0);
226
227 dest_lp->pe->cur_event = event;
228#ifdef USE_RAND_TIEBREAKER
229 dest_lp->kp->last_sig = event->sig;
230#else
231 dest_lp->kp->last_time = event->recv_ts;
232#endif
233
234 if( dest_lp->suspend_flag &&
235 dest_lp->suspend_event == event &&
236 // Must test time stamp since events are reused once GVT sweeps by
238 tw_event_sig_compare_ptr(&dest_lp->suspend_sig, &event->sig) == 0)
239#else
240 TW_STIME_CMP(dest_lp->suspend_time, event->recv_ts) == 0)
241#endif
242 {
243 // unsuspend the LP
244 dest_lp->suspend_flag = 0;
245 dest_lp->suspend_event = NULL;
246#ifdef USE_RAND_TIEBREAKER
247 dest_lp->suspend_sig.recv_ts = TW_STIME_CRT(0.0);
248 dest_lp->suspend_sig.priority = 0;
249 dest_lp->suspend_sig.tie_lineage_length = 0;
250#else
251 dest_lp->suspend_time = TW_STIME_CRT(0.0);
252#endif
253 dest_lp->suspend_error_number = 0;
254
255 if( dest_lp->suspend_do_orig_event_rc == 0 )
256 {
257 goto jump_over_rc_event_handler;
258 }
259 else
260 { // reset
261 dest_lp->suspend_do_orig_event_rc = 0;
262 // note, should fall thru and process reverse events
263 }
264 }
265 else if( dest_lp->suspend_flag )
266 { // don't rc this event since it was never forward processed
267 goto jump_over_rc_event_handler;
268 }
269
270 (*dest_lp->type->revent)(dest_lp->cur_state, &event->cv, tw_event_data(event), dest_lp);
271
272 // reset critical path
273 dest_lp->critical_path = event->critical_path;
274
275jump_over_rc_event_handler:
276 if (event->delta_buddy) {
277 tw_clock start = tw_clock_read();
278 buddy_free(event->delta_buddy);
279 g_tw_pe->stats.s_buddy += (tw_clock_read() - start);
280 event->delta_buddy = 0;
281 }
282
283 while (e) {
284 tw_event *n = e->cause_next;
285 e->cause_next = NULL;
286#ifdef USE_RAND_TIEBREAKER
287 tw_rand_reverse_unif(e->src_lp->core_rng); //undo the tiebreaker rng advanced by this LP for the subsequent event
288#endif
289 event_cancel(e);
290 e = n;
291 }
292
293#ifdef USE_RAND_TIEBREAKER
294 // undo the tiebreaker rng advanced by this LP for subsequent events that were not scheduled (aborted)
295 while (event->aborted_total) {
297 event->aborted_total--;
298 }
299#endif
300
301 event->caused_by_me = NULL;
302
303 dest_lp->kp->s_e_rbs++;
304 // instrumentation
305 dest_lp->kp->kp_stats->s_e_rbs++;
306 dest_lp->lp_stats->s_e_rbs++;
307}
#define USE_RAND_TIEBREAKER
Definition config.h:20
void buddy_free(void *ptr)
Definition buddy.c:137
static tw_clock tw_clock_read(void)
Definition aarch64.h:8
uint64_t tw_clock
Definition aarch64.h:6
static int tw_gvt_inprogress(tw_pe *pe)
void tw_net_cancel(tw_event *)
Cancel the given remote event by either removing from the outq or sending an antimessage,...
void tw_net_send(tw_event *)
Adds the event to the outgoing queue of events to be sent, polls for finished sends,...
void tw_pq_enqueue(tw_pq *, tw_event *)
Definition splay.c:245
void tw_pq_delete_any(tw_pq *, tw_event *)
Definition splay.c:341
void io_event_cancel(tw_event *e)
Definition io-mpi.c:70
#define TW_STIME_DBL(x)
Definition ross-base.h:42
unsigned long tw_peid
Definition ross-base.h:36
#define TW_STIME_CRT(x)
Definition ross-base.h:41
#define TW_STIME_CMP(x, y)
Definition ross-base.h:43
double tw_stime
Definition ross-base.h:39
uint64_t tw_lpid
Definition ross-base.h:49
static void * tw_event_data(tw_event *event)
tw_pe * g_tw_pe
Definition ross-global.c:79
static void tw_free_output_messages(tw_event *e, int print_message)
static void tw_event_free(tw_pe *, tw_event *)
void tw_error(const char *file, int line, const char *fmt,...)
Definition tw-util.c:77
tw_peid g_tw_mynode
Definition ross-global.c:92
tw_synch g_tw_synchronization_protocol
Definition ross-global.c:19
double g_tw_lookahead
Definition ross-global.c:53
double g_tw_ts_end
Definition ross-global.c:72
#define TW_LOC
static tw_stime tw_now(tw_lp const *lp)
static tw_lp * tw_getlocal_lp(tw_lpid gid)
#define tw_rand_reverse_unif(G)
static int tw_event_sig_compare_ptr(tw_event_sig const *e_sig, tw_event_sig const *n_sig)
Definition ross-types.h:512
static void tw_copy_event_sig(tw_event_sig *e, tw_event_sig const *sig)
Definition ross-types.h:493
@ CONSERVATIVE
Definition ross-types.h:38
@ TW_pe_event_q
In a tw_pe.event_q list.
Definition ross-types.h:224
@ TW_pe_sevent_q
In tw_pe.sevent_q.
Definition ross-types.h:231
@ TW_kp_pevent_q
In a tw_kp.pevent_q.
Definition ross-types.h:226
@ TW_net_asend
Network transmission in progress.
Definition ross-types.h:229
@ TW_pe_pq
In a tw_pe.pq.
Definition ross-types.h:225
@ TW_net_outq
Pending network transmission.
Definition ross-types.h:228
unsigned int tw_eventid
Definition ross-types.h:56
static void tw_eventq_push(tw_eventq *q, tw_event *e)
Definition tw-eventq.h:281
unsigned int s_e_rbs
unsigned int s_e_rbs
double priority
Definition ross-types.h:261
tw_stime recv_ts
Definition ross-types.h:260
unsigned int tie_lineage_length
Definition ross-types.h:262
Event Stucture.
Definition ross-types.h:277
struct tw_event::@130070134144252114152124341363102114315067064025 state
tw_event * cause_next
Next in parent's caused_by_me chain.
Definition ross-types.h:289
tw_lp * src_lp
Sending LP ID.
Definition ross-types.h:313
tw_out * out_msgs
Output messages.
Definition ross-types.h:321
tw_stime recv_ts
Actual time to be received.
Definition ross-types.h:314
tw_lpid dest_lpid
Definition ross-types.h:318
unsigned char owner
Owner of the next/prev pointers; see tw_event_owner.
Definition ross-types.h:300
void * delta_buddy
Delta memory from buddy allocator.
Definition ross-types.h:307
tw_event * caused_by_me
Start of event list caused by this event.
Definition ross-types.h:288
tw_bf cv
Used by app during reverse computation.
Definition ross-types.h:306
tw_lp * dest_lp
Destination LP ID.
Definition ross-types.h:312
tw_event_sig sig
Event signature, to be used by tiebreaker.
Definition ross-types.h:294
size_t aborted_total
Number of events caused by this event that were not scheduled.
Definition ross-types.h:295
long s_e_rbs
Number of events rolled back by this LP.
Definition ross-types.h:404
struct st_kp_stats * kp_stats
Definition ross-types.h:407
tw_event_sig last_sig
Event signature of the current event being processed.
Definition ross-types.h:398
tw_stime last_time
Time of the current event being processed.
Definition ross-types.h:401
LP State Structure.
Definition ross-types.h:336
tw_event * suspend_event
Definition ross-types.h:360
tw_pe * pe
Definition ross-types.h:340
tw_event_sig suspend_sig
Definition ross-types.h:362
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
Definition ross-types.h:345
tw_lpid gid
global LP id
Definition ross-types.h:338
tw_lptype * type
Type of this LP, including service callbacks.
Definition ross-types.h:348
tw_rng_stream * core_rng
RNG stream array for ROSS non-model operation - possible alternative to a model_rng pointer array.
Definition ross-types.h:350
unsigned int suspend_do_orig_event_rc
Definition ross-types.h:367
struct st_lp_stats * lp_stats
Definition ross-types.h:356
unsigned int suspend_error_number
Definition ross-types.h:366
unsigned int critical_path
Critical path value for this LP.
Definition ross-types.h:352
void * cur_state
Current application LP data.
Definition ross-types.h:347
unsigned int suspend_flag
Definition ross-types.h:368
tw_stime suspend_time
Definition ross-types.h:364
map_f map
LP Mapping of LP gid -> remote PE routine.
Definition ross-types.h:104
revent_f revent
LP Reverse event handler routine.
Definition ross-types.h:101
Holds the entire PE state.
Definition ross-types.h:416
tw_eventq event_q
Linked list of events sent to this PE.
Definition ross-types.h:420
tw_pq * pq
Priority queue used to sort events.
Definition ross-types.h:422
tw_event_sig trans_msg_sig
Last transient messages' time signature.
Definition ross-types.h:444
tw_event * cancel_q
List of canceled events.
Definition ross-types.h:421
unsigned char cev_abort
Current event being processed must be aborted.
Definition ross-types.h:440
tw_stime trans_msg_ts
Last transient messages' time stamp.
Definition ross-types.h:450
tw_peid id
Definition ross-types.h:417
tw_event * cur_event
Current event being processed.
Definition ross-types.h:426
tw_event * abort_event
Placeholder event for when free_q is empty.
Definition ross-types.h:425
tw_statistics stats
per PE counters
Definition ross-types.h:463
tw_eventid seq_num
Array of remote send counters for hashing on, size == g_tw_npe.
Definition ross-types.h:468
tw_stat s_nsend_loc_remote
Definition ross-types.h:135
tw_clock s_pq
Definition ross-types.h:153
tw_clock s_net_other
Definition ross-types.h:147
tw_stat s_nsend_net_remote
Definition ross-types.h:136
static void link_causality(tw_event *nev, tw_event *cev)
Definition tw-event.c:10
#define INCREASE_ABORTED_COUNT(event)
Definition tw-event.c:7
static void event_cancel(tw_event *event)
Definition tw-event.c:129
void tw_event_send(tw_event *event)
Definition tw-event.c:15
void tw_event_rollback(tw_event *event)
Definition tw-event.c:221
static void local_cancel(tw_pe *d, tw_event *event)
Definition tw-event.c:122