ROSS
tw-event.c
Go to the documentation of this file.
1 #include <ross.h>
2 #include <assert.h>
3 
4 static inline void link_causality (tw_event *nev, tw_event *cev) {
5  nev->cause_next = cev->caused_by_me;
6  cev->caused_by_me = nev;
7 }
8 
9 void tw_event_send(tw_event * event) {
10  tw_lp *src_lp = event->src_lp;
11  tw_pe *send_pe = src_lp->pe;
12  tw_pe *dest_pe = NULL;
13  tw_clock pq_start, net_start;
14 
15  tw_peid dest_peid = -1;
16  tw_stime recv_ts = event->recv_ts;
17 
18  if (event == send_pe->abort_event) {
19  if (TW_STIME_DBL(recv_ts) < g_tw_ts_end) {
20  send_pe->cev_abort = 1;
21  }
22  return;
23  }
24 
25 #ifdef USE_RIO
26  // rio saves events scheduled past end time
27  if (recv_ts >= g_tw_ts_end) {
28  link_causality(event, send_pe->cur_event);
29  return;
30  }
31 #endif
32 
33  // moved from network-mpi.c in order to give all events a seq_num
34  event->event_id = (tw_eventid) ++send_pe->seq_num;
35 
36  // call LP remote mapping function to get dest_pe
37  dest_peid = (*src_lp->type->map) ((tw_lpid) event->dest_lp);
38  event->send_lp = src_lp->gid;
39 
40  //Trap lookahead violations
42  if (TW_STIME_DBL(recv_ts) - TW_STIME_DBL(tw_now(src_lp)) < g_tw_lookahead) {
43  tw_error(TW_LOC, "Lookahead violation: decrease g_tw_lookahead %f\n"
44  "Event causing violation: src LP: %lu, src PE: %lu\n"
45  "dest LP %lu, dest PE %lu, recv_ts %f\n",
46  g_tw_lookahead, src_lp->gid, send_pe->id, event->dest_lpid,
47  dest_peid, recv_ts);
48  }
49  }
50 
51  if (event->out_msgs) {
52  tw_error(TW_LOC, "It is an error to send an event with pre-loaded output message.");
53  }
54 
55  link_causality(event, send_pe->cur_event);
56 
57  if (dest_peid == g_tw_mynode) {
58  event->dest_lp = tw_getlocal_lp((tw_lpid) event->dest_lp);
59  dest_pe = event->dest_lp->pe;
60 
61  if (send_pe == dest_pe && TW_STIME_CMP(event->dest_lp->kp->last_time, recv_ts) <= 0) {
62  /* Fast case, we are sending to our own PE and there is
63  * no rollback caused by this send. We cannot have any
64  * transient messages on local sends so we can return.
65  */
66  pq_start = tw_clock_read();
67  tw_pq_enqueue(send_pe->pq, event);
68  send_pe->stats.s_pq += tw_clock_read() - pq_start;
69  return;
70  } else {
71  /* Slower, but still local send, so put into top of
72  * dest_pe->event_q.
73  */
74  event->state.owner = TW_pe_event_q;
75 
76  tw_eventq_push(&dest_pe->event_q, event);
77 
78  if(send_pe != dest_pe) {
79  send_pe->stats.s_nsend_loc_remote++;
80  }
81  }
82  } else {
83  /* Slowest approach of all; this is not a local event.
84  * We need to send it over the network to the other PE
85  * for processing.
86  */
87  send_pe->stats.s_nsend_net_remote++;
88  //event->src_lp->lp_stats->s_nsend_net_remote++;
89  event->state.owner = TW_net_asend;
90  net_start = tw_clock_read();
91  tw_net_send(event);
92  send_pe->stats.s_net_other += tw_clock_read() - net_start;
93  }
94 
95  if(tw_gvt_inprogress(send_pe)) {
96  send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, recv_ts) < 0) ? send_pe->trans_msg_ts : recv_ts;
97  }
98 }
99 
100 static inline void local_cancel(tw_pe *d, tw_event *event) {
101  event->state.cancel_q = 1;
102 
103  event->cancel_next = d->cancel_q;
104  d->cancel_q = event;
105 }
106 
107 static inline void event_cancel(tw_event * event) {
108  tw_pe *send_pe = event->src_lp->pe;
109  tw_peid dest_peid;
110  tw_clock net_start;
111 
112  if( event->state.owner == TW_net_asend ||
113  event->state.owner == TW_net_outq || // need to consider this case - Chris 06/13/2018
114  event->state.owner == TW_pe_sevent_q) {
115  /* Slowest approach of all; this has to be sent over the
116  * network to let the dest_pe know it shouldn't have seen
117  * it in the first place.
118  */
119  net_start = tw_clock_read();
120  tw_net_cancel(event);
121  send_pe->stats.s_nsend_net_remote--;
122  send_pe->stats.s_net_other += tw_clock_read() - net_start;
123  //event->src_lp->lp_stats->s_nsend_net_remote--;
124 
125  if(tw_gvt_inprogress(send_pe)) {
126  send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
127  }
128 
129  return;
130  }
131 
132 #ifdef USE_RIO
133  if (event->state.owner == IO_buffer) {
134  io_event_cancel(event);
135  return;
136  }
137 #endif
138 
139  dest_peid = event->dest_lp->pe->id;
140 
141  tw_clock pq_start;
142  if (send_pe->id == dest_peid) {
143  switch (event->state.owner) {
144  case TW_pe_pq:
145  /* Currently in our pq and not processed; delete it and
146  * free the event buffer immediately. No need to wait.
147  */
148  pq_start = tw_clock_read();
149  tw_pq_delete_any(send_pe->pq, event);
150  send_pe->stats.s_pq += tw_clock_read() - pq_start;
151  tw_event_free(send_pe, event);
152  break;
153 
154  case TW_pe_event_q:
155  case TW_kp_pevent_q:
156  local_cancel(send_pe, event);
157 
158  if(tw_gvt_inprogress(send_pe)) {
159  send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
160  }
161  break;
162 
163  default:
164  tw_error(TW_LOC, "unknown fast local cancel owner %d", event->state.owner);
165  }
166  } else if ((unsigned long)send_pe->id == dest_peid) {
167  /* Slower, but still a local cancel, so put into
168  * top of dest_pe->cancel_q for final deletion.
169  */
170  local_cancel(event->dest_lp->pe, event);
171  send_pe->stats.s_nsend_loc_remote--;
172 
173  if(tw_gvt_inprogress(send_pe)) {
174  send_pe->trans_msg_ts = (TW_STIME_CMP(send_pe->trans_msg_ts, event->recv_ts) < 0) ? send_pe->trans_msg_ts : event->recv_ts;
175  }
176  } else {
177  tw_error(TW_LOC, "Should be remote cancel!");
178  }
179 }
180 
182  tw_event *e = event->caused_by_me;
183  tw_lp *dest_lp = event->dest_lp;
184 
185  tw_free_output_messages(event, 0);
186 
187  dest_lp->pe->cur_event = event;
188  dest_lp->kp->last_time = event->recv_ts;
189 
190  if( dest_lp->suspend_flag &&
191  dest_lp->suspend_event == event &&
192  // Must test time stamp since events are reused once GVT sweeps by
193  TW_STIME_CMP(dest_lp->suspend_time, event->recv_ts) == 0)
194  {
195  // unsuspend the LP
196  dest_lp->suspend_flag = 0;
197  dest_lp->suspend_event = NULL;
198  dest_lp->suspend_time = TW_STIME_CRT(0.0);
199  dest_lp->suspend_error_number = 0;
200 
201  if( dest_lp->suspend_do_orig_event_rc == 0 )
202  {
203  goto jump_over_rc_event_handler;
204  }
205  else
206  { // reset
207  dest_lp->suspend_do_orig_event_rc = 0;
208  // note, should fall thru and process reverse events
209  }
210  }
211  else if( dest_lp->suspend_flag )
212  { // don't rc this event since it was never forward processed
213  goto jump_over_rc_event_handler;
214  }
215 
216  (*dest_lp->type->revent)(dest_lp->cur_state, &event->cv, tw_event_data(event), dest_lp);
217 
218  // reset critical path
219  dest_lp->critical_path = event->critical_path;
220 
221 jump_over_rc_event_handler:
222  if (event->delta_buddy) {
223  tw_clock start = tw_clock_read();
224  buddy_free(event->delta_buddy);
225  g_tw_pe->stats.s_buddy += (tw_clock_read() - start);
226  event->delta_buddy = 0;
227  }
228 
229  while (e) {
230  tw_event *n = e->cause_next;
231  e->cause_next = NULL;
232 
233  event_cancel(e);
234  e = n;
235  }
236 
237  event->caused_by_me = NULL;
238 
239  dest_lp->kp->s_e_rbs++;
240  // instrumentation
241  dest_lp->kp->kp_stats->s_e_rbs++;
242  dest_lp->lp_stats->s_e_rbs++;
243 }
tw_synch g_tw_synchronization_protocol
Definition: ross-global.c:18
#define TW_LOC
Definition: ross-extern.h:164
void * delta_buddy
Delta memory from buddy allocator.
Definition: ross-types.h:275
tw_clock s_net_other
Definition: ross-types.h:137
tw_lp * dest_lp
Destination LP ID.
Definition: ross-types.h:280
tw_eventq event_q
Linked list of events sent to this PE.
Definition: ross-types.h:379
void tw_net_cancel(tw_event *e)
Cancel the given remote event by either removing from the outq or sending an antimessage, depending on the status of the original positive send.
Definition: network-mpi.c:595
tw_event * suspend_event
Definition: ross-types.h:327
double tw_stime
Definition: ross.h:150
revent_f revent
LP Reverse event handler routine.
Definition: ross-types.h:91
#define TW_STIME_CRT(x)
Definition: ross.h:152
tw_clock s_pq
Definition: ross-types.h:143
double g_tw_ts_end
Definition: ross-global.c:68
struct st_lp_stats * lp_stats
Definition: ross-types.h:323
unsigned int critical_path
Critical path value for this LP.
Definition: ross-types.h:319
void tw_error(const char *file, int line, const char *fmt,...) NORETURN
Definition: tw-util.c:74
tw_lptype * type
Type of this LP, including service callbacks.
Definition: ross-types.h:316
struct st_kp_stats * kp_stats
Definition: ross-types.h:366
tw_statistics stats
per PE counters
Definition: ross-types.h:415
unsigned int s_e_rbs
tw_stime recv_ts
Actual time to be received.
Definition: ross-types.h:282
static tw_clock tw_clock_read(void)
Definition: aarch64.h:6
static void tw_event_free(tw_pe *, tw_event *)
map_f map
LP Mapping of LP gid -> remote PE routine.
Definition: ross-types.h:94
tw_stime trans_msg_ts
Last transient messages' time stamp.
Definition: ross-types.h:402
Holds the entire PE state.
Definition: ross-types.h:375
Network transmission in progress.
Definition: ross-types.h:219
unsigned int suspend_flag
Definition: ross-types.h:331
tw_event * cancel_q
List of canceled events.
Definition: ross-types.h:380
tw_bf cv
Used by app during reverse computation.
Definition: ross-types.h:274
In a tw_kp.pevent_q.
Definition: ross-types.h:216
tw_out * out_msgs
Output messages.
Definition: ross-types.h:289
static void event_cancel(tw_event *event)
Definition: tw-event.c:107
uint64_t tw_lpid
Definition: ross.h:160
tw_eventid seq_num
Array of remote send counters for hashing on, size == g_tw_npe.
Definition: ross-types.h:420
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
Definition: ross-types.h:313
unsigned int suspend_do_orig_event_rc
Definition: ross-types.h:330
tw_stat s_nsend_net_remote
Definition: ross-types.h:126
Event Stucture.
Definition: ross-types.h:250
void tw_pq_delete_any(splay_tree *st, tw_event *r)
Definition: splay.c:288
tw_stime suspend_time
Definition: ross-types.h:328
tw_lpid gid
global LP id
Definition: ross-types.h:306
#define TW_STIME_CMP(x, y)
Definition: ross.h:154
tw_event * abort_event
Placeholder event for when free_q is empty.
Definition: ross-types.h:384
tw_clock s_buddy
Definition: ross-types.h:149
static tw_stime tw_now(tw_lp const *lp)
tw_event * cur_event
Current event being processed.
Definition: ross-types.h:385
static void * tw_event_data(tw_event *event)
unsigned int suspend_error_number
Definition: ross-types.h:329
static int tw_gvt_inprogress(tw_pe *pe)
Definition: mpi_allreduce.h:8
void tw_pq_enqueue(splay_tree *st, tw_event *e)
Definition: splay.c:195
tw_event * cause_next
Next in parent's caused_by_me chain.
Definition: ross-types.h:262
tw_event * caused_by_me
Start of event list caused by this event.
Definition: ross-types.h:261
unsigned int s_e_rbs
tw_peid g_tw_mynode
Definition: ross-global.c:88
tw_lpid dest_lpid
Definition: ross-types.h:286
long s_e_rbs
Number of events rolled back by this LP.
Definition: ross-types.h:363
tw_pq * pq
Priority queue used to sort events.
Definition: ross-types.h:381
static void link_causality(tw_event *nev, tw_event *cev)
Definition: tw-event.c:4
Pending network transmission.
Definition: ross-types.h:218
static void tw_eventq_push(tw_eventq *q, tw_event *e)
Definition: tw-eventq.h:257
void buddy_free(void *ptr)
Definition: buddy.c:137
static void tw_free_output_messages(tw_event *e, int print_message)
In tw_pe.sevent_q.
Definition: ross-types.h:221
tw_stat s_nsend_loc_remote
Definition: ross-types.h:125
tw_pe * g_tw_pe
Definition: ross-global.c:75
struct tw_event::@0 state
tw_pe * pe
Definition: ross-types.h:308
unsigned long tw_peid
Definition: ross.h:147
void tw_event_send(tw_event *event)
Definition: tw-event.c:9
uint64_t tw_clock
Definition: aarch64.h:4
void * cur_state
Current application LP data.
Definition: ross-types.h:315
void tw_net_send(tw_event *e)
Adds the event to the outgoing queue of events to be sent, polls for finished sends, and attempts to start sends from outq.
Definition: network-mpi.c:578
static void local_cancel(tw_pe *d, tw_event *event)
Definition: tw-event.c:100
tw_peid id
Definition: ross-types.h:376
void io_event_cancel(tw_event *e)
Definition: io-mpi.c:70
static tw_lp * tw_getlocal_lp(tw_lpid gid)
In a tw_pe.pq.
Definition: ross-types.h:215
double g_tw_lookahead
Definition: ross-global.c:49
unsigned int tw_eventid
Definition: ross-types.h:46
#define TW_STIME_DBL(x)
Definition: ross.h:153
tw_stime last_time
Time of the current event being processed.
Definition: ross-types.h:360
unsigned char cev_abort
Current event being processed must be aborted.
Definition: ross-types.h:399
void tw_event_rollback(tw_event *event)
Definition: tw-event.c:181
LP State Structure.
Definition: ross-types.h:304
In a tw_pe.event_q list.
Definition: ross-types.h:214
unsigned char owner
Owner of the next/prev pointers; see tw_event_owner.
Definition: ross-types.h:268