ROSS
tw-lp.c
Go to the documentation of this file.
1 #include <ross.h>
2 
3 /**
4  * @file tw-lp.c
5  * @brief tw_lp_settype is defined here!
6  */
7 
8 // IMPORTANT: this function replaces tw_lp_settype
9 // g_tw_lp_types must be defined
10 // g_tw_lp_typemap must be defined
12  if ( !g_tw_lp_types ) {
13  tw_error(TW_LOC, "No LP types are defined");
14  }
15 
16  if ( !g_tw_lp_typemap ) {
17  tw_error(TW_LOC, "No LP type mapping is defined");
18  }
19 
20  unsigned int i;
21  for (i = 0; i < g_tw_nlp; i++) {
22  tw_lp *lp = g_tw_lp[i];
24 
25  if (g_st_ev_trace)
27  }
28 
29 }
30 
31 /**
32  * IMPORTANT: This function should be called after tw_define_lps. It
33  * copies the function pointers which define the LP to the appropriate
34  * location for *each* LP, i.e. you probably want to call this more than
35  * once.
36  */
37 void
39 {
40  tw_lp *lp = g_tw_lp[id];
41 
42  if(id >= g_tw_nlp + g_st_analysis_nlp)
43  tw_error(TW_LOC, "ID %ld exceeded MAX LPs (%ld)", id, g_tw_nlp + g_st_analysis_nlp);
44 
45  if(!lp || !lp->pe)
46  tw_error(TW_LOC, "LP %u has no PE assigned.", lp->gid);
47 
48  // memcpy(&lp->type, type, sizeof(*type));
49  lp->type = type;
50 
51  if (type->state_sz > g_tw_delta_sz) {
52  g_tw_delta_sz = type->state_sz;
53  }
54 }
55 
56 void
58 {
59  if(id >= g_tw_nlp + g_st_analysis_nlp)
60  tw_error(TW_LOC, "ID %d exceeded MAX LPs", id);
61 
62  if(g_tw_lp[id])
63  tw_error(TW_LOC, "LP already allocated: %lld\n", id);
64 
65  g_tw_lp[id] = (tw_lp *) tw_calloc(TW_LOC, "Local LP", sizeof(tw_lp), 1);
66 
67  g_tw_lp[id]->gid = gid;
68  g_tw_lp[id]->id = id;
69  g_tw_lp[id]->pe = pe;
70 }
71 
72 void
73 tw_lp_onkp(tw_lp * lp, tw_kp * kp)
74 {
75  if(!lp)
76  tw_error(TW_LOC, "Bad LP pointer!");
77 
78  lp->kp = kp;
79  kp->lp_count++;
80 }
81 
82 void
84 {
85  tw_lpid i;
86  int j;
87 
88  for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++)
89  {
90  tw_lp * lp = g_tw_lp[i];
91 
92  if (lp->pe != me)
93  continue;
94 
95  // Allocate initial state vector for this LP
96  if(!lp->cur_state) {
97  lp->cur_state = tw_calloc(TW_LOC, "state vector", lp->type->state_sz, 1);
98  }
99 
100  lp->lp_stats = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1);
101  for (j = 0; j < 3; j++)
102  lp->last_stats[j] = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1);
103 
104 #ifndef USE_RIO
105  if (lp->type->init)
106  {
107  me->cur_event = me->abort_event;
108  me->cur_event->caused_by_me = NULL;
109 
110  (*(init_f)lp->type->init) (lp->cur_state, lp);
111 
112  if (me->cev_abort)
113  tw_error(TW_LOC, "ran out of events during init");
114  }
115 #endif
116  }
117 #ifdef USE_RIO
118  // RIO requires that all LPs have been allocated
119  if (g_io_load_at == PRE_INIT || g_io_load_at == INIT) {
120  tw_clock start = tw_clock_read();
122  me->stats.s_rio_load += (tw_clock_read() - start);
123  }
124  if (g_io_load_at != INIT) {
125  tw_clock start = tw_clock_read();
126  for (i = 0; i < g_tw_nlp; i++) {
127  tw_lp * lp = g_tw_lp[i];
128  me->cur_event = me->abort_event;
129  me->cur_event->caused_by_me = NULL;
130 
131  (*(init_f)lp->type->init) (lp->cur_state, lp);
132 
133  if (me->cev_abort) {
134  tw_error(TW_LOC, "ran out of events during init");
135  }
136  }
137  me->stats.s_rio_lp_init += (tw_clock_read() - start);
138  }
139  if (g_io_load_at == POST_INIT) {
140  tw_clock start = tw_clock_read();
142  me->stats.s_rio_load += (tw_clock_read() - start);
143  }
144 #endif
145 }
146 
147 void tw_pre_run_lps (tw_pe * me) {
148  tw_lpid i;
149 
150  for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++) {
151  tw_lp * lp = g_tw_lp[i];
152 
153  if (lp->pe != me)
154  continue;
155 
156  if (lp->type->pre_run) {
157  me->cur_event = me->abort_event;
158  me->cur_event->caused_by_me = NULL;
159 
160  (*(pre_run_f)lp->type->pre_run) (lp->cur_state, lp);
161 
162  if (me->cev_abort)
163  tw_error(TW_LOC, "ran out of events during pre_run");
164  }
165  }
166 }
167 
168 /********************************************************************//**
169  LP Suspension Design Notes! (John Jenkins, ANL)
170 
171 Many times, when developing optimistic models, we are able to
172 determine < LP state, event > pairs which represent infeasible model
173 behavior. These types of simulation states typically arise when time
174 warp causes us to receive and potentially process messages in an order
175 we don't expect.
176 
177 For example, consider a client/server protocol in which a server sends
178 an ACK to a client upon completion of some event. In optimistic mode,
179 the client can see what amounts to duplicate ACKs from the server due
180 to the server LP rolling back and re-sending an ACK.
181 
182 While some models can gracefully cope with such issues, more complex
183 models can have troubles (the client in the example could for instance
184 destroy the request metadata after receiving an ACK).
185 
186 A solution, as noted in the "Dark Side of Risk" paper, is to introduce
187 LP "self-suspend" functionality. If an LP is able to detect a < state,
188 message > pair which is incorrect / unexpected in a well-behaved
189 simulation, the LP should be able to put itself into suspend mode,
190 refusing to process messages until rolled back to a pre < state,
191 message > state. There are two benefits: 1) it greatly reduces the
192 difficulty in tracking down and distinguishing proper model bugs from
193 bugs arising from time-warp related issues such as out-of-order event
194 receipt and 2) it improves simulation performance by pruning the
195 number of processed events that we know are invalid and will be rolled
196 back anyways.
197 
198 I suggest the function signature tw_suspend(tw_lp *lp, int
199 do_suspend_event_rc, const char * format, ...), with the following
200 semantics:
201 
202 After a call to tw_suspend, all subsequent events (both forward and
203 reverse) that arrive at the suspended LP shall be processed as if they
204 were no-ops. The reverse event handler of the event that caused the
205 suspend will be run if do_orig_event_rc is nonzero; otherwise, the
206 reverse event handler shall additionally be a no-op. Typically,
207 do_orig_event_rc == 0 is desired, as good coding practices for
208 moderate-or-greater complexity simulations dictate state/event
209 validation prior to modifying LP state (partial rollbacks are very
210 undesirable), but there may be messy logic in the user code for which
211 a partial rollback is warranted (operations that free memory as a side
212 effect of operations, for example). An LP exits suspend state upon
213 rolling back the event that caused the suspend (whether or not that
214 event is processed as a no-op). Upon GVT, if an LP is in self-suspend
215 mode and the event that caused the suspend has a timestamp less than
216 that of GVT, then the simulator shall report the format string of
217 suspended LP(s) and exit. A NULL format string is acceptable for
218 performance purposes, e.g. when doing "production" simulation runs.
219 
220 @param lp Pointer to the LP we're suspending
221 @param do_orig_event_rc A bool indicating whether or not to skip the RC function
222 @param error_num User-specified value for tracking purposes; ROSS ignores this
223 
224 *************************************************************************/
225 
226 void
227 tw_lp_suspend(tw_lp * lp, int do_orig_event_rc, int error_num )
228 {
229  if(!lp)
230  tw_error(TW_LOC, "Bad LP pointer!");
231 
232  lp->suspend_flag=1;
233  lp->suspend_event = lp->pe->cur_event; // only valid prior to GVT
234  lp->suspend_time = tw_now(lp);
235  lp->suspend_error_number = error_num;
236  lp->suspend_do_orig_event_rc = do_orig_event_rc;
237 
238 }
#define TW_LOC
Definition: ross-extern.h:164
Definition: io.h:21
init_f init
LP setup routine.
Definition: ross-types.h:88
tw_lp ** g_tw_lp
Definition: ross-global.c:26
tw_event * suspend_event
Definition: ross-types.h:327
struct st_lp_stats * lp_stats
Definition: ross-types.h:323
void tw_lp_suspend(tw_lp *lp, int do_orig_event_rc, int error_num)
Definition: tw-lp.c:227
void tw_error(const char *file, int line, const char *fmt,...) NORETURN
Definition: tw-util.c:74
tw_lptype * type
Type of this LP, including service callbacks.
Definition: ross-types.h:316
tw_statistics stats
per PE counters
Definition: ross-types.h:415
Definition: io.h:20
static tw_clock tw_clock_read(void)
Definition: aarch64.h:6
void tw_lp_setup_types()
Definition: tw-lp.c:11
Holds the entire PE state.
Definition: ross-types.h:375
unsigned int suspend_flag
Definition: ross-types.h:331
tw_lpid g_tw_nlp
Definition: ross-global.c:23
io_load_type g_io_load_at
Definition: io-mpi.c:21
size_t g_tw_delta_sz
Definition: ross-global.c:34
void tw_pre_run_lps(tw_pe *me)
Definition: tw-lp.c:147
int lp_count
Definition: ross-types.h:345
uint64_t tw_lpid
Definition: ross.h:160
Function Pointers for ROSS Event Handlers.
Definition: ross-types.h:87
void tw_lp_settype(tw_lpid id, tw_lptype *type)
Definition: tw-lp.c:38
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
Definition: ross-types.h:313
void tw_lp_onpe(tw_lpid id, tw_pe *pe, tw_lpid gid)
Definition: tw-lp.c:57
unsigned int suspend_do_orig_event_rc
Definition: ross-types.h:330
struct st_lp_stats * last_stats[3]
Definition: ross-types.h:324
tw_lpid g_st_analysis_nlp
tw_stime suspend_time
Definition: ross-types.h:328
tw_lpid gid
global LP id
Definition: ross-types.h:306
tw_event * abort_event
Placeholder event for when free_q is empty.
Definition: ross-types.h:384
static tw_stime tw_now(tw_lp const *lp)
tw_event * cur_event
Current event being processed.
Definition: ross-types.h:385
void tw_lp_onkp(tw_lp *lp, tw_kp *kp)
Definition: tw-lp.c:73
void io_read_checkpoint()
Definition: io-mpi.c:112
unsigned int suspend_error_number
Definition: ross-types.h:329
tw_event * caused_by_me
Start of event list caused by this event.
Definition: ross-types.h:261
void(* init_f)(void *sv, tw_lp *me)
Definition: ross-types.h:72
size_t state_sz
Number of bytes that SV is for the LP.
Definition: ross-types.h:95
void(* pre_run_f)(void *sv, tw_lp *me)
Definition: ross-types.h:76
void st_model_setup_types(tw_lp *lp)
Definition: st-model-data.c:10
tw_typemap_f g_tw_lp_typemap
Definition: ross-global.c:102
void tw_init_lps(tw_pe *me)
Definition: tw-lp.c:83
tw_lptype * g_tw_lp_types
Definition: ross-global.c:101
tw_pe * pe
Definition: ross-types.h:308
Definition: io.h:19
tw_lpid id
local LP id
Definition: ross-types.h:305
tw_pe * pe
Definition: avl_tree.c:11
uint64_t tw_clock
Definition: aarch64.h:4
void * cur_state
Current application LP data.
Definition: ross-types.h:315
int g_st_ev_trace
Definition: st-event-trace.c:3
void * tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n)
Definition: tw-util.c:203
pre_run_f pre_run
Second stage LP initialization.
Definition: ross-types.h:89
unsigned char cev_abort
Current event being processed must be aborted.
Definition: ross-types.h:399
LP State Structure.
Definition: ross-types.h:304