ROSS
st-sim-engine.c
Go to the documentation of this file.
1 #include <ross.h>
2 #include <sys/stat.h>
3 #define __STDC_FORMAT_MACROS 1
4 
8 
9 /* wrapper to call gvt instrumentation functions depending on which granularity to use */
10 void st_collect_engine_data(tw_pe *pe, int col_type)
11 {
12  tw_clock start_time = tw_clock_read();
13  tw_kp *kp;
14  tw_lp *lp;
15  unsigned int i;
16  tw_statistics s;
17  bzero(&s, sizeof(s));
18  tw_get_stats(pe, &s);
19 
20  sample_metadata sample_md;
21  sample_md.ts = pe->GVT;
22  sample_md.real_time = (double)tw_clock_read() / g_tw_clock_rate;
23 
24  if (g_st_pe_data)
25  st_collect_engine_data_pes(pe, &sample_md, &s, col_type);
26  if (g_st_kp_data)
27  {
28  for (i = 0; i < g_tw_nkp; i++)
29  {
30  kp = tw_getkp(i);
31  st_collect_engine_data_kps(pe, kp, &sample_md, col_type);
32  }
33  }
34  if (g_st_lp_data)
35  {
36  for (i = 0; i < g_tw_nlp; i++)
37  {
38  lp = tw_getlp(i);
39  st_collect_engine_data_lps(lp, &sample_md, col_type);
40  }
41  }
42  pe->stats.s_stat_comp += tw_clock_read() - start_time;
43 }
44 
45 void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type)
46 {
47  st_pe_stats pe_stats;
48  int buf_size = sizeof(*sample_md) + sizeof(pe_stats);
49  char buffer[buf_size];
51 
52  // sample_md time stamps were set in the calling function
53  sample_md->flag = PE_TYPE;
54  sample_md->sample_sz = sizeof(pe_stats);
55 
56  pe_stats.peid = (unsigned int) g_tw_mynode;
57  pe_stats.s_nevent_processed = (unsigned int)( s->s_nevent_processed-last_pe_stats[col_type].s_nevent_processed);
58  pe_stats.s_nevent_abort = (unsigned int)(s->s_nevent_abort-last_pe_stats[col_type].s_nevent_abort);
59  pe_stats.s_e_rbs = (unsigned int)(s->s_e_rbs-last_pe_stats[col_type].s_e_rbs);
60  pe_stats.s_rb_total = (unsigned int)( s->s_rb_total-last_pe_stats[col_type].s_rb_total);
61  pe_stats.s_rb_secondary = (unsigned int)(s->s_rb_secondary-last_pe_stats[col_type].s_rb_secondary);
62  pe_stats.s_fc_attempts = (unsigned int)(s->s_fc_attempts-last_pe_stats[col_type].s_fc_attempts);
63  pe_stats.s_pq_qsize = tw_pq_get_size(pe->pq);
64  pe_stats.s_nsend_network = (unsigned int)(s->s_nsend_network-last_pe_stats[col_type].s_nsend_network);
65  pe_stats.s_nread_network = (unsigned int)(s->s_nread_network-last_pe_stats[col_type].s_nread_network);
66  pe_stats.s_pe_event_ties = (unsigned int)(s->s_pe_event_ties-last_pe_stats[col_type].s_pe_event_ties);
67  pe_stats.s_ngvts = (unsigned int)(g_tw_gvt_done - last_pe_stats[col_type].s_ngvts);
68  pe_stats.all_reduce_count = (unsigned int)(all_reduce_cnt-last_all_reduce_cnt);
69 
70  // I think it's possible for net_events to be negative over some interval of simulation time
71  // e.g., if in the current interval we've happened to process more rollback events than forward events
72  // for now, just report efficiency as 0 in this case?
73  int net_events = pe_stats.s_nevent_processed - pe_stats.s_e_rbs;
74  if (net_events > 0)
75  pe_stats.efficiency = (float) 100.0 * (1.0 - ((float) pe_stats.s_e_rbs / (float) net_events));
76  else
77  pe_stats.efficiency = 0;
78 
79  // TODO set a starting clock rate and subtract that from the counters?
80  // because PEs on different nodes will probably have different starting points for cycle counters
81  pe_stats.s_net_read = (float)(pe->stats.s_net_read - last_pe_stats[col_type].s_net_read) / g_tw_clock_rate;
82  pe_stats.s_net_other = (float)(pe->stats.s_net_other - last_pe_stats[col_type].s_net_other) / g_tw_clock_rate;
83  pe_stats.s_gvt = (float)(pe->stats.s_gvt - last_pe_stats[col_type].s_gvt) / g_tw_clock_rate;
84  pe_stats.s_fossil_collect = (float)(pe->stats.s_fossil_collect - last_pe_stats[col_type].s_fossil_collect) / g_tw_clock_rate;
85  pe_stats.s_event_abort = (float)(pe->stats.s_event_abort - last_pe_stats[col_type].s_event_abort) / g_tw_clock_rate;
86  pe_stats.s_event_process = (float)(pe->stats.s_event_process - last_pe_stats[col_type].s_event_process) / g_tw_clock_rate;
87  pe_stats.s_pq = (float)(pe->stats.s_pq - last_pe_stats[col_type].s_pq) / g_tw_clock_rate;
88  pe_stats.s_rollback = (float)(pe->stats.s_rollback - last_pe_stats[col_type].s_rollback) / g_tw_clock_rate;
89  pe_stats.s_cancel_q = (float)(pe->stats.s_cancel_q - last_pe_stats[col_type].s_cancel_q) / g_tw_clock_rate;
90  pe_stats.s_avl = (float)(pe->stats.s_avl - last_pe_stats[col_type].s_avl) / g_tw_clock_rate;
91  pe_stats.s_buddy = (float)(pe->stats.s_buddy - last_pe_stats[col_type].s_buddy) / g_tw_clock_rate;
92  pe_stats.s_lz4 = (float)(pe->stats.s_lz4 - last_pe_stats[col_type].s_lz4) / g_tw_clock_rate;
93 
94  memcpy(&buffer[0], sample_md, sizeof(*sample_md));
95  memcpy(&buffer[sizeof(*sample_md)], &pe_stats, sizeof(pe_stats));
96  st_buffer_push(col_type, &buffer[0], buf_size);
97 
98  memcpy(&last_pe_stats[col_type], s, sizeof(tw_statistics));
100 }
101 
102 void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type)
103 {
104  st_kp_stats kp_stats;
105  int buf_size = sizeof(*sample_md) + sizeof(kp_stats);
106  char buffer[buf_size];
107  int index = 0;
108 
109  // sample_md time stamps were set in the calling function
110  sample_md->flag = KP_TYPE;
111  sample_md->sample_sz = sizeof(kp_stats);
112 
113  kp_stats.peid = (unsigned int) g_tw_mynode;
114 
115  kp_stats.kpid = kp->id;
116 
117  kp_stats.s_nevent_processed = (unsigned int)(kp->kp_stats->s_nevent_processed - kp->last_stats[col_type]->s_nevent_processed);
118  kp_stats.s_nevent_abort = (unsigned int)(kp->kp_stats->s_nevent_abort - kp->last_stats[col_type]->s_nevent_abort);
119  kp_stats.s_e_rbs = (unsigned int)(kp->kp_stats->s_e_rbs - kp->last_stats[col_type]->s_e_rbs);
120  kp_stats.s_rb_total = (unsigned int)(kp->kp_stats->s_rb_total - kp->last_stats[col_type]->s_rb_total);
121  kp_stats.s_rb_secondary = (unsigned int)(kp->kp_stats->s_rb_secondary - kp->last_stats[col_type]->s_rb_secondary);
122  kp_stats.s_nsend_network = (unsigned int)(kp->kp_stats->s_nsend_network - kp->last_stats[col_type]->s_nsend_network);
123  kp_stats.s_nread_network = (unsigned int)(kp->kp_stats->s_nread_network - kp->last_stats[col_type]->s_nread_network);
124  kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_time) - TW_STIME_DBL(pe->GVT));
125 
126  int net_events = kp_stats.s_nevent_processed - kp_stats.s_e_rbs;
127  if (net_events > 0)
128  kp_stats.efficiency = (float) 100.0 * (1.0 - ((float) kp_stats.s_e_rbs / (float) net_events));
129  else
130  kp_stats.efficiency = 0;
131 
132  memcpy(kp->last_stats[col_type], kp->kp_stats, sizeof(st_kp_stats));
133 
134  memcpy(&buffer[index], sample_md, sizeof(*sample_md));
135  index += sizeof(*sample_md);
136  memcpy(&buffer[index], &kp_stats, sizeof(kp_stats));
137  index += sizeof(kp_stats);
138 
139  if (index != buf_size)
140  tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
141 
142  st_buffer_push(col_type, &buffer[0], buf_size);
143 }
144 
145 void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type)
146 {
147  st_lp_stats lp_stats;
148  int buf_size = sizeof(*sample_md) + sizeof(lp_stats);
149  char buffer[buf_size];
150  int index = 0;
151 
152  // sample_md time stamps were set in the calling function
153  sample_md->flag = LP_TYPE;
154  sample_md->sample_sz = sizeof(lp_stats);
155 
156  lp_stats.peid = (unsigned int) g_tw_mynode;
157 
158  lp_stats.kpid = lp->kp->id;
159  lp_stats.lpid = lp->gid;
160 
161  lp_stats.s_nevent_processed = (unsigned int)(lp->lp_stats->s_nevent_processed - lp->last_stats[col_type]->s_nevent_processed);
162  lp_stats.s_nevent_abort = (unsigned int)(lp->lp_stats->s_nevent_abort - lp->last_stats[col_type]->s_nevent_abort);
163  lp_stats.s_e_rbs = (unsigned int)(lp->lp_stats->s_e_rbs - lp->last_stats[col_type]->s_e_rbs);
164  lp_stats.s_nsend_network = (unsigned int)(lp->lp_stats->s_nsend_network - lp->last_stats[col_type]->s_nsend_network);
165  lp_stats.s_nread_network = (unsigned int)(lp->lp_stats->s_nread_network - lp->last_stats[col_type]->s_nread_network);
166 
167  int net_events = lp_stats.s_nevent_processed - lp_stats.s_e_rbs;
168  if (net_events > 0)
169  lp_stats.efficiency = (float) 100.0 * (1.0 - ((float) lp_stats.s_e_rbs / (float) net_events));
170  else
171  lp_stats.efficiency = 0;
172 
173  memcpy(lp->last_stats[col_type], lp->lp_stats, sizeof(st_lp_stats));
174 
175  memcpy(&buffer[index], sample_md, sizeof(*sample_md));
176  index += sizeof(*sample_md);
177  memcpy(&buffer[index], &lp_stats, sizeof(lp_stats));
178  index += sizeof(lp_stats);
179 
180  if (index != buf_size)
181  tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
182 
183  st_buffer_push(col_type, &buffer[0], buf_size);
184 }
unsigned int kpid
unsigned int s_e_rbs
#define TW_LOC
Definition: ross-extern.h:164
unsigned int g_tw_gvt_done
Definition: ross-global.c:81
unsigned int s_nsend_network
unsigned int s_ngvts
unsigned int peid
static tw_statistics last_pe_stats[3]
Definition: st-sim-engine.c:6
tw_clock s_net_other
Definition: ross-types.h:137
tw_clock s_event_process
Definition: ross-types.h:142
unsigned long long g_tw_clock_rate
Definition: ross-global.c:98
int g_st_lp_data
void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type)
Definition: st-sim-engine.c:45
unsigned int peid
tw_clock s_pq
Definition: ross-types.h:143
unsigned int s_rb_secondary
unsigned int lpid
struct st_lp_stats * lp_stats
Definition: ross-types.h:323
void tw_error(const char *file, int line, const char *fmt,...) NORETURN
Definition: tw-util.c:74
struct st_kp_stats * kp_stats
Definition: ross-types.h:366
tw_stat s_nevent_abort
Definition: ross-types.h:112
int g_st_pe_data
tw_statistics stats
per PE counters
Definition: ross-types.h:415
unsigned int peid
struct st_kp_stats * last_stats[3]
Definition: ross-types.h:367
unsigned int s_e_rbs
void st_collect_engine_data(tw_pe *pe, int col_type)
Definition: st-sim-engine.c:10
tw_stat s_rb_total
Definition: ross-types.h:115
unsigned int s_nevent_abort
void st_buffer_push(int type, char *data, int size)
unsigned int s_pq_qsize
static tw_clock tw_clock_read(void)
Definition: aarch64.h:6
tw_clock s_lz4
Definition: ross-types.h:150
unsigned int all_reduce_count
Holds the entire PE state.
Definition: ross-types.h:375
unsigned int s_nevent_processed
unsigned int s_pe_event_ties
tw_lpid g_tw_nlp
Definition: ross-global.c:23
tw_clock s_stat_comp
Definition: ross-types.h:151
unsigned int s_nsend_network
tw_stat s_nsend_network
Definition: ross-types.h:121
unsigned int tw_pq_get_size(splay_tree *st)
Definition: splay.c:351
static tw_kp * tw_getkp(tw_kpid id)
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
Definition: ross-types.h:313
unsigned int kpid
Statistics tallied over the duration of the simulation.
Definition: ross-types.h:107
struct st_lp_stats * last_stats[3]
Definition: ross-types.h:324
void tw_get_stats(tw_pe *me, tw_statistics *s)
Definition: tw-stats.c:35
unsigned int s_nread_network
tw_stat s_nread_network
Definition: ross-types.h:122
tw_lpid gid
global LP id
Definition: ross-types.h:306
tw_clock s_buddy
Definition: ross-types.h:149
unsigned int s_rb_total
unsigned int s_rb_secondary
unsigned int s_nevent_processed
tw_stat s_e_rbs
Definition: ross-types.h:113
unsigned int s_e_rbs
tw_peid g_tw_mynode
Definition: ross-global.c:88
unsigned int s_nread_network
void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type)
tw_pq * pq
Priority queue used to sort events.
Definition: ross-types.h:381
unsigned int s_nevent_processed
tw_clock s_net_read
Definition: ross-types.h:136
unsigned int s_nread_network
tw_clock s_avl
Definition: ross-types.h:148
static tw_lp * tw_getlp(tw_lpid id)
tw_stat s_fc_attempts
Definition: ross-types.h:118
static tw_stat all_reduce_cnt
Definition: mpi_allreduce.c:8
tw_stat s_pe_event_ties
Definition: ross-types.h:130
tw_clock s_fossil_collect
Definition: ross-types.h:139
tw_stat st_get_allreduce_count()
Definition: mpi_allreduce.c:20
unsigned int s_nevent_abort
tw_stime GVT
Global Virtual Time.
Definition: ross-types.h:403
unsigned long long tw_stat
Definition: ross-types.h:48
tw_clock s_gvt
Definition: ross-types.h:138
tw_pe * pe
Definition: avl_tree.c:11
unsigned int s_rb_total
uint64_t tw_clock
Definition: aarch64.h:4
tw_kpid id
ID number, otherwise its not available to the app.
Definition: ross-types.h:341
unsigned int s_nsend_network
int g_st_kp_data
tw_kpid g_tw_nkp
Definition: ross-global.c:25
tw_stat s_nevent_processed
Definition: ross-types.h:111
tw_clock s_event_abort
Definition: ross-types.h:141
unsigned int s_fc_attempts
tw_clock s_rollback
Definition: ross-types.h:144
long g_st_current_interval
Definition: st-sim-engine.c:5
#define TW_STIME_DBL(x)
Definition: ross.h:153
unsigned int s_nevent_abort
void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type)
static tw_stat last_all_reduce_cnt
Definition: st-sim-engine.c:7
tw_stat s_rb_secondary
Definition: ross-types.h:117
tw_stime last_time
Time of the current event being processed.
Definition: ross-types.h:360
tw_clock s_cancel_q
Definition: ross-types.h:146
LP State Structure.
Definition: ross-types.h:304