ROSS
st-sim-engine.c
Go to the documentation of this file.
1#include <ross.h>
2#include <sys/stat.h>
3#define __STDC_FORMAT_MACROS 1
4
8
9/* wrapper to call gvt instrumentation functions depending on which granularity to use */
10void st_collect_engine_data(tw_pe *pe, int col_type)
11{
12 tw_clock start_time = tw_clock_read();
13 tw_kp *kp;
14 tw_lp *lp;
15 unsigned int i;
17 bzero(&s, sizeof(s));
18 tw_get_stats(pe, &s);
19
20 sample_metadata sample_md;
21#ifdef USE_RAND_TIEBREAKER
22 sample_md.ts = pe->GVT_sig.recv_ts;
23#else
24 sample_md.ts = pe->GVT;
25#endif
26 sample_md.real_time = (double)tw_clock_read() / g_tw_clock_rate;
27
28 if (g_st_pe_data)
29 st_collect_engine_data_pes(pe, &sample_md, &s, col_type);
30 if (g_st_kp_data)
31 {
32 for (i = 0; i < g_tw_nkp; i++)
33 {
34 kp = tw_getkp(i);
35 st_collect_engine_data_kps(pe, kp, &sample_md, col_type);
36 }
37 }
38 if (g_st_lp_data)
39 {
40 for (i = 0; i < g_tw_nlp; i++)
41 {
42 lp = tw_getlp(i);
43 st_collect_engine_data_lps(lp, &sample_md, col_type);
44 }
45 }
46 pe->stats.s_stat_comp += tw_clock_read() - start_time;
47}
48
50{
51 st_pe_stats pe_stats;
52 int buf_size = sizeof(*sample_md) + sizeof(pe_stats);
53 char buffer[buf_size];
55
56 // sample_md time stamps were set in the calling function
57 sample_md->flag = PE_TYPE;
58 sample_md->sample_sz = sizeof(pe_stats);
59
60 pe_stats.peid = (unsigned int) g_tw_mynode;
61 pe_stats.s_nevent_processed = (unsigned int)( s->s_nevent_processed-last_pe_stats[col_type].s_nevent_processed);
62 pe_stats.s_nevent_abort = (unsigned int)(s->s_nevent_abort-last_pe_stats[col_type].s_nevent_abort);
63 pe_stats.s_e_rbs = (unsigned int)(s->s_e_rbs-last_pe_stats[col_type].s_e_rbs);
64 pe_stats.s_rb_total = (unsigned int)( s->s_rb_total-last_pe_stats[col_type].s_rb_total);
65 pe_stats.s_rb_secondary = (unsigned int)(s->s_rb_secondary-last_pe_stats[col_type].s_rb_secondary);
66 pe_stats.s_fc_attempts = (unsigned int)(s->s_fc_attempts-last_pe_stats[col_type].s_fc_attempts);
67 pe_stats.s_pq_qsize = tw_pq_get_size(pe->pq);
68 pe_stats.s_nsend_network = (unsigned int)(s->s_nsend_network-last_pe_stats[col_type].s_nsend_network);
69 pe_stats.s_nread_network = (unsigned int)(s->s_nread_network-last_pe_stats[col_type].s_nread_network);
70 pe_stats.s_pe_event_ties = (unsigned int)(s->s_pe_event_ties-last_pe_stats[col_type].s_pe_event_ties);
71 pe_stats.s_ngvts = (unsigned int)(g_tw_gvt_done - last_pe_stats[col_type].s_ngvts);
72 pe_stats.all_reduce_count = (unsigned int)(all_reduce_cnt-last_all_reduce_cnt);
73
74 // I think it's possible for net_events to be negative over some interval of simulation time
75 // e.g., if in the current interval we've happened to process more rollback events than forward events
76 // for now, just report efficiency as 0 in this case?
77 int net_events = pe_stats.s_nevent_processed - pe_stats.s_e_rbs;
78 if (net_events > 0)
79 pe_stats.efficiency = (float) 100.0 * (1.0 - ((float) pe_stats.s_e_rbs / (float) net_events));
80 else
81 pe_stats.efficiency = 0;
82
83 // TODO set a starting clock rate and subtract that from the counters?
84 // because PEs on different nodes will probably have different starting points for cycle counters
85 pe_stats.s_net_read = (float)(pe->stats.s_net_read - last_pe_stats[col_type].s_net_read) / g_tw_clock_rate;
86 pe_stats.s_net_other = (float)(pe->stats.s_net_other - last_pe_stats[col_type].s_net_other) / g_tw_clock_rate;
87 pe_stats.s_gvt = (float)(pe->stats.s_gvt - last_pe_stats[col_type].s_gvt) / g_tw_clock_rate;
88 pe_stats.s_fossil_collect = (float)(pe->stats.s_fossil_collect - last_pe_stats[col_type].s_fossil_collect) / g_tw_clock_rate;
89 pe_stats.s_event_abort = (float)(pe->stats.s_event_abort - last_pe_stats[col_type].s_event_abort) / g_tw_clock_rate;
90 pe_stats.s_event_process = (float)(pe->stats.s_event_process - last_pe_stats[col_type].s_event_process) / g_tw_clock_rate;
91 pe_stats.s_pq = (float)(pe->stats.s_pq - last_pe_stats[col_type].s_pq) / g_tw_clock_rate;
92 pe_stats.s_rollback = (float)(pe->stats.s_rollback - last_pe_stats[col_type].s_rollback) / g_tw_clock_rate;
93 pe_stats.s_cancel_q = (float)(pe->stats.s_cancel_q - last_pe_stats[col_type].s_cancel_q) / g_tw_clock_rate;
94 pe_stats.s_avl = (float)(pe->stats.s_avl - last_pe_stats[col_type].s_avl) / g_tw_clock_rate;
95 pe_stats.s_buddy = (float)(pe->stats.s_buddy - last_pe_stats[col_type].s_buddy) / g_tw_clock_rate;
96 pe_stats.s_lz4 = (float)(pe->stats.s_lz4 - last_pe_stats[col_type].s_lz4) / g_tw_clock_rate;
97
98 memcpy(&buffer[0], sample_md, sizeof(*sample_md));
99 memcpy(&buffer[sizeof(*sample_md)], &pe_stats, sizeof(pe_stats));
100 st_buffer_push(col_type, &buffer[0], buf_size);
101
102 memcpy(&last_pe_stats[col_type], s, sizeof(tw_statistics));
104}
105
106void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type)
107{
108 st_kp_stats kp_stats;
109 int buf_size = sizeof(*sample_md) + sizeof(kp_stats);
110 char buffer[buf_size];
111 int index = 0;
112
113 // sample_md time stamps were set in the calling function
114 sample_md->flag = KP_TYPE;
115 sample_md->sample_sz = sizeof(kp_stats);
116
117 kp_stats.peid = (unsigned int) g_tw_mynode;
118
119 kp_stats.kpid = kp->id;
120
121 kp_stats.s_nevent_processed = (unsigned int)(kp->kp_stats->s_nevent_processed - kp->last_stats[col_type]->s_nevent_processed);
122 kp_stats.s_nevent_abort = (unsigned int)(kp->kp_stats->s_nevent_abort - kp->last_stats[col_type]->s_nevent_abort);
123 kp_stats.s_e_rbs = (unsigned int)(kp->kp_stats->s_e_rbs - kp->last_stats[col_type]->s_e_rbs);
124 kp_stats.s_rb_total = (unsigned int)(kp->kp_stats->s_rb_total - kp->last_stats[col_type]->s_rb_total);
125 kp_stats.s_rb_secondary = (unsigned int)(kp->kp_stats->s_rb_secondary - kp->last_stats[col_type]->s_rb_secondary);
126 kp_stats.s_nsend_network = (unsigned int)(kp->kp_stats->s_nsend_network - kp->last_stats[col_type]->s_nsend_network);
127 kp_stats.s_nread_network = (unsigned int)(kp->kp_stats->s_nread_network - kp->last_stats[col_type]->s_nread_network);
128#ifdef USE_RAND_TIEBREAKER
129 kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_sig.recv_ts) - TW_STIME_DBL(pe->GVT_sig.recv_ts));
130#else
131 kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_time) - TW_STIME_DBL(pe->GVT));
132#endif
133
134 int net_events = kp_stats.s_nevent_processed - kp_stats.s_e_rbs;
135 if (net_events > 0)
136 kp_stats.efficiency = (float) 100.0 * (1.0 - ((float) kp_stats.s_e_rbs / (float) net_events));
137 else
138 kp_stats.efficiency = 0;
139
140 memcpy(kp->last_stats[col_type], kp->kp_stats, sizeof(st_kp_stats));
141
142 memcpy(&buffer[index], sample_md, sizeof(*sample_md));
143 index += sizeof(*sample_md);
144 memcpy(&buffer[index], &kp_stats, sizeof(kp_stats));
145 index += sizeof(kp_stats);
146
147 if (index != buf_size)
148 tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
149
150 st_buffer_push(col_type, &buffer[0], buf_size);
151}
152
153void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type)
154{
155 st_lp_stats lp_stats;
156 int buf_size = sizeof(*sample_md) + sizeof(lp_stats);
157 char buffer[buf_size];
158 int index = 0;
159
160 // sample_md time stamps were set in the calling function
161 sample_md->flag = LP_TYPE;
162 sample_md->sample_sz = sizeof(lp_stats);
163
164 lp_stats.peid = (unsigned int) g_tw_mynode;
165
166 lp_stats.kpid = lp->kp->id;
167 lp_stats.lpid = lp->gid;
168
169 lp_stats.s_nevent_processed = (unsigned int)(lp->lp_stats->s_nevent_processed - lp->last_stats[col_type]->s_nevent_processed);
170 lp_stats.s_nevent_abort = (unsigned int)(lp->lp_stats->s_nevent_abort - lp->last_stats[col_type]->s_nevent_abort);
171 lp_stats.s_e_rbs = (unsigned int)(lp->lp_stats->s_e_rbs - lp->last_stats[col_type]->s_e_rbs);
172 lp_stats.s_nsend_network = (unsigned int)(lp->lp_stats->s_nsend_network - lp->last_stats[col_type]->s_nsend_network);
173 lp_stats.s_nread_network = (unsigned int)(lp->lp_stats->s_nread_network - lp->last_stats[col_type]->s_nread_network);
174
175 int net_events = lp_stats.s_nevent_processed - lp_stats.s_e_rbs;
176 if (net_events > 0)
177 lp_stats.efficiency = (float) 100.0 * (1.0 - ((float) lp_stats.s_e_rbs / (float) net_events));
178 else
179 lp_stats.efficiency = 0;
180
181 memcpy(lp->last_stats[col_type], lp->lp_stats, sizeof(st_lp_stats));
182
183 memcpy(&buffer[index], sample_md, sizeof(*sample_md));
184 index += sizeof(*sample_md);
185 memcpy(&buffer[index], &lp_stats, sizeof(lp_stats));
186 index += sizeof(lp_stats);
187
188 if (index != buf_size)
189 tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
190
191 st_buffer_push(col_type, &buffer[0], buf_size);
192}
tw_pe * pe
Definition avl_tree.c:10
static tw_clock tw_clock_read(void)
Definition aarch64.h:8
uint64_t tw_clock
Definition aarch64.h:6
tw_stat st_get_allreduce_count()
@ KP_TYPE
@ LP_TYPE
@ PE_TYPE
int g_st_lp_data
void st_buffer_push(int type, char *data, int size)
int g_st_kp_data
int g_st_pe_data
unsigned int tw_pq_get_size(tw_pq *)
Definition splay.c:418
#define TW_STIME_DBL(x)
Definition ross-base.h:42
unsigned long long g_tw_clock_rate
tw_kpid g_tw_nkp
Definition ross-global.c:27
void tw_get_stats(tw_pe *me, tw_statistics *s)
Definition tw-stats.c:36
void tw_error(const char *file, int line, const char *fmt,...)
Definition tw-util.c:77
tw_peid g_tw_mynode
Definition ross-global.c:92
unsigned int g_tw_gvt_done
Definition ross-global.c:85
tw_lpid g_tw_nlp
Definition ross-global.c:24
#define TW_LOC
static tw_lp * tw_getlp(tw_lpid id)
static tw_kp * tw_getkp(tw_kpid id)
unsigned long long tw_stat
Definition ross-types.h:58
static tw_stat all_reduce_cnt
void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type)
static tw_stat last_all_reduce_cnt
long g_st_current_interval
static tw_statistics last_pe_stats[3]
void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type)
void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type)
void st_collect_engine_data(tw_pe *pe, int col_type)
unsigned int s_nsend_network
unsigned int s_rb_secondary
unsigned int s_nevent_processed
unsigned int s_nread_network
unsigned int peid
unsigned int kpid
unsigned int s_e_rbs
unsigned int s_rb_total
unsigned int s_nevent_abort
unsigned int s_nread_network
unsigned int lpid
unsigned int s_nevent_processed
unsigned int s_e_rbs
unsigned int peid
unsigned int s_nevent_abort
unsigned int kpid
unsigned int s_nsend_network
unsigned int all_reduce_count
unsigned int s_rb_total
unsigned int s_fc_attempts
unsigned int s_nevent_abort
unsigned int s_nread_network
unsigned int s_nsend_network
unsigned int s_nevent_processed
unsigned int s_pq_qsize
unsigned int s_rb_secondary
unsigned int peid
unsigned int s_e_rbs
unsigned int s_pe_event_ties
unsigned int s_ngvts
tw_stime recv_ts
Definition ross-types.h:260
struct st_kp_stats * kp_stats
Definition ross-types.h:407
struct st_kp_stats * last_stats[3]
Definition ross-types.h:408
tw_event_sig last_sig
Event signature of the current event being processed.
Definition ross-types.h:398
tw_stime last_time
Time of the current event being processed.
Definition ross-types.h:401
tw_kpid id
ID number, otherwise its not available to the app.
Definition ross-types.h:378
LP State Structure.
Definition ross-types.h:336
tw_kp * kp
kp – Kernel process that we belong to (must match pe).
Definition ross-types.h:345
tw_lpid gid
global LP id
Definition ross-types.h:338
struct st_lp_stats * lp_stats
Definition ross-types.h:356
struct st_lp_stats * last_stats[3]
Definition ross-types.h:357
Holds the entire PE state.
Definition ross-types.h:416
Statistics tallied over the duration of the simulation.
Definition ross-types.h:117
tw_stat s_pe_event_ties
Definition ross-types.h:140
tw_stat s_nread_network
Definition ross-types.h:132
tw_stat s_nevent_abort
Definition ross-types.h:122
tw_stat s_nevent_processed
Definition ross-types.h:121
tw_stat s_nsend_network
Definition ross-types.h:131
tw_stat s_e_rbs
Definition ross-types.h:123
tw_stat s_rb_secondary
Definition ross-types.h:127
tw_stat s_rb_total
Definition ross-types.h:125
tw_stat s_fc_attempts
Definition ross-types.h:128