ROSS
mpi_allreduce.c
Go to the documentation of this file.
1 #include <ross.h>
2 
3 #define TW_GVT_NORMAL 0
4 #define TW_GVT_COMPUTE 1
5 
6 static unsigned int g_tw_gvt_max_no_change = 10000;
7 static unsigned int g_tw_gvt_no_change = 0;
9 static unsigned int gvt_cnt = 0;
10 static unsigned int gvt_force = 0;
11 
12 static const tw_optdef gvt_opts [] =
13 {
14  TWOPT_GROUP("ROSS MPI GVT"),
15  TWOPT_UINT("gvt-interval", g_tw_gvt_interval, "GVT Interval: Iterations through scheduling loop (synch=1,2,3,4), or ms between GVTs (synch=5)"),
16  TWOPT_DOUBLE("report-interval", gvt_print_interval, "percent of runtime to print GVT"),
17  TWOPT_END()
18 };
19 
21 {
22  return all_reduce_cnt;
23 }
24 
25 const tw_optdef *
27 {
28  gvt_cnt = 0;
29 
30  return gvt_opts;
31 }
32 
33 void
35 {
36 }
37 
38 void
40 {
41  gvt_force++;
43 }
44 
45 void
47 {
48  gvt_force++;
49  g_tw_gvt_interval_start_cycles = 0; // reset to start of time
50 }
51 
52 void
53 tw_gvt_stats(FILE * f)
54 {
55  fprintf(f, "\nTW GVT Statistics: MPI AllReduce\n");
56  fprintf(f, "\t%-50s %11d\n", "GVT Interval", g_tw_gvt_interval);
57  fprintf(f, "\t%-50s %llu\n", "GVT Real Time Interval (cycles)", g_tw_gvt_realtime_interval);
58  fprintf(f, "\t%-50s %11.8lf\n", "GVT Real Time Interval (sec)", (double)g_tw_gvt_realtime_interval/(double)g_tw_clock_rate);
59  fprintf(f, "\t%-50s %11d\n", "Batch Size", g_tw_mblock);
60  fprintf(f, "\n");
61  fprintf(f, "\t%-50s %11d\n", "Forced GVT", gvt_force);
62  fprintf(f, "\t%-50s %11d\n", "Total GVT Computations", g_tw_gvt_done);
63  fprintf(f, "\t%-50s %11lld\n", "Total All Reduce Calls", all_reduce_cnt);
64  fprintf(f, "\t%-50s %11.2lf\n", "Average Reduction / GVT",
65  (double) ((double) all_reduce_cnt / (double) g_tw_gvt_done));
66 }
67 
68 void
70 {
71  if(me->gvt_status == TW_GVT_COMPUTE ||
73  return;
74 
76 }
77 
78 void
80 {
81  unsigned long long current_rt;
82 
83  if( (me->gvt_status == TW_GVT_COMPUTE) ||
86  {
87  /* if( me->id == 0 ) */
88  /* { */
89  /* printf("GVT Step 1 RT Rank %ld: found start_cycles at %llu, rt interval at %llu, current time at %llu \n", */
90  /* me->id, g_tw_gvt_interval_start_cycles, g_tw_gvt_realtime_interval, current_rt); */
91 
92  /* } */
93 
94  return;
95  }
96 
98 }
99 
100 
101 void
103 {
104  long long local_white = 0;
105  long long total_white = 0;
106 
107  tw_stime pq_min = TW_STIME_MAX;
108  tw_stime net_min = TW_STIME_MAX;
109 
110  tw_stime lvt;
111  tw_stime gvt;
112 
113  tw_clock net_start;
114  tw_clock start = tw_clock_read();
115 
116  if(me->gvt_status != TW_GVT_COMPUTE)
117  return;
118  while(1)
119  {
120  net_start = tw_clock_read();
121  tw_net_read(me);
122  me->stats.s_net_read += tw_clock_read() - net_start;
123 
124  // send message counts to create consistent cut
125  local_white = me->s_nwhite_sent - me->s_nwhite_recv;
126  all_reduce_cnt++;
127  if(MPI_Allreduce(
128  &local_white,
129  &total_white,
130  1,
131  MPI_LONG_LONG,
132  MPI_SUM,
133  MPI_COMM_ROSS) != MPI_SUCCESS)
134  tw_error(TW_LOC, "MPI_Allreduce for GVT failed");
135 
136  if(total_white == 0)
137  break;
138  }
139 
140  pq_min = tw_pq_minimum(me->pq);
141  net_min = tw_net_minimum();
142 
143  lvt = me->trans_msg_ts;
144  if(TW_STIME_CMP(lvt, pq_min) > 0)
145  lvt = pq_min;
146  if(TW_STIME_CMP(lvt, net_min) > 0)
147  lvt = net_min;
148 
149  all_reduce_cnt++;
150 
151  if(MPI_Allreduce(
152  &lvt,
153  &gvt,
154  1,
156  MPI_MIN,
157  MPI_COMM_ROSS) != MPI_SUCCESS)
158  tw_error(TW_LOC, "MPI_Allreduce for GVT failed");
159 
160  if(TW_STIME_CMP(gvt, me->GVT_prev) < 0)
161  {
162  g_tw_gvt_no_change = 0;
163  } else
164  {
165  gvt = me->GVT_prev;
168  tw_error(
169  TW_LOC,
170  "GVT computed %d times in a row"
171  " without changing: GVT = %14.14lf, PREV %14.14lf"
172  " -- GLOBAL SYNCH -- out of memory!",
173  g_tw_gvt_no_change, gvt, me->GVT_prev);
174  }
175  }
176 
177  if (TW_STIME_CMP(me->GVT, gvt) > 0)
178  {
179  tw_error(TW_LOC, "PE %u GVT decreased %g -> %g",
180  me->id, me->GVT, gvt);
181  }
182 
184  {
185  gvt_print(gvt);
186  }
187 
188  me->s_nwhite_sent = 0;
189  me->s_nwhite_recv = 0;
191  me->GVT_prev = TW_STIME_MAX; // me->GVT;
192  me->GVT = gvt;
194 
195  gvt_cnt = 0;
196 
197  // update GVT timing stats
198  me->stats.s_gvt += tw_clock_read() - start;
199 
200  // only FC if OPTIMISTIC or REALTIME, do not do for DEBUG MODE
203  {
204  start = tw_clock_read();
206  me->stats.s_fossil_collect += tw_clock_read() - start;
207  }
208 
209  // do any necessary instrumentation calls
212  {
213 #ifdef USE_DAMARIS
214  if (g_st_damaris_enabled)
215  {
216  st_damaris_expose_data(me, gvt, GVT_COL);
217  st_damaris_end_iteration();
218  }
219  else
221 #else
223 #endif
224  }
225 #ifdef USE_DAMARIS
226  // need to make sure damaris_end_iteration is called if GVT instrumentation not turned on
227  //if (!g_st_stats_enabled && g_st_real_time_samp) //need to make sure if one PE enters this, all do; otherwise deadlock
228  if (g_st_damaris_enabled && (g_st_engine_stats == RT_STATS || g_st_engine_stats == VT_STATS))
229  {
230  st_damaris_end_iteration();
231  }
232 #endif
233 
236 
237  st_inst_dump();
238  // done with instrumentation related stuff
239 
240  g_tw_gvt_done++;
241 
242  // reset for the next gvt round -- for use in realtime GVT mode only!!
244  }
long long s_nwhite_recv
Definition: ross-types.h:409
tw_synch g_tw_synchronization_protocol
Definition: ross-global.c:18
#define TW_LOC
Definition: ross-extern.h:164
unsigned int g_tw_gvt_done
Definition: ross-global.c:81
unsigned long long g_tw_clock_rate
Definition: ross-global.c:98
unsigned char gvt_status
Bits available for gvt computation.
Definition: ross-types.h:400
double tw_stime
Definition: ross.h:150
tw_stime tw_pq_minimum(splay_tree *pq)
Definition: splay.c:345
double g_tw_ts_end
Definition: ross-global.c:68
void tw_error(const char *file, int line, const char *fmt,...) NORETURN
Definition: tw-util.c:74
const tw_optdef * tw_gvt_setup(void)
Definition: mpi_allreduce.c:26
tw_statistics stats
per PE counters
Definition: ross-types.h:415
tw_stime GVT_prev
Definition: ross-types.h:404
#define TWOPT_DOUBLE(n, v, h)
Definition: tw-opts.h:32
void tw_net_read(tw_pe *me)
starts service_queues() to poll network
Definition: network-mpi.c:572
static tw_clock tw_clock_read(void)
Definition: aarch64.h:6
int g_st_engine_stats
static unsigned int g_tw_gvt_max_no_change
Definition: mpi_allreduce.c:6
void tw_gvt_start(void)
Definition: mpi_allreduce.c:34
void tw_gvt_force_update_realtime(void)
Definition: mpi_allreduce.c:46
void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type)
Definition: st-model-data.c:45
tw_stime trans_msg_ts
Last transient messages' time stamp.
Definition: ross-types.h:402
Holds the entire PE state.
Definition: ross-types.h:375
MPI_Comm MPI_COMM_ROSS
Definition: network-mpi.c:4
static unsigned int gvt_cnt
Definition: mpi_allreduce.c:9
tw_stime tw_net_minimum(void)
Obtain the lowest timestamp inside the network buffers.
Definition: network-mpi.c:182
void tw_gvt_step1(tw_pe *me)
Definition: mpi_allreduce.c:69
unsigned long long g_tw_gvt_realtime_interval
Definition: ross-global.c:65
#define TW_GVT_COMPUTE
Definition: mpi_allreduce.c:4
static double gvt_print_interval
Definition: mpi_allreduce.h:4
#define TW_STIME_CMP(x, y)
Definition: ross.h:154
unsigned int g_tw_gvt_interval
Definition: ross-global.c:63
unsigned int g_tw_mblock
Definition: ross-global.c:62
void st_inst_dump()
void tw_gvt_force_update(void)
Definition: mpi_allreduce.c:39
tw_peid g_tw_mynode
Definition: ross-global.c:88
#define TWOPT_UINT(n, v, h)
Definition: tw-opts.h:30
#define TW_STIME_MAX
Definition: ross.h:156
tw_peid g_tw_masternode
Definition: ross-global.c:89
#define TWOPT_END()
Definition: tw-opts.h:35
tw_pq * pq
Priority queue used to sort events.
Definition: ross-types.h:381
tw_clock s_net_read
Definition: ross-types.h:136
#define MPI_TYPE_TW_STIME
Definition: ross.h:151
void st_collect_engine_data(tw_pe *me, int col_type)
Definition: st-sim-engine.c:10
int g_st_model_stats
void tw_pe_fossil_collect(void)
Definition: tw-pe.c:62
static tw_stat all_reduce_cnt
Definition: mpi_allreduce.c:8
tw_clock s_fossil_collect
Definition: ross-types.h:139
tw_stat st_get_allreduce_count()
Definition: mpi_allreduce.c:20
long long s_nwhite_sent
Definition: ross-types.h:408
tw_stime GVT
Global Virtual Time.
Definition: ross-types.h:403
unsigned long long tw_stat
Definition: ross-types.h:48
static double percent_complete
Definition: mpi_allreduce.h:5
static void gvt_print(tw_stime gvt)
Definition: mpi_allreduce.h:14
tw_clock s_gvt
Definition: ross-types.h:138
uint64_t tw_clock
Definition: aarch64.h:4
#define TWOPT_GROUP(h)
Definition: tw-opts.h:27
void tw_gvt_step1_realtime(tw_pe *me)
Definition: mpi_allreduce.c:79
static unsigned int gvt_force
Definition: mpi_allreduce.c:10
void tw_gvt_step2(tw_pe *me)
unsigned long long g_tw_max_opt_lookahead
Definition: ross-global.c:64
static const tw_optdef gvt_opts[]
Definition: mpi_allreduce.c:12
#define TW_GVT_NORMAL
Definition: mpi_allreduce.c:3
tw_peid id
Definition: ross-types.h:376
int g_st_num_gvt
#define TW_STIME_DBL(x)
Definition: ross.h:153
void tw_gvt_stats(FILE *f)
Definition: mpi_allreduce.c:53
static unsigned int g_tw_gvt_no_change
Definition: mpi_allreduce.c:7
unsigned long long g_tw_gvt_interval_start_cycles
Definition: ross-global.c:66