ROSS
rand-clcg4.c
Go to the documentation of this file.
1 #include <ross.h>
2 
3 /**
4  * @file rand-clcg4.c
5  * @brief RNG Implementation module
6  *
7  * Random number generator, provides all of the features GTW requires
8  * by default. Chris hacked this pretty well, he would know the
9  * features better.
10  *
11  */
12 
13 /*********************************************************************
14  *
15  * clcg4 private data/routines
16  *
17  ********************************************************************/
18 
19 // H = 2^15 : use in MultModM.
20 #define H 32768
21 
22 // One RNG per PE
23 static tw_rng *rng = NULL;
24 
25 // default RNG seed
26 int32_t seed[4] = { 11111111, 22222222, 33333333, 44444444 };
27 
28 /**
29  * FindB
30  * @brief Find B to run CLCG4 backwards
31  *
32  * B is \f$ a_[i]^{m_[i] - 2} \mathrm{mod m_[i]} \f$
33  * which is used in running the CLCG4 backwards
34  * Added by Chris Carothers, 5/15/98
35  */
36 long long
37 FindB(long long a, long long k, long long m)
38 {
39  long long sqrs[32];
40  long long power_of_2;
41  long long b;
42 
43  int i;
44 
45  sqrs[0] = a;
46  for(i = 1; i < 32; i++)
47  {
48  sqrs[i] =(sqrs[i - 1] * sqrs[i - 1]) % m;
49  }
50 
51  power_of_2 = 1;
52  b = 1;
53  for(i = 0; i < 32; i++)
54  {
55  if(!(power_of_2 & k))
56  {
57  sqrs[i] = 1;
58 
59  }
60  b =(b * sqrs[i]) % m;
61  power_of_2 = power_of_2 * 2;
62  }
63 
64  return(b);
65 }
66 
67 /**
68  * MultiModM
69  * @brief Returns(s*t) MOD M
70  *
71  * See L'Ecuyer and Cote(1991).
72  *
73  * Returns(s*t) MOD M. Assumes that -M < s < M and -M < t < M.
74  */
75 
76 int32_t
77 MultModM(int32_t s, int32_t t, int32_t M)
78 {
79  int32_t R, S0, S1, q, qh, rh, k;
80 
81  if(s < 0)
82  s += M;
83  if(t < 0)
84  t += M;
85  if(s < H)
86  {
87  S0 = s;
88  R = 0;
89  }
90  else
91  {
92  S1 = s / H;
93  S0 = s - H * S1;
94  qh = M / H;
95  rh = M - H * qh;
96 
97  if(S1 >= H)
98  {
99  S1 -= H;
100  k = t / qh;
101  R = H *(t - k * qh) - k * rh;
102  while(R < 0)
103  R += M;
104  }
105  else
106  R = 0;
107 
108  if(S1 != 0)
109  {
110  q = M / S1;
111  k = t / q;
112  R -= k *(M - S1 * q);
113  if(R > 0)
114  R -= M;
115  R += S1 *(t - k * q);
116  while(R < 0)
117  R += M;
118  }
119  k = R / qh;
120  R = H *(R - k * qh) - k * rh;
121  while(R < 0)
122  R += M;
123  }
124  if(S0 != 0)
125  {
126  q = M / S0;
127  k = t / q;
128  R -= k *(M - S0 * q);
129  if(R > 0)
130  R -= M;
131  R += S0 *(t - k * q);
132  while(R < 0)
133  R += M;
134  }
135  return R;
136 }
137 
138 /********************************************************************
139  *
140  * clcg4 public interface
141  *
142  *******************************************************************/
143 
144 #define S0_MAX 2147483646
145 #define S1_MAX 2147483542
146 #define S2_MAX 2147483422
147 #define S3_MAX 2147483322
148 
149 /**
150  * These seeds MUST adhere to these requirements.
151  * This is explicitly stated in L'Ecuyer and Andres (1997)
152  */
153 void clamp_seed(uint32_t s[4])
154 {
155  if (s[0] < 1)
156  s[0] = 1;
157  if (s[0] > S0_MAX)
158  s[0] = S0_MAX;
159 
160  if (s[1] < 1)
161  s[1] = 1;
162  if (s[1] > S1_MAX)
163  s[1] = S1_MAX;
164 
165  if (s[2] < 1)
166  s[2] = 1;
167  if (s[2] > S2_MAX)
168  s[2] = S2_MAX;
169 
170  if (s[3] < 1)
171  s[3] = 1;
172  if (s[3] > S3_MAX)
173  s[3] = S3_MAX;
174 }
175 
176 /*
177  * rng_set_seed
178  */
179 
180 void
181 rng_set_seed(tw_rng_stream * g, uint32_t s[4])
182 {
183  int j;
184 
185  clamp_seed(s);
186 
187  for(j = 0; j < 4; j++)
188  g->Ig[j] = s[j];
189 
191 }
192 
193 /*
194  * rng_write_state
195  */
196 
197 void
199 {
200  int j;
201 
202  for(j = 0; j < 4; j++)
203  fprintf( f, "%u ", g->Cg[j]);
204  fprintf( f, "\n");
205 }
206 
207 /*
208  * rng_get_state
209  */
210 
211 void
212 rng_get_state(tw_rng_stream * g, uint32_t s[4])
213 {
214  int j;
215 
216  for(j = 0; j < 4; j++)
217  s[j] = g->Cg[j];
218 }
219 
220 
221 /*
222  * rng_get_state
223  */
224 
225 void
226 rng_put_state(tw_rng_stream * g, uint32_t s[4])
227 {
228  int j;
229 
230  for(j = 0; j < 4; j++)
231  g->Cg[j] = s[j];
232 }
233 
234 /*
235  * rng_init_generator
236  */
237 
238 void
240 {
241  int j;
242 
243  for(j = 0; j < 4; j++)
244  {
245  switch(Where)
246  {
247  case InitialSeed:
248  g->Lg[j] = g->Ig[j];
249  break;
250  case NewSeed:
251  g->Lg[j] = MultModM(rng->aw[j], g->Lg[j], rng->m[j]);
252  break;
253  case LastSeed:
254  break;
255  }
256 
257  g->Cg[j] = g->Lg[j];
258  }
259 }
260 
261 /*
262  * rng_set_initial_seed
263  */
264 void
266 {
267  tw_lpid mask_bit = 1;
268 
269  uint32_t Ig_t[4];
270  uint32_t avw_t[4];
271 
272  int i;
273  int j;
274  int positions = ((sizeof(tw_lpid)) * 8) - 1;
275 
276  //seed for zero
277  for(j = 0; j < 4; j++)
278  Ig_t[j] = rng->seed[j];
279 
280  mask_bit <<= positions;
281 
282  g->count = 0;
283 
284  do
285  {
286  if(id & mask_bit)
287  {
288  for(j = 0; j < 4; j++)
289  {
290  avw_t[j] = rng->avw[j];
291 
292  // exponentiate modulus
293  for(i = 0; i < positions; i++)
294  avw_t[j] = MultModM(avw_t[j], avw_t[j], rng->m[j]);
295 
296  Ig_t[j] = MultModM(avw_t[j], Ig_t[j], rng->m[j]);
297  }
298  }
299 
300  mask_bit >>= 1;
301  positions--;
302  } while(positions > 0);
303 
304  if(id % 2)
305  {
306  for(j = 0; j < 4; j++)
307  Ig_t[j] = MultModM(rng->avw[j], Ig_t[j], rng->m[j]);
308  }
309 
310  clamp_seed(Ig_t);
311 
312  for(j = 0; j < 4; j++)
313  g->Ig[j] = Ig_t[j];
314 
316  //rng_write_state(g);
317 }
318 
319 void
320 tw_rand_init_streams(tw_lp * lp, unsigned int nstreams)
321 {
322  unsigned int i;
323 
324  if(nstreams > g_tw_nRNG_per_lp)
325  tw_error(TW_LOC, "LP %lu asked for more RNG streams (%d) than the global maximum (g_tw_nRNG_per_lp:%d)\n", lp->gid, nstreams, g_tw_nRNG_per_lp);
326 
327  lp->rng = (tw_rng_stream *) tw_calloc(TW_LOC, "LP RNG Streams", sizeof(*lp->rng), nstreams);
328 
329  for(i = 0; i < nstreams; i++) {
330  tw_rand_initial_seed(&lp->rng[i], (lp->gid * g_tw_nRNG_per_lp) + i);
331  }
332 }
333 
334 /*
335  * rng_init
336  */
337 tw_rng *
338 rng_init(int v, int w)
339 {
340  int i;
341  int j;
342 
343  rng = (tw_rng *) tw_calloc(TW_LOC, "RNG", sizeof(*rng), 1);
344 
345  rng->m[0] = 2147483647;
346  rng->m[1] = 2147483543;
347  rng->m[2] = 2147483423;
348  rng->m[3] = 2147483323;
349 
350  rng->a[0] = 45991;
351  rng->a[1] = 207707;
352  rng->a[2] = 138556;
353  rng->a[3] = 49689;
354 
355  if(g_tw_rng_seed)
356  {
357  clamp_seed((uint32_t*)g_tw_rng_seed);
358  for(j = 0; j < 4; j++)
359  rng->seed[j] = g_tw_rng_seed[j];
360  } else
361  {
362  rng->seed[0] = 11111111;
363  rng->seed[1] = 22222222;
364  rng->seed[2] = 33333333;
365  rng->seed[3] = 44444444;
366  }
367 
368  for(j = 0; j < 4; j++)
369  rng->aw[j] = rng->a[j];
370 
371  for(j = 0; j < 4; j++)
372  {
373  for(i = 1; i <= w; i++)
374  rng->aw[j] = MultModM(rng->aw[j], rng->aw[j], rng->m[j]);
375 
376  rng->avw[j] = rng->aw[j];
377 
378  for(i = 1; i <= v; i++)
379  rng->avw[j] = MultModM(rng->avw[j], rng->avw[j], rng->m[j]);
380  }
381 
382  for(j = 0; j < 4; j++)
383  rng->b[j] = FindB(rng->a[j],(rng->m[j] - 2), rng->m[j]);
384 
385  return rng;
386 }
387 
388 /*
389  * rng_gen_val
390  */
391 double
393 {
394  int32_t k, s;
395  double u;
396 
397  g->count++;
398 
399  u = 0.0;
400 
401  s = g->Cg[0];
402  k = s / 46693;
403  s = 45991 *(s - k * 46693) - k * 25884;
404  if(s < 0)
405  s = s + 2147483647;
406  g->Cg[0] = s;
407  u = u + 4.65661287524579692e-10 * s;
408 
409  s = g->Cg[1];
410  k = s / 10339;
411  s = 207707 *(s - k * 10339) - k * 870;
412  if(s < 0)
413  s = s + 2147483543;
414  g->Cg[1] = s;
415  u = u - 4.65661310075985993e-10 * s;
416  if(u < 0)
417  u = u + 1.0;
418 
419  s = g->Cg[2];
420  k = s / 15499;
421  s = 138556 *(s - k * 15499) - k * 3979;
422  if(s < 0.0)
423  s = s + 2147483423;
424  g->Cg[2] = s;
425  u = u + 4.65661336096842131e-10 * s;
426  if(u >= 1.0)
427  u = u - 1.0;
428 
429  s = g->Cg[3];
430  k = s / 43218;
431  s = 49689 *(s - k * 43218) - k * 24121;
432  if(s < 0)
433  s = s + 2147483323;
434  g->Cg[3] = s;
435  u = u - 4.65661357780891134e-10 * s;
436  if(u < 0)
437  u = u + 1.0;
438 
439  return u;
440 }
441 
442 /*
443  * rng_gen_reverse_val
444  *
445  * computes the reverse sequence, however does not
446  * return the uniform value computed as a performance
447  * optimization -- Chris Carothers 5/15/98
448  */
449 
450 double
452 {
453  long long *b = rng->b;
454  int32_t *m = rng->m;
455  int32_t s;
456  double u;
457 
458  g->count--;
459 
460  u = 0.0;
461 
462  if(b[0] == 0)
463  tw_error(TW_LOC, "b values not calculated \n");
464 
465  s = g->Cg[0];
466  s =(b[0] * s) % m[0];
467  g->Cg[0] = s;
468  u = u + 4.65661287524579692e-10 * s;
469 
470  s = g->Cg[1];
471  s =(b[1] * s) % m[1];
472  g->Cg[1] = s;
473  u = u - 4.65661310075985993e-10 * s;
474  if(u < 0)
475  u = u + 1.0;
476 
477  s = g->Cg[2];
478  s =(b[2] * s) % m[2];
479  g->Cg[2] = s;
480  u = u + 4.65661336096842131e-10 * s;
481  if(u >= 1.0)
482  u = u - 1.0;
483 
484  s = g->Cg[3];
485  s =(b[3] * s) % m[3];
486  g->Cg[3] = s;
487  u = u - 4.65661357780891134e-10 * s;
488  if(u < 0)
489  u = u + 1.0;
490 
491  return u;
492 }
void clamp_seed(uint32_t s[4])
Definition: rand-clcg4.c:153
#define TW_LOC
Definition: ross-extern.h:164
tw_rng * rng_init(int v, int w)
Definition: rand-clcg4.c:338
void tw_error(const char *file, int line, const char *fmt,...) NORETURN
Definition: tw-util.c:74
SeedType
Definition: rand-clcg4.h:25
int32_t a[4]
Definition: rand-clcg4.h:17
int32_t avw[4]
Definition: rand-clcg4.h:19
int32_t MultModM(int32_t s, int32_t t, int32_t M)
Returns(s*t) MOD M.
Definition: rand-clcg4.c:77
double rng_gen_val(tw_rng_stream *g)
Definition: rand-clcg4.c:392
void rng_write_state(tw_rng_stream *g, FILE *f)
Definition: rand-clcg4.c:198
void tw_rand_initial_seed(tw_rng_stream *g, tw_lpid id)
Definition: rand-clcg4.c:265
#define S3_MAX
Definition: rand-clcg4.c:147
int32_t aw[4]
Definition: rand-clcg4.h:18
static tw_rng * rng
Definition: rand-clcg4.c:23
#define S0_MAX
Definition: rand-clcg4.c:144
uint64_t tw_lpid
Definition: ross.h:160
#define S1_MAX
Definition: rand-clcg4.c:145
#define H
Definition: rand-clcg4.c:20
tw_lpid gid
global LP id
Definition: ross-types.h:306
tw_seed g_tw_rng_seed
Definition: ross-global.c:31
long long b[4]
Definition: rand-clcg4.h:11
#define S2_MAX
Definition: rand-clcg4.c:146
void rng_get_state(tw_rng_stream *g, uint32_t s[4])
Definition: rand-clcg4.c:212
int32_t seed[4]
Definition: rand-clcg4.h:22
int32_t Ig[4]
Definition: rand-clcg4.h:35
unsigned int g_tw_nRNG_per_lp
Definition: ross-global.c:29
int32_t Lg[4]
Definition: rand-clcg4.h:36
void rng_init_generator(tw_rng_stream *g, SeedType Where)
Definition: rand-clcg4.c:239
void rng_set_seed(tw_rng_stream *g, uint32_t s[4])
Definition: rand-clcg4.c:181
void rng_put_state(tw_rng_stream *g, uint32_t s[4])
Definition: rand-clcg4.c:226
double rng_gen_reverse_val(tw_rng_stream *g)
Definition: rand-clcg4.c:451
unsigned long count
Definition: rand-clcg4.h:34
tw_rng_stream * rng
RNG stream array for this LP.
Definition: ross-types.h:317
int32_t m[4]
Definition: rand-clcg4.h:16
int32_t Cg[4]
Definition: rand-clcg4.h:37
void tw_rand_init_streams(tw_lp *lp, unsigned int nstreams)
Definition: rand-clcg4.c:320
long long FindB(long long a, long long k, long long m)
Find B to run CLCG4 backwards.
Definition: rand-clcg4.c:37
int32_t seed[4]
Definition: rand-clcg4.c:26
void * tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n)
Definition: tw-util.c:203
LP State Structure.
Definition: ross-types.h:304