ROSS
rand-clcg4.c
Go to the documentation of this file.
1#include <ross.h>
2
3/**
4 * @file rand-clcg4.c
5 * @brief RNG Implementation module
6 *
7 * Random number generator, provides all of the features GTW requires
8 * by default. Chris hacked this pretty well, he would know the
9 * features better.
10 *
11 */
12
13/*********************************************************************
14 *
15 * clcg4 private data/routines
16 *
17 ********************************************************************/
18
19// H = 2^15 : use in MultModM.
20#define H 32768
21
22// One RNG per PE
23static tw_rng *rng = NULL;
24// Core RNG (For tiebreaker etc.)
25static tw_rng *core_rng = NULL;
26
27// default RNG seed
28int32_t seed[4] = { 11111111, 22222222, 33333333, 44444444 };
29
30/**
31 * FindB
32 * @brief Find B to run CLCG4 backwards
33 *
34 * B is \f$ a_[i]^{m_[i] - 2} \mathrm{mod m_[i]} \f$
35 * which is used in running the CLCG4 backwards
36 * Added by Chris Carothers, 5/15/98
37 */
38long long
39FindB(long long a, long long k, long long m)
40{
41 long long sqrs[32];
42 long long power_of_2;
43 long long b;
44
45 int i;
46
47 sqrs[0] = a;
48 for(i = 1; i < 32; i++)
49 {
50 sqrs[i] =(sqrs[i - 1] * sqrs[i - 1]) % m;
51 }
52
53 power_of_2 = 1;
54 b = 1;
55 for(i = 0; i < 32; i++)
56 {
57 if(!(power_of_2 & k))
58 {
59 sqrs[i] = 1;
60
61 }
62 b =(b * sqrs[i]) % m;
63 power_of_2 = power_of_2 * 2;
64 }
65
66 return(b);
67}
68
69/**
70 * MultiModM
71 * @brief Returns(s*t) MOD M
72 *
73 * See L'Ecuyer and Cote(1991).
74 *
75 * Returns(s*t) MOD M. Assumes that -M < s < M and -M < t < M.
76 */
77
78int32_t
79MultModM(int32_t s, int32_t t, int32_t M)
80{
81 int32_t R, S0, S1, q, qh, rh, k;
82
83 if(s < 0)
84 s += M;
85 if(t < 0)
86 t += M;
87 if(s < H)
88 {
89 S0 = s;
90 R = 0;
91 }
92 else
93 {
94 S1 = s / H;
95 S0 = s - H * S1;
96 qh = M / H;
97 rh = M - H * qh;
98
99 if(S1 >= H)
100 {
101 S1 -= H;
102 k = t / qh;
103 R = H *(t - k * qh) - k * rh;
104 while(R < 0)
105 R += M;
106 }
107 else
108 R = 0;
109
110 if(S1 != 0)
111 {
112 q = M / S1;
113 k = t / q;
114 R -= k *(M - S1 * q);
115 if(R > 0)
116 R -= M;
117 R += S1 *(t - k * q);
118 while(R < 0)
119 R += M;
120 }
121 k = R / qh;
122 R = H *(R - k * qh) - k * rh;
123 while(R < 0)
124 R += M;
125 }
126 if(S0 != 0)
127 {
128 q = M / S0;
129 k = t / q;
130 R -= k *(M - S0 * q);
131 if(R > 0)
132 R -= M;
133 R += S0 *(t - k * q);
134 while(R < 0)
135 R += M;
136 }
137 return R;
138}
139
140/********************************************************************
141 *
142 * clcg4 public interface
143 *
144 *******************************************************************/
145
146#define S0_MAX 2147483646
147#define S1_MAX 2147483542
148#define S2_MAX 2147483422
149#define S3_MAX 2147483322
150
151/**
152 * These seeds MUST adhere to these requirements.
153 * This is explicitly stated in L'Ecuyer and Andres (1997)
154 */
155void clamp_seed(uint32_t s[4])
156{
157 if (s[0] < 1)
158 s[0] = 1;
159 if (s[0] > S0_MAX)
160 s[0] = S0_MAX;
161
162 if (s[1] < 1)
163 s[1] = 1;
164 if (s[1] > S1_MAX)
165 s[1] = S1_MAX;
166
167 if (s[2] < 1)
168 s[2] = 1;
169 if (s[2] > S2_MAX)
170 s[2] = S2_MAX;
171
172 if (s[3] < 1)
173 s[3] = 1;
174 if (s[3] > S3_MAX)
175 s[3] = S3_MAX;
176}
177
178/*
179 * rng_set_seed
180 */
181
182void
183rng_set_seed(tw_rng_stream * g, uint32_t s[4], tw_rng * the_rng)
184{
185 int j;
186
187 clamp_seed(s);
188
189 for(j = 0; j < 4; j++)
190 g->Ig[j] = s[j];
191
192 rng_init_generator(g, InitialSeed, the_rng);
193}
194
195/*
196 * rng_write_state
197 */
198
199void
201{
202 int j;
203
204 for(j = 0; j < 4; j++)
205 fprintf( f, "%u ", g->Cg[j]);
206 fprintf( f, "\n");
207}
208
209/*
210 * rng_get_state
211 */
212
213void
214rng_get_state(tw_rng_stream * g, uint32_t s[4])
215{
216 int j;
217
218 for(j = 0; j < 4; j++)
219 s[j] = g->Cg[j];
220}
221
222
223/*
224 * rng_get_state
225 */
226
227void
228rng_put_state(tw_rng_stream * g, uint32_t s[4])
229{
230 int j;
231
232 for(j = 0; j < 4; j++)
233 g->Cg[j] = s[j];
234}
235
236/*
237 * rng_init_generator
238 */
239
240void
242{
243 int j;
244
245 for(j = 0; j < 4; j++)
246 {
247 switch(Where)
248 {
249 case InitialSeed:
250 g->Lg[j] = g->Ig[j];
251 break;
252 case NewSeed:
253 g->Lg[j] = MultModM(the_rng->aw[j], g->Lg[j], the_rng->m[j]);
254 break;
255 case LastSeed:
256 break;
257 }
258
259 g->Cg[j] = g->Lg[j];
260 }
261}
262
263/*
264 * tw_rand_initial_seed
265 * Takes in pointer to main RNG with which to seed the supplied tw_rng_stream *g
266 */
267void
269{
270 //If no specific PE rng is specified, use the default globally defiend PE rng
271 if (the_rng == NULL)
272 the_rng = rng;
273
274 tw_lpid mask_bit = 1;
275
276 uint32_t Ig_t[4];
277 uint32_t avw_t[4];
278
279 int i;
280 int j;
281 int positions = ((sizeof(tw_lpid)) * 8) - 1;
282
283 //seed for zero
284 for(j = 0; j < 4; j++)
285 Ig_t[j] = the_rng->seed[j];
286
287 mask_bit <<= positions;
288
289 g->count = 0;
290
291 do
292 {
293 if(id & mask_bit)
294 {
295 for(j = 0; j < 4; j++)
296 {
297 avw_t[j] = the_rng->avw[j];
298
299 // exponentiate modulus
300 for(i = 0; i < positions; i++)
301 avw_t[j] = MultModM(avw_t[j], avw_t[j], the_rng->m[j]);
302
303 Ig_t[j] = MultModM(avw_t[j], Ig_t[j], the_rng->m[j]);
304 }
305 }
306
307 mask_bit >>= 1;
308 positions--;
309 } while(positions > 0);
310
311 if(id % 2)
312 {
313 for(j = 0; j < 4; j++)
314 Ig_t[j] = MultModM(the_rng->avw[j], Ig_t[j], the_rng->m[j]);
315 }
316
317 clamp_seed(Ig_t);
318
319 for(j = 0; j < 4; j++)
320 g->Ig[j] = Ig_t[j];
321
322 rng_init_generator(g, InitialSeed, the_rng);
323 //rng_write_state(g);
324}
325
326
327
328void
329tw_rand_init_streams(tw_lp * lp, unsigned int nstreams, unsigned int n_core_streams)
330{
331 unsigned int i;
332 unsigned int j;
333
334 if(nstreams > g_tw_nRNG_per_lp)
335 tw_error(TW_LOC, "LP %lu asked for more RNG streams (%d) than the global maximum (g_tw_nRNG_per_lp:%d)\n", lp->gid, nstreams, g_tw_nRNG_per_lp);
336
337 if(n_core_streams > g_tw_nRNG_core_per_lp)
338 tw_error(TW_LOC, "LP %lu asked for more RNG streams (%d) than the global maximum for core (g_tw_nRNG_core_per_lp:%d)\n", lp->gid, n_core_streams, g_tw_nRNG_core_per_lp);
339
340 lp->rng = (tw_rng_stream *) tw_calloc(TW_LOC, "LP RNG Streams", sizeof(*lp->rng), nstreams);
341 lp->core_rng = (tw_rng_stream *) tw_calloc(TW_LOC, "LP Core RNG Streams", sizeof(*lp->core_rng), n_core_streams);
342
343 unsigned int total_num_noncore_rngs = g_tw_nRNG_per_lp * g_tw_total_lps;
344
345 for(i = 0; i < nstreams; i++) {
346 tw_rand_initial_seed(&lp->rng[i], (lp->gid * g_tw_nRNG_per_lp) + i, rng);
347 }
348
349 //ROSS core rng streams should be seeded "after" lp rng streams so adjusting number of core streams doesn't change seeds of the lp rng streams
350 for(j = 0; j < n_core_streams; j++) {
351 tw_rand_initial_seed(&lp->core_rng[j], total_num_noncore_rngs + (lp->gid * (g_tw_nRNG_core_per_lp)) + j, core_rng);
352 }
353}
354
355/*
356 * rng_init
357 */
358tw_rng *
359rng_init(int v, int w)
360{
361 int i;
362 int j;
363
364 rng = (tw_rng *) tw_calloc(TW_LOC, "RNG", sizeof(*rng), 1);
365
366 rng->m[0] = 2147483647;
367 rng->m[1] = 2147483543;
368 rng->m[2] = 2147483423;
369 rng->m[3] = 2147483323;
370
371 rng->a[0] = 45991;
372 rng->a[1] = 207707;
373 rng->a[2] = 138556;
374 rng->a[3] = 49689;
375
376 if(g_tw_rng_seed)
377 {
378 if (!g_tw_mynode)
379 printf("Clamping Provided LP Seeds\n");
380 clamp_seed((uint32_t*)g_tw_rng_seed);
381 for(j = 0; j < 4; j++)
382 rng->seed[j] = g_tw_rng_seed[j];
383 } else
384 {
385 rng->seed[0] = 11111111;
386 rng->seed[1] = 22222222;
387 rng->seed[2] = 33333333;
388 rng->seed[3] = 44444444;
389 }
390
391 for(j = 0; j < 4; j++)
392 rng->aw[j] = rng->a[j];
393
394 for(j = 0; j < 4; j++)
395 {
396 for(i = 1; i <= w; i++)
397 rng->aw[j] = MultModM(rng->aw[j], rng->aw[j], rng->m[j]);
398
399 rng->avw[j] = rng->aw[j];
400
401 for(i = 1; i <= v; i++)
402 rng->avw[j] = MultModM(rng->avw[j], rng->avw[j], rng->m[j]);
403 }
404
405 for(j = 0; j < 4; j++)
406 rng->b[j] = FindB(rng->a[j],(rng->m[j] - 2), rng->m[j]);
407
408 return rng;
409}
410
411/*
412 * rng_core_init
413 * Sets up the main ROSS Core RNG
414 */
415tw_rng *
416rng_core_init(int v, int w)
417{
418 int i;
419 int j;
420
421 //Do the same for the ROSS Core RNG (Ross Engine Hidden RNGs) as was done for the regular main RNG
422 core_rng = (tw_rng *) tw_calloc(TW_LOC, "CORE_RNG", sizeof(*rng), 1);
423
424 //For RC to work properly, these should all have the same values as the main RNG
425 //If this changes, it will break reverse computation of the RNG. See rng_gen_reverse_val()
426 memcpy(core_rng, rng, sizeof(tw_rng));
427
428 //But the seeds should be different
430 {
431 if(!g_tw_mynode)
432 printf("Clamping Provided Core Seeds\n");
433 clamp_seed((uint32_t*)g_tw_core_rng_seed);
434 for(j = 0; j < 4; j++)
435 core_rng->seed[j] = g_tw_core_rng_seed[j];
436 } else
437 {
438 core_rng->seed[0] = 55555555;
439 core_rng->seed[1] = 66666666;
440 core_rng->seed[2] = 77777777;
441 core_rng->seed[3] = 88888888;
442 }
443
444 return core_rng;
445}
446
447/*
448 * rng_gen_val
449 */
450double
452{
453 int32_t k, s;
454 double u;
455
456 g->count++;
457
458 u = 0.0;
459
460 s = g->Cg[0];
461 k = s / 46693;
462 s = 45991 *(s - k * 46693) - k * 25884;
463 if(s < 0)
464 s = s + 2147483647;
465 g->Cg[0] = s;
466 u = u + 4.65661287524579692e-10 * s;
467
468 s = g->Cg[1];
469 k = s / 10339;
470 s = 207707 *(s - k * 10339) - k * 870;
471 if(s < 0)
472 s = s + 2147483543;
473 g->Cg[1] = s;
474 u = u - 4.65661310075985993e-10 * s;
475 if(u < 0)
476 u = u + 1.0;
477
478 s = g->Cg[2];
479 k = s / 15499;
480 s = 138556 *(s - k * 15499) - k * 3979;
481 if(s < 0.0)
482 s = s + 2147483423;
483 g->Cg[2] = s;
484 u = u + 4.65661336096842131e-10 * s;
485 if(u >= 1.0)
486 u = u - 1.0;
487
488 s = g->Cg[3];
489 k = s / 43218;
490 s = 49689 *(s - k * 43218) - k * 24121;
491 if(s < 0)
492 s = s + 2147483323;
493 g->Cg[3] = s;
494 u = u - 4.65661357780891134e-10 * s;
495 if(u < 0)
496 u = u + 1.0;
497
498 return u;
499}
500
501/*
502 * rng_gen_reverse_val
503 *
504 * computes the reverse sequence, however does not
505 * return the uniform value computed as a performance
506 * optimization -- Chris Carothers 5/15/98
507 *
508 * Because the ROSS Core RNGs have the exact same b & m
509 * values as the main RNG, this is safe for use on the
510 * core RNG streams. Will be an issue if this is ever
511 * not the case.
512 */
513
514double
516{
517 long long *b = rng->b;
518 int32_t *m = rng->m;
519 int32_t s;
520 double u;
521
522 g->count--;
523
524 u = 0.0;
525
526 if(b[0] == 0)
527 tw_error(TW_LOC, "b values not calculated \n");
528
529 s = g->Cg[0];
530 s =(b[0] * s) % m[0];
531 g->Cg[0] = s;
532 u = u + 4.65661287524579692e-10 * s;
533
534 s = g->Cg[1];
535 s =(b[1] * s) % m[1];
536 g->Cg[1] = s;
537 u = u - 4.65661310075985993e-10 * s;
538 if(u < 0)
539 u = u + 1.0;
540
541 s = g->Cg[2];
542 s =(b[2] * s) % m[2];
543 g->Cg[2] = s;
544 u = u + 4.65661336096842131e-10 * s;
545 if(u >= 1.0)
546 u = u - 1.0;
547
548 s = g->Cg[3];
549 s =(b[3] * s) % m[3];
550 g->Cg[3] = s;
551 u = u - 4.65661357780891134e-10 * s;
552 if(u < 0)
553 u = u + 1.0;
554
555 return u;
556}
SeedType
Definition rand-clcg4.h:29
@ InitialSeed
Definition rand-clcg4.h:30
@ NewSeed
Definition rand-clcg4.h:30
@ LastSeed
Definition rand-clcg4.h:30
uint64_t tw_lpid
Definition ross-base.h:49
void * tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n)
Definition tw-util.c:206
tw_seed g_tw_rng_seed
Definition ross-global.c:34
void tw_error(const char *file, int line, const char *fmt,...)
Definition tw-util.c:77
tw_peid g_tw_mynode
Definition ross-global.c:92
unsigned int g_tw_nRNG_per_lp
Definition ross-global.c:31
unsigned int g_tw_nRNG_core_per_lp
Definition ross-global.c:32
tw_seed g_tw_core_rng_seed
Definition ross-global.c:35
tw_lpid g_tw_total_lps
Definition ross-global.c:25
#define TW_LOC
#define S1_MAX
Definition rand-clcg4.c:147
void tw_rand_init_streams(tw_lp *lp, unsigned int nstreams, unsigned int n_core_streams)
Definition rand-clcg4.c:329
#define S0_MAX
Definition rand-clcg4.c:146
#define S2_MAX
Definition rand-clcg4.c:148
static tw_rng * rng
Definition rand-clcg4.c:23
static tw_rng * core_rng
Definition rand-clcg4.c:25
tw_rng * rng_core_init(int v, int w)
Definition rand-clcg4.c:416
tw_rng * rng_init(int v, int w)
Definition rand-clcg4.c:359
double rng_gen_reverse_val(tw_rng_stream *g)
Definition rand-clcg4.c:515
void rng_put_state(tw_rng_stream *g, uint32_t s[4])
Definition rand-clcg4.c:228
void rng_get_state(tw_rng_stream *g, uint32_t s[4])
Definition rand-clcg4.c:214
int32_t MultModM(int32_t s, int32_t t, int32_t M)
Returns(s*t) MOD M.
Definition rand-clcg4.c:79
#define H
Definition rand-clcg4.c:20
long long FindB(long long a, long long k, long long m)
Find B to run CLCG4 backwards.
Definition rand-clcg4.c:39
int32_t seed[4]
Definition rand-clcg4.c:28
#define S3_MAX
Definition rand-clcg4.c:149
void rng_write_state(tw_rng_stream *g, FILE *f)
Definition rand-clcg4.c:200
void rng_set_seed(tw_rng_stream *g, uint32_t s[4], tw_rng *the_rng)
Definition rand-clcg4.c:183
double rng_gen_val(tw_rng_stream *g)
Definition rand-clcg4.c:451
void clamp_seed(uint32_t s[4])
Definition rand-clcg4.c:155
void rng_init_generator(tw_rng_stream *g, SeedType Where, tw_rng *the_rng)
Definition rand-clcg4.c:241
void tw_rand_initial_seed(tw_rng_stream *g, tw_lpid id, tw_rng *the_rng)
Definition rand-clcg4.c:268
LP State Structure.
Definition ross-types.h:336
tw_rng_stream * rng
RNG stream array for this LP.
Definition ross-types.h:349
tw_lpid gid
global LP id
Definition ross-types.h:338
tw_rng_stream * core_rng
RNG stream array for ROSS non-model operation - possible alternative to a model_rng pointer array.
Definition ross-types.h:350
int32_t Ig[4]
Definition rand-clcg4.h:38
int32_t Cg[4]
Definition rand-clcg4.h:40
int32_t Lg[4]
Definition rand-clcg4.h:39
unsigned long count
Definition rand-clcg4.h:37
int32_t avw[4]
Definition rand-clcg4.h:22
int32_t aw[4]
Definition rand-clcg4.h:21
int32_t m[4]
Definition rand-clcg4.h:19
int32_t seed[4]
Definition rand-clcg4.h:25