[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Fast Cheap Unreviewed Stream Cipher
I received this code from another friend.
Here's a FAST somewhat cryptographically secure pseudo-random generator
written by a friend of mine that wishes to remain anonymous. He has given
permission for the following code to be placed in the public domain, and to
be sent to cypherpunks and others for review. Alas, I have never gotten
around to doing so. As it states, on a PowerPC it'll produce bits fast
enough to be a pseudo-one-time pad / stream cipher for good digitized video
on a LAN (the purpose for which he originally sent it to me) and hence
should do point-to-point audio without even breathing hard.
For those who haven't heard, the scenario is for point-to-point audio to
use an affordable (weak) stream cipher, but to be changing keys frequently
using our strongly secure SSL channel. Each costly break would only
compromise a few seconds or so of audio. The following code may serve as
the affordable (but maybe better than weak) stream cipher.
/******************************************************/
/** (c) 1823 Millard Fillmore, all rights expired **/
/** Full words of pseudorandom bits at video speed **/
/******************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <sioux.h>
#define WORD_SIZE 32
unsigned long shiftRand();
static void refillShiftRand();
void seedShiftRand(unsigned long seed);
static unsigned long shiftRandArray[31] = { }; // to hold
internal state values
static unsigned long * arrayPtr = shiftRandArray; // incremented as
state values are consumed
static unsigned long * const end = shiftRandArray + 31; // marks end (and
need to refill array)
// Initializes shiftRandArray to pseudorandom contents using a seed value
and the system rand function,
// which is assumed to produce 16 bits per call. Low rand bits are obscured
by xoring with high bits from other
// calls, and all product bits depend on two or more calls to rand. The
resulting values are themselves obscured,
// not simply revealed at the output, hence this is probably overkill. The
seed acts as a key.
void seedShiftRand(unsigned long seed) {
srand(seed);
long cyclesPerWord = ((WORD_SIZE + 7) >> 3) - 1; // will add a net
of 8 bits per call to rand
unsigned long * arrPtr = shiftRandArray;
for (; arrPtr < end; arrPtr++) {
unsigned long rand1 = rand(); // wrap this
value around word boundary:
unsigned long element = (rand1 >> 8) | (rand1 << (WORD_SIZE
- 8));
for (long i = 0; i < cyclesPerWord; i++) {
element ^= rand() << (8 * i); //previous
8 high bits obscure 8 new low bits
}
*arrPtr = element;
}
}
// Low-order bits in the following act as an xor-based linear feedback
shift register (with moving taps
// and stationary data); the repeat period equals (1 << 31) - 1.
Higher-order bits have similar
// "intrinsic" behavior, but the state of each is determined by additional
xoring with a sequence
// of carry bits from below, causing repeated period-doubling across the
width of the word.
//
// The choice of 31 and 13 corresponds to a primitive polynomial mod 2;
Applied Cryptography has
// typos in polynomial coefficients, but the repeat period was tested
directly. This code avoids a
// shift-direction mistake in Applied Cryptography, and hence follows
Numerical Recipes.
//
static void refillShiftRand() {
unsigned long * tap0 = shiftRandArray + 0;
unsigned long * tap13 = shiftRandArray + 13;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13 = shiftRandArray;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13; tap0++; tap13++;
*tap0 = *tap0 + *tap13;
return;
}
// sum at 10 million iterations = 1344076334
/*
// Slightly faster than the above on a 604, provided CodeWarrior's
instruction-scheduling "optimization" is OFF.
static void refillShiftRand() {
unsigned long * tap0a = shiftRandArray + 0;
unsigned long * tap0b = shiftRandArray + 1; // uses more registers
unsigned long * tap13a = shiftRandArray + 13;
unsigned long * tap13b = shiftRandArray + 14;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a = shiftRandArray;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b = (shiftRandArray +
1);
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2;
*tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2;
*tap0b = *tap0b + *tap13b;
*tap0a = *tap0a + *tap13a;
return;
}
// sum at 10 million iterations = 1344076334
*/
// The array-filling mechanism above provides an information-conserving
mechanism for producing
// pseudorandom numbers with an effectively unlimited repeat period, fast,
but based on a large number of
// internal state bits, and having a bit of a given order eventually depend
on all bits of the same or
// lower order in all words in the array.
// Rare, transient states having almost all zeros in the low bits will
show strong correlations,
// since the restoration of normal bit statistics will take several cycles.
(The all-zero state
// is inaccessible unless it is the starting condition -- one might test
for this.) Correlations
// in the frequency of low-order zero bits in sequentially generated array
elements are eliminated
// in the output by xoring high bits with the low bits; adding a preceding
squaring operation
// adds the effect of making any given output consistent with a large
number of array element states,
// making inferences regarding the internal state of the system relatively
difficult.
inline unsigned long shiftRand() {
((arrayPtr == end) ? // '?:' instead of 'if' to
enable inlining by CW compiler
(refillShiftRand(),
arrayPtr = shiftRandArray,
0) : 0);
unsigned long output = *arrayPtr++;
unsigned long square = output * output; // this multiplication
seems free on a 605
return(square + (output >> (WORD_SIZE / 2)));
}
// With multiplication (above) and summing of output to prevent bogus
optimizations, 16.5 clocks per word.
// Test code for CodeWarrier environment.
void main () {
SIOUXSettings.asktosaveonclose = FALSE;
unsigned long sum = 0;
seedShiftRand(12345); // easily guessed seed
long start = clock();
for (long i = 0; i < 2000000; i++) {
sum += shiftRand();
sum += shiftRand();
sum += shiftRand();
sum += shiftRand();
sum += shiftRand();
}
unsigned long end = clock();
printf("\n sum = %u", sum);
printf("\n\n time = %d \n", (int)(1000.0 * ((float)(end - start) /
(float)CLOCKS_PER_SEC)));
}