#include #include #include #define HOST_WIDE_INT long long typedef HOST_WIDE_INT gpi_int; static gpi_int compute_checksum_original (unsigned char *buf, gpi_int size) { gpi_int sum = 0, n = 0; for (n = 0; n < size; n++) sum += n * buf[n]; return sum; } static gpi_int compute_checksum_ladd (unsigned char *buf, gpi_int size) { gpi_int sum1 = 0, sum2 = 0, n = 0; for (n = size - 1; n >= 0; n--) { sum2 += sum1; sum1 += buf[n]; } return sum2; } #define CHUNK 1024 static gpi_int compute_checksum_sadd (unsigned char *buf, gpi_int size) { gpi_int sum1 = 0, sum2 = 0, n1 = CHUNK * (size / CHUNK), n = 0; unsigned char *buf0 = buf; for (n = size - 1; n >= n1; n--) { sum2 += sum1; sum1 += buf[n]; } buf += n1; while (buf > buf0) { long in, isum1 = 0, isum2 = 0; buf -= CHUNK; for (in = CHUNK - 1; in >= 0; in --) { isum2 += isum1; isum1 += buf[in]; } sum2 += CHUNK * sum1 + isum2; sum1 += isum1; } return sum2; } static gpi_int compute_checksum_short (unsigned char *buf, gpi_int size) { long in; gpi_int sum = 0, n = 0; while (size >= CHUNK ) { long isum1 = 0, isum2 = 0; for (in = 0; in < CHUNK; in++) { long ib = buf[in]; isum1 += ib; isum2 += in * ib; } sum += isum2 + n * isum1; n += CHUNK; buf += CHUNK; size -= CHUNK; } for (in = 0; in < size; in++, n++) sum += n * buf[in]; return sum; } static gpi_int compute_checksum_lladd (unsigned char *buf, gpi_int size) { int * bp = (int *) buf; gpi_int isize = size / sizeof(int); int * be = bp + isize; gpi_int sum = 0; while (bp < be) { sum += *bp; bp++; } return sum; } static gpi_int compute_checksum_unrolled (unsigned char *buf, gpi_int size) { gpi_int sum = 0, n = 0; while ( size > 4 ) { sum += n * buf[n]; n++; sum += n * buf[n]; n++; sum += n * buf[n]; n++; sum += n * buf[n]; n++; size -= 4; } while ( size > 0 ) { sum += n * buf[n]; n++; size--; } return sum; } static gpi_int compute_checksum_native (unsigned char *buf, gpi_int size) { gpi_int sum = 0, n = 0; if ( size > 0x7fffffff || sizeof(int) < 4) { for (n = 0; n < size; n++) sum += n * buf[n]; } else { int ni = 0; int sizei = size; while ( sizei > 4 ) { sum += ni * buf[ni]; ni++; sum += ni * buf[ni]; ni++; sum += ni * buf[ni]; ni++; sum += ni * buf[ni]; ni++; sizei -= 4; } while ( sizei > 0 ) { sum += ni * buf[ni]; ni++; sizei--; } } return sum; } static gpi_int compute_checksum_shift (unsigned char *buf, gpi_int size) { gpi_int sum = 0, n = 0; if ( size > 0x7fffffff || sizeof(int) < 4) { for (n = 0; n < size; n++) sum += n * buf[n]; } else { int ni = 0; int sizei = size; while ( sizei > 8 ) { sum += (int)buf[ni] << 0; ni++; sum += (int)buf[ni] << 4; ni++; sum += (int)buf[ni] << 8; ni++; sum += (int)buf[ni] << 12; ni++; sum += (int)buf[ni] << 16; ni++; sum += (int)buf[ni] << 20; ni++; sum += (int)buf[ni] << 24; ni++; sum += (int)buf[ni] << 28; ni++; sizei -= 8; } while ( sizei > 0 ) { sum += (int)buf[ni] << 0; ni++; sizei--; } } return sum; } static gpi_int compute_checksum_add (unsigned char *buf, gpi_int size) { gpi_int sum = 0, n = 0; if ( size > 0x7fffffff || sizeof(int) < 4) { for (n = 0; n < size; n++) sum += n * buf[n]; } else { int ni = 0; int sizei = size; int sumi = 0; while ( sizei > 8 ) { sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sumi += (int)buf[ni]; ni++; sizei -= 8; } while ( sizei > 0 ) { sumi += (int)buf[ni]; ni++; sizei--; } sum = sumi; } return sum; } #define SIZE 27000000L // 10820508 unsigned char *buf; typedef gpi_int (*checksum_t)(unsigned char *buf, gpi_int size); void timeit( const char *what, checksum_t checksum ) { long long sum; struct timeval start, finish; long long used; gettimeofday( &start, NULL ); sum = checksum( buf, SIZE ); gettimeofday( &finish, NULL ); used = (finish.tv_sec - start.tv_sec) * 1000000 + finish.tv_usec - start.tv_usec; printf( "time=%8lld sum=%20lld %s\n", used, sum, what ); } int main() { gpi_int checksum; unsigned char *p; gpi_int i; buf = malloc( SIZE ); p = buf; for ( i = 1; i <= SIZE; i++ ) { *p++ = i; } printf( "sizeof: %d\n", sizeof(checksum) ); timeit( "compute_checksum_original", &compute_checksum_original ); timeit( "compute_checksum_short", & compute_checksum_short ); timeit( "compute_checksum_ladd", & compute_checksum_ladd ); timeit( "compute_checksum_sadd", &compute_checksum_sadd ); timeit( "compute_checksum_lladd", &compute_checksum_lladd ); timeit( "compute_checksum_unrolled", &compute_checksum_unrolled ); timeit( "compute_checksum_native", &compute_checksum_native ); timeit( "compute_checksum_shift", &compute_checksum_shift ); timeit( "compute_checksum_add", &compute_checksum_add ); return 0; }