diff --git a/cdrskin/cdrskin_timestamp.h b/cdrskin/cdrskin_timestamp.h index deeee30..b954ad0 100644 --- a/cdrskin/cdrskin_timestamp.h +++ b/cdrskin/cdrskin_timestamp.h @@ -1 +1 @@ -#define Cdrskin_timestamP "2009.10.19.115722" +#define Cdrskin_timestamP "2009.10.20.160131" diff --git a/libburn/ecma130ab.c b/libburn/ecma130ab.c index ed80e19..372da2f 100644 --- a/libburn/ecma130ab.c +++ b/libburn/ecma130ab.c @@ -116,6 +116,10 @@ Comparing the output of both alternatives with the old scrambler output lets 15 bit win for now. + So the prescription is to start with 15 bit value 1, to use the lowest bit + as output, to shift the bits down by one, to exor the output bit with the + next lowest bit, and to put that exor result into bit 14 of the register. + ------------------------------------------------------------------------- @@ -136,17 +140,17 @@ /* ------------------------------------------------------------------------- */ -/* Power and logarithm tables for GF(2^8). +/* Power and logarithm tables for GF(2^8), parity matrices for ECMA-130. Generated by burn_rspc_setup_tables() and burn_rspc_print_tables(). - The highest possible sum of gflog[] values is is 510. So the table gfpow[] - with period 255 was manually unrolled to 511 elements to avoid one modulo + The highest possible sum of gflog[] values is is 508. So the table gfpow[] + with period 255 was manually unrolled to 509 elements to avoid one modulo 255 operation in burn_rspc_mult(). - Idea by D. Hugh Redelmeier. + Proposed by D. Hugh Redelmeier. */ -static unsigned char gfpow[511] = { +static unsigned char gfpow[509] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, @@ -198,8 +202,7 @@ static unsigned char gfpow[511] = { 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, - 108, 216, 173, 71, 142, - 1 + 108, 216, 173, 71, }; static unsigned char gflog[256] = { @@ -232,6 +235,39 @@ static unsigned char gflog[256] = { }; +#define Libburn_use_h_matriceS 1 + +#ifdef Libburn_use_h_matriceS + +/* On my AMD 2x64 bit 3000 MHz processor h[i] costs about 7 % more time + than using gfpow[25-i] resp. gfpow[44-1]. I blame this on the more + condensed data representation which slightly increases the rate of cache + hits. + Nevertheless this effect is very likely depending on the exact cache + size and architecture. In general, using h[] saves more than 8000 + subtractions per sector. +*/ + +/* Parity matrices H as prescribed by ECMA-130 Annex A. + Actually just reverted order start pieces of gfpow[]. +*/ +static unsigned char h26[26] = { + 3, 143, 201, 234, 117, 180, 90, 45, 152, 76, + 38, 19, 135, 205, 232, 116, 58, 29, 128, 64, + 32, 16, 8, 4, 2, 1, +}; + +static unsigned char h45[45] = { + 238, 119, 181, 212, 106, 53, 148, 74, 37, 156, + 78, 39, 157, 192, 96, 48, 24, 12, 6, 3, + 143, 201, 234, 117, 180, 90, 45, 152, 76, 38, + 19, 135, 205, 232, 116, 58, 29, 128, 64, 32, + 16, 8, 4, 2, 1, +}; + +#endif /* Libburn_use_h_matriceS */ + + /* Pseudo-random bytes which of course are exactly the same as with the previously used code. Generated by function print_ecma_130_scrambler(). @@ -487,24 +523,28 @@ static unsigned char burn_rspc_mult(unsigned char a, unsigned char b) { if (a == 0 || b == 0) return 0; + /* Optimization of (a == 0 || b == 0) by D. Hugh Redelmeier + if((((int)a - 1) | ((int)b - 1)) < 0) + return 0; + */ + return gfpow[gflog[a] + gflog[b]]; /* % 255 not necessary because gfpow is unrolled up to index 510 */ } -/* Divides by polynomial 0x03. Derived from burn_rspc_div() */ +/* Divide by polynomial 0x03. Derived from burn_rspc_div() and using the + unrolled size of the gfpow[] array. +*/ static unsigned char burn_rspc_div_3(unsigned char a) { if (a == 0) return 0; - if (gflog[a] >= 25) - return gfpow[gflog[a] - 25]; - else - return gfpow[230 + gflog[a]]; + return gfpow[230 + gflog[a]]; } -static int burn_rspc_p0p1(unsigned char *sector, int col, +static void burn_rspc_p0p1(unsigned char *sector, int col, unsigned char *p0_lsb, unsigned char *p0_msb, unsigned char *p1_lsb, unsigned char *p1_msb) { @@ -516,22 +556,34 @@ static int burn_rspc_p0p1(unsigned char *sector, int col, for(i = 0; i < 24; i++) { b = *start; sum_v_lsb ^= b; + +#ifdef Libburn_use_h_matriceS + hxv_lsb ^= burn_rspc_mult(b, h26[i]); +#else hxv_lsb ^= burn_rspc_mult(b, gfpow[25 - i]); +#endif + b = *(start + 1); sum_v_msb ^= b; + +#ifdef Libburn_use_h_matriceS + hxv_msb ^= burn_rspc_mult(b, h26[i]); +#else hxv_msb ^= burn_rspc_mult(b, gfpow[25 - i]); +#endif + start += 86; } + + /* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */ *p0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb); - /* 2 = gfpow[1] , 3 = gfpow[1] ^ gfpow[0]); */ - *p1_lsb = sum_v_lsb ^ *p0_lsb; *p0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb); + *p1_lsb = sum_v_lsb ^ *p0_lsb; *p1_msb = sum_v_msb ^ *p0_msb; - return 1; } -int burn_rspc_parity_p(unsigned char *sector) +void burn_rspc_parity_p(unsigned char *sector) { int i; unsigned char p0_lsb, p0_msb, p1_lsb, p1_msb; @@ -555,39 +607,51 @@ int burn_rspc_parity_p(unsigned char *sector) #endif /* Libburn_with_lec_generatoR */ } - return 1 ; } -static int burn_rspc_q0q1(unsigned char *sector, int diag, +static void burn_rspc_q0q1(unsigned char *sector, int diag, unsigned char *q0_lsb, unsigned char *q0_msb, unsigned char *q1_lsb, unsigned char *q1_msb) { unsigned char *start, b; unsigned int i, idx, sum_v_lsb = 0, sum_v_msb = 0; - unsigned int hxv_lsb = 0, hxv_msb = 0, lsb_start; + unsigned int hxv_lsb = 0, hxv_msb = 0; start = sector + 12; - lsb_start = 2 * 43 * diag; + idx = 2 * 43 * diag; for(i = 0; i < 43; i++) { - idx = (lsb_start + i * 88) % 2236; + if (idx >= 2236) + idx -= 2236; b = start[idx]; sum_v_lsb ^= b; + +#ifdef Libburn_use_h_matriceS + hxv_lsb ^= burn_rspc_mult(b, h45[i]); +#else hxv_lsb ^= burn_rspc_mult(b, gfpow[44 - i]); +#endif + b = start[idx + 1]; sum_v_msb ^= b; + +#ifdef Libburn_use_h_matriceS + hxv_msb ^= burn_rspc_mult(b, h45[i]); +#else hxv_msb ^= burn_rspc_mult(b, gfpow[44 - i]); +#endif + + idx += 88; } + /* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */ *q0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb); - /* 2 = gfpow[1] ; 3 = gfpow[1] ^ gfpow[0]); */ - *q1_lsb = sum_v_lsb ^ *q0_lsb; *q0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb); + *q1_lsb = sum_v_lsb ^ *q0_lsb; *q1_msb = sum_v_msb ^ *q0_msb; - return 1; } -int burn_rspc_parity_q(unsigned char *sector) +void burn_rspc_parity_q(unsigned char *sector) { int i; unsigned char q0_lsb, q0_msb, q1_lsb, q1_msb; @@ -611,7 +675,6 @@ int burn_rspc_parity_q(unsigned char *sector) #endif /* Libburn_with_lec_generatoR */ } - return 1; } /* ------------------------------------------------------------------------- */ @@ -622,7 +685,7 @@ int burn_rspc_parity_q(unsigned char *sector) Measurements indicate that about 50 MIPS are needed for 48x CD speed. */ -int burn_ecma130_scramble(unsigned char *sector) +void burn_ecma130_scramble(unsigned char *sector) { int i; unsigned char *s; @@ -630,7 +693,6 @@ int burn_ecma130_scramble(unsigned char *sector) s = sector + 12; for (i = 0; i < 2340; i++) s[i] ^= ecma_130_annex_b[i]; - return 1; } @@ -667,6 +729,7 @@ static int burn_rspc_setup_tables(void) /* This function printed the content of gflog[] and gfpow[] as C code and compared the content with the tables of the old implementation. + h26[] and h45[] are reverted order copies of gfpow[] */ static int burn_rspc_print_tables(void) { @@ -700,16 +763,31 @@ static int burn_rspc_print_tables(void) if((i % 10) == 9) printf("\n\t"); } - printf("\n};\n"); + printf("\n};\n\n"); + + printf("static unsigned char h26[26] = {"); + printf("\n\t"); + for(i= 0; i < 26; i++) { + printf(" %3u,", gfpow[25 - i]); + if((i % 10) == 9) + printf("\n\t"); + } + printf("\n};\n\n"); + + printf("static unsigned char h45[45] = {"); + printf("\n\t"); + for(i= 0; i < 45; i++) { + printf(" %3u,",gfpow[44 - i]); + if((i % 10) == 9) + printf("\n\t"); + } + printf("\n};\n\n"); return 0; } -/* This code was used to generate the content of array ecma_130_annex_b[] - It implements the prescription to use the lowest bit as output, to shift - the bits down by one, to exor the output bit with the next lowest bit, - and to put that exor result into bit 14 of the register. +/* This code was used to generate the content of array ecma_130_annex_b[]. */ static unsigned short ecma_130_fsr = 1; @@ -746,6 +824,8 @@ static int print_ecma_130_scrambler(void) } +#ifdef Libburn_with_general_rspc_diV + /* This is a general polynomial division function. burn_rspc_div_3() has been derived from this by setting b to constant 3. */ @@ -763,6 +843,8 @@ static unsigned char burn_rspc_div(unsigned char a, unsigned char b) return gfpow[d]; } +#endif /* Libburn_with_general_rspc_diV */ + #endif /* Libburn_with_lec_generatoR */ diff --git a/libburn/ecma130ab.h b/libburn/ecma130ab.h index 36e9159..39d8b20 100644 --- a/libburn/ecma130ab.h +++ b/libburn/ecma130ab.h @@ -13,11 +13,11 @@ #ifndef Libburn_ecma130ab_includeD #define Libburn_ecma130ab_includeD 1 -int burn_rspc_parity_p(unsigned char *sector); +void burn_rspc_parity_p(unsigned char *sector); -int burn_rspc_parity_q(unsigned char *sector); +void burn_rspc_parity_q(unsigned char *sector); -int burn_ecma130_scramble(unsigned char *sector); +void burn_ecma130_scramble(unsigned char *sector); #endif /* ! Libburn_ecma130ab_includeD */ diff --git a/libburn/libburn.h b/libburn/libburn.h index 2981f80..545e38b 100644 --- a/libburn/libburn.h +++ b/libburn/libburn.h @@ -148,7 +148,7 @@ enum burn_write_types if this mode is attempted. @since 0.7.2 ts A91016: Re-implemented according to ECMA-130 Annex A and B. - Slower but understood and explained. + Now understood, explained and not stemming from cdrdao. @since 0.7.4 */ BURN_WRITE_RAW,