More optimizations with parity computation

This commit is contained in:
Thomas Schmitt 2009-10-20 16:14:14 +00:00
parent e8e748d2f3
commit edabeec60a
4 changed files with 121 additions and 39 deletions

View File

@ -1 +1 @@
#define Cdrskin_timestamP "2009.10.19.115722"
#define Cdrskin_timestamP "2009.10.20.160131"

View File

@ -116,6 +116,10 @@
Comparing the output of both alternatives with the old scrambler output
lets 15 bit win for now.
So the prescription is to start with 15 bit value 1, to use the lowest bit
as output, to shift the bits down by one, to exor the output bit with the
next lowest bit, and to put that exor result into bit 14 of the register.
-------------------------------------------------------------------------
@ -136,17 +140,17 @@
/* ------------------------------------------------------------------------- */
/* Power and logarithm tables for GF(2^8).
/* Power and logarithm tables for GF(2^8), parity matrices for ECMA-130.
Generated by burn_rspc_setup_tables() and burn_rspc_print_tables().
The highest possible sum of gflog[] values is is 510. So the table gfpow[]
with period 255 was manually unrolled to 511 elements to avoid one modulo
The highest possible sum of gflog[] values is is 508. So the table gfpow[]
with period 255 was manually unrolled to 509 elements to avoid one modulo
255 operation in burn_rspc_mult().
Idea by D. Hugh Redelmeier.
Proposed by D. Hugh Redelmeier.
*/
static unsigned char gfpow[511] = {
static unsigned char gfpow[509] = {
1, 2, 4, 8, 16, 32, 64, 128, 29, 58,
116, 232, 205, 135, 19, 38, 76, 152, 45, 90,
180, 117, 234, 201, 143, 3, 6, 12, 24, 48,
@ -198,8 +202,7 @@ static unsigned char gfpow[511] = {
172, 69, 138, 9, 18, 36, 72, 144, 61, 122,
244, 245, 247, 243, 251, 235, 203, 139, 11, 22,
44, 88, 176, 125, 250, 233, 207, 131, 27, 54,
108, 216, 173, 71, 142,
1
108, 216, 173, 71,
};
static unsigned char gflog[256] = {
@ -232,6 +235,39 @@ static unsigned char gflog[256] = {
};
#define Libburn_use_h_matriceS 1
#ifdef Libburn_use_h_matriceS
/* On my AMD 2x64 bit 3000 MHz processor h[i] costs about 7 % more time
than using gfpow[25-i] resp. gfpow[44-1]. I blame this on the more
condensed data representation which slightly increases the rate of cache
hits.
Nevertheless this effect is very likely depending on the exact cache
size and architecture. In general, using h[] saves more than 8000
subtractions per sector.
*/
/* Parity matrices H as prescribed by ECMA-130 Annex A.
Actually just reverted order start pieces of gfpow[].
*/
static unsigned char h26[26] = {
3, 143, 201, 234, 117, 180, 90, 45, 152, 76,
38, 19, 135, 205, 232, 116, 58, 29, 128, 64,
32, 16, 8, 4, 2, 1,
};
static unsigned char h45[45] = {
238, 119, 181, 212, 106, 53, 148, 74, 37, 156,
78, 39, 157, 192, 96, 48, 24, 12, 6, 3,
143, 201, 234, 117, 180, 90, 45, 152, 76, 38,
19, 135, 205, 232, 116, 58, 29, 128, 64, 32,
16, 8, 4, 2, 1,
};
#endif /* Libburn_use_h_matriceS */
/* Pseudo-random bytes which of course are exactly the same as with the
previously used code.
Generated by function print_ecma_130_scrambler().
@ -487,24 +523,28 @@ static unsigned char burn_rspc_mult(unsigned char a, unsigned char b)
{
if (a == 0 || b == 0)
return 0;
/* Optimization of (a == 0 || b == 0) by D. Hugh Redelmeier
if((((int)a - 1) | ((int)b - 1)) < 0)
return 0;
*/
return gfpow[gflog[a] + gflog[b]];
/* % 255 not necessary because gfpow is unrolled up to index 510 */
}
/* Divides by polynomial 0x03. Derived from burn_rspc_div() */
/* Divide by polynomial 0x03. Derived from burn_rspc_div() and using the
unrolled size of the gfpow[] array.
*/
static unsigned char burn_rspc_div_3(unsigned char a)
{
if (a == 0)
return 0;
if (gflog[a] >= 25)
return gfpow[gflog[a] - 25];
else
return gfpow[230 + gflog[a]];
return gfpow[230 + gflog[a]];
}
static int burn_rspc_p0p1(unsigned char *sector, int col,
static void burn_rspc_p0p1(unsigned char *sector, int col,
unsigned char *p0_lsb, unsigned char *p0_msb,
unsigned char *p1_lsb, unsigned char *p1_msb)
{
@ -516,22 +556,34 @@ static int burn_rspc_p0p1(unsigned char *sector, int col,
for(i = 0; i < 24; i++) {
b = *start;
sum_v_lsb ^= b;
#ifdef Libburn_use_h_matriceS
hxv_lsb ^= burn_rspc_mult(b, h26[i]);
#else
hxv_lsb ^= burn_rspc_mult(b, gfpow[25 - i]);
#endif
b = *(start + 1);
sum_v_msb ^= b;
#ifdef Libburn_use_h_matriceS
hxv_msb ^= burn_rspc_mult(b, h26[i]);
#else
hxv_msb ^= burn_rspc_mult(b, gfpow[25 - i]);
#endif
start += 86;
}
/* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */
*p0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
/* 2 = gfpow[1] , 3 = gfpow[1] ^ gfpow[0]); */
*p1_lsb = sum_v_lsb ^ *p0_lsb;
*p0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
*p1_lsb = sum_v_lsb ^ *p0_lsb;
*p1_msb = sum_v_msb ^ *p0_msb;
return 1;
}
int burn_rspc_parity_p(unsigned char *sector)
void burn_rspc_parity_p(unsigned char *sector)
{
int i;
unsigned char p0_lsb, p0_msb, p1_lsb, p1_msb;
@ -555,39 +607,51 @@ int burn_rspc_parity_p(unsigned char *sector)
#endif /* Libburn_with_lec_generatoR */
}
return 1 ;
}
static int burn_rspc_q0q1(unsigned char *sector, int diag,
static void burn_rspc_q0q1(unsigned char *sector, int diag,
unsigned char *q0_lsb, unsigned char *q0_msb,
unsigned char *q1_lsb, unsigned char *q1_msb)
{
unsigned char *start, b;
unsigned int i, idx, sum_v_lsb = 0, sum_v_msb = 0;
unsigned int hxv_lsb = 0, hxv_msb = 0, lsb_start;
unsigned int hxv_lsb = 0, hxv_msb = 0;
start = sector + 12;
lsb_start = 2 * 43 * diag;
idx = 2 * 43 * diag;
for(i = 0; i < 43; i++) {
idx = (lsb_start + i * 88) % 2236;
if (idx >= 2236)
idx -= 2236;
b = start[idx];
sum_v_lsb ^= b;
#ifdef Libburn_use_h_matriceS
hxv_lsb ^= burn_rspc_mult(b, h45[i]);
#else
hxv_lsb ^= burn_rspc_mult(b, gfpow[44 - i]);
#endif
b = start[idx + 1];
sum_v_msb ^= b;
#ifdef Libburn_use_h_matriceS
hxv_msb ^= burn_rspc_mult(b, h45[i]);
#else
hxv_msb ^= burn_rspc_mult(b, gfpow[44 - i]);
#endif
idx += 88;
}
/* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */
*q0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
/* 2 = gfpow[1] ; 3 = gfpow[1] ^ gfpow[0]); */
*q1_lsb = sum_v_lsb ^ *q0_lsb;
*q0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
*q1_lsb = sum_v_lsb ^ *q0_lsb;
*q1_msb = sum_v_msb ^ *q0_msb;
return 1;
}
int burn_rspc_parity_q(unsigned char *sector)
void burn_rspc_parity_q(unsigned char *sector)
{
int i;
unsigned char q0_lsb, q0_msb, q1_lsb, q1_msb;
@ -611,7 +675,6 @@ int burn_rspc_parity_q(unsigned char *sector)
#endif /* Libburn_with_lec_generatoR */
}
return 1;
}
/* ------------------------------------------------------------------------- */
@ -622,7 +685,7 @@ int burn_rspc_parity_q(unsigned char *sector)
Measurements indicate that about 50 MIPS are needed for 48x CD speed.
*/
int burn_ecma130_scramble(unsigned char *sector)
void burn_ecma130_scramble(unsigned char *sector)
{
int i;
unsigned char *s;
@ -630,7 +693,6 @@ int burn_ecma130_scramble(unsigned char *sector)
s = sector + 12;
for (i = 0; i < 2340; i++)
s[i] ^= ecma_130_annex_b[i];
return 1;
}
@ -667,6 +729,7 @@ static int burn_rspc_setup_tables(void)
/* This function printed the content of gflog[] and gfpow[] as C code
and compared the content with the tables of the old implementation.
h26[] and h45[] are reverted order copies of gfpow[]
*/
static int burn_rspc_print_tables(void)
{
@ -700,16 +763,31 @@ static int burn_rspc_print_tables(void)
if((i % 10) == 9)
printf("\n\t");
}
printf("\n};\n");
printf("\n};\n\n");
printf("static unsigned char h26[26] = {");
printf("\n\t");
for(i= 0; i < 26; i++) {
printf(" %3u,", gfpow[25 - i]);
if((i % 10) == 9)
printf("\n\t");
}
printf("\n};\n\n");
printf("static unsigned char h45[45] = {");
printf("\n\t");
for(i= 0; i < 45; i++) {
printf(" %3u,",gfpow[44 - i]);
if((i % 10) == 9)
printf("\n\t");
}
printf("\n};\n\n");
return 0;
}
/* This code was used to generate the content of array ecma_130_annex_b[]
It implements the prescription to use the lowest bit as output, to shift
the bits down by one, to exor the output bit with the next lowest bit,
and to put that exor result into bit 14 of the register.
/* This code was used to generate the content of array ecma_130_annex_b[].
*/
static unsigned short ecma_130_fsr = 1;
@ -746,6 +824,8 @@ static int print_ecma_130_scrambler(void)
}
#ifdef Libburn_with_general_rspc_diV
/* This is a general polynomial division function.
burn_rspc_div_3() has been derived from this by setting b to constant 3.
*/
@ -763,6 +843,8 @@ static unsigned char burn_rspc_div(unsigned char a, unsigned char b)
return gfpow[d];
}
#endif /* Libburn_with_general_rspc_diV */
#endif /* Libburn_with_lec_generatoR */

View File

@ -13,11 +13,11 @@
#ifndef Libburn_ecma130ab_includeD
#define Libburn_ecma130ab_includeD 1
int burn_rspc_parity_p(unsigned char *sector);
void burn_rspc_parity_p(unsigned char *sector);
int burn_rspc_parity_q(unsigned char *sector);
void burn_rspc_parity_q(unsigned char *sector);
int burn_ecma130_scramble(unsigned char *sector);
void burn_ecma130_scramble(unsigned char *sector);
#endif /* ! Libburn_ecma130ab_includeD */

View File

@ -148,7 +148,7 @@ enum burn_write_types
if this mode is attempted.
@since 0.7.2
ts A91016: Re-implemented according to ECMA-130 Annex A and B.
Slower but understood and explained.
Now understood, explained and not stemming from cdrdao.
@since 0.7.4
*/
BURN_WRITE_RAW,