More optimizations with parity computation
This commit is contained in:
parent
f3070d2e71
commit
e646686145
@ -1 +1 @@
|
||||
#define Cdrskin_timestamP "2009.10.19.115722"
|
||||
#define Cdrskin_timestamP "2009.10.20.160131"
|
||||
|
@ -116,6 +116,10 @@
|
||||
Comparing the output of both alternatives with the old scrambler output
|
||||
lets 15 bit win for now.
|
||||
|
||||
So the prescription is to start with 15 bit value 1, to use the lowest bit
|
||||
as output, to shift the bits down by one, to exor the output bit with the
|
||||
next lowest bit, and to put that exor result into bit 14 of the register.
|
||||
|
||||
-------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -136,17 +140,17 @@
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
/* Power and logarithm tables for GF(2^8).
|
||||
/* Power and logarithm tables for GF(2^8), parity matrices for ECMA-130.
|
||||
Generated by burn_rspc_setup_tables() and burn_rspc_print_tables().
|
||||
|
||||
The highest possible sum of gflog[] values is is 510. So the table gfpow[]
|
||||
with period 255 was manually unrolled to 511 elements to avoid one modulo
|
||||
The highest possible sum of gflog[] values is is 508. So the table gfpow[]
|
||||
with period 255 was manually unrolled to 509 elements to avoid one modulo
|
||||
255 operation in burn_rspc_mult().
|
||||
Idea by D. Hugh Redelmeier.
|
||||
Proposed by D. Hugh Redelmeier.
|
||||
|
||||
*/
|
||||
|
||||
static unsigned char gfpow[511] = {
|
||||
static unsigned char gfpow[509] = {
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 29, 58,
|
||||
116, 232, 205, 135, 19, 38, 76, 152, 45, 90,
|
||||
180, 117, 234, 201, 143, 3, 6, 12, 24, 48,
|
||||
@ -198,8 +202,7 @@ static unsigned char gfpow[511] = {
|
||||
172, 69, 138, 9, 18, 36, 72, 144, 61, 122,
|
||||
244, 245, 247, 243, 251, 235, 203, 139, 11, 22,
|
||||
44, 88, 176, 125, 250, 233, 207, 131, 27, 54,
|
||||
108, 216, 173, 71, 142,
|
||||
1
|
||||
108, 216, 173, 71,
|
||||
};
|
||||
|
||||
static unsigned char gflog[256] = {
|
||||
@ -232,6 +235,39 @@ static unsigned char gflog[256] = {
|
||||
};
|
||||
|
||||
|
||||
#define Libburn_use_h_matriceS 1
|
||||
|
||||
#ifdef Libburn_use_h_matriceS
|
||||
|
||||
/* On my AMD 2x64 bit 3000 MHz processor h[i] costs about 7 % more time
|
||||
than using gfpow[25-i] resp. gfpow[44-1]. I blame this on the more
|
||||
condensed data representation which slightly increases the rate of cache
|
||||
hits.
|
||||
Nevertheless this effect is very likely depending on the exact cache
|
||||
size and architecture. In general, using h[] saves more than 8000
|
||||
subtractions per sector.
|
||||
*/
|
||||
|
||||
/* Parity matrices H as prescribed by ECMA-130 Annex A.
|
||||
Actually just reverted order start pieces of gfpow[].
|
||||
*/
|
||||
static unsigned char h26[26] = {
|
||||
3, 143, 201, 234, 117, 180, 90, 45, 152, 76,
|
||||
38, 19, 135, 205, 232, 116, 58, 29, 128, 64,
|
||||
32, 16, 8, 4, 2, 1,
|
||||
};
|
||||
|
||||
static unsigned char h45[45] = {
|
||||
238, 119, 181, 212, 106, 53, 148, 74, 37, 156,
|
||||
78, 39, 157, 192, 96, 48, 24, 12, 6, 3,
|
||||
143, 201, 234, 117, 180, 90, 45, 152, 76, 38,
|
||||
19, 135, 205, 232, 116, 58, 29, 128, 64, 32,
|
||||
16, 8, 4, 2, 1,
|
||||
};
|
||||
|
||||
#endif /* Libburn_use_h_matriceS */
|
||||
|
||||
|
||||
/* Pseudo-random bytes which of course are exactly the same as with the
|
||||
previously used code.
|
||||
Generated by function print_ecma_130_scrambler().
|
||||
@ -487,24 +523,28 @@ static unsigned char burn_rspc_mult(unsigned char a, unsigned char b)
|
||||
{
|
||||
if (a == 0 || b == 0)
|
||||
return 0;
|
||||
/* Optimization of (a == 0 || b == 0) by D. Hugh Redelmeier
|
||||
if((((int)a - 1) | ((int)b - 1)) < 0)
|
||||
return 0;
|
||||
*/
|
||||
|
||||
return gfpow[gflog[a] + gflog[b]];
|
||||
/* % 255 not necessary because gfpow is unrolled up to index 510 */
|
||||
}
|
||||
|
||||
|
||||
/* Divides by polynomial 0x03. Derived from burn_rspc_div() */
|
||||
/* Divide by polynomial 0x03. Derived from burn_rspc_div() and using the
|
||||
unrolled size of the gfpow[] array.
|
||||
*/
|
||||
static unsigned char burn_rspc_div_3(unsigned char a)
|
||||
{
|
||||
if (a == 0)
|
||||
return 0;
|
||||
if (gflog[a] >= 25)
|
||||
return gfpow[gflog[a] - 25];
|
||||
else
|
||||
return gfpow[230 + gflog[a]];
|
||||
return gfpow[230 + gflog[a]];
|
||||
}
|
||||
|
||||
|
||||
static int burn_rspc_p0p1(unsigned char *sector, int col,
|
||||
static void burn_rspc_p0p1(unsigned char *sector, int col,
|
||||
unsigned char *p0_lsb, unsigned char *p0_msb,
|
||||
unsigned char *p1_lsb, unsigned char *p1_msb)
|
||||
{
|
||||
@ -516,22 +556,34 @@ static int burn_rspc_p0p1(unsigned char *sector, int col,
|
||||
for(i = 0; i < 24; i++) {
|
||||
b = *start;
|
||||
sum_v_lsb ^= b;
|
||||
|
||||
#ifdef Libburn_use_h_matriceS
|
||||
hxv_lsb ^= burn_rspc_mult(b, h26[i]);
|
||||
#else
|
||||
hxv_lsb ^= burn_rspc_mult(b, gfpow[25 - i]);
|
||||
#endif
|
||||
|
||||
b = *(start + 1);
|
||||
sum_v_msb ^= b;
|
||||
|
||||
#ifdef Libburn_use_h_matriceS
|
||||
hxv_msb ^= burn_rspc_mult(b, h26[i]);
|
||||
#else
|
||||
hxv_msb ^= burn_rspc_mult(b, gfpow[25 - i]);
|
||||
#endif
|
||||
|
||||
start += 86;
|
||||
}
|
||||
|
||||
/* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */
|
||||
*p0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
|
||||
/* 2 = gfpow[1] , 3 = gfpow[1] ^ gfpow[0]); */
|
||||
*p1_lsb = sum_v_lsb ^ *p0_lsb;
|
||||
*p0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
|
||||
*p1_lsb = sum_v_lsb ^ *p0_lsb;
|
||||
*p1_msb = sum_v_msb ^ *p0_msb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int burn_rspc_parity_p(unsigned char *sector)
|
||||
void burn_rspc_parity_p(unsigned char *sector)
|
||||
{
|
||||
int i;
|
||||
unsigned char p0_lsb, p0_msb, p1_lsb, p1_msb;
|
||||
@ -555,39 +607,51 @@ int burn_rspc_parity_p(unsigned char *sector)
|
||||
#endif /* Libburn_with_lec_generatoR */
|
||||
|
||||
}
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
|
||||
static int burn_rspc_q0q1(unsigned char *sector, int diag,
|
||||
static void burn_rspc_q0q1(unsigned char *sector, int diag,
|
||||
unsigned char *q0_lsb, unsigned char *q0_msb,
|
||||
unsigned char *q1_lsb, unsigned char *q1_msb)
|
||||
{
|
||||
unsigned char *start, b;
|
||||
unsigned int i, idx, sum_v_lsb = 0, sum_v_msb = 0;
|
||||
unsigned int hxv_lsb = 0, hxv_msb = 0, lsb_start;
|
||||
unsigned int hxv_lsb = 0, hxv_msb = 0;
|
||||
|
||||
start = sector + 12;
|
||||
lsb_start = 2 * 43 * diag;
|
||||
idx = 2 * 43 * diag;
|
||||
for(i = 0; i < 43; i++) {
|
||||
idx = (lsb_start + i * 88) % 2236;
|
||||
if (idx >= 2236)
|
||||
idx -= 2236;
|
||||
b = start[idx];
|
||||
sum_v_lsb ^= b;
|
||||
|
||||
#ifdef Libburn_use_h_matriceS
|
||||
hxv_lsb ^= burn_rspc_mult(b, h45[i]);
|
||||
#else
|
||||
hxv_lsb ^= burn_rspc_mult(b, gfpow[44 - i]);
|
||||
#endif
|
||||
|
||||
b = start[idx + 1];
|
||||
sum_v_msb ^= b;
|
||||
|
||||
#ifdef Libburn_use_h_matriceS
|
||||
hxv_msb ^= burn_rspc_mult(b, h45[i]);
|
||||
#else
|
||||
hxv_msb ^= burn_rspc_mult(b, gfpow[44 - i]);
|
||||
#endif
|
||||
|
||||
idx += 88;
|
||||
}
|
||||
/* 3 = gfpow[1] ^ gfpow[0] , 2 = gfpow[1] */
|
||||
*q0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
|
||||
/* 2 = gfpow[1] ; 3 = gfpow[1] ^ gfpow[0]); */
|
||||
*q1_lsb = sum_v_lsb ^ *q0_lsb;
|
||||
*q0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
|
||||
*q1_lsb = sum_v_lsb ^ *q0_lsb;
|
||||
*q1_msb = sum_v_msb ^ *q0_msb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int burn_rspc_parity_q(unsigned char *sector)
|
||||
void burn_rspc_parity_q(unsigned char *sector)
|
||||
{
|
||||
int i;
|
||||
unsigned char q0_lsb, q0_msb, q1_lsb, q1_msb;
|
||||
@ -611,7 +675,6 @@ int burn_rspc_parity_q(unsigned char *sector)
|
||||
#endif /* Libburn_with_lec_generatoR */
|
||||
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
@ -622,7 +685,7 @@ int burn_rspc_parity_q(unsigned char *sector)
|
||||
Measurements indicate that about 50 MIPS are needed for 48x CD speed.
|
||||
*/
|
||||
|
||||
int burn_ecma130_scramble(unsigned char *sector)
|
||||
void burn_ecma130_scramble(unsigned char *sector)
|
||||
{
|
||||
int i;
|
||||
unsigned char *s;
|
||||
@ -630,7 +693,6 @@ int burn_ecma130_scramble(unsigned char *sector)
|
||||
s = sector + 12;
|
||||
for (i = 0; i < 2340; i++)
|
||||
s[i] ^= ecma_130_annex_b[i];
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@ -667,6 +729,7 @@ static int burn_rspc_setup_tables(void)
|
||||
|
||||
/* This function printed the content of gflog[] and gfpow[] as C code
|
||||
and compared the content with the tables of the old implementation.
|
||||
h26[] and h45[] are reverted order copies of gfpow[]
|
||||
*/
|
||||
static int burn_rspc_print_tables(void)
|
||||
{
|
||||
@ -700,16 +763,31 @@ static int burn_rspc_print_tables(void)
|
||||
if((i % 10) == 9)
|
||||
printf("\n\t");
|
||||
}
|
||||
printf("\n};\n");
|
||||
printf("\n};\n\n");
|
||||
|
||||
printf("static unsigned char h26[26] = {");
|
||||
printf("\n\t");
|
||||
for(i= 0; i < 26; i++) {
|
||||
printf(" %3u,", gfpow[25 - i]);
|
||||
if((i % 10) == 9)
|
||||
printf("\n\t");
|
||||
}
|
||||
printf("\n};\n\n");
|
||||
|
||||
printf("static unsigned char h45[45] = {");
|
||||
printf("\n\t");
|
||||
for(i= 0; i < 45; i++) {
|
||||
printf(" %3u,",gfpow[44 - i]);
|
||||
if((i % 10) == 9)
|
||||
printf("\n\t");
|
||||
}
|
||||
printf("\n};\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* This code was used to generate the content of array ecma_130_annex_b[]
|
||||
It implements the prescription to use the lowest bit as output, to shift
|
||||
the bits down by one, to exor the output bit with the next lowest bit,
|
||||
and to put that exor result into bit 14 of the register.
|
||||
/* This code was used to generate the content of array ecma_130_annex_b[].
|
||||
*/
|
||||
static unsigned short ecma_130_fsr = 1;
|
||||
|
||||
@ -746,6 +824,8 @@ static int print_ecma_130_scrambler(void)
|
||||
}
|
||||
|
||||
|
||||
#ifdef Libburn_with_general_rspc_diV
|
||||
|
||||
/* This is a general polynomial division function.
|
||||
burn_rspc_div_3() has been derived from this by setting b to constant 3.
|
||||
*/
|
||||
@ -763,6 +843,8 @@ static unsigned char burn_rspc_div(unsigned char a, unsigned char b)
|
||||
return gfpow[d];
|
||||
}
|
||||
|
||||
#endif /* Libburn_with_general_rspc_diV */
|
||||
|
||||
|
||||
#endif /* Libburn_with_lec_generatoR */
|
||||
|
||||
|
@ -13,11 +13,11 @@
|
||||
#ifndef Libburn_ecma130ab_includeD
|
||||
#define Libburn_ecma130ab_includeD 1
|
||||
|
||||
int burn_rspc_parity_p(unsigned char *sector);
|
||||
void burn_rspc_parity_p(unsigned char *sector);
|
||||
|
||||
int burn_rspc_parity_q(unsigned char *sector);
|
||||
void burn_rspc_parity_q(unsigned char *sector);
|
||||
|
||||
int burn_ecma130_scramble(unsigned char *sector);
|
||||
void burn_ecma130_scramble(unsigned char *sector);
|
||||
|
||||
#endif /* ! Libburn_ecma130ab_includeD */
|
||||
|
||||
|
@ -148,7 +148,7 @@ enum burn_write_types
|
||||
if this mode is attempted.
|
||||
@since 0.7.2
|
||||
ts A91016: Re-implemented according to ECMA-130 Annex A and B.
|
||||
Slower but understood and explained.
|
||||
Now understood, explained and not stemming from cdrdao.
|
||||
@since 0.7.4
|
||||
*/
|
||||
BURN_WRITE_RAW,
|
||||
|
Loading…
Reference in New Issue
Block a user