Optimizations with parity computation, clarification about nature of logarithms

This commit is contained in:
Thomas Schmitt 2009-10-19 11:56:25 +00:00
parent 749f3fc21e
commit e8e748d2f3
2 changed files with 108 additions and 41 deletions

View File

@ -1 +1 @@
#define Cdrskin_timestamP "2009.10.17.131852"
#define Cdrskin_timestamP "2009.10.19.115722"

View File

@ -76,7 +76,9 @@
Multiplication and division would demand polynomial division, e.g. by the
euclidian algorithm. The computing path over logarithms and powers follows
algebra and allows to reduce the arithmetic task to table lookups, additions
modulo 255, and exor operations.
modulo 255, and exor operations. Note that the logarithms are natural
numbers, not polynomials. They get added or subtracted by the usual addition
(not by exor) and their polynomial power depends on their value modulo 255.
Needed are a logarithm table and a power table (or inverse logarithm table)
for Galois Field GF(2^8) which will serve to perform the peculiar
@ -136,9 +138,15 @@
/* Power and logarithm tables for GF(2^8).
Generated by burn_rspc_setup_tables() and burn_rspc_print_tables().
The highest possible sum of gflog[] values is is 510. So the table gfpow[]
with period 255 was manually unrolled to 511 elements to avoid one modulo
255 operation in burn_rspc_mult().
Idea by D. Hugh Redelmeier.
*/
static unsigned char gfpow[256] = {
static unsigned char gfpow[511] = {
1, 2, 4, 8, 16, 32, 64, 128, 29, 58,
116, 232, 205, 135, 19, 38, 76, 152, 45, 90,
180, 117, 234, 201, 143, 3, 6, 12, 24, 48,
@ -164,7 +172,34 @@ static unsigned char gfpow[256] = {
172, 69, 138, 9, 18, 36, 72, 144, 61, 122,
244, 245, 247, 243, 251, 235, 203, 139, 11, 22,
44, 88, 176, 125, 250, 233, 207, 131, 27, 54,
108, 216, 173, 71, 142
108, 216, 173, 71, 142,
1, 2, 4, 8, 16, 32, 64, 128, 29, 58,
116, 232, 205, 135, 19, 38, 76, 152, 45, 90,
180, 117, 234, 201, 143, 3, 6, 12, 24, 48,
96, 192, 157, 39, 78, 156, 37, 74, 148, 53,
106, 212, 181, 119, 238, 193, 159, 35, 70, 140,
5, 10, 20, 40, 80, 160, 93, 186, 105, 210,
185, 111, 222, 161, 95, 190, 97, 194, 153, 47,
94, 188, 101, 202, 137, 15, 30, 60, 120, 240,
253, 231, 211, 187, 107, 214, 177, 127, 254, 225,
223, 163, 91, 182, 113, 226, 217, 175, 67, 134,
17, 34, 68, 136, 13, 26, 52, 104, 208, 189,
103, 206, 129, 31, 62, 124, 248, 237, 199, 147,
59, 118, 236, 197, 151, 51, 102, 204, 133, 23,
46, 92, 184, 109, 218, 169, 79, 158, 33, 66,
132, 21, 42, 84, 168, 77, 154, 41, 82, 164,
85, 170, 73, 146, 57, 114, 228, 213, 183, 115,
230, 209, 191, 99, 198, 145, 63, 126, 252, 229,
215, 179, 123, 246, 241, 255, 227, 219, 171, 75,
150, 49, 98, 196, 149, 55, 110, 220, 165, 87,
174, 65, 130, 25, 50, 100, 200, 141, 7, 14,
28, 56, 112, 224, 221, 167, 83, 166, 81, 162,
89, 178, 121, 242, 249, 239, 195, 155, 43, 86,
172, 69, 138, 9, 18, 36, 72, 144, 61, 122,
244, 245, 247, 243, 251, 235, 203, 139, 11, 22,
44, 88, 176, 125, 250, 233, 207, 131, 27, 54,
108, 216, 173, 71, 142,
1
};
static unsigned char gflog[256] = {
@ -443,49 +478,55 @@ static unsigned char ecma_130_annex_b[2340] = {
/* This is the new implementation of P- and Q-parity generation.
It is totally unoptimized and thus needs about 50 percent more time than the
old implementation (both with gcc -O2 on AMD 64 bit). Measurements indicate
that about 400 MIPS are needed for 48x CD speed (7.1 MB/s).
It needs about the same computing time as the old implementation (both
with gcc -O2 on AMD 64 bit). Measurements indicate that about 280 MIPS
are needed for 48x CD speed (7.1 MB/s).
*/
static unsigned char burn_rspc_mult(unsigned char a, unsigned char b)
{
if (a == 0 || b == 0)
return 0;
return gfpow[(gflog[a] + gflog[b]) % 255];
return gfpow[gflog[a] + gflog[b]];
/* % 255 not necessary because gfpow is unrolled up to index 510 */
}
static unsigned char burn_rspc_div(unsigned char a, unsigned char b)
/* Divides by polynomial 0x03. Derived from burn_rspc_div() */
static unsigned char burn_rspc_div_3(unsigned char a)
{
int d;
if (a == 0)
return 0;
if (b == 0)
return -1;
d = gflog[a] - gflog[b];
if (d < 0)
d += 255;
return gfpow[d];
if (gflog[a] >= 25)
return gfpow[gflog[a] - 25];
else
return gfpow[230 + gflog[a]];
}
static int burn_rspc_p0p1(unsigned char *sector, int col, int msb,
unsigned char *p0, unsigned char *p1)
static int burn_rspc_p0p1(unsigned char *sector, int col,
unsigned char *p0_lsb, unsigned char *p0_msb,
unsigned char *p1_lsb, unsigned char *p1_msb)
{
unsigned char *start, b;
unsigned int i, sum_v = 0, hxv = 0;
unsigned int i, sum_v_lsb = 0, sum_v_msb = 0;
unsigned int hxv_lsb = 0, hxv_msb = 0;
start = sector + 12 + 2 * col + !!msb;
start = sector + 12 + 2 * col;
for(i = 0; i < 24; i++) {
b = start[i * 86];
sum_v ^= b;
hxv ^= burn_rspc_mult(b, gfpow[25 - i]);
b = *start;
sum_v_lsb ^= b;
hxv_lsb ^= burn_rspc_mult(b, gfpow[25 - i]);
b = *(start + 1);
sum_v_msb ^= b;
hxv_msb ^= burn_rspc_mult(b, gfpow[25 - i]);
start += 86;
}
*p0 = burn_rspc_div(burn_rspc_mult(gfpow[1], sum_v) ^ hxv,
3); /* gfpow[1] ^ gfpow[0]); */
*p1 = sum_v ^ *p0;
*p0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
/* 2 = gfpow[1] , 3 = gfpow[1] ^ gfpow[0]); */
*p1_lsb = sum_v_lsb ^ *p0_lsb;
*p0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
*p1_msb = sum_v_msb ^ *p0_msb;
return 1;
}
@ -497,8 +538,7 @@ int burn_rspc_parity_p(unsigned char *sector)
/* Loop over P columns */
for(i = 0; i < 43; i++) {
burn_rspc_p0p1(sector, i, 0, &p0_lsb, &p1_lsb);
burn_rspc_p0p1(sector, i, 1, &p0_msb, &p1_msb);
burn_rspc_p0p1(sector, i, &p0_lsb, &p0_msb, &p1_lsb, &p1_msb);
sector[2162 + 2 * i] = p0_lsb;
sector[2162 + 2 * i + 1] = p0_msb;
sector[2076 + 2 * i] = p1_lsb;
@ -519,21 +559,30 @@ int burn_rspc_parity_p(unsigned char *sector)
}
static int burn_rspc_q0q1(unsigned char *sector, int diag, int msb,
unsigned char *q0, unsigned char *q1)
static int burn_rspc_q0q1(unsigned char *sector, int diag,
unsigned char *q0_lsb, unsigned char *q0_msb,
unsigned char *q1_lsb, unsigned char *q1_msb)
{
unsigned char *start, b;
unsigned int i, sum_v = 0, hxv = 0;
unsigned int i, idx, sum_v_lsb = 0, sum_v_msb = 0;
unsigned int hxv_lsb = 0, hxv_msb = 0, lsb_start;
start = sector + 12;
lsb_start = 2 * 43 * diag;
for(i = 0; i < 43; i++) {
b = start[(2 * 43 * diag + i * 88 + !!msb) % 2236];
sum_v ^= b;
hxv ^= burn_rspc_mult(b, gfpow[44 - i]);
idx = (lsb_start + i * 88) % 2236;
b = start[idx];
sum_v_lsb ^= b;
hxv_lsb ^= burn_rspc_mult(b, gfpow[44 - i]);
b = start[idx + 1];
sum_v_msb ^= b;
hxv_msb ^= burn_rspc_mult(b, gfpow[44 - i]);
}
*q0 = burn_rspc_div(burn_rspc_mult(gfpow[1], sum_v) ^ hxv,
3); /* gfpow[1] ^ gfpow[0]); */
*q1 = sum_v ^ *q0;
*q0_lsb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_lsb) ^ hxv_lsb);
/* 2 = gfpow[1] ; 3 = gfpow[1] ^ gfpow[0]); */
*q1_lsb = sum_v_lsb ^ *q0_lsb;
*q0_msb = burn_rspc_div_3(burn_rspc_mult(2, sum_v_msb) ^ hxv_msb);
*q1_msb = sum_v_msb ^ *q0_msb;
return 1;
}
@ -545,8 +594,7 @@ int burn_rspc_parity_q(unsigned char *sector)
/* Loop over Q diagonals */
for(i = 0; i < 26; i++) {
burn_rspc_q0q1(sector, i, 0, &q0_lsb, &q1_lsb);
burn_rspc_q0q1(sector, i, 1, &q0_msb, &q1_msb);
burn_rspc_q0q1(sector, i, &q0_lsb, &q0_msb, &q1_lsb, &q1_msb);
sector[2300 + 2 * i] = q0_lsb;
sector[2300 + 2 * i + 1] = q0_msb;
sector[2248 + 2 * i] = q1_lsb;
@ -624,7 +672,7 @@ static int burn_rspc_print_tables(void)
{
int i;
printf("static unsigned char gfpow[256] = {");
printf("static unsigned char gfpow[255] = {");
printf("\n\t");
for(i= 0; i < 255; i++) {
printf("%3u, ", gfpow[i]);
@ -697,5 +745,24 @@ static int print_ecma_130_scrambler(void)
return 1;
}
/* This is a general polynomial division function.
burn_rspc_div_3() has been derived from this by setting b to constant 3.
*/
static unsigned char burn_rspc_div(unsigned char a, unsigned char b)
{
int d;
if (a == 0)
return 0;
if (b == 0)
return -1;
d = gflog[a] - gflog[b];
if (d < 0)
d += 255;
return gfpow[d];
}
#endif /* Libburn_with_lec_generatoR */