Described libisofs MD5 recording and implemented checksum array checksum

and stream detectable session checksum tag.
This commit is contained in:
Thomas Schmitt 2009-08-13 17:19:58 +02:00
parent ecf2ca044e
commit 40c39af271
4 changed files with 177 additions and 23 deletions

101
doc/checksums.txt Normal file
View File

@ -0,0 +1,101 @@
Description of libisofs MD5 checksumming
by Thomas Schmitt - mailto:scdbackup@gmx.net
Libburnia project - mailto:libburn-hackers@pykix.org
MD5 is a 128 bit message digest with a very low probability to be the same for
any pair of differing data files. It is described in RFC 1321. and can be
computed e.g. by program md5sum.
libisofs can equip its images with MD5 checksums for the whole session and
for each single data file. See libisofs.h, iso_write_opts_set_record_md5().
The checksums get loaded together with the directory tree if this is enabled by
iso_read_opts_set_no_md5(). Loaded checksums can be inquired by
iso_image_get_session_md5() and iso_file_get_md5().
libisofs has own MD5 computation functions: iso_md5_start(), iso_md5_compute(),
iso_md5_clone(), iso_md5_end().
See iso_file_get_stream(), iso_stream_open() et.al. for reading file content
from the loaded image.
Representation in the Image
The checksums are stored in an area at the end of the session, in order to
allow quick loading from media with slow random access.
There is an array of MD5 entries and a single block with a checksum tag.
Location and layout of the checksum area is recorded as AAIP attribute
"isofs.ca" of the root node.
See doc/susp_aaip_2_0.txt for a general description of AAIP and
doc/susp_aaip_isofs_names.txt for the layout of "isofs.ca".
Because the inquiry of this attribute demands loading of the image tree,
there is also a checksum tag after the checksum area.
This tag can be detected on the fly when reading and checksumming the session
from the start point as learned from a media table-of-content. It covers not
only the payload of the session but also the checksum area.
The single data files hold an index to their MD5 checksum in individual AAIP
attributes "isofs.cx". Index I means: array base address + 16 * I.
The checksums cover the data content as it was actually written into the ISO
image stream, not necessarily as it was on hard disk before or afterwards.
This implies that content filtered files bear the MD5 of the filtered data
and not of the original files on disk. When checkreading, one has to avoid
any filtering. Dig out the stream which directly reads image data by calling
iso_stream_get_input_stream() until it returns NULL and use
iso_stream_get_size() rather than iso_file_get_size().
The MD5 array
If there are N checksummed data files then the array consists of N + 2 entries
with 16 bytes each.
Entry number 0 holds a session checksum which covers the range from the session
start block up to (but not including) the start block of the checksum area.
This range is described by attribute "isofs.ca" of the root node.
Entries 1 to N hold the checksums of individual data files.
Entry number N + 1 holds the MD5 checksum of entries 0 to N.
The Checksum Tag
The next block after the array begins with the checksum tag and is padded
by 0-bytes. The tag is a single line of printable text and has the following
format:
libisofs_checksum_tag_v1 pos=... range_start=... range_size=... md5=... \n
Example:
libisofs_checksum_tag_v1 pos=81552 range_start=32 range_size=81520 md5=f172b994e8eb565a011d220b2a8b7a19
There are four parameters:
pos=
gives the block address where the tag supposes itself to be stored.
If this does not match the block address where the tag is found then this
either indicates that the tag is payload of the image or that the image has
been relocated. (The latter makes the image unuable.)
range_start=
The block address where the session is supposed to start. If this does not
match the session start on media then the image volume descriptors of the
image have been been relocated. (The latter can happen with the overwriteable
media.)
range_size=
The number of blocks beginning at range_start which are covered by the
checksum of the tag.
md5=
The checksum of the tag. Encoded as 32 hex digits.
The newline character at the end is mandatory. For now all bytes of the
block after that newline shall be zero. There may arise future extensions.

View File

@ -1057,6 +1057,9 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img)
target->checksum_ctx = NULL; target->checksum_ctx = NULL;
target->checksum_counter = 0; target->checksum_counter = 0;
target->checksum_buffer = NULL; target->checksum_buffer = NULL;
target->checksum_array_pos = 0;
target->checksum_range_start = 0;
target->checksum_range_size = 0;
#endif #endif

View File

@ -456,6 +456,9 @@ struct ecma119_image
off_t checksum_counter; off_t checksum_counter;
char image_md5[16]; char image_md5[16];
char *checksum_buffer; char *checksum_buffer;
uint32_t checksum_array_pos;
uint32_t checksum_range_start;
uint32_t checksum_range_size;
#endif /* Libisofs_with_checksumS */ #endif /* Libisofs_with_checksumS */

View File

@ -10,6 +10,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdio.h>
#include "writer.h" #include "writer.h"
#include "messages.h" #include "messages.h"
@ -364,7 +365,7 @@ int iso_md5_clone(void *old_md5_context, void **new_md5_context)
{ {
int ret; int ret;
ret = libisofs_md5(new_md5_context, old_md5_context, 0, NULL, 4); ret = libisofs_md5(new_md5_context, old_md5_context, 0, NULL, 1 | 4);
if (ret < 0) if (ret < 0)
return ISO_OUT_OF_MEM; return ISO_OUT_OF_MEM;
if (ret == 0) if (ret == 0)
@ -474,18 +475,22 @@ int checksum_writer_compute_data_blocks(IsoImageWriter *writer)
size_t size; size_t size;
Ecma119Image *t; Ecma119Image *t;
int ret; int ret;
unsigned int lba;
if (writer == NULL) { if (writer == NULL) {
return ISO_ASSERT_FAILURE; return ISO_ASSERT_FAILURE;
} }
t = writer->target; t = writer->target;
lba = t->curblock; /* (t->curblock already contains t->ms_block) */ t->checksum_array_pos = t->curblock;
size = (t->checksum_idx_counter + 2) / 128 + 1; /* (t->curblock already contains t->ms_block) */
t->curblock += size; t->checksum_range_start = t->ms_block;
size = (t->checksum_idx_counter + 2) / 128;
/* >>> ??? reserve extra block for stream detectable checksum */; if (size * 128 < t->checksum_idx_counter + 2)
size++;
t->curblock += size + 1;
/* + 1 = extra block for stream detectable checksum tag */
t->checksum_range_size = t->checksum_array_pos + size
- t->checksum_range_start;
/* Allocate array of MD5 sums */ /* Allocate array of MD5 sums */
t->checksum_buffer = calloc(size, 2048); t->checksum_buffer = calloc(size, 2048);
@ -499,7 +504,8 @@ int checksum_writer_compute_data_blocks(IsoImageWriter *writer)
/* Record lba,count,size,cecksum_type in "isofs.ca" of root node */ /* Record lba,count,size,cecksum_type in "isofs.ca" of root node */
ret = iso_root_set_isofsca((IsoNode *) t->image->root, ret = iso_root_set_isofsca((IsoNode *) t->image->root,
(unsigned int) t->ms_block, lba, t->checksum_range_start,
t->checksum_array_pos,
t->checksum_idx_counter + 2, 16, "MD5", 0); t->checksum_idx_counter + 2, 16, "MD5", 0);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -524,9 +530,11 @@ int checksum_writer_write_data(IsoImageWriter *writer)
#ifdef Libisofs_with_checksumS #ifdef Libisofs_with_checksumS
int wres, res; int wres, res, l;
size_t i, size; size_t i, size;
Ecma119Image *t; Ecma119Image *t;
void *ctx = NULL;
char md5[16], tag_block[2048];
if (writer == NULL) { if (writer == NULL) {
return ISO_ASSERT_FAILURE; return ISO_ASSERT_FAILURE;
@ -537,30 +545,69 @@ int checksum_writer_write_data(IsoImageWriter *writer)
/* Write image checksum to index 0 */ /* Write image checksum to index 0 */
if (t->checksum_ctx != NULL) { if (t->checksum_ctx != NULL) {
res = iso_md5_clone(t->checksum_ctx, &ctx);
/* >>> rather clone a result than killing t->checksum_ctx */; if (res > 0) {
res = iso_md5_end(&ctx, t->image_md5);
res = iso_md5_end(&(t->checksum_ctx), t->image_md5); if (res > 0)
if (res > 0) memcpy(t->checksum_buffer + 0 * 16, t->image_md5, 16);
memcpy(t->checksum_buffer + 0, t->image_md5, 16); }
} }
size = (t->checksum_idx_counter + 2) / 128 + 1; size = (t->checksum_idx_counter + 2) / 128;
if (size * 128 < t->checksum_idx_counter + 2)
size++;
/* >>> write overall checksum as index t->checksum_idx_counter + 1 */; /* Write checksum of checksum array as index t->checksum_idx_counter + 1 */
res = iso_md5_start(&ctx);
if (res > 0) {
for (i = 0; i < t->checksum_idx_counter + 1; i++)
iso_md5_compute(ctx,
t->checksum_buffer + ((size_t) i) * (size_t) 16, 16);
res = iso_md5_end(&ctx, md5);
if (res > 0)
memcpy(t->checksum_buffer + (t->checksum_idx_counter + 1) * 16,
md5, 16);
}
for (i = 0; i < size; i++) { for (i = 0; i < size; i++) {
wres = iso_write(t, t->checksum_buffer + ((size_t) 2048) * i, wres = iso_write(t, t->checksum_buffer + ((size_t) 2048) * i, 2048);
BLOCK_SIZE); if (wres < 0) {
if (wres < 0) res = wres;
return wres; goto ex;
}
}
if (t->checksum_ctx == NULL) {
res = ISO_SUCCESS;
goto ex;
} }
/* >>> write scdbackup checksum tag to an extra block */; /* Write stream detectable checksum tag to extra block */;
memset(tag_block, 0, 2048);
res = iso_md5_end(&(t->checksum_ctx), md5);
if (res > 0) {
sprintf(tag_block,
"libisofs_checksum_tag_v1 pos=%u range_start=%u range_size=%u md5=",
t->checksum_array_pos + (unsigned int) size,
t->checksum_range_start, t->checksum_range_size);
l = strlen(tag_block);
for (i = 0; i < 16; i++)
sprintf(tag_block + l + 2 * i, "%2.2x",
((unsigned char *) md5)[i]);
tag_block[l + 32] = '\n';
}
wres = iso_write(t, tag_block, 2048);
if (wres < 0) {
res = wres;
goto ex;
}
#endif /* Libisofs_with_checksumS */ #endif /* Libisofs_with_checksumS */
return ISO_SUCCESS; res = ISO_SUCCESS;
ex:;
if (ctx != NULL)
iso_md5_end(&ctx, md5);
return(res);
} }