diff --git a/doc/checksums.txt b/doc/checksums.txt new file mode 100644 index 0000000..3b646dd --- /dev/null +++ b/doc/checksums.txt @@ -0,0 +1,101 @@ + + Description of libisofs MD5 checksumming + + by Thomas Schmitt - mailto:scdbackup@gmx.net + Libburnia project - mailto:libburn-hackers@pykix.org + + +MD5 is a 128 bit message digest with a very low probability to be the same for +any pair of differing data files. It is described in RFC 1321. and can be +computed e.g. by program md5sum. + +libisofs can equip its images with MD5 checksums for the whole session and +for each single data file. See libisofs.h, iso_write_opts_set_record_md5(). +The checksums get loaded together with the directory tree if this is enabled by +iso_read_opts_set_no_md5(). Loaded checksums can be inquired by +iso_image_get_session_md5() and iso_file_get_md5(). +libisofs has own MD5 computation functions: iso_md5_start(), iso_md5_compute(), +iso_md5_clone(), iso_md5_end(). +See iso_file_get_stream(), iso_stream_open() et.al. for reading file content +from the loaded image. + + + Representation in the Image + +The checksums are stored in an area at the end of the session, in order to +allow quick loading from media with slow random access. +There is an array of MD5 entries and a single block with a checksum tag. + +Location and layout of the checksum area is recorded as AAIP attribute +"isofs.ca" of the root node. +See doc/susp_aaip_2_0.txt for a general description of AAIP and +doc/susp_aaip_isofs_names.txt for the layout of "isofs.ca". + +Because the inquiry of this attribute demands loading of the image tree, +there is also a checksum tag after the checksum area. +This tag can be detected on the fly when reading and checksumming the session +from the start point as learned from a media table-of-content. It covers not +only the payload of the session but also the checksum area. + +The single data files hold an index to their MD5 checksum in individual AAIP +attributes "isofs.cx". Index I means: array base address + 16 * I. + +The checksums cover the data content as it was actually written into the ISO +image stream, not necessarily as it was on hard disk before or afterwards. +This implies that content filtered files bear the MD5 of the filtered data +and not of the original files on disk. When checkreading, one has to avoid +any filtering. Dig out the stream which directly reads image data by calling +iso_stream_get_input_stream() until it returns NULL and use +iso_stream_get_size() rather than iso_file_get_size(). + + + The MD5 array + +If there are N checksummed data files then the array consists of N + 2 entries +with 16 bytes each. + +Entry number 0 holds a session checksum which covers the range from the session +start block up to (but not including) the start block of the checksum area. +This range is described by attribute "isofs.ca" of the root node. + +Entries 1 to N hold the checksums of individual data files. + +Entry number N + 1 holds the MD5 checksum of entries 0 to N. + + + The Checksum Tag + +The next block after the array begins with the checksum tag and is padded +by 0-bytes. The tag is a single line of printable text and has the following +format: + + libisofs_checksum_tag_v1 pos=... range_start=... range_size=... md5=... \n + +Example: + libisofs_checksum_tag_v1 pos=81552 range_start=32 range_size=81520 md5=f172b994e8eb565a011d220b2a8b7a19 + +There are four parameters: + + pos= + gives the block address where the tag supposes itself to be stored. + If this does not match the block address where the tag is found then this + either indicates that the tag is payload of the image or that the image has + been relocated. (The latter makes the image unuable.) + + range_start= + The block address where the session is supposed to start. If this does not + match the session start on media then the image volume descriptors of the + image have been been relocated. (The latter can happen with the overwriteable + media.) + + range_size= + The number of blocks beginning at range_start which are covered by the + checksum of the tag. + + md5= + The checksum of the tag. Encoded as 32 hex digits. + + The newline character at the end is mandatory. For now all bytes of the + block after that newline shall be zero. There may arise future extensions. + + diff --git a/libisofs/ecma119.c b/libisofs/ecma119.c index fb44cbb..682e559 100644 --- a/libisofs/ecma119.c +++ b/libisofs/ecma119.c @@ -1057,6 +1057,9 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img) target->checksum_ctx = NULL; target->checksum_counter = 0; target->checksum_buffer = NULL; + target->checksum_array_pos = 0; + target->checksum_range_start = 0; + target->checksum_range_size = 0; #endif diff --git a/libisofs/ecma119.h b/libisofs/ecma119.h index 8d6b5b7..6ef57da 100644 --- a/libisofs/ecma119.h +++ b/libisofs/ecma119.h @@ -456,6 +456,9 @@ struct ecma119_image off_t checksum_counter; char image_md5[16]; char *checksum_buffer; + uint32_t checksum_array_pos; + uint32_t checksum_range_start; + uint32_t checksum_range_size; #endif /* Libisofs_with_checksumS */ diff --git a/libisofs/md5.c b/libisofs/md5.c index b9b4313..f0f3f67 100644 --- a/libisofs/md5.c +++ b/libisofs/md5.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "writer.h" #include "messages.h" @@ -364,7 +365,7 @@ int iso_md5_clone(void *old_md5_context, void **new_md5_context) { int ret; - ret = libisofs_md5(new_md5_context, old_md5_context, 0, NULL, 4); + ret = libisofs_md5(new_md5_context, old_md5_context, 0, NULL, 1 | 4); if (ret < 0) return ISO_OUT_OF_MEM; if (ret == 0) @@ -474,18 +475,22 @@ int checksum_writer_compute_data_blocks(IsoImageWriter *writer) size_t size; Ecma119Image *t; int ret; - unsigned int lba; if (writer == NULL) { return ISO_ASSERT_FAILURE; } t = writer->target; - lba = t->curblock; /* (t->curblock already contains t->ms_block) */ - size = (t->checksum_idx_counter + 2) / 128 + 1; - t->curblock += size; - - /* >>> ??? reserve extra block for stream detectable checksum */; + t->checksum_array_pos = t->curblock; + /* (t->curblock already contains t->ms_block) */ + t->checksum_range_start = t->ms_block; + size = (t->checksum_idx_counter + 2) / 128; + if (size * 128 < t->checksum_idx_counter + 2) + size++; + t->curblock += size + 1; + /* + 1 = extra block for stream detectable checksum tag */ + t->checksum_range_size = t->checksum_array_pos + size + - t->checksum_range_start; /* Allocate array of MD5 sums */ t->checksum_buffer = calloc(size, 2048); @@ -499,7 +504,8 @@ int checksum_writer_compute_data_blocks(IsoImageWriter *writer) /* Record lba,count,size,cecksum_type in "isofs.ca" of root node */ ret = iso_root_set_isofsca((IsoNode *) t->image->root, - (unsigned int) t->ms_block, lba, + t->checksum_range_start, + t->checksum_array_pos, t->checksum_idx_counter + 2, 16, "MD5", 0); if (ret < 0) return ret; @@ -524,9 +530,11 @@ int checksum_writer_write_data(IsoImageWriter *writer) #ifdef Libisofs_with_checksumS - int wres, res; + int wres, res, l; size_t i, size; Ecma119Image *t; + void *ctx = NULL; + char md5[16], tag_block[2048]; if (writer == NULL) { return ISO_ASSERT_FAILURE; @@ -537,30 +545,69 @@ int checksum_writer_write_data(IsoImageWriter *writer) /* Write image checksum to index 0 */ if (t->checksum_ctx != NULL) { - - /* >>> rather clone a result than killing t->checksum_ctx */; - - res = iso_md5_end(&(t->checksum_ctx), t->image_md5); - if (res > 0) - memcpy(t->checksum_buffer + 0, t->image_md5, 16); + res = iso_md5_clone(t->checksum_ctx, &ctx); + if (res > 0) { + res = iso_md5_end(&ctx, t->image_md5); + if (res > 0) + memcpy(t->checksum_buffer + 0 * 16, t->image_md5, 16); + } } - size = (t->checksum_idx_counter + 2) / 128 + 1; + size = (t->checksum_idx_counter + 2) / 128; + if (size * 128 < t->checksum_idx_counter + 2) + size++; - /* >>> write overall checksum as index t->checksum_idx_counter + 1 */; + /* Write checksum of checksum array as index t->checksum_idx_counter + 1 */ + res = iso_md5_start(&ctx); + if (res > 0) { + for (i = 0; i < t->checksum_idx_counter + 1; i++) + iso_md5_compute(ctx, + t->checksum_buffer + ((size_t) i) * (size_t) 16, 16); + res = iso_md5_end(&ctx, md5); + if (res > 0) + memcpy(t->checksum_buffer + (t->checksum_idx_counter + 1) * 16, + md5, 16); + } for (i = 0; i < size; i++) { - wres = iso_write(t, t->checksum_buffer + ((size_t) 2048) * i, - BLOCK_SIZE); - if (wres < 0) - return wres; + wres = iso_write(t, t->checksum_buffer + ((size_t) 2048) * i, 2048); + if (wres < 0) { + res = wres; + goto ex; + } + } + if (t->checksum_ctx == NULL) { + res = ISO_SUCCESS; + goto ex; } - /* >>> write scdbackup checksum tag to an extra block */; + /* Write stream detectable checksum tag to extra block */; + memset(tag_block, 0, 2048); + res = iso_md5_end(&(t->checksum_ctx), md5); + if (res > 0) { + sprintf(tag_block, + "libisofs_checksum_tag_v1 pos=%u range_start=%u range_size=%u md5=", + t->checksum_array_pos + (unsigned int) size, + t->checksum_range_start, t->checksum_range_size); + l = strlen(tag_block); + for (i = 0; i < 16; i++) + sprintf(tag_block + l + 2 * i, "%2.2x", + ((unsigned char *) md5)[i]); + tag_block[l + 32] = '\n'; + } + wres = iso_write(t, tag_block, 2048); + if (wres < 0) { + res = wres; + goto ex; + } #endif /* Libisofs_with_checksumS */ - return ISO_SUCCESS; + res = ISO_SUCCESS; +ex:; + if (ctx != NULL) + iso_md5_end(&ctx, md5); + return(res); }