From b4e2a60cd9adf3c900995a021be93af104efbf0c Mon Sep 17 00:00:00 2001 From: Thomas Schmitt Date: Sun, 16 Aug 2009 13:57:42 +0200 Subject: [PATCH] Introduced checksum tags for superblock and directory tree. --- doc/checksums.txt | 164 ++++++++++++++++++++++++++++++++------------ libisofs/ecma119.c | 37 +++++++++- libisofs/ecma119.h | 2 + libisofs/libisofs.h | 7 ++ libisofs/md5.c | 120 ++++++++++++++++++++++++++++++-- libisofs/md5.h | 17 +++++ libisofs/util.c | 29 ++++++-- 7 files changed, 319 insertions(+), 57 deletions(-) diff --git a/doc/checksums.txt b/doc/checksums.txt index cc64077..0c26667 100644 --- a/doc/checksums.txt +++ b/doc/checksums.txt @@ -1,57 +1,47 @@ - Description of libisofs MD5 checksumming + Description of libisofs MD5 checksumming - by Thomas Schmitt - mailto:scdbackup@gmx.net - Libburnia project - mailto:libburn-hackers@pykix.org - 13 Aug 2009 + by Thomas Schmitt - mailto:scdbackup@gmx.net + Libburnia project - mailto:libburn-hackers@pykix.org + 16 Aug 2009 MD5 is a 128 bit message digest with a very low probability to be the same for any pair of differing data files. It is described in RFC 1321. and can be computed e.g. by program md5sum. -libisofs can equip its images with MD5 checksums for the whole session and -for each single data file. See libisofs.h, iso_write_opts_set_record_md5(). -The checksums get loaded together with the directory tree if this is enabled by -iso_read_opts_set_no_md5(). Loaded checksums can be inquired by +libisofs can equip its images with MD5 checksums for superblock, directory +tree, the whole session, and for each single data file. +See libisofs.h, iso_write_opts_set_record_md5(). + +The data file checksums get loaded together with the directory tree if this +is enabled by iso_read_opts_set_no_md5(). Loaded checksums can be inquired by iso_image_get_session_md5() and iso_file_get_md5(). -libisofs has own MD5 computation functions: iso_md5_start(), iso_md5_compute(), -iso_md5_clone(), iso_md5_end(). -See iso_file_get_stream(), iso_stream_open() et.al. for reading file content -from the loaded image. + +Stream recognizable checksum tags occupy exactly one block each. They can +be detected by submitting a read-in block to iso_util_decode_md5_tag(). + +libisofs has own MD5 computation functions: +iso_md5_start(), iso_md5_compute(), iso_md5_clone(), iso_md5_end() - Representation in the Image + Representation in the Image -The checksums are stored in an area at the end of the session, in order to -allow quick loading from media with slow random access. -There is an array of MD5 entries and a single block with a checksum tag. +The checksums are stored as stream recognizable checksum tags and as a compact +array at the end of the session. The latter allows to quickly load many +file checksums from media with slow random access. -Location and layout of the checksum area is recorded as AAIP attribute + + The Checksum Array + +Location and layout of the checksum array is recorded as AAIP attribute "isofs.ca" of the root node. See doc/susp_aaip_2_0.txt for a general description of AAIP and doc/susp_aaip_isofs_names.txt for the layout of "isofs.ca". -Because the inquiry of this attribute demands loading of the image tree, -there is also a checksum tag after the checksum area. -This tag can be detected on the fly when reading and checksumming the session -from the start point as learned from a media table-of-content. It covers not -only the payload of the session but also the checksum area. - The single data files hold an index to their MD5 checksum in individual AAIP attributes "isofs.cx". Index I means: array base address + 16 * I. -The checksums cover the data content as it was actually written into the ISO -image stream, not necessarily as it was on hard disk before or afterwards. -This implies that content filtered files bear the MD5 of the filtered data -and not of the original files on disk. When checkreading, one has to avoid -any filtering. Dig out the stream which directly reads image data by calling -iso_stream_get_input_stream() until it returns NULL and use -iso_stream_get_size() rather than iso_file_get_size(). - - - The MD5 array - If there are N checksummed data files then the array consists of N + 2 entries with 16 bytes each. @@ -64,19 +54,41 @@ Entries 1 to N hold the checksums of individual data files. Entry number N + 1 holds the MD5 checksum of entries 0 to N. - The Checksum Tag + The Checksum Tags -The next block after the array begins with the checksum tag and is padded -by 0-bytes. The tag is a single line of printable text and has the following -format: +Because the inquiry of AAIP attributes demands loading of the image tree, +there are also checksum tags which can be detected on the fly when reading +and checksumming the session from the start point as learned from a media +table-of-content. - libisofs_checksum_tag_v1 pos=# range_start=# range_size=# md5=# self=#\n +The superblock checksum tag is written after the ECMA-119 volume descriptors. +The tree checksum tag is written after the ECMA-119 directory entries. +The session checksum tag is written after all payload including the checksum +array. (Then follows eventual padding.) -Example: -libisofs_checksum_tag_v1 pos=81552 range_start=32 range_size=81520 md5=f172b994e8eb565a011d220b2a8b7a19 self=020975b2aa1189d455db2c09560b8732 +The tags are a single lines of printable text, padded by 0 bytes. They have +the following format: -There are five parameters. The first three are decimal numbers, the others -are strings of 32 hex digits. + Tag_id pos=# range_start=# range_size=# md5=# self=#\n + +Tag_id distinguishes the three tag types + "libisofs_sb_checksum_tag_v1" Superblock tag + "libisofs_tree_checksum_tag_v1" Directory tree tag + "libisofs_checksum_tag_v1" Session tag + + +Example (session starts at at Logical Block Address 32): + + <... ECMA-119 System Area and Volume Descriptors ...> +libisofs_sb_checksum_tag_v1 pos=50 range_start=32 range_size=18 md5=17471035f1360a69eedbd1d0c67a6aa2 self=52d602210883eeababfc9cd287e28682 + <... ECMA-119 Directory Entries ...> +libisofs_tree_checksum_tag_v1 pos=334 range_start=32 range_size=302 md5=41acd50285339be5318decce39834a45 self=fe100c338c8f9a494a5432b5bfe6bf3c + <... Data file payload and checksum array ...> +libisofs_checksum_tag_v1 pos=81554 range_start=32 range_size=81522 md5=8adb404bdf7f5c0a078873bb129ee5b9 self=57c2c2192822b658240d62cbc88270cb + + +There are five tag parameters. The first three are decimal numbers, the others +are strings of 32 hex digits: pos= gives the block address where the tag supposes itself to be stored. @@ -107,4 +119,68 @@ are strings of 32 hex digits. The newline character at the end is mandatory. For now all bytes of the block after that newline shall be zero. There may arise future extensions. - + +------------------------------------------------------------------------------- + + Usage at Read Time + + Checking a Whole Session + +In order to check the trustworthyness of a whole session, read from its start +up to the session tag. Read the blocks and submit each single one of them to + + iso_util_decode_md5_tag(block, &pos, &range_start, &range_size, md5, 1); + +If this returns 1, then check whether the returned parameters pos, range_start, +and range_size match the state of block reading, and whether the returned +bytes in parameter md5 match the MD5 computed from the data blocks which were +read before the tag block. + + + Checking before Image Tree Loading + +In order to check for a trustworthy loadable image tree, read the first +32 blocks of the session and look for the superblock checksum tag by + iso_util_decode_md5_tag(block, &pos, &range_start, &range_size, md5, 2); +If one appears and has plausible parameters, then check whether its MD5 matches +the MD5 of the data blocks read before. +(Keep the original MD5 context of the data blocks and clone one for obtaining +the MD5 bytes.) + +If those MD5s match, then compute the checksum block into the kept MD5 context +and go on with searching for the tree checksum tag. This can be found in a +read-in block by: + iso_util_decode_md5_tag(block, &pos, &range_start, &range_size, md5, 3) +Again, if the parameters match the reading state, the MD5 must match the +MD5 computed from the data blocks before. +If so, then the tree is ok and safe to be loaded by iso_image_import(). + + + Checking Single Files in a Loaded Image + +The image has to be loaded, so you can obtain IsoNode objects which yield + iso_node_get_type(node) == LIBISO_FILE + +The recorded checksum can be obtained by + iso_file_get_md5(image, (IsoFile *) node, md5, 0); + +For accessing the file data in the loaded image use + iso_file_get_stream((IsoFile *) node); +to get the data stream of the object. +The checksums cover the data content as it was actually written into the ISO +image stream, not necessarily as it was on hard disk before or afterwards. +This implies that content filtered files bear the MD5 of the filtered data +and not of the original files on disk. When checkreading, one has to avoid +any reverse filtering. Dig out the stream which directly reads image data +by calling iso_stream_get_input_stream() until it returns NULL and use +iso_stream_get_size() rather than iso_file_get_size(). + +Now you may call iso_stream_open(), iso_stream_read(), iso_stream_close() +for reading file content from the loaded image. + + + Session Check in a Loaded Image + +iso_image_get_session_md5() gives start LBA and session payload size as of +"isofs.ca" and the session checksum as of the checksum array. + diff --git a/libisofs/ecma119.c b/libisofs/ecma119.c index deead32..5be9a94 100644 --- a/libisofs/ecma119.c +++ b/libisofs/ecma119.c @@ -241,6 +241,16 @@ int ecma119_writer_compute_data_blocks(IsoImageWriter *writer) target->curblock += DIV_UP(path_table_size, BLOCK_SIZE); target->path_table_size = path_table_size; +#ifdef Libisofs_with_checksumS + + if (target->md5_session_checksum) { + /* Account for tree checksum tag */ + target->checksum_tree_tag_pos = target->curblock; + target->curblock++; + } + +#endif /* Libisofs_with_checksumS */ + return ISO_SUCCESS; } @@ -674,6 +684,17 @@ int ecma119_writer_write_data(IsoImageWriter *writer) /* and write the path tables */ ret = write_path_tables(t); + if (ret < 0) + return ret; + +#ifdef Libisofs_with_checksumS + + if (t->md5_session_checksum) { + /* Write tree checksum tag */ + ret = iso_md5_write_tag(t, t->checksum_tree_tag_pos, 3); + } + +#endif /* Libisofs_with_checksumS */ return ret; } @@ -849,6 +870,18 @@ void *write_function(void *arg) } } +#ifdef Libisofs_with_checksumS + + /* Write superblock checksum tag */ + if (target->md5_session_checksum && target->checksum_ctx != NULL) { + res = iso_md5_write_tag(target, target->checksum_sb_tag_pos, 2); + if (res < 0) + goto write_error; + } + +#endif /* Libisofs_with_checksumS */ + + /* write data for each writer */ for (i = 0; i < target->nwriters; ++i) { writer = target->writers[i]; @@ -1064,6 +1097,8 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img) target->checksum_idx_counter = 0; target->checksum_ctx = NULL; target->checksum_counter = 0; + target->checksum_sb_tag_pos = 0; + target->checksum_tree_tag_pos = 0; target->checksum_buffer = NULL; target->checksum_array_pos = 0; target->checksum_range_start = 0; @@ -1175,8 +1210,6 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img) #ifdef Libisofs_with_checksumS - /* ??? Is it safe to add a writer after the content writer ? */ - if (target->md5_file_checksums || target->md5_session_checksum) { ret = checksum_writer_create(target); if (ret < 0) diff --git a/libisofs/ecma119.h b/libisofs/ecma119.h index 6ef57da..7a981be 100644 --- a/libisofs/ecma119.h +++ b/libisofs/ecma119.h @@ -454,6 +454,8 @@ struct ecma119_image unsigned int checksum_idx_counter; void *checksum_ctx; off_t checksum_counter; + uint32_t checksum_sb_tag_pos; + uint32_t checksum_tree_tag_pos; char image_md5[16]; char *checksum_buffer; uint32_t checksum_array_pos; diff --git a/libisofs/libisofs.h b/libisofs/libisofs.h index c5229c9..ae7812e 100644 --- a/libisofs/libisofs.h +++ b/libisofs/libisofs.h @@ -5065,6 +5065,13 @@ int iso_file_get_md5(IsoImage *image, IsoFile *file, char md5[16], int flag); * covered by parameter md5. * @param md5 * Returns 16 byte of MD5 checksum. + * @param flag + * Bitfield for control purposes: + * bit0-bit7= tag type being looked for + * 0= any checksum tag + * 1= session tag + * 2= superblock tag + * 3= tree tag * @return * 0= not a checksum tag, return parameters are invalid * 1= checksum tag found diff --git a/libisofs/md5.c b/libisofs/md5.c index 3c0a18f..6f02ea7 100644 --- a/libisofs/md5.c +++ b/libisofs/md5.c @@ -519,7 +519,12 @@ int checksum_writer_compute_data_blocks(IsoImageWriter *writer) static int checksum_writer_write_vol_desc(IsoImageWriter *writer) { - /* nothing needed */ + + /* The superblock checksum tag has to be written after + the Volume Descriptor Set Terminator and thus may not be + written by this function. (It would have been neat, though). + */ + return ISO_SUCCESS; } @@ -530,11 +535,16 @@ int checksum_writer_write_data(IsoImageWriter *writer) #ifdef Libisofs_with_checksumS - int wres, res, l; + int wres, res; size_t i, size; Ecma119Image *t; void *ctx = NULL; - char md5[16], tag_block[2048]; + char md5[16]; + +#ifdef NIX + char tag_block[2048]; + int l; +#endif if (writer == NULL) { return ISO_ASSERT_FAILURE; @@ -582,6 +592,9 @@ int checksum_writer_write_data(IsoImageWriter *writer) } /* Write stream detectable checksum tag to extra block */; + +#ifdef NIX + memset(tag_block, 0, 2048); res = iso_md5_end(&(t->checksum_ctx), md5); if (res > 0) { @@ -612,13 +625,25 @@ int checksum_writer_write_data(IsoImageWriter *writer) goto ex; } -#endif /* Libisofs_with_checksumS */ +#else /* NIX */ + + res = iso_md5_write_tag(t, t->checksum_array_pos + (uint32_t) size, 1); + if (res < 0) + goto ex; + +#endif /* ! NIX */ res = ISO_SUCCESS; ex:; if (ctx != NULL) iso_md5_end(&ctx, md5); return(res); + +#else /* Libisofs_with_checksumS */ + + return ISO_SUCCESS; + +#endif /* ! Libisofs_with_checksumS */ } @@ -649,6 +674,93 @@ int checksum_writer_create(Ecma119Image *target) /* add this writer to image */ target->writers[target->nwriters++] = writer; +#ifdef Libisofs_with_checksumS + + /* Account for superblock checksum tag */ + if (target->md5_session_checksum) { + target->checksum_sb_tag_pos = target->curblock; + target->curblock++; + } + +#endif /* Libisofs_with_checksumS */ + return ISO_SUCCESS; } + +/* Write stream detectable checksum tag to extra block. + * @flag bit0-7= tag type + * 1= session tag (End checksumming.) + * 2= superblock tag (System Area and Volume Descriptors) + * 3= tree tag (ECMA-119 and Rock Ridge tree) + */ +int iso_md5_write_tag(Ecma119Image *t, uint32_t pos, int flag) +{ + +#ifdef Libisofs_with_checksumS + + int res, mode, l, i, wres; + void *ctx = NULL; + char md5[16], tag_block[2048]; + uint32_t size; + static char *tag_ids[4]= {"", + "libisofs_checksum_tag_v1", + "libisofs_sb_checksum_tag_v1", + "libisofs_tree_checksum_tag_v1"}; + + memset(tag_block, 0, 2048); + mode = flag & 255; + if (mode == 1) { + res = iso_md5_end(&(t->checksum_ctx), md5); + size = t->checksum_range_size; + } else if (mode == 2 || mode == 3) { + size = pos - t->checksum_range_start; + res = iso_md5_clone(t->checksum_ctx, &ctx); + if (res < 0) + return res; + res = iso_md5_end(&ctx, md5); + } else { + return ISO_WRONG_ARG_VALUE; + } + if (res > 0) { + sprintf(tag_block, + "%s pos=%u range_start=%u range_size=%u md5=", + tag_ids[mode], pos, + t->checksum_range_start, size); + l = strlen(tag_block); + for (i = 0; i < 16; i++) + sprintf(tag_block + l + 2 * i, "%2.2x", + ((unsigned char *) md5)[i]); + + res = iso_md5_start(&ctx); + if (res > 0) { + iso_md5_compute(ctx, tag_block, l + 32); + iso_md5_end(&ctx, md5); + strcpy(tag_block + l + 32, " self="); + l += 32 + 6; + for (i = 0; i < 16; i++) + sprintf(tag_block + l + 2 * i, "%2.2x", + ((unsigned char *) md5)[i]); + } + tag_block[l + 32] = '\n'; + } + wres = iso_write(t, tag_block, 2048); + if (wres < 0) { + res = wres; + goto ex; + } + res = ISO_SUCCESS; +ex:; + if (ctx != NULL) + iso_md5_end(&ctx, md5); + return res; + +#else /* Libisofs_with_checksumS */ + + return ISO_SUCCESS; + +#endif /* ! Libisofs_with_checksumS */ + +} + + diff --git a/libisofs/md5.h b/libisofs/md5.h index e899d7e..18ce149 100644 --- a/libisofs/md5.h +++ b/libisofs/md5.h @@ -26,6 +26,23 @@ int checksum_writer_create(Ecma119Image *target); int checksum_xinfo_func(void *data, int flag); +/* Write stream detectable checksum tag to extra block. + * All tag ranges start at the beginning of the System Area (i.e. t->ms_block) + * and stem from the same MD5 computation context. Tag types 2 and 3 are + * intermediate checksums. Type 2 announces the existence of type 3. + * If both match, then at least the directory tree is trustworthy. + * Type 1 is written at the very end of the session. If it matches, then + * the whole image is trustworthy. + * @param t The image being written + * @param pos The LBA where this tag block is supposed to be written + * @flag bit0-7= tag type + * 1= session tag (End checksumming.) + * 2= superblock tag (System Area and Volume Descriptors) + * 3= tree tag (ECMA-119 and Rock Ridge tree) + */ +int iso_md5_write_tag(Ecma119Image *t, uint32_t pos, int flag); + + #endif /* ! LIBISO_MD5_H_ */ diff --git a/libisofs/util.c b/libisofs/util.c index 3dccb40..9b1cae8 100644 --- a/libisofs/util.c +++ b/libisofs/util.c @@ -1567,15 +1567,30 @@ int iso_util_decode_md5_tag(char data[2048], uint32_t *pos, uint32_t *range_start, uint32_t *range_size, char md5[16], int flag) { - static char *tag_magic= "libisofs_checksum_tag_v1 pos="; - static int magic_len= 29; - int ret, bin_count, i; + static char *tag_magic[4] = {"", + "libisofs_checksum_tag_v1", + "libisofs_sb_checksum_tag_v1", + "libisofs_tree_checksum_tag_v1"}; + static int magic_len[4]= {0, 24, 27, 29}; + int ret, bin_count, i, mode, magic_first = 1, magic_last = 3, found = 0; char *cpt, self_md5[16], tag_md5[16]; void *ctx = NULL; - if (strncmp(data, tag_magic, magic_len) != 0) - return(0); - cpt = data + magic_len; + mode = flag & 255; + if (mode > magic_last) + return ISO_WRONG_ARG_VALUE; + if (mode > 0) + magic_first = magic_last = mode; + for (i = magic_first; i <= magic_last; i++) + if (strncmp(data, tag_magic[i], magic_len[i]) == 0) + break; + if (i > magic_last ) + return 0; + found = i; + cpt = data + magic_len[found] + 1; + if (strncmp(cpt, "pos=", 4) != 0) + return 0; + cpt+= 4; ret = iso_util_dec_to_uint32(cpt, pos, 0); if (ret <= 0) return 0; @@ -1615,6 +1630,6 @@ int iso_util_decode_md5_tag(char data[2048], uint32_t *pos, return ISO_MD5_AREA_CORRUPTED; if (*(cpt + 5 + 32) != '\n') return 0; - return(1); + return(found); }