From 56796ff55f473eff397710119f9a2f1845c08933 Mon Sep 17 00:00:00 2001 From: Thomas Schmitt Date: Sun, 3 May 2009 17:08:29 +0200 Subject: [PATCH] Introduced hardlink unification at image generation time. For now it works only with data files or with files from the imported image. --- libisofs/ecma119_tree.c | 185 ++++++++++++++++++++++++++++++++++++++-- libisofs/fs_image.c | 7 -- libisofs/image.c | 9 -- libisofs/libisofs.h | 15 ++-- libisofs/node.c | 16 +++- libisofs/node.h | 1 + libisofs/stream.c | 4 +- 7 files changed, 200 insertions(+), 37 deletions(-) diff --git a/libisofs/ecma119_tree.c b/libisofs/ecma119_tree.c index 7340739..10f792c 100644 --- a/libisofs/ecma119_tree.c +++ b/libisofs/ecma119_tree.c @@ -114,6 +114,8 @@ int create_ecma119_node(Ecma119Image *img, IsoNode *iso, Ecma119Node **node) #ifdef Libisofs_hardlink_prooF + /* >>> ts A90503 : this is obsolete with Libisofs_hardlink_matcheR */ + /*ts A90428 */ /* Looking only for valid ISO image inode numbers. */ ret = iso_node_get_id(iso, &fs_id, &dev_id, &(ecma->ino), 1); @@ -831,6 +833,175 @@ int reorder_tree(Ecma119Image *img, Ecma119Node *dir, int level, int pathlen) return ISO_SUCCESS; } +/* + * @param flag + * bit0= recursion + * bit1= count nodes rather than fill them into *nodes + * @return + * <0 error + * bit0= saw ino == 0 + * bit1= saw ino != 0 + */ +static +int make_node_array(Ecma119Image *img, Ecma119Node *dir, + Ecma119Node **nodes, size_t nodes_size, size_t *node_count, + int flag) +{ + int ret, result = 0; + size_t i; + Ecma119Node *child; + + if (!(flag & 1)) { + *node_count = 0; + if (!(flag & 2)) { + /* Register the tree root node */ + if (*node_count >= nodes_size) { + iso_msg_submit(img->image->id, ISO_ASSERT_FAILURE, 0, + "Programming error: Overflow of hardlink sort array"); + return ISO_ASSERT_FAILURE; + } + nodes[*node_count] = dir; + } + result|= (dir->ino == 0 ? 1 : 2); + (*node_count)++; + } + + for (i = 0; i < dir->info.dir->nchildren; i++) { + child = dir->info.dir->children[i]; + if (!(flag & 2)) { + if (*node_count >= nodes_size) { + iso_msg_submit(img->image->id, ISO_ASSERT_FAILURE, 0, + "Programming error: Overflow of hardlink sort array"); + return ISO_ASSERT_FAILURE; + } + nodes[*node_count] = child; + } + result|= (child->ino == 0 ? 1 : 2); + (*node_count)++; + + if (child->type == ECMA119_DIR) { + ret = make_node_array(img, child, + nodes, nodes_size, node_count, flag | 1); + if (ret < 0) + return ret; + } + } + return result; +} + +/* ts A90503 */ +static +int ecma119_node_cmp(const void *v1, const void *v2) +{ + int ret1, ret2; + Ecma119Node *n1, *n2; + unsigned int fs_id1, fs_id2; + dev_t dev_id1, dev_id2; + ino_t ino_id1, ino_id2; + + n1 = *((Ecma119Node **) v1); + n2 = *((Ecma119Node **) v2); + if (n1 == n2) + return 0; + + /* Imported or explicite ISO image node id has absolute priority */ + ret1 = (iso_node_get_id(n1->node, &fs_id1, &dev_id1, &ino_id1, 1) > 0); + ret2 = (iso_node_get_id(n2->node, &fs_id2, &dev_id2, &ino_id2, 1) > 0); + if (ret1 != ret2) + return (ret1 < ret2 ? -1 : 1); + if (ret1) { + /* fs_id and dev_id do not matter here. + Both nodes have explicite inode numbers of the emerging image. + */ + return (ino_id1 < ino_id2 ? -1 : ino_id1 > ino_id2 ? 1 : 0); + } + + if (n1->type < n2->type) + return -1; + if (n1->type > n2->type) + return 1; + + if (n1->type == ECMA119_FILE) { + ret1 = iso_file_src_cmp(n2->info.file, n2->info.file); + return ret1; + + /* >>> Create means to inquire ECMA119_SYMLINK and ECMA119_SPECIAL + for their original fs,dev,ino tuple */ + } else { + + return (v1 < v2 ? -1 : 1); /* case v1 == v2 is handled above */ + } + return 0; +} + +/* ts A90503 */ +static +int family_set_ino(Ecma119Image *img, Ecma119Node **nodes, size_t family_start, + size_t next_family, ino_t img_ino, int flag) +{ + size_t i; + + if (img_ino == 0) { + img_ino = img_give_ino_number(img->image, 0); + } + for (i = family_start; i < next_family; i++) { + nodes[i]->ino = img_ino; + nodes[i]->nlink = next_family - family_start; + } + return 1; +} + +/* ts A90503 */ +static +int match_hardlinks(Ecma119Image *img, Ecma119Node *dir, int flag) +{ + int ret; + size_t nodes_size = 0, node_count = 0, i, family_start; + Ecma119Node **nodes = NULL; + unsigned int fs_id; + dev_t dev_id; + ino_t img_ino = 0; + + ret = make_node_array(img, dir, nodes, nodes_size, &node_count, 2); + if (ret < 0) + return ret; + nodes_size = node_count; + nodes = (Ecma119Node **) calloc(sizeof(Ecma119Node *), nodes_size); + if (nodes == NULL) + return ISO_OUT_OF_MEM; + ret = make_node_array(img, dir, nodes, nodes_size, &node_count, 0); + if (ret < 0) + goto ex; + + /* Sort according to id tuples and IsoFileSrc identity. */ + qsort(nodes, node_count, sizeof(Ecma119Node *), ecma119_node_cmp); + + /* Hand out image inode numbers to all Ecma119Node.ino == 0 . + Same sorting rank gets same inode number. + */ + iso_node_get_id(nodes[0]->node, &fs_id, &dev_id, &img_ino, 1); + family_start = 0; + for (i = 1; i < node_count; i++) { + if (ecma119_node_cmp(nodes + (i - 1), nodes + i) == 0) { + /* Still in same ino family */ + if (img_ino == 0) { /* Just in case any member knows its img_ino */ + iso_node_get_id(nodes[0]->node, &fs_id, &dev_id, &img_ino, 1); + } + continue; + } + family_set_ino(img, nodes, family_start, i, img_ino, 0); + iso_node_get_id(nodes[i]->node, &fs_id, &dev_id, &img_ino, 1); + family_start = i; + } + family_set_ino(img, nodes, family_start, i, img_ino, 0); + + ret = ISO_SUCCESS; +ex:; + if (nodes != NULL) + free((char *) nodes); + return ret; +} + int ecma119_tree_create(Ecma119Image *img) { int ret; @@ -848,14 +1019,12 @@ int ecma119_tree_create(Ecma119Image *img) #ifdef Libisofs_hardlink_matcheR - /* ts A90430 */ - - /* >>> if there are Ecma119Node.ino == 0 : */ - >>> Sort tree according to id tuples and IsoFileSrc identity. - >>> Hand out image inode numbers to all Ecma119Node.ino == 0 . - Same sorting rank gets same inode number. - >>> Set Ecma119Node.nlink according to final ino outcome - */ + /* ts A90503 */ + iso_msg_debug(img->image->id, "Matching hardlinks..."); + ret = match_hardlinks(img, img->root, 0); + if (ret < 0) { + return ret; + } #endif /* ! Libisofs_hardlink_matcheR */ diff --git a/libisofs/fs_image.c b/libisofs/fs_image.c index 621331c..d09d1e0 100644 --- a/libisofs/fs_image.c +++ b/libisofs/fs_image.c @@ -1662,8 +1662,6 @@ int iso_file_source_new_ifs(IsoImageFilesystem *fs, IsoFileSource *parent, #ifdef Libisofs_new_fs_image_inO - /* >>> ts A90426 : this ifdef shall become a read option */ - if (fsdata->rr != RR_EXT_112) { if (fsdata->rr == 0) { atts.st_nlink = 1; @@ -3080,11 +3078,6 @@ int iso_image_import(IsoImage *image, IsoDataSource *src, iso_node_builder_unref(image->builder); goto import_revert; } - - /* <<< debugging */ - } else { - /* <<< just for the duplicate inode check */ - img_collect_inos(image, image->root, 0); } #endif /* ! Libisofs_hardlink_prooF */ diff --git a/libisofs/image.c b/libisofs/image.c index 77dd69d..2b61e62 100644 --- a/libisofs/image.c +++ b/libisofs/image.c @@ -343,15 +343,6 @@ int img_register_ino(IsoImage *image, IsoNode *node, int flag) if (ret > 0 && ino >= image->used_inodes_start && ino <= image->used_inodes_start + (ISO_USED_INODE_RANGE - 1)) { /* without -1 : rollover hazard on 32 bit */ - - /* <<< */ - if (ino && - image->used_inodes[(ino - image->used_inodes_start) / 8] - & (1 << (ino % 8))) - fprintf(stderr, - "libisofs_DEBUG: found duplicate inode number %.f\n", - (double) ino); - image->used_inodes[(ino - image->used_inodes_start) / 8] |= (1 << (ino % 8)); } diff --git a/libisofs/libisofs.h b/libisofs/libisofs.h index a4c7c0e..12fb75c 100644 --- a/libisofs/libisofs.h +++ b/libisofs/libisofs.h @@ -5307,18 +5307,21 @@ struct burn_source { /* ---------------------------- Experiments ---------------------------- */ /* Hardlinks : During image generation accompany the tree of IsoFileSrc - by a sorted structure of Ecma119Node. + by a sorted array of Ecma119Node. The sorting order shall bring together candidates for being hardlink siblings resp. having identical content. - This has to be in sync with the IsoFileSrc unification by - IsoRBTree and iso_file_src_cmp() which cannot be obsoleted - because Joliet and ISO1999 depend on it. + This is in sync with the IsoFileSrc unification by IsoRBTree + and iso_file_src_cmp(). + That tree cannot be obsoleted because Joliet and ISO1999 depend + on it. On the other hand, the Ecma119Node array includes objects + which have no IsoFileSrc attached. So both, tree and array, are + needed. - ! INCOMPLETE AND UNDECIDED ! DO NOT USE YET ! + ! VERY EXPERIMENTAL ! DO NOT USE YET ! -#define Libisofs_hardlink_matcheR yes */ +#define Libisofs_hardlink_matcheR yes /* Hardlinks : Override Libisofs_new_fs_image_inO and preserve inode numbers diff --git a/libisofs/node.c b/libisofs/node.c index 6ba7a70..9d16fc2 100644 --- a/libisofs/node.c +++ b/libisofs/node.c @@ -2259,6 +2259,7 @@ int iso_px_ino_xinfo_func(void *data, int flag) * @return * 1= reply is valid from stream, 2= reply is valid from xinfo * 0= no id available, <0= error + * (fs_id, dev_id, ino_id) will be (0,0,0) in case of return <= 0 */ int iso_node_get_id(IsoNode *node, unsigned int *fs_id, dev_t *dev_id, ino_t *ino_id, int flag) @@ -2269,7 +2270,7 @@ int iso_node_get_id(IsoNode *node, unsigned int *fs_id, dev_t *dev_id, ret = iso_node_get_xinfo(node, iso_px_ino_xinfo_func, &xipt); if (ret < 0) - return ret; + goto no_id; if (ret == 1) { *fs_id = ISO_IMAGE_FS_ID; *dev_id = 0; @@ -2280,11 +2281,18 @@ int iso_node_get_id(IsoNode *node, unsigned int *fs_id, dev_t *dev_id, if (node->type == LIBISO_FILE) { file= (IsoFile *) node; iso_stream_get_id(file->stream, fs_id, dev_id, ino_id); - if (*fs_id != ISO_IMAGE_FS_ID && (flag & 1)) - return 0; + if (*fs_id != ISO_IMAGE_FS_ID && (flag & 1)) { + ret = 0; + goto no_id; + } return 1; } - return 0; + ret = 0; +no_id:; + *fs_id = 0; + *dev_id = 0; + *ino_id = 0; + return ret; } diff --git a/libisofs/node.h b/libisofs/node.h index 6bec92b..a5cd1b6 100644 --- a/libisofs/node.h +++ b/libisofs/node.h @@ -408,6 +408,7 @@ int iso_file_zf_by_magic(IsoFile *file, int flag); * @return * 1= reply is valid from stream, 2= reply is valid from xinfo * 0= no id available, <0= error + * (fs_id, dev_id, ino_id) will be (0,0,0) in case of return <= 0 */ int iso_node_get_id(IsoNode *node, unsigned int *fs_id, dev_t *dev_id, ino_t *ino_id, int flag); diff --git a/libisofs/stream.c b/libisofs/stream.c index f82f553..e2173fa 100644 --- a/libisofs/stream.c +++ b/libisofs/stream.c @@ -738,10 +738,8 @@ int iso_stream_cmp_ino(IsoStream *s1, IsoStream *s2, int flag) off_t size1, size2; if (s1 == s2) { - return 0; /* Normally just a shortcut. - But important if Libisofs_file_src_cmp_non_zerO */ + return 0; } - iso_stream_get_id(s1, &fs_id1, &dev_id1, &ino_id1); iso_stream_get_id(s2, &fs_id2, &dev_id2, &ino_id2);