From c03d4d9b333458e2612bf9f552de45446b5ad675 Mon Sep 17 00:00:00 2001 From: Vreixo Formoso Date: Sun, 16 Dec 2007 17:32:05 +0100 Subject: [PATCH] Implementation of name mangling algorithm. This has to ensure that file names are unique among all files in an iso directory. Current implementation can be improved by using a hash table to keep the names, instead of iterate over all children. An alternative method, keeping children sorted all the time, thus allowing binary search, has been considered but discarded. Current implementation support up to 9,999,999 equal files per directory. --- src/ecma119_tree.c | 221 ++++++++++++++++++++++++++++++++++++++++++++- src/error.h | 4 +- src/util.c | 9 ++ src/util.h | 2 + 4 files changed, 234 insertions(+), 2 deletions(-) diff --git a/src/ecma119_tree.c b/src/ecma119_tree.c index 26fbdf1..66af21b 100644 --- a/src/ecma119_tree.c +++ b/src/ecma119_tree.c @@ -16,6 +16,7 @@ #include #include +#include static int get_iso_name(Ecma119Image *img, IsoNode *iso, char **name) @@ -266,6 +267,218 @@ void sort_tree(Ecma119Node *root) } } +static +int contains_name(Ecma119Node *dir, const char *name) +{ + int i; + for (i = 0; i < dir->info.dir.nchildren; i++) { + Ecma119Node *child = dir->info.dir.children[i]; + if (!strcmp(child->iso_name, name)) { + return 1; + } + } + return 0; +} + +/** + * Ensures that the ISO name of each children of the given dir is unique, + * changing some of them if needed. + * It also ensures that resulting filename is always <= than given + * max_name_len, including extension. If needed, the extension will be reduced, + * but never under 3 characters. + */ +static +int mangle_dir(Ecma119Node *dir, int max_file_len, int max_dir_len) +{ + int i, nchildren; + Ecma119Node **children; + int need_sort = 0; + + nchildren = dir->info.dir.nchildren; + children = dir->info.dir.children; + + for (i = 0; i < nchildren; ++i) { + char *name, *ext; + char full_name[40]; + int max; /* computed max len for name, without extension */ + int j = i; + int digits = 1; /* characters to change per name */ + + /* first, find all child with same name */ + while (j + 1 < nchildren && + !cmp_node_name(children + i, children + j + 1)) { + ++j; + } + if (j == i) { + /* name is unique */ + continue; + } + + /* + * A max of 7 characters is good enought, it allows handling up to + * 9,999,999 files with same name. We can increment this to + * max_name_len, but the int_pow() function must then be modified + * to return a bigger integer. + */ + while (digits < 8) { + int ok, k; + char *dot; + int change = 0; /* number to be written */ + + /* copy name to buffer */ + strcpy(full_name, children[i]->iso_name); + + /* compute name and extension */ + dot = strrchr(full_name, '.'); + if (dot != NULL && children[i]->type != ECMA119_DIR) { + + /* + * File (not dir) with extension + * Note that we don't need to check for placeholders, as + * tree reparent happens later, so no placeholders can be + * here at this time. + * + * TODO !!! Well, we will need a way to mangle root names + * if we do reparent! + */ + int extlen; + full_name[dot - full_name] = '\0'; + name = full_name; + ext = dot + 1; + + /* + * For iso level 1 we force ext len to be 3, as name + * can't grow on the extension space + */ + extlen = (max_file_len == 12) ? 3 : strlen(ext); + max = max_file_len - extlen - 1 - digits; + if (max <= 0) { + /* this can happen if extension is too long */ + if (extlen + max > 3) { + /* + * reduce extension len, to give name an extra char + * note that max is negative or 0 + */ + extlen = extlen + max - 1; + ext[extlen] = '\0'; + max = max_file_len - extlen - 1 - digits; + } else { + /* + * error, we don't support extensions < 3 + * This can't happen with current limit of digits. + */ + return ISO_ERROR; + } + } + /* ok, reduce name by digits */ + if (name + max < dot) { + name[max] = '\0'; + } + } else { + /* Directory, or file without extension */ + if (children[i]->type == ECMA119_DIR) { + max = max_dir_len - digits; + dot = NULL; /* dots have no meaning in dirs */ + } else { + max = max_file_len - digits; + } + name = full_name; + if (max < strlen(name)) { + name[max] = '\0'; + } + /* let ext be an empty string */ + ext = name + strlen(name); + } + + ok = 1; + /* change name of each file */ + for (k = i; k <= j; ++k) { + char tmp[40]; + char fmt[16]; + if (dot != NULL) { + sprintf(fmt, "%%s%%0%dd.%%s", digits); + } else { + sprintf(fmt, "%%s%%0%dd%%s", digits); + } + while (1) { + sprintf(tmp, fmt, name, change, ext); + ++change; + if (change > int_pow(10, digits)) { + ok = 0; + break; + } + if (!contains_name(dir, tmp)) { + /* the name is unique, so it can be used */ + break; + } + } + if (ok) { + char *new = strdup(tmp); + if (new == NULL) { + return ISO_MEM_ERROR; + } + free(children[k]->iso_name); + children[k]->iso_name = new; + /* + * if we change a name we need to sort again children + * at the end + */ + need_sort = 1; + } else { + /* we need to increment digits */ + break; + } + } + if (ok) { + break; + } else { + ++digits; + } + } + if (digits == 8) { + return ISO_MANGLE_TOO_MUCH_FILES; + } + i = j; + } + + /* + * If needed, sort again the files inside dir + */ + if (need_sort) { + qsort(children, nchildren, sizeof(void*), cmp_node_name); + } + + /* recurse */ + for (i = 0; i < nchildren; ++i) { + int ret; + if (children[i]->type == ECMA119_DIR) { + ret = mangle_dir(children[i], max_file_len, max_dir_len); + if (ret < 0) { + /* error */ + return ret; + } + } + } + + return ISO_SUCCESS; +} + +static +int mangle_tree(Ecma119Image *img) +{ + int max_file, max_dir; + + // TODO take care about relaxed constraints + if (img->iso_level == 1) { + max_file = 12; /* 8 + 3 + 1 */ + max_dir = 8; + } else { + max_file = max_dir = 31; + } + return mangle_dir(img->root, max_file, max_dir); +} + + int ecma119_tree_create(Ecma119Image *img, IsoNode *iso) { int ret; @@ -278,10 +491,16 @@ int ecma119_tree_create(Ecma119Image *img, IsoNode *iso) img->root = root; sort_tree(root); + ret = mangle_tree(img); + if (ret < 0) { + return ret; + } + /* * TODO * - reparent if RR - * - mangle names + * This must be done after mangle_tree, as name mangling may increment + * file name length. After reparent, the root dir must be mangled again */ return ISO_SUCCESS; diff --git a/src/error.h b/src/error.h index ad1b406..0bbe9d8 100644 --- a/src/error.h +++ b/src/error.h @@ -36,6 +36,8 @@ #define ISO_FILE_IS_NOT_DIR -108 #define ISO_FILE_IS_NOT_SYMLINK -109 -#define ISO_CHARSET_CONV_ERROR -150 +#define ISO_CHARSET_CONV_ERROR -150 + +#define ISO_MANGLE_TOO_MUCH_FILES -200 #endif /*LIBISO_ERROR_H_*/ diff --git a/src/util.c b/src/util.c index 803fcd4..4be894e 100644 --- a/src/util.c +++ b/src/util.c @@ -27,6 +27,15 @@ int round_up(int n, int mul) return div_up(n, mul) * mul; } +int int_pow(int base, int power) +{ + int result = 1; + while (--power >= 0) { + result *= base; + } + return result; +} + /** * Convert a str in a specified codeset to WCHAR_T. * The result must be free() when no more needed diff --git a/src/util.h b/src/util.h index 93dc390..8e19ac0 100644 --- a/src/util.h +++ b/src/util.h @@ -13,6 +13,8 @@ extern inline int div_up(int n, int div); extern inline int round_up(int n, int mul); +int int_pow(int base, int power); + /** * Convert a given string from any input charset to ASCII *