From 9a90afcf69fc5204d804bfb02a2628bf309be251 Mon Sep 17 00:00:00 2001 From: Vreixo Formoso Date: Fri, 28 Dec 2007 00:20:02 +0100 Subject: [PATCH] Support for setting an output charset for RR NM entries. It will default to input charset (i.e. the locale charset for now). Names will be stored internally in that locale charset. Note that input charset musn't be changed by user. Instead, we can provide an input charset property to IsoFilesystem implementations. --- demo/iso.c | 3 ++- src/ecma119.c | 17 ++++++++++++++++- src/libiso_msgs.h | 2 ++ src/libisofs.h | 12 +++++------- src/messages.h | 2 ++ src/node.h | 2 +- src/rockridge.c | 44 +++++++++++++++++++++++++++++++++++-------- test/test_rockridge.c | 36 +++++++++++++++++++++++------------ test/test_util.c | 6 ++++++ 9 files changed, 94 insertions(+), 30 deletions(-) diff --git a/demo/iso.c b/demo/iso.c index fd9b612..21021c4 100644 --- a/demo/iso.c +++ b/demo/iso.c @@ -39,7 +39,8 @@ int main(int argc, char **argv) 0, /* dir_mode */ 0, /* file_mode */ 0, /* uid */ - 0 /* gid */ + 0, /* gid */ + NULL /* output charset */ }; if (argc < 2) { diff --git a/src/ecma119.c b/src/ecma119.c index f4a319c..b03607d 100644 --- a/src/ecma119.c +++ b/src/ecma119.c @@ -755,7 +755,22 @@ int ecma119_image_new(IsoImage *src, Ecma119WriteOpts *opts, /* default to locale charset */ setlocale(LC_CTYPE, ""); target->input_charset = strdup(nl_langinfo(CODESET)); - target->output_charset = strdup(target->input_charset); //TODO + if (target->input_charset == NULL) { + iso_image_unref(src); + free(target); + return ISO_MEM_ERROR; + } + + if (opts->output_charset != NULL) { + target->output_charset = strdup(opts->output_charset); + } else { + target->output_charset = strdup(target->input_charset); + } + if (target->output_charset == NULL) { + iso_image_unref(src); + free(target); + return ISO_MEM_ERROR; + } /* * 2. Based on those options, create needed writers: iso, joliet... diff --git a/src/libiso_msgs.h b/src/libiso_msgs.h index 9a756f7..ceff4ad 100644 --- a/src/libiso_msgs.h +++ b/src/libiso_msgs.h @@ -389,6 +389,8 @@ Range "vreixo" : 0x00030000 to 0x0003ffff 0x00030103 (FATAL,HIGH) = Read error 0x00030110 (FATAL,HIGH) = Cannot create writer thread + 0x00030500 (SORRY,HIGH) = Charset conversion error + General: 0x00031001 (SORRY,HIGH) = Cannot read file (ignored) 0x00031002 (FATAL,HIGH) = Cannot read file (operation canceled) diff --git a/src/libisofs.h b/src/libisofs.h index 2453a95..950c69b 100644 --- a/src/libisofs.h +++ b/src/libisofs.h @@ -59,7 +59,6 @@ enum IsoHideNodeFlag { * Holds the options for the image generation. */ typedef struct { - //int volnum; /**< The volume in the set which you want to write (usually 0) */ int level; /**< ISO level to write at. */ /** Which extensions to support. */ @@ -100,11 +99,10 @@ typedef struct { mode_t dir_mode; /** Mode to use on dirs when replace_dir_mode == 2. */ mode_t file_mode; /** Mode to use on files when replace_file_mode == 2. */ - gid_t gid; /** gid to use when replace_gid == 2. */ uid_t uid; /** uid to use when replace_uid == 2. */ + gid_t gid; /** gid to use when replace_gid == 2. */ -// char *input_charset; /**< NULL to use default charset */ -// char *ouput_charset; /**< NULL to use default charset */ + char *output_charset; /**< NULL to use default charset */ // uint32_t ms_block; /**< * Start block for multisession. When this is greater than 0, @@ -312,7 +310,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node); * The node whose name you want to change. Note that you can't change * the name of the root. * @param name - * The name in UTF-8 encoding. If you supply an empty string or a + * The name for the node. If you supply an empty string or a * name greater than 255 characters this returns with failure, and * node name is not modified. * @return @@ -321,7 +319,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node); int iso_node_set_name(IsoNode *node, const char *name); /** - * Get the name of a node (in UTF-8). + * Get the name of a node. * The returned string belongs to the node and should not be modified nor * freed. Use strdup if you really need your own copy. */ @@ -603,7 +601,7 @@ int iso_dir_iter_take(IsoDirIter *iter); int iso_dir_iter_remove(IsoDirIter *iter); /** - * Get the destination of a node (in UTF-8). + * Get the destination of a node. * The returned string belongs to the node and should not be modified nor * freed. Use strdup if you really need your own copy. */ diff --git a/src/messages.h b/src/messages.h index fb4a229..150f59b 100644 --- a/src/messages.h +++ b/src/messages.h @@ -26,6 +26,8 @@ /** Cannot create writer thread */ #define LIBISO_THREAD_ERROR 0x00030110 +/** Charset conversion error */ +#define LIBISO_CHARSET_ERROR 0x00030500 /** Can't read file (ignored) */ #define LIBISO_CANT_READ_FILE 0x00031001 diff --git a/src/node.h b/src/node.h index 34abad5..7b423d9 100644 --- a/src/node.h +++ b/src/node.h @@ -37,7 +37,7 @@ struct Iso_Node /**< Type of the IsoNode, do not confuse with mode */ enum IsoNodeType type; - char *name; /**< Real name, supossed to be in UTF-8 */ + char *name; /**< Real name, in default charset */ mode_t mode; /**< protection */ uid_t uid; /**< user ID of owner */ diff --git a/src/rockridge.c b/src/rockridge.c index 10365fb..c806684 100644 --- a/src/rockridge.c +++ b/src/rockridge.c @@ -12,6 +12,7 @@ #include "ecma119_tree.h" #include "error.h" #include "writer.h" +#include "messages.h" #include @@ -246,6 +247,30 @@ int rrip_add_CL(Ecma119Image *t, Ecma119Node *n, struct susp_info *susp) return susp_append(t, susp, CL); } +static +char *get_rr_name(Ecma119Image *t, Ecma119Node *n) +{ + int ret; + char *name; + + if (!strcmp(t->input_charset, t->output_charset)) { + /* no conversion needed */ + return strdup(n->node->name); + } + + ret = strconv(n->node->name, t->input_charset, t->output_charset, &name); + if (ret < 0) { + iso_msg_sorry(t->image, LIBISO_CHARSET_ERROR, + "Charset conversion error. Can't convert %s from %s to %s", + n->node->name, t->input_charset, t->output_charset); + + /* use the original name, it's the best we can do */ + name = strdup(n->node->name); + } + + return name; +} + /** * Add a NM System Use Entry to the given tree node. The purpose of this * System Use Entry is to store the content of an Alternate Name to support @@ -544,7 +569,9 @@ size_t rrip_calc_len(Ecma119Image *t, Ecma119Node *n, int type, } if (type == 0) { - size_t namelen = strlen(n->node->name); + char *name = get_rr_name(t, n); + size_t namelen = strlen(name); + free(name); /* NM entry */ if (su_size + 5 + namelen <= space) { @@ -727,6 +754,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, int ret; size_t i; Ecma119Node *node; + char *name = NULL; if (t == NULL || n == NULL || info == NULL) { return ISO_NULL_POINTER; @@ -807,7 +835,6 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, } if (type == 0) { - char *name; size_t sua_free; /* free space in the SUA */ int nm_type = 0; /* 0 whole entry in SUA, 1 part in CE */ size_t ce_len = 0; /* len of the CE */ @@ -817,8 +844,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, uint8_t **comps = NULL; /* components of the SL field */ size_t n_comp = 0; /* number of components */ - // TODO handle output charset - name = n->node->name; + name = get_rr_name(t, n); namelen = strlen(name); sua_free = space - info->suf_len; @@ -1008,12 +1034,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, * Write the NM part that fits in SUA... Note that CE * entry and NM in the continuation area is added below */ - size_t len = space - info->suf_len - 28 - 5; - ret = rrip_add_NM(t, info, name, len, 1, 0); + namelen = space - info->suf_len - 28 - 5; + ret = rrip_add_NM(t, info, name, namelen, 1, 0); if (ret < 0) { goto add_susp_cleanup; } - name += len; } if (ce_len > 0) { @@ -1028,7 +1053,8 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, /* * ..and the part that goes to continuation area. */ - ret = rrip_add_NM(t, info, name, strlen(name), 0, 1); + ret = rrip_add_NM(t, info, name + namelen, strlen(name + namelen), + 0, 1); if (ret < 0) { goto add_susp_cleanup; } @@ -1084,9 +1110,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, */ info->suf_len += (info->suf_len % 2); + free(name); return ISO_SUCCESS; add_susp_cleanup:; + free(name); susp_info_free(info); return ret; } diff --git a/test/test_rockridge.c b/test/test_rockridge.c index 50e63b6..598a503 100644 --- a/test/test_rockridge.c +++ b/test/test_rockridge.c @@ -14,8 +14,13 @@ static void test_rrip_calc_len_file() { IsoFile *file; Ecma119Node *node; + Ecma119Image t; size_t sua_len = 0, ce_len = 0; + memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; + file = malloc(sizeof(IsoFile)); CU_ASSERT_PTR_NOT_NULL_FATAL(file); file->msblock = 0; @@ -34,7 +39,7 @@ static void test_rrip_calc_len_file() file->node.name = "a small name.txt"; node->iso_name = "A_SMALL_.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1); @@ -44,7 +49,7 @@ static void test_rrip_calc_len_file() "PADPADPADADPADPADPADPAD.txt"; node->iso_name = "A_BIG_NA.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); /* note that 254 is the max length of a directory record, as it needs to * be an even number */ @@ -56,7 +61,7 @@ static void test_rrip_calc_len_file() "PADPADPADADPADPADPADPAD1.txt"; node->iso_name = "A_BIG_NA.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); /* 28 (the chars moved to include the CE entry) + 5 (header of NM in CE) + * 1 (the char that originally didn't fit) */ CU_ASSERT_EQUAL(ce_len, 28 + 5 + 1); @@ -72,8 +77,13 @@ static void test_rrip_calc_len_symlink() { IsoSymlink *link; Ecma119Node *node; + Ecma119Image t; size_t sua_len = 0, ce_len = 0; + memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; + link = malloc(sizeof(IsoSymlink)); CU_ASSERT_PTR_NOT_NULL_FATAL(link); link->node.type = LIBISO_SYMLINK; @@ -89,7 +99,7 @@ static void test_rrip_calc_len_symlink() link->dest = "/three/components"; node->iso_name = "A_SMALL_.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1 + (5 + 2 + (2+5) + (2+10)) ); @@ -99,7 +109,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SU.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -109,7 +119,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SU.txt"; link->dest = "./and/../a/./big/destination/with/10/componentsk"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 60); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -118,7 +128,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SUx.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 75) + (5 + 3*7) + 28); @@ -129,7 +139,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entry.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -138,7 +148,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entry.txt"; link->dest = "./and/../a/./big/destination/with/10/components/"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -147,7 +157,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entryc.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59 + 6); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -159,7 +169,7 @@ static void test_rrip_calc_len_symlink() "just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes" "/bytes/bytes/bytes/bytes/bytes/bytes/../bytes"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 255); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -171,7 +181,7 @@ static void test_rrip_calc_len_symlink() "just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes" "/bytes/bytes/bytes/bytes/bytes/bytes/../bytess"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 261); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -205,6 +215,8 @@ void test_rrip_get_susp_fields_file() uint8_t *entry; memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; file = malloc(sizeof(IsoFile)); CU_ASSERT_PTR_NOT_NULL_FATAL(file); diff --git a/test/test_util.c b/test/test_util.c index eceb54e..833d7f7 100644 --- a/test/test_util.c +++ b/test/test_util.c @@ -41,6 +41,12 @@ static void test_strconv() CU_ASSERT_STRING_EQUAL(out, (char*)out1); free(out); + /* UTF-8 to ISO-8859-15 */ + ret = strconv((char*)out1, "UTF-8", "ISO-8859-15", &out); + CU_ASSERT_EQUAL(ret, 1); + CU_ASSERT_STRING_EQUAL(out, (char*)in1); + free(out); + /* try with an incorrect input */ ret = strconv((char*)in2, "UTF-8", "ISO-8859-15", &out); CU_ASSERT_EQUAL(ret, ISO_CHARSET_CONV_ERROR);