diff --git a/demo/iso.c b/demo/iso.c index fd9b612..21021c4 100644 --- a/demo/iso.c +++ b/demo/iso.c @@ -39,7 +39,8 @@ int main(int argc, char **argv) 0, /* dir_mode */ 0, /* file_mode */ 0, /* uid */ - 0 /* gid */ + 0, /* gid */ + NULL /* output charset */ }; if (argc < 2) { diff --git a/src/ecma119.c b/src/ecma119.c index f4a319c..b03607d 100644 --- a/src/ecma119.c +++ b/src/ecma119.c @@ -755,7 +755,22 @@ int ecma119_image_new(IsoImage *src, Ecma119WriteOpts *opts, /* default to locale charset */ setlocale(LC_CTYPE, ""); target->input_charset = strdup(nl_langinfo(CODESET)); - target->output_charset = strdup(target->input_charset); //TODO + if (target->input_charset == NULL) { + iso_image_unref(src); + free(target); + return ISO_MEM_ERROR; + } + + if (opts->output_charset != NULL) { + target->output_charset = strdup(opts->output_charset); + } else { + target->output_charset = strdup(target->input_charset); + } + if (target->output_charset == NULL) { + iso_image_unref(src); + free(target); + return ISO_MEM_ERROR; + } /* * 2. Based on those options, create needed writers: iso, joliet... diff --git a/src/libiso_msgs.h b/src/libiso_msgs.h index 9a756f7..ceff4ad 100644 --- a/src/libiso_msgs.h +++ b/src/libiso_msgs.h @@ -389,6 +389,8 @@ Range "vreixo" : 0x00030000 to 0x0003ffff 0x00030103 (FATAL,HIGH) = Read error 0x00030110 (FATAL,HIGH) = Cannot create writer thread + 0x00030500 (SORRY,HIGH) = Charset conversion error + General: 0x00031001 (SORRY,HIGH) = Cannot read file (ignored) 0x00031002 (FATAL,HIGH) = Cannot read file (operation canceled) diff --git a/src/libisofs.h b/src/libisofs.h index 2453a95..950c69b 100644 --- a/src/libisofs.h +++ b/src/libisofs.h @@ -59,7 +59,6 @@ enum IsoHideNodeFlag { * Holds the options for the image generation. */ typedef struct { - //int volnum; /**< The volume in the set which you want to write (usually 0) */ int level; /**< ISO level to write at. */ /** Which extensions to support. */ @@ -100,11 +99,10 @@ typedef struct { mode_t dir_mode; /** Mode to use on dirs when replace_dir_mode == 2. */ mode_t file_mode; /** Mode to use on files when replace_file_mode == 2. */ - gid_t gid; /** gid to use when replace_gid == 2. */ uid_t uid; /** uid to use when replace_uid == 2. */ + gid_t gid; /** gid to use when replace_gid == 2. */ -// char *input_charset; /**< NULL to use default charset */ -// char *ouput_charset; /**< NULL to use default charset */ + char *output_charset; /**< NULL to use default charset */ // uint32_t ms_block; /**< * Start block for multisession. When this is greater than 0, @@ -312,7 +310,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node); * The node whose name you want to change. Note that you can't change * the name of the root. * @param name - * The name in UTF-8 encoding. If you supply an empty string or a + * The name for the node. If you supply an empty string or a * name greater than 255 characters this returns with failure, and * node name is not modified. * @return @@ -321,7 +319,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node); int iso_node_set_name(IsoNode *node, const char *name); /** - * Get the name of a node (in UTF-8). + * Get the name of a node. * The returned string belongs to the node and should not be modified nor * freed. Use strdup if you really need your own copy. */ @@ -603,7 +601,7 @@ int iso_dir_iter_take(IsoDirIter *iter); int iso_dir_iter_remove(IsoDirIter *iter); /** - * Get the destination of a node (in UTF-8). + * Get the destination of a node. * The returned string belongs to the node and should not be modified nor * freed. Use strdup if you really need your own copy. */ diff --git a/src/messages.h b/src/messages.h index fb4a229..150f59b 100644 --- a/src/messages.h +++ b/src/messages.h @@ -26,6 +26,8 @@ /** Cannot create writer thread */ #define LIBISO_THREAD_ERROR 0x00030110 +/** Charset conversion error */ +#define LIBISO_CHARSET_ERROR 0x00030500 /** Can't read file (ignored) */ #define LIBISO_CANT_READ_FILE 0x00031001 diff --git a/src/node.h b/src/node.h index 34abad5..7b423d9 100644 --- a/src/node.h +++ b/src/node.h @@ -37,7 +37,7 @@ struct Iso_Node /**< Type of the IsoNode, do not confuse with mode */ enum IsoNodeType type; - char *name; /**< Real name, supossed to be in UTF-8 */ + char *name; /**< Real name, in default charset */ mode_t mode; /**< protection */ uid_t uid; /**< user ID of owner */ diff --git a/src/rockridge.c b/src/rockridge.c index 10365fb..c806684 100644 --- a/src/rockridge.c +++ b/src/rockridge.c @@ -12,6 +12,7 @@ #include "ecma119_tree.h" #include "error.h" #include "writer.h" +#include "messages.h" #include @@ -246,6 +247,30 @@ int rrip_add_CL(Ecma119Image *t, Ecma119Node *n, struct susp_info *susp) return susp_append(t, susp, CL); } +static +char *get_rr_name(Ecma119Image *t, Ecma119Node *n) +{ + int ret; + char *name; + + if (!strcmp(t->input_charset, t->output_charset)) { + /* no conversion needed */ + return strdup(n->node->name); + } + + ret = strconv(n->node->name, t->input_charset, t->output_charset, &name); + if (ret < 0) { + iso_msg_sorry(t->image, LIBISO_CHARSET_ERROR, + "Charset conversion error. Can't convert %s from %s to %s", + n->node->name, t->input_charset, t->output_charset); + + /* use the original name, it's the best we can do */ + name = strdup(n->node->name); + } + + return name; +} + /** * Add a NM System Use Entry to the given tree node. The purpose of this * System Use Entry is to store the content of an Alternate Name to support @@ -544,7 +569,9 @@ size_t rrip_calc_len(Ecma119Image *t, Ecma119Node *n, int type, } if (type == 0) { - size_t namelen = strlen(n->node->name); + char *name = get_rr_name(t, n); + size_t namelen = strlen(name); + free(name); /* NM entry */ if (su_size + 5 + namelen <= space) { @@ -727,6 +754,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, int ret; size_t i; Ecma119Node *node; + char *name = NULL; if (t == NULL || n == NULL || info == NULL) { return ISO_NULL_POINTER; @@ -807,7 +835,6 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, } if (type == 0) { - char *name; size_t sua_free; /* free space in the SUA */ int nm_type = 0; /* 0 whole entry in SUA, 1 part in CE */ size_t ce_len = 0; /* len of the CE */ @@ -817,8 +844,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, uint8_t **comps = NULL; /* components of the SL field */ size_t n_comp = 0; /* number of components */ - // TODO handle output charset - name = n->node->name; + name = get_rr_name(t, n); namelen = strlen(name); sua_free = space - info->suf_len; @@ -1008,12 +1034,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, * Write the NM part that fits in SUA... Note that CE * entry and NM in the continuation area is added below */ - size_t len = space - info->suf_len - 28 - 5; - ret = rrip_add_NM(t, info, name, len, 1, 0); + namelen = space - info->suf_len - 28 - 5; + ret = rrip_add_NM(t, info, name, namelen, 1, 0); if (ret < 0) { goto add_susp_cleanup; } - name += len; } if (ce_len > 0) { @@ -1028,7 +1053,8 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, /* * ..and the part that goes to continuation area. */ - ret = rrip_add_NM(t, info, name, strlen(name), 0, 1); + ret = rrip_add_NM(t, info, name + namelen, strlen(name + namelen), + 0, 1); if (ret < 0) { goto add_susp_cleanup; } @@ -1084,9 +1110,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type, */ info->suf_len += (info->suf_len % 2); + free(name); return ISO_SUCCESS; add_susp_cleanup:; + free(name); susp_info_free(info); return ret; } diff --git a/test/test_rockridge.c b/test/test_rockridge.c index 50e63b6..598a503 100644 --- a/test/test_rockridge.c +++ b/test/test_rockridge.c @@ -14,8 +14,13 @@ static void test_rrip_calc_len_file() { IsoFile *file; Ecma119Node *node; + Ecma119Image t; size_t sua_len = 0, ce_len = 0; + memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; + file = malloc(sizeof(IsoFile)); CU_ASSERT_PTR_NOT_NULL_FATAL(file); file->msblock = 0; @@ -34,7 +39,7 @@ static void test_rrip_calc_len_file() file->node.name = "a small name.txt"; node->iso_name = "A_SMALL_.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1); @@ -44,7 +49,7 @@ static void test_rrip_calc_len_file() "PADPADPADADPADPADPADPAD.txt"; node->iso_name = "A_BIG_NA.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); /* note that 254 is the max length of a directory record, as it needs to * be an even number */ @@ -56,7 +61,7 @@ static void test_rrip_calc_len_file() "PADPADPADADPADPADPADPAD1.txt"; node->iso_name = "A_BIG_NA.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); /* 28 (the chars moved to include the CE entry) + 5 (header of NM in CE) + * 1 (the char that originally didn't fit) */ CU_ASSERT_EQUAL(ce_len, 28 + 5 + 1); @@ -72,8 +77,13 @@ static void test_rrip_calc_len_symlink() { IsoSymlink *link; Ecma119Node *node; + Ecma119Image t; size_t sua_len = 0, ce_len = 0; + memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; + link = malloc(sizeof(IsoSymlink)); CU_ASSERT_PTR_NOT_NULL_FATAL(link); link->node.type = LIBISO_SYMLINK; @@ -89,7 +99,7 @@ static void test_rrip_calc_len_symlink() link->dest = "/three/components"; node->iso_name = "A_SMALL_.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1 + (5 + 2 + (2+5) + (2+10)) ); @@ -99,7 +109,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SU.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 0); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -109,7 +119,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SU.txt"; link->dest = "./and/../a/./big/destination/with/10/componentsk"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 60); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -118,7 +128,7 @@ static void test_rrip_calc_len_symlink() "that fits in the SUx.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 75) + (5 + 3*7) + 28); @@ -129,7 +139,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entry.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -138,7 +148,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entry.txt"; link->dest = "./and/../a/./big/destination/with/10/components/"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -147,7 +157,7 @@ static void test_rrip_calc_len_symlink() "max that fits in the SU once we add the CE entryc.txt"; link->dest = "./and/../a/./big/destination/with/10/components"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 59 + 6); CU_ASSERT_EQUAL(sua_len, 254 - 46); @@ -159,7 +169,7 @@ static void test_rrip_calc_len_symlink() "just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes" "/bytes/bytes/bytes/bytes/bytes/bytes/../bytes"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 255); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -171,7 +181,7 @@ static void test_rrip_calc_len_symlink() "just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes" "/bytes/bytes/bytes/bytes/bytes/bytes/../bytess"; node->iso_name = "THIS_NAM.TXT"; - sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len); + sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len); CU_ASSERT_EQUAL(ce_len, 261); CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28); @@ -205,6 +215,8 @@ void test_rrip_get_susp_fields_file() uint8_t *entry; memset(&t, 0, sizeof(Ecma119Image)); + t.input_charset = "UTF-8"; + t.output_charset = "UTF-8"; file = malloc(sizeof(IsoFile)); CU_ASSERT_PTR_NOT_NULL_FATAL(file); diff --git a/test/test_util.c b/test/test_util.c index eceb54e..833d7f7 100644 --- a/test/test_util.c +++ b/test/test_util.c @@ -41,6 +41,12 @@ static void test_strconv() CU_ASSERT_STRING_EQUAL(out, (char*)out1); free(out); + /* UTF-8 to ISO-8859-15 */ + ret = strconv((char*)out1, "UTF-8", "ISO-8859-15", &out); + CU_ASSERT_EQUAL(ret, 1); + CU_ASSERT_STRING_EQUAL(out, (char*)in1); + free(out); + /* try with an incorrect input */ ret = strconv((char*)in2, "UTF-8", "ISO-8859-15", &out); CU_ASSERT_EQUAL(ret, ISO_CHARSET_CONV_ERROR);