Support for setting an output charset for RR NM entries.

It will default to input charset (i.e. the locale charset for now). 
Names will be stored internally in that locale charset. Note that input 
charset musn't be changed by user. Instead, we can provide an input 
charset property to IsoFilesystem implementations.
This commit is contained in:
Vreixo Formoso 2007-12-28 00:20:02 +01:00
parent 6c7c54af93
commit 9a90afcf69
9 changed files with 94 additions and 30 deletions

View File

@ -39,7 +39,8 @@ int main(int argc, char **argv)
0, /* dir_mode */
0, /* file_mode */
0, /* uid */
0 /* gid */
0, /* gid */
NULL /* output charset */
};
if (argc < 2) {

View File

@ -755,7 +755,22 @@ int ecma119_image_new(IsoImage *src, Ecma119WriteOpts *opts,
/* default to locale charset */
setlocale(LC_CTYPE, "");
target->input_charset = strdup(nl_langinfo(CODESET));
target->output_charset = strdup(target->input_charset); //TODO
if (target->input_charset == NULL) {
iso_image_unref(src);
free(target);
return ISO_MEM_ERROR;
}
if (opts->output_charset != NULL) {
target->output_charset = strdup(opts->output_charset);
} else {
target->output_charset = strdup(target->input_charset);
}
if (target->output_charset == NULL) {
iso_image_unref(src);
free(target);
return ISO_MEM_ERROR;
}
/*
* 2. Based on those options, create needed writers: iso, joliet...

View File

@ -389,6 +389,8 @@ Range "vreixo" : 0x00030000 to 0x0003ffff
0x00030103 (FATAL,HIGH) = Read error
0x00030110 (FATAL,HIGH) = Cannot create writer thread
0x00030500 (SORRY,HIGH) = Charset conversion error
General:
0x00031001 (SORRY,HIGH) = Cannot read file (ignored)
0x00031002 (FATAL,HIGH) = Cannot read file (operation canceled)

View File

@ -59,7 +59,6 @@ enum IsoHideNodeFlag {
* Holds the options for the image generation.
*/
typedef struct {
//int volnum; /**< The volume in the set which you want to write (usually 0) */
int level; /**< ISO level to write at. */
/** Which extensions to support. */
@ -100,11 +99,10 @@ typedef struct {
mode_t dir_mode; /** Mode to use on dirs when replace_dir_mode == 2. */
mode_t file_mode; /** Mode to use on files when replace_file_mode == 2. */
gid_t gid; /** gid to use when replace_gid == 2. */
uid_t uid; /** uid to use when replace_uid == 2. */
gid_t gid; /** gid to use when replace_gid == 2. */
// char *input_charset; /**< NULL to use default charset */
// char *ouput_charset; /**< NULL to use default charset */
char *output_charset; /**< NULL to use default charset */
// uint32_t ms_block;
/**<
* Start block for multisession. When this is greater than 0,
@ -312,7 +310,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node);
* The node whose name you want to change. Note that you can't change
* the name of the root.
* @param name
* The name in UTF-8 encoding. If you supply an empty string or a
* The name for the node. If you supply an empty string or a
* name greater than 255 characters this returns with failure, and
* node name is not modified.
* @return
@ -321,7 +319,7 @@ enum IsoNodeType iso_node_get_type(IsoNode *node);
int iso_node_set_name(IsoNode *node, const char *name);
/**
* Get the name of a node (in UTF-8).
* Get the name of a node.
* The returned string belongs to the node and should not be modified nor
* freed. Use strdup if you really need your own copy.
*/
@ -603,7 +601,7 @@ int iso_dir_iter_take(IsoDirIter *iter);
int iso_dir_iter_remove(IsoDirIter *iter);
/**
* Get the destination of a node (in UTF-8).
* Get the destination of a node.
* The returned string belongs to the node and should not be modified nor
* freed. Use strdup if you really need your own copy.
*/

View File

@ -26,6 +26,8 @@
/** Cannot create writer thread */
#define LIBISO_THREAD_ERROR 0x00030110
/** Charset conversion error */
#define LIBISO_CHARSET_ERROR 0x00030500
/** Can't read file (ignored) */
#define LIBISO_CANT_READ_FILE 0x00031001

View File

@ -37,7 +37,7 @@ struct Iso_Node
/**< Type of the IsoNode, do not confuse with mode */
enum IsoNodeType type;
char *name; /**< Real name, supossed to be in UTF-8 */
char *name; /**< Real name, in default charset */
mode_t mode; /**< protection */
uid_t uid; /**< user ID of owner */

View File

@ -12,6 +12,7 @@
#include "ecma119_tree.h"
#include "error.h"
#include "writer.h"
#include "messages.h"
#include <string.h>
@ -246,6 +247,30 @@ int rrip_add_CL(Ecma119Image *t, Ecma119Node *n, struct susp_info *susp)
return susp_append(t, susp, CL);
}
static
char *get_rr_name(Ecma119Image *t, Ecma119Node *n)
{
int ret;
char *name;
if (!strcmp(t->input_charset, t->output_charset)) {
/* no conversion needed */
return strdup(n->node->name);
}
ret = strconv(n->node->name, t->input_charset, t->output_charset, &name);
if (ret < 0) {
iso_msg_sorry(t->image, LIBISO_CHARSET_ERROR,
"Charset conversion error. Can't convert %s from %s to %s",
n->node->name, t->input_charset, t->output_charset);
/* use the original name, it's the best we can do */
name = strdup(n->node->name);
}
return name;
}
/**
* Add a NM System Use Entry to the given tree node. The purpose of this
* System Use Entry is to store the content of an Alternate Name to support
@ -544,7 +569,9 @@ size_t rrip_calc_len(Ecma119Image *t, Ecma119Node *n, int type,
}
if (type == 0) {
size_t namelen = strlen(n->node->name);
char *name = get_rr_name(t, n);
size_t namelen = strlen(name);
free(name);
/* NM entry */
if (su_size + 5 + namelen <= space) {
@ -727,6 +754,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
int ret;
size_t i;
Ecma119Node *node;
char *name = NULL;
if (t == NULL || n == NULL || info == NULL) {
return ISO_NULL_POINTER;
@ -807,7 +835,6 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
}
if (type == 0) {
char *name;
size_t sua_free; /* free space in the SUA */
int nm_type = 0; /* 0 whole entry in SUA, 1 part in CE */
size_t ce_len = 0; /* len of the CE */
@ -817,8 +844,7 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
uint8_t **comps = NULL; /* components of the SL field */
size_t n_comp = 0; /* number of components */
// TODO handle output charset
name = n->node->name;
name = get_rr_name(t, n);
namelen = strlen(name);
sua_free = space - info->suf_len;
@ -1008,12 +1034,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
* Write the NM part that fits in SUA... Note that CE
* entry and NM in the continuation area is added below
*/
size_t len = space - info->suf_len - 28 - 5;
ret = rrip_add_NM(t, info, name, len, 1, 0);
namelen = space - info->suf_len - 28 - 5;
ret = rrip_add_NM(t, info, name, namelen, 1, 0);
if (ret < 0) {
goto add_susp_cleanup;
}
name += len;
}
if (ce_len > 0) {
@ -1028,7 +1053,8 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
/*
* ..and the part that goes to continuation area.
*/
ret = rrip_add_NM(t, info, name, strlen(name), 0, 1);
ret = rrip_add_NM(t, info, name + namelen, strlen(name + namelen),
0, 1);
if (ret < 0) {
goto add_susp_cleanup;
}
@ -1084,9 +1110,11 @@ int rrip_get_susp_fields(Ecma119Image *t, Ecma119Node *n, int type,
*/
info->suf_len += (info->suf_len % 2);
free(name);
return ISO_SUCCESS;
add_susp_cleanup:;
free(name);
susp_info_free(info);
return ret;
}

View File

@ -14,8 +14,13 @@ static void test_rrip_calc_len_file()
{
IsoFile *file;
Ecma119Node *node;
Ecma119Image t;
size_t sua_len = 0, ce_len = 0;
memset(&t, 0, sizeof(Ecma119Image));
t.input_charset = "UTF-8";
t.output_charset = "UTF-8";
file = malloc(sizeof(IsoFile));
CU_ASSERT_PTR_NOT_NULL_FATAL(file);
file->msblock = 0;
@ -34,7 +39,7 @@ static void test_rrip_calc_len_file()
file->node.name = "a small name.txt";
node->iso_name = "A_SMALL_.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 0);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1);
@ -44,7 +49,7 @@ static void test_rrip_calc_len_file()
"PADPADPADADPADPADPADPAD.txt";
node->iso_name = "A_BIG_NA.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 0);
/* note that 254 is the max length of a directory record, as it needs to
* be an even number */
@ -56,7 +61,7 @@ static void test_rrip_calc_len_file()
"PADPADPADADPADPADPADPAD1.txt";
node->iso_name = "A_BIG_NA.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
/* 28 (the chars moved to include the CE entry) + 5 (header of NM in CE) +
* 1 (the char that originally didn't fit) */
CU_ASSERT_EQUAL(ce_len, 28 + 5 + 1);
@ -72,8 +77,13 @@ static void test_rrip_calc_len_symlink()
{
IsoSymlink *link;
Ecma119Node *node;
Ecma119Image t;
size_t sua_len = 0, ce_len = 0;
memset(&t, 0, sizeof(Ecma119Image));
t.input_charset = "UTF-8";
t.output_charset = "UTF-8";
link = malloc(sizeof(IsoSymlink));
CU_ASSERT_PTR_NOT_NULL_FATAL(link);
link->node.type = LIBISO_SYMLINK;
@ -89,7 +99,7 @@ static void test_rrip_calc_len_symlink()
link->dest = "/three/components";
node->iso_name = "A_SMALL_.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 0);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 16) + (5 + 3*7) + 1 +
(5 + 2 + (2+5) + (2+10)) );
@ -99,7 +109,7 @@ static void test_rrip_calc_len_symlink()
"that fits in the SU.txt";
link->dest = "./and/../a/./big/destination/with/10/components";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 0);
CU_ASSERT_EQUAL(sua_len, 254 - 46);
@ -109,7 +119,7 @@ static void test_rrip_calc_len_symlink()
"that fits in the SU.txt";
link->dest = "./and/../a/./big/destination/with/10/componentsk";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 60);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28);
@ -118,7 +128,7 @@ static void test_rrip_calc_len_symlink()
"that fits in the SUx.txt";
link->dest = "./and/../a/./big/destination/with/10/components";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 59);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 75) + (5 + 3*7) + 28);
@ -129,7 +139,7 @@ static void test_rrip_calc_len_symlink()
"max that fits in the SU once we add the CE entry.txt";
link->dest = "./and/../a/./big/destination/with/10/components";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 59);
CU_ASSERT_EQUAL(sua_len, 254 - 46);
@ -138,7 +148,7 @@ static void test_rrip_calc_len_symlink()
"max that fits in the SU once we add the CE entry.txt";
link->dest = "./and/../a/./big/destination/with/10/components/";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 59);
CU_ASSERT_EQUAL(sua_len, 254 - 46);
@ -147,7 +157,7 @@ static void test_rrip_calc_len_symlink()
"max that fits in the SU once we add the CE entryc.txt";
link->dest = "./and/../a/./big/destination/with/10/components";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 59 + 6);
CU_ASSERT_EQUAL(sua_len, 254 - 46);
@ -159,7 +169,7 @@ static void test_rrip_calc_len_symlink()
"just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes"
"/bytes/bytes/bytes/bytes/bytes/bytes/../bytes";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 255);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28);
@ -171,7 +181,7 @@ static void test_rrip_calc_len_symlink()
"just/two/hundred/and/fifty/bytes/bytes/bytes/bytes/bytes"
"/bytes/bytes/bytes/bytes/bytes/bytes/../bytess";
node->iso_name = "THIS_NAM.TXT";
sua_len = rrip_calc_len(NULL, node, 0, 255 - 46, &ce_len);
sua_len = rrip_calc_len(&t, node, 0, 255 - 46, &ce_len);
CU_ASSERT_EQUAL(ce_len, 261);
CU_ASSERT_EQUAL(sua_len, 44 + (5 + 74) + (5 + 3*7) + 1 + 28);
@ -205,6 +215,8 @@ void test_rrip_get_susp_fields_file()
uint8_t *entry;
memset(&t, 0, sizeof(Ecma119Image));
t.input_charset = "UTF-8";
t.output_charset = "UTF-8";
file = malloc(sizeof(IsoFile));
CU_ASSERT_PTR_NOT_NULL_FATAL(file);

View File

@ -41,6 +41,12 @@ static void test_strconv()
CU_ASSERT_STRING_EQUAL(out, (char*)out1);
free(out);
/* UTF-8 to ISO-8859-15 */
ret = strconv((char*)out1, "UTF-8", "ISO-8859-15", &out);
CU_ASSERT_EQUAL(ret, 1);
CU_ASSERT_STRING_EQUAL(out, (char*)in1);
free(out);
/* try with an incorrect input */
ret = strconv((char*)in2, "UTF-8", "ISO-8859-15", &out);
CU_ASSERT_EQUAL(ret, ISO_CHARSET_CONV_ERROR);