New API call iso_write_opts_set_joliet_utf16()

and ability to read Joliet names as UTF-16BE
This commit is contained in:
Thomas Schmitt 2013-12-17 21:45:52 +01:00
parent ba47d1534c
commit 88555bd059
10 changed files with 144 additions and 16 deletions

View File

@ -1,3 +1,8 @@
bzr branch lp:libisofs/for-libisoburn (to become libisofs-1.3.6.tar.gz)
===============================================================================
* New API call iso_write_opts_set_joliet_utf16() and ability to read Joliet
names as UTF-16BE
libisofs-1.3.4.tar.gz Thu Dec 12 2013 libisofs-1.3.4.tar.gz Thu Dec 12 2013
=============================================================================== ===============================================================================
* Giving sort weight 2 as default to El Torito boot images * Giving sort weight 2 as default to El Torito boot images

View File

@ -1510,6 +1510,21 @@ ex:;
} }
static
void issue_write_warning_summary(Ecma119Image *target)
{
if (target->joliet_ucs2_failures > ISO_JOLIET_UCS2_WARN_MAX) {
iso_msg_submit(-1, ISO_NAME_NOT_UCS2, 0,
"More filenames found which were not suitable for Joliet character set UCS-2");
}
if (target->joliet_ucs2_failures > 0) {
iso_msg_submit(-1, ISO_NAME_NOT_UCS2, 0,
"Sum of filenames not suitable for Joliet character set UCS-2: %.f",
(double) target->joliet_ucs2_failures);
}
}
static static
void *write_function(void *arg) void *write_function(void *arg)
{ {
@ -1566,6 +1581,8 @@ void *write_function(void *arg)
if (res <= 0) if (res <= 0)
goto write_error; goto write_error;
issue_write_warning_summary(target);
target->image->generator_is_running = 0; target->image->generator_is_running = 0;
/* Give up reference claim made in ecma119_image_new(). /* Give up reference claim made in ecma119_image_new().
@ -1783,6 +1800,7 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img)
target->relaxed_vol_atts = opts->relaxed_vol_atts; target->relaxed_vol_atts = opts->relaxed_vol_atts;
target->joliet_longer_paths = opts->joliet_longer_paths; target->joliet_longer_paths = opts->joliet_longer_paths;
target->joliet_long_names = opts->joliet_long_names; target->joliet_long_names = opts->joliet_long_names;
target->joliet_utf16 = opts->joliet_utf16;
target->rrip_version_1_10 = opts->rrip_version_1_10; target->rrip_version_1_10 = opts->rrip_version_1_10;
target->rrip_1_10_px_ino = opts->rrip_1_10_px_ino; target->rrip_1_10_px_ino = opts->rrip_1_10_px_ino;
target->aaip_susp_1_10 = opts->aaip_susp_1_10; target->aaip_susp_1_10 = opts->aaip_susp_1_10;
@ -2007,6 +2025,8 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img)
target->filesrc_start = 0; target->filesrc_start = 0;
target->filesrc_blocks = 0; target->filesrc_blocks = 0;
target->joliet_ucs2_failures = 0;
/* /*
* 2. Based on those options, create needed writers: iso, joliet... * 2. Based on those options, create needed writers: iso, joliet...
* Each writer inits its structures and stores needed info into * Each writer inits its structures and stores needed info into
@ -2758,6 +2778,7 @@ int iso_write_opts_new(IsoWriteOpts **opts, int profile)
wopts->fat = 0; wopts->fat = 0;
wopts->fifo_size = 1024; /* 2 MB buffer */ wopts->fifo_size = 1024; /* 2 MB buffer */
wopts->sort_files = 1; /* file sorting is always good */ wopts->sort_files = 1; /* file sorting is always good */
wopts->joliet_utf16 = 0;
wopts->rr_reloc_dir = NULL; wopts->rr_reloc_dir = NULL;
wopts->rr_reloc_flags = 0; wopts->rr_reloc_flags = 0;
wopts->system_area_data = NULL; wopts->system_area_data = NULL;
@ -3035,6 +3056,15 @@ int iso_write_opts_set_joliet_long_names(IsoWriteOpts *opts, int allow)
return ISO_SUCCESS; return ISO_SUCCESS;
} }
int iso_write_opts_set_joliet_utf16(IsoWriteOpts *opts, int allow)
{
if (opts == NULL) {
return ISO_NULL_POINTER;
}
opts->joliet_utf16 = allow ? 1 : 0;
return ISO_SUCCESS;
}
int iso_write_opts_set_rrip_version_1_10(IsoWriteOpts *opts, int oldvers) int iso_write_opts_set_rrip_version_1_10(IsoWriteOpts *opts, int oldvers)
{ {
if (opts == NULL) { if (opts == NULL) {

View File

@ -87,6 +87,12 @@
#define ISO_GPT_ENTRIES_MAX 248 #define ISO_GPT_ENTRIES_MAX 248
/* How many warnings to issue about writing Joliet names which cannot be
properly represented in UCS-2 and thus had to be defaultet to '_'.
*/
#define ISO_JOLIET_UCS2_WARN_MAX 3
/** /**
* Holds the options for the image generation. * Holds the options for the image generation.
*/ */
@ -192,6 +198,11 @@ struct iso_write_opts {
*/ */
unsigned int joliet_long_names :1; unsigned int joliet_long_names :1;
/**
* Use UTF-16BE rather than its subset UCS-2
*/
unsigned int joliet_utf16 :1;
/** /**
* Write Rock Ridge info as of specification RRIP-1.10 rather than * Write Rock Ridge info as of specification RRIP-1.10 rather than
* RRIP-1.12: signature "RRIP_1991A" rather than "IEEE_1282", * RRIP-1.12: signature "RRIP_1991A" rather than "IEEE_1282",
@ -540,6 +551,9 @@ struct ecma119_image
/** Allow Joliet names up to 103 characters rather than 64 */ /** Allow Joliet names up to 103 characters rather than 64 */
unsigned int joliet_long_names :1; unsigned int joliet_long_names :1;
/** Use UTF-16BE rather than its subset UCS-2 */
unsigned int joliet_utf16 :1;
/** Write old fashioned RRIP-1.10 rather than RRIP-1.12 */ /** Write old fashioned RRIP-1.10 rather than RRIP-1.12 */
unsigned int rrip_version_1_10 :1; unsigned int rrip_version_1_10 :1;
@ -642,6 +656,7 @@ struct ecma119_image
uint32_t joliet_path_table_size; uint32_t joliet_path_table_size;
uint32_t joliet_l_path_table_pos; uint32_t joliet_l_path_table_pos;
uint32_t joliet_m_path_table_pos; uint32_t joliet_m_path_table_pos;
size_t joliet_ucs2_failures;
/* /*
* HFS+ related information * HFS+ related information

View File

@ -1252,7 +1252,7 @@ char *get_name(_ImageFsData *fsdata, const char *str, size_t len)
return name; return name;
} else { } else {
ret = iso_msg_submit(fsdata->msgid, ISO_FILENAME_WRONG_CHARSET, ret, ret = iso_msg_submit(fsdata->msgid, ISO_FILENAME_WRONG_CHARSET, ret,
"Charset conversion error. Cannot convert from %s to %s", "Cannot convert from charset %s to %s",
fsdata->input_charset, fsdata->local_charset); fsdata->input_charset, fsdata->local_charset);
if (ret < 0) { if (ret < 0) {
return NULL; /* aborted */ return NULL; /* aborted */
@ -1751,7 +1751,7 @@ if (name != NULL && !namecont) {
LIBISO_FREE_MEM(msg); LIBISO_FREE_MEM(msg);
LIBISO_ALLOC_MEM(msg, char, 160); LIBISO_ALLOC_MEM(msg, char, 160);
sprintf(msg, sprintf(msg,
"Charset conversion error. Cannot convert from %.40s to %.40s", "Cannot convert from charset %.40s to %.40s",
fsdata->input_charset, fsdata->local_charset); fsdata->input_charset, fsdata->local_charset);
ret = iso_rr_msg_submit(fsdata, 17, ISO_FILENAME_WRONG_CHARSET, ret = iso_rr_msg_submit(fsdata, 17, ISO_FILENAME_WRONG_CHARSET,
ret, msg); ret, msg);
@ -2906,7 +2906,10 @@ int iso_image_filesystem_new(IsoDataSource *src, struct iso_read_opts *opts,
if (!opts->nojoliet && opts->preferjoliet && data->joliet) { if (!opts->nojoliet && opts->preferjoliet && data->joliet) {
/* if user prefers joliet, that is used */ /* if user prefers joliet, that is used */
iso_msg_debug(data->msgid, "Reading Joliet extensions."); iso_msg_debug(data->msgid, "Reading Joliet extensions.");
data->input_charset = strdup("UCS-2BE"); /* Although Joliet prescribes UCS-2BE, interpret names by its
superset UTF-16BE in order to avoid conversion failures.
*/
data->input_charset = strdup("UTF-16BE");
data->rr = RR_EXT_NO; data->rr = RR_EXT_NO;
data->iso_root_block = data->svd_root_block; data->iso_root_block = data->svd_root_block;
} else { } else {
@ -2919,7 +2922,7 @@ int iso_image_filesystem_new(IsoDataSource *src, struct iso_read_opts *opts,
if (!opts->nojoliet && data->joliet) { if (!opts->nojoliet && data->joliet) {
/* joliet will be used */ /* joliet will be used */
iso_msg_debug(data->msgid, "Reading Joliet extensions."); iso_msg_debug(data->msgid, "Reading Joliet extensions.");
data->input_charset = strdup("UCS-2BE"); data->input_charset = strdup("UTF-16BE");
data->iso_root_block = data->svd_root_block; data->iso_root_block = data->svd_root_block;
} else if (!opts->noiso1999 && data->iso1999) { } else if (!opts->noiso1999 && data->iso1999) {
/* we will read ISO 9660:1999 */ /* we will read ISO 9660:1999 */

View File

@ -31,19 +31,41 @@
static static
int get_joliet_name(Ecma119Image *t, IsoNode *iso, uint16_t **name) int get_joliet_name(Ecma119Image *t, IsoNode *iso, uint16_t **name)
{ {
int ret; int ret = ISO_SUCCESS;
uint16_t *ucs_name; uint16_t *ucs_name = NULL, *utf16_name = NULL;
uint16_t *jname = NULL; uint16_t *jname = NULL;
if (iso->name == NULL) { if (iso->name == NULL) {
/* it is not necessarily an error, it can be the root */ /* it is not necessarily an error, it can be the root */
*name = NULL;
return ISO_SUCCESS; return ISO_SUCCESS;
} }
if (t->joliet_utf16) {
ret = str2utf16be(t->input_charset, iso->name, &ucs_name);
if (ret < 0) {
iso_msg_debug(t->image->id, "Cannot convert to UTF-16 : \"%s\"",
iso->name);
goto ex;
}
} else {
ret = str2ucs(t->input_charset, iso->name, &ucs_name); ret = str2ucs(t->input_charset, iso->name, &ucs_name);
if (ret < 0) { if (ret < 0) {
iso_msg_debug(t->image->id, "Can't convert %s", iso->name); iso_msg_debug(t->image->id, "Cannot convert to UCS-2 : \"%s\"",
return ret; iso->name);
goto ex;
}
ret = str2utf16be(t->input_charset, iso->name, &utf16_name);
if (ret == ISO_SUCCESS) {
if (ucscmp(ucs_name, utf16_name) != 0) {
t->joliet_ucs2_failures++;
if (t->joliet_ucs2_failures <= ISO_JOLIET_UCS2_WARN_MAX) {
iso_msg_submit(t->image->id, ISO_NAME_NOT_UCS2, 0,
"Filename not suitable for Joliet character set UCS-2 : \"%s\"",
iso->name);
}
}
}
} }
if (iso->type == LIBISO_DIR) { if (iso->type == LIBISO_DIR) {
jname = iso_j_dir_id(ucs_name, t->joliet_long_names << 1); jname = iso_j_dir_id(ucs_name, t->joliet_long_names << 1);
@ -51,8 +73,17 @@ int get_joliet_name(Ecma119Image *t, IsoNode *iso, uint16_t **name)
jname = iso_j_file_id(ucs_name, jname = iso_j_file_id(ucs_name,
(t->joliet_long_names << 1) | !!(t->no_force_dots & 2)); (t->joliet_long_names << 1) | !!(t->no_force_dots & 2));
} }
ret = ISO_SUCCESS;
ex:;
if (ucs_name != NULL)
free(ucs_name); free(ucs_name);
if (jname != NULL) { if (utf16_name != NULL)
free(utf16_name);
if (ret != ISO_SUCCESS) {
if (jname != NULL)
free(jname);
return ret;
} else if (jname != NULL) {
*name = jname; *name = jname;
return ISO_SUCCESS; return ISO_SUCCESS;
} else { } else {
@ -828,18 +859,22 @@ void ucsncpy_pad(uint16_t *dest, const uint16_t *src, size_t max)
csrc = (char*)src; csrc = (char*)src;
if (src != NULL) { if (src != NULL) {
len = MIN(ucslen(src) * 2, max); len = MIN(ucslen(src) * 2, max - (max % 2));
} else { } else {
len = 0; len = 0;
} }
for (i = 0; i < len; ++i) for (i = 0; i < len; ++i)
cdest[i] = csrc[i]; cdest[i] = csrc[i];
if (len >= 2)
iso_handle_split_utf16(dest + (len / 2 - 1));
for (i = len; i < max; i += 2) { for (i = len; i + 1 < max; i += 2) {
cdest[i] = '\0'; cdest[i] = '\0';
cdest[i + 1] = ' '; cdest[i + 1] = ' ';
} }
if (max % 2)
cdest[max - 1] = 0;
} }
int joliet_writer_write_vol_desc(IsoImageWriter *writer) int joliet_writer_write_vol_desc(IsoImageWriter *writer)

View File

@ -1771,6 +1771,16 @@ int iso_write_opts_set_joliet_longer_paths(IsoWriteOpts *opts, int allow);
*/ */
int iso_write_opts_set_joliet_long_names(IsoWriteOpts *opts, int allow); int iso_write_opts_set_joliet_long_names(IsoWriteOpts *opts, int allow);
/**
* Use character set UTF-16BE with Joliet, which is a superset of the
* actually prescribed character set UCS-2.
* This breaks Joliet specification with exotic characters which would
* elsewise be mapped to underscore '_'. Use with caution.
*
* @since 1.3.6
*/
int iso_write_opts_set_joliet_utf16(IsoWriteOpts *opts, int allow);
/** /**
* Write Rock Ridge info as of specification RRIP-1.10 rather than RRIP-1.12: * Write Rock Ridge info as of specification RRIP-1.10 rather than RRIP-1.12:
* signature "RRIP_1991A" rather than "IEEE_1282", field PX without file * signature "RRIP_1991A" rather than "IEEE_1282", field PX without file
@ -7544,6 +7554,9 @@ int iso_image_hfsplus_get_blessed(IsoImage *img, IsoNode ***blessed_nodes,
/** Unrecognized file type in ISO image (FAILURE, HIGH, -396) */ /** Unrecognized file type in ISO image (FAILURE, HIGH, -396) */
#define ISO_BAD_ISO_FILETYPE 0xE830FE74 #define ISO_BAD_ISO_FILETYPE 0xE830FE74
/** Filename not suitable for character set UCS-2 (WARNING, HIGH, -397) */
#define ISO_NAME_NOT_UCS2 0xD030FE73
/* Internal developer note: /* Internal developer note:
Place new error codes directly above this comment. Place new error codes directly above this comment.

View File

@ -303,6 +303,7 @@ iso_write_opts_set_iso_level;
iso_write_opts_set_joliet; iso_write_opts_set_joliet;
iso_write_opts_set_joliet_long_names; iso_write_opts_set_joliet_long_names;
iso_write_opts_set_joliet_longer_paths; iso_write_opts_set_joliet_longer_paths;
iso_write_opts_set_joliet_utf16;
iso_write_opts_set_max_37_char_filenames; iso_write_opts_set_max_37_char_filenames;
iso_write_opts_set_ms_block; iso_write_opts_set_ms_block;
iso_write_opts_set_no_force_dots; iso_write_opts_set_no_force_dots;

View File

@ -503,6 +503,8 @@ const char *iso_error_to_msg(int errcode)
return "Too many chained symbolic links"; return "Too many chained symbolic links";
case ISO_BAD_ISO_FILETYPE: case ISO_BAD_ISO_FILETYPE:
return "Unrecognized file type in ISO image"; return "Unrecognized file type in ISO image";
case ISO_NAME_NOT_UCS2:
return "Filename not suitable for character set UCS-2";
default: default:
return "Unknown error"; return "Unknown error";
} }

View File

@ -1197,7 +1197,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
{ {
uint16_t *dot, *retval = NULL; uint16_t *dot, *retval = NULL;
size_t lname, lext, lnname, lnext, pos, i, maxchar = 64; size_t lname, lext, lnname, lnext, pos, i, maxchar = 64;
uint16_t *dest = NULL; uint16_t *dest = NULL, c;
LIBISO_ALLOC_MEM_VOID(dest, uint16_t, LIBISO_JOLIET_NAME_MAX); LIBISO_ALLOC_MEM_VOID(dest, uint16_t, LIBISO_JOLIET_NAME_MAX);
/* was: 66 = 64 (name + ext) + 1 (.) + 1 (\0) */ /* was: 66 = 64 (name + ext) + 1 (.) + 1 (\0) */
@ -1237,7 +1237,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
/* Convert up to lnname characters of the filename. */ /* Convert up to lnname characters of the filename. */
for (i = 0; i < lnname; i++) { for (i = 0; i < lnname; i++) {
uint16_t c = src[i]; c = src[i];
if (valid_j_char(c)) { if (valid_j_char(c)) {
dest[pos++] = c; dest[pos++] = c;
} else { } else {
@ -1245,6 +1245,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
pos++; pos++;
} }
} }
iso_handle_split_utf16(dest + (pos - 1));
if ((flag & 1) && lnext <= 0) if ((flag & 1) && lnext <= 0)
goto is_done; goto is_done;
@ -1262,6 +1263,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
pos++; pos++;
} }
} }
iso_handle_split_utf16(dest + (pos - 1));
is_done:; is_done:;
set_ucsbe(dest + pos, '\0'); set_ucsbe(dest + pos, '\0');
@ -1298,6 +1300,7 @@ uint16_t *iso_j_dir_id(const uint16_t *src, int flag)
set_ucsbe(dest + i, '_'); set_ucsbe(dest + i, '_');
} }
} }
iso_handle_split_utf16(dest + (len - 1));
set_ucsbe(dest + len, '\0'); set_ucsbe(dest + len, '\0');
retval = ucsdup(dest); retval = ucsdup(dest);
ex: ex:
@ -1379,6 +1382,8 @@ uint16_t *ucsncpy(uint16_t *dest, const uint16_t *src, size_t n)
{ {
n = MIN(n, ucslen(src) + 1); n = MIN(n, ucslen(src) + 1);
memcpy(dest, src, n*2); memcpy(dest, src, n*2);
if (n >= 2)
iso_handle_split_utf16(dest + (n - 2));
return dest; return dest;
} }
@ -2209,3 +2214,16 @@ uint16_t iso_htons(uint16_t v)
return ret; return ret;
} }
/* If an UTF-16 surrogate pair was split : Change to UTF-16 '_'.
(UCS-2 is promised to reserve 0xd800 to 0xdbff for UTF-16).
*/
void iso_handle_split_utf16(uint16_t *utf_word)
{
unsigned char *hb;
hb = (unsigned char *) utf_word;
if ((hb[0] & 0xfc) == 0xd8)
set_ucsbe(utf_word, '_');
}

View File

@ -236,6 +236,12 @@ uint16_t *ucscpy(uint16_t *dest, const uint16_t *src);
*/ */
uint16_t *ucsncpy(uint16_t *dest, const uint16_t *src, size_t n); uint16_t *ucsncpy(uint16_t *dest, const uint16_t *src, size_t n);
/**
* Check whether utf_word is the first surrogate word of a pair.
* If so, change it to UTF-16 character '_'.
*/
void iso_handle_split_utf16(uint16_t *utf_word);
/** /**
* Convert a given input string to d-chars. * Convert a given input string to d-chars.
* @return * @return