New API call iso_write_opts_set_joliet_utf16()
and ability to read Joliet names as UTF-16BE
This commit is contained in:
parent
ba47d1534c
commit
88555bd059
@ -1,3 +1,8 @@
|
||||
bzr branch lp:libisofs/for-libisoburn (to become libisofs-1.3.6.tar.gz)
|
||||
===============================================================================
|
||||
* New API call iso_write_opts_set_joliet_utf16() and ability to read Joliet
|
||||
names as UTF-16BE
|
||||
|
||||
libisofs-1.3.4.tar.gz Thu Dec 12 2013
|
||||
===============================================================================
|
||||
* Giving sort weight 2 as default to El Torito boot images
|
||||
|
@ -1510,6 +1510,21 @@ ex:;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void issue_write_warning_summary(Ecma119Image *target)
|
||||
{
|
||||
if (target->joliet_ucs2_failures > ISO_JOLIET_UCS2_WARN_MAX) {
|
||||
iso_msg_submit(-1, ISO_NAME_NOT_UCS2, 0,
|
||||
"More filenames found which were not suitable for Joliet character set UCS-2");
|
||||
}
|
||||
if (target->joliet_ucs2_failures > 0) {
|
||||
iso_msg_submit(-1, ISO_NAME_NOT_UCS2, 0,
|
||||
"Sum of filenames not suitable for Joliet character set UCS-2: %.f",
|
||||
(double) target->joliet_ucs2_failures);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void *write_function(void *arg)
|
||||
{
|
||||
@ -1566,6 +1581,8 @@ void *write_function(void *arg)
|
||||
if (res <= 0)
|
||||
goto write_error;
|
||||
|
||||
issue_write_warning_summary(target);
|
||||
|
||||
target->image->generator_is_running = 0;
|
||||
|
||||
/* Give up reference claim made in ecma119_image_new().
|
||||
@ -1783,6 +1800,7 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img)
|
||||
target->relaxed_vol_atts = opts->relaxed_vol_atts;
|
||||
target->joliet_longer_paths = opts->joliet_longer_paths;
|
||||
target->joliet_long_names = opts->joliet_long_names;
|
||||
target->joliet_utf16 = opts->joliet_utf16;
|
||||
target->rrip_version_1_10 = opts->rrip_version_1_10;
|
||||
target->rrip_1_10_px_ino = opts->rrip_1_10_px_ino;
|
||||
target->aaip_susp_1_10 = opts->aaip_susp_1_10;
|
||||
@ -2007,6 +2025,8 @@ int ecma119_image_new(IsoImage *src, IsoWriteOpts *opts, Ecma119Image **img)
|
||||
target->filesrc_start = 0;
|
||||
target->filesrc_blocks = 0;
|
||||
|
||||
target->joliet_ucs2_failures = 0;
|
||||
|
||||
/*
|
||||
* 2. Based on those options, create needed writers: iso, joliet...
|
||||
* Each writer inits its structures and stores needed info into
|
||||
@ -2758,6 +2778,7 @@ int iso_write_opts_new(IsoWriteOpts **opts, int profile)
|
||||
wopts->fat = 0;
|
||||
wopts->fifo_size = 1024; /* 2 MB buffer */
|
||||
wopts->sort_files = 1; /* file sorting is always good */
|
||||
wopts->joliet_utf16 = 0;
|
||||
wopts->rr_reloc_dir = NULL;
|
||||
wopts->rr_reloc_flags = 0;
|
||||
wopts->system_area_data = NULL;
|
||||
@ -3035,6 +3056,15 @@ int iso_write_opts_set_joliet_long_names(IsoWriteOpts *opts, int allow)
|
||||
return ISO_SUCCESS;
|
||||
}
|
||||
|
||||
int iso_write_opts_set_joliet_utf16(IsoWriteOpts *opts, int allow)
|
||||
{
|
||||
if (opts == NULL) {
|
||||
return ISO_NULL_POINTER;
|
||||
}
|
||||
opts->joliet_utf16 = allow ? 1 : 0;
|
||||
return ISO_SUCCESS;
|
||||
}
|
||||
|
||||
int iso_write_opts_set_rrip_version_1_10(IsoWriteOpts *opts, int oldvers)
|
||||
{
|
||||
if (opts == NULL) {
|
||||
|
@ -87,6 +87,12 @@
|
||||
#define ISO_GPT_ENTRIES_MAX 248
|
||||
|
||||
|
||||
/* How many warnings to issue about writing Joliet names which cannot be
|
||||
properly represented in UCS-2 and thus had to be defaultet to '_'.
|
||||
*/
|
||||
#define ISO_JOLIET_UCS2_WARN_MAX 3
|
||||
|
||||
|
||||
/**
|
||||
* Holds the options for the image generation.
|
||||
*/
|
||||
@ -192,6 +198,11 @@ struct iso_write_opts {
|
||||
*/
|
||||
unsigned int joliet_long_names :1;
|
||||
|
||||
/**
|
||||
* Use UTF-16BE rather than its subset UCS-2
|
||||
*/
|
||||
unsigned int joliet_utf16 :1;
|
||||
|
||||
/**
|
||||
* Write Rock Ridge info as of specification RRIP-1.10 rather than
|
||||
* RRIP-1.12: signature "RRIP_1991A" rather than "IEEE_1282",
|
||||
@ -540,6 +551,9 @@ struct ecma119_image
|
||||
/** Allow Joliet names up to 103 characters rather than 64 */
|
||||
unsigned int joliet_long_names :1;
|
||||
|
||||
/** Use UTF-16BE rather than its subset UCS-2 */
|
||||
unsigned int joliet_utf16 :1;
|
||||
|
||||
/** Write old fashioned RRIP-1.10 rather than RRIP-1.12 */
|
||||
unsigned int rrip_version_1_10 :1;
|
||||
|
||||
@ -642,6 +656,7 @@ struct ecma119_image
|
||||
uint32_t joliet_path_table_size;
|
||||
uint32_t joliet_l_path_table_pos;
|
||||
uint32_t joliet_m_path_table_pos;
|
||||
size_t joliet_ucs2_failures;
|
||||
|
||||
/*
|
||||
* HFS+ related information
|
||||
|
@ -1252,7 +1252,7 @@ char *get_name(_ImageFsData *fsdata, const char *str, size_t len)
|
||||
return name;
|
||||
} else {
|
||||
ret = iso_msg_submit(fsdata->msgid, ISO_FILENAME_WRONG_CHARSET, ret,
|
||||
"Charset conversion error. Cannot convert from %s to %s",
|
||||
"Cannot convert from charset %s to %s",
|
||||
fsdata->input_charset, fsdata->local_charset);
|
||||
if (ret < 0) {
|
||||
return NULL; /* aborted */
|
||||
@ -1751,7 +1751,7 @@ if (name != NULL && !namecont) {
|
||||
LIBISO_FREE_MEM(msg);
|
||||
LIBISO_ALLOC_MEM(msg, char, 160);
|
||||
sprintf(msg,
|
||||
"Charset conversion error. Cannot convert from %.40s to %.40s",
|
||||
"Cannot convert from charset %.40s to %.40s",
|
||||
fsdata->input_charset, fsdata->local_charset);
|
||||
ret = iso_rr_msg_submit(fsdata, 17, ISO_FILENAME_WRONG_CHARSET,
|
||||
ret, msg);
|
||||
@ -2906,7 +2906,10 @@ int iso_image_filesystem_new(IsoDataSource *src, struct iso_read_opts *opts,
|
||||
if (!opts->nojoliet && opts->preferjoliet && data->joliet) {
|
||||
/* if user prefers joliet, that is used */
|
||||
iso_msg_debug(data->msgid, "Reading Joliet extensions.");
|
||||
data->input_charset = strdup("UCS-2BE");
|
||||
/* Although Joliet prescribes UCS-2BE, interpret names by its
|
||||
superset UTF-16BE in order to avoid conversion failures.
|
||||
*/
|
||||
data->input_charset = strdup("UTF-16BE");
|
||||
data->rr = RR_EXT_NO;
|
||||
data->iso_root_block = data->svd_root_block;
|
||||
} else {
|
||||
@ -2919,7 +2922,7 @@ int iso_image_filesystem_new(IsoDataSource *src, struct iso_read_opts *opts,
|
||||
if (!opts->nojoliet && data->joliet) {
|
||||
/* joliet will be used */
|
||||
iso_msg_debug(data->msgid, "Reading Joliet extensions.");
|
||||
data->input_charset = strdup("UCS-2BE");
|
||||
data->input_charset = strdup("UTF-16BE");
|
||||
data->iso_root_block = data->svd_root_block;
|
||||
} else if (!opts->noiso1999 && data->iso1999) {
|
||||
/* we will read ISO 9660:1999 */
|
||||
|
@ -31,19 +31,41 @@
|
||||
static
|
||||
int get_joliet_name(Ecma119Image *t, IsoNode *iso, uint16_t **name)
|
||||
{
|
||||
int ret;
|
||||
uint16_t *ucs_name;
|
||||
int ret = ISO_SUCCESS;
|
||||
uint16_t *ucs_name = NULL, *utf16_name = NULL;
|
||||
uint16_t *jname = NULL;
|
||||
|
||||
if (iso->name == NULL) {
|
||||
/* it is not necessarily an error, it can be the root */
|
||||
*name = NULL;
|
||||
return ISO_SUCCESS;
|
||||
}
|
||||
|
||||
ret = str2ucs(t->input_charset, iso->name, &ucs_name);
|
||||
if (ret < 0) {
|
||||
iso_msg_debug(t->image->id, "Can't convert %s", iso->name);
|
||||
return ret;
|
||||
if (t->joliet_utf16) {
|
||||
ret = str2utf16be(t->input_charset, iso->name, &ucs_name);
|
||||
if (ret < 0) {
|
||||
iso_msg_debug(t->image->id, "Cannot convert to UTF-16 : \"%s\"",
|
||||
iso->name);
|
||||
goto ex;
|
||||
}
|
||||
} else {
|
||||
ret = str2ucs(t->input_charset, iso->name, &ucs_name);
|
||||
if (ret < 0) {
|
||||
iso_msg_debug(t->image->id, "Cannot convert to UCS-2 : \"%s\"",
|
||||
iso->name);
|
||||
goto ex;
|
||||
}
|
||||
ret = str2utf16be(t->input_charset, iso->name, &utf16_name);
|
||||
if (ret == ISO_SUCCESS) {
|
||||
if (ucscmp(ucs_name, utf16_name) != 0) {
|
||||
t->joliet_ucs2_failures++;
|
||||
if (t->joliet_ucs2_failures <= ISO_JOLIET_UCS2_WARN_MAX) {
|
||||
iso_msg_submit(t->image->id, ISO_NAME_NOT_UCS2, 0,
|
||||
"Filename not suitable for Joliet character set UCS-2 : \"%s\"",
|
||||
iso->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (iso->type == LIBISO_DIR) {
|
||||
jname = iso_j_dir_id(ucs_name, t->joliet_long_names << 1);
|
||||
@ -51,8 +73,17 @@ int get_joliet_name(Ecma119Image *t, IsoNode *iso, uint16_t **name)
|
||||
jname = iso_j_file_id(ucs_name,
|
||||
(t->joliet_long_names << 1) | !!(t->no_force_dots & 2));
|
||||
}
|
||||
free(ucs_name);
|
||||
if (jname != NULL) {
|
||||
ret = ISO_SUCCESS;
|
||||
ex:;
|
||||
if (ucs_name != NULL)
|
||||
free(ucs_name);
|
||||
if (utf16_name != NULL)
|
||||
free(utf16_name);
|
||||
if (ret != ISO_SUCCESS) {
|
||||
if (jname != NULL)
|
||||
free(jname);
|
||||
return ret;
|
||||
} else if (jname != NULL) {
|
||||
*name = jname;
|
||||
return ISO_SUCCESS;
|
||||
} else {
|
||||
@ -828,18 +859,22 @@ void ucsncpy_pad(uint16_t *dest, const uint16_t *src, size_t max)
|
||||
csrc = (char*)src;
|
||||
|
||||
if (src != NULL) {
|
||||
len = MIN(ucslen(src) * 2, max);
|
||||
len = MIN(ucslen(src) * 2, max - (max % 2));
|
||||
} else {
|
||||
len = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < len; ++i)
|
||||
cdest[i] = csrc[i];
|
||||
if (len >= 2)
|
||||
iso_handle_split_utf16(dest + (len / 2 - 1));
|
||||
|
||||
for (i = len; i < max; i += 2) {
|
||||
for (i = len; i + 1 < max; i += 2) {
|
||||
cdest[i] = '\0';
|
||||
cdest[i + 1] = ' ';
|
||||
}
|
||||
if (max % 2)
|
||||
cdest[max - 1] = 0;
|
||||
}
|
||||
|
||||
int joliet_writer_write_vol_desc(IsoImageWriter *writer)
|
||||
|
@ -1771,6 +1771,16 @@ int iso_write_opts_set_joliet_longer_paths(IsoWriteOpts *opts, int allow);
|
||||
*/
|
||||
int iso_write_opts_set_joliet_long_names(IsoWriteOpts *opts, int allow);
|
||||
|
||||
/**
|
||||
* Use character set UTF-16BE with Joliet, which is a superset of the
|
||||
* actually prescribed character set UCS-2.
|
||||
* This breaks Joliet specification with exotic characters which would
|
||||
* elsewise be mapped to underscore '_'. Use with caution.
|
||||
*
|
||||
* @since 1.3.6
|
||||
*/
|
||||
int iso_write_opts_set_joliet_utf16(IsoWriteOpts *opts, int allow);
|
||||
|
||||
/**
|
||||
* Write Rock Ridge info as of specification RRIP-1.10 rather than RRIP-1.12:
|
||||
* signature "RRIP_1991A" rather than "IEEE_1282", field PX without file
|
||||
@ -7544,6 +7554,9 @@ int iso_image_hfsplus_get_blessed(IsoImage *img, IsoNode ***blessed_nodes,
|
||||
/** Unrecognized file type in ISO image (FAILURE, HIGH, -396) */
|
||||
#define ISO_BAD_ISO_FILETYPE 0xE830FE74
|
||||
|
||||
/** Filename not suitable for character set UCS-2 (WARNING, HIGH, -397) */
|
||||
#define ISO_NAME_NOT_UCS2 0xD030FE73
|
||||
|
||||
|
||||
/* Internal developer note:
|
||||
Place new error codes directly above this comment.
|
||||
|
@ -303,6 +303,7 @@ iso_write_opts_set_iso_level;
|
||||
iso_write_opts_set_joliet;
|
||||
iso_write_opts_set_joliet_long_names;
|
||||
iso_write_opts_set_joliet_longer_paths;
|
||||
iso_write_opts_set_joliet_utf16;
|
||||
iso_write_opts_set_max_37_char_filenames;
|
||||
iso_write_opts_set_ms_block;
|
||||
iso_write_opts_set_no_force_dots;
|
||||
|
@ -503,6 +503,8 @@ const char *iso_error_to_msg(int errcode)
|
||||
return "Too many chained symbolic links";
|
||||
case ISO_BAD_ISO_FILETYPE:
|
||||
return "Unrecognized file type in ISO image";
|
||||
case ISO_NAME_NOT_UCS2:
|
||||
return "Filename not suitable for character set UCS-2";
|
||||
default:
|
||||
return "Unknown error";
|
||||
}
|
||||
|
@ -1197,7 +1197,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
|
||||
{
|
||||
uint16_t *dot, *retval = NULL;
|
||||
size_t lname, lext, lnname, lnext, pos, i, maxchar = 64;
|
||||
uint16_t *dest = NULL;
|
||||
uint16_t *dest = NULL, c;
|
||||
|
||||
LIBISO_ALLOC_MEM_VOID(dest, uint16_t, LIBISO_JOLIET_NAME_MAX);
|
||||
/* was: 66 = 64 (name + ext) + 1 (.) + 1 (\0) */
|
||||
@ -1237,7 +1237,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
|
||||
|
||||
/* Convert up to lnname characters of the filename. */
|
||||
for (i = 0; i < lnname; i++) {
|
||||
uint16_t c = src[i];
|
||||
c = src[i];
|
||||
if (valid_j_char(c)) {
|
||||
dest[pos++] = c;
|
||||
} else {
|
||||
@ -1245,6 +1245,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
iso_handle_split_utf16(dest + (pos - 1));
|
||||
|
||||
if ((flag & 1) && lnext <= 0)
|
||||
goto is_done;
|
||||
@ -1262,6 +1263,7 @@ uint16_t *iso_j_file_id(const uint16_t *src, int flag)
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
iso_handle_split_utf16(dest + (pos - 1));
|
||||
|
||||
is_done:;
|
||||
set_ucsbe(dest + pos, '\0');
|
||||
@ -1298,6 +1300,7 @@ uint16_t *iso_j_dir_id(const uint16_t *src, int flag)
|
||||
set_ucsbe(dest + i, '_');
|
||||
}
|
||||
}
|
||||
iso_handle_split_utf16(dest + (len - 1));
|
||||
set_ucsbe(dest + len, '\0');
|
||||
retval = ucsdup(dest);
|
||||
ex:
|
||||
@ -1379,6 +1382,8 @@ uint16_t *ucsncpy(uint16_t *dest, const uint16_t *src, size_t n)
|
||||
{
|
||||
n = MIN(n, ucslen(src) + 1);
|
||||
memcpy(dest, src, n*2);
|
||||
if (n >= 2)
|
||||
iso_handle_split_utf16(dest + (n - 2));
|
||||
return dest;
|
||||
}
|
||||
|
||||
@ -2209,3 +2214,16 @@ uint16_t iso_htons(uint16_t v)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* If an UTF-16 surrogate pair was split : Change to UTF-16 '_'.
|
||||
(UCS-2 is promised to reserve 0xd800 to 0xdbff for UTF-16).
|
||||
*/
|
||||
void iso_handle_split_utf16(uint16_t *utf_word)
|
||||
{
|
||||
unsigned char *hb;
|
||||
|
||||
hb = (unsigned char *) utf_word;
|
||||
if ((hb[0] & 0xfc) == 0xd8)
|
||||
set_ucsbe(utf_word, '_');
|
||||
}
|
||||
|
||||
|
@ -236,6 +236,12 @@ uint16_t *ucscpy(uint16_t *dest, const uint16_t *src);
|
||||
*/
|
||||
uint16_t *ucsncpy(uint16_t *dest, const uint16_t *src, size_t n);
|
||||
|
||||
/**
|
||||
* Check whether utf_word is the first surrogate word of a pair.
|
||||
* If so, change it to UTF-16 character '_'.
|
||||
*/
|
||||
void iso_handle_split_utf16(uint16_t *utf_word);
|
||||
|
||||
/**
|
||||
* Convert a given input string to d-chars.
|
||||
* @return
|
||||
|
Loading…
Reference in New Issue
Block a user