Implemented direct iconv conversion for the case that the traditional

two-step conversion via character set "WCHAR_T" fails. E.g. on Solaris.
This commit is contained in:
Thomas Schmitt 2011-08-11 18:22:49 +02:00
parent 6892c734e2
commit 3a82f213e0

View File

@ -382,12 +382,13 @@ conv_error:;
int str2ascii(const char *icharset, const char *input, char **output) int str2ascii(const char *icharset, const char *input, char **output)
{ {
int result; int result;
wchar_t *wsrc_; wchar_t *wsrc_ = NULL;
char *ret; char *ret;
char *ret_; char *ret_ = NULL;
char *src; char *src;
struct iso_iconv_handle conv; struct iso_iconv_handle conv;
int conv_ret; int conv_ret;
int direct_conv = 0;
/* That while loop smells like a potential show stopper */ /* That while loop smells like a potential show stopper */
size_t loop_counter = 0, loop_limit = 3; size_t loop_counter = 0, loop_limit = 3;
@ -405,14 +406,17 @@ int str2ascii(const char *icharset, const char *input, char **output)
return ISO_NULL_POINTER; return ISO_NULL_POINTER;
} }
/* First try the traditional way via intermediate character set WCHAR_T.
* Up to August 2011 this was the only way. But it will not work if
* there is no character set "WCHAR_T". E.g. on Solaris.
*/
/* convert the string to a wide character string. Note: outbytes /* convert the string to a wide character string. Note: outbytes
* is in fact the number of characters in the string and doesn't * is in fact the number of characters in the string and doesn't
* include the last NULL character. * include the last NULL character.
*/ */
conv_ret = 0;
result = str2wchar(icharset, input, &wsrc_); result = str2wchar(icharset, input, &wsrc_);
if (result < 0) { if (result == (int) ISO_SUCCESS) {
goto fallback;
}
src = (char *)wsrc_; src = (char *)wsrc_;
numchars = wcslen(wsrc_); numchars = wcslen(wsrc_);
@ -431,7 +435,25 @@ int str2ascii(const char *icharset, const char *input, char **output)
if (conv_ret <= 0) { if (conv_ret <= 0) {
free(wsrc_); free(wsrc_);
free(ret_); free(ret_);
}
} else if (result != (int) ISO_CHARSET_CONV_ERROR)
return result;
/* If this did not succeed : Try the untraditional direct conversion.
*/
if (conv_ret <= 0) {
conv_ret = iso_iconv_open(&conv, "ASCII", (char *) icharset, 0);
if (conv_ret <= 0)
goto fallback; goto fallback;
direct_conv = 1;
src = (char *) input;
inbytes = strlen(input);
loop_limit = inbytes + 3;
outbytes = (inbytes + 1) * sizeof(uint16_t);
ret_ = malloc(outbytes);
if (ret == NULL)
return ISO_OUT_OF_MEM;
ret = ret_;
} }
n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0); n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0);
@ -458,8 +480,13 @@ int str2ascii(const char *icharset, const char *input, char **output)
/* There was an error with one character but some other remain /* There was an error with one character but some other remain
* to be converted. That's probably a multibyte character. * to be converted. That's probably a multibyte character.
* See above comment. */ * See above comment. */
if (direct_conv) {
src++;
inbytes--;
} else {
src += sizeof(wchar_t); src += sizeof(wchar_t);
inbytes -= sizeof(wchar_t); inbytes -= sizeof(wchar_t);
}
if (!inbytes) if (!inbytes)
break; break;
@ -471,7 +498,8 @@ int str2ascii(const char *icharset, const char *input, char **output)
n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0); n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0);
} }
iso_iconv_close(&conv, 0); iso_iconv_close(&conv, 0);
*ret='\0'; *ret = 0;
if (wsrc_ != NULL)
free(wsrc_); free(wsrc_);
*output = ret_; *output = ret_;
@ -517,12 +545,13 @@ int cmp_ucsbe(const uint16_t *ucs, char c)
int str2ucs(const char *icharset, const char *input, uint16_t **output) int str2ucs(const char *icharset, const char *input, uint16_t **output)
{ {
int result; int result;
wchar_t *wsrc_; wchar_t *wsrc_ = NULL;
char *src; char *src;
char *ret; char *ret;
char *ret_; char *ret_ = NULL;
struct iso_iconv_handle conv; struct iso_iconv_handle conv;
int conv_ret; int conv_ret = 0;
int direct_conv = 0;
/* That while loop smells like a potential show stopper */ /* That while loop smells like a potential show stopper */
size_t loop_counter = 0, loop_limit = 3; size_t loop_counter = 0, loop_limit = 3;
@ -540,10 +569,13 @@ int str2ucs(const char *icharset, const char *input, uint16_t **output)
* is in fact the number of characters in the string and doesn't * is in fact the number of characters in the string and doesn't
* include the last NULL character. * include the last NULL character.
*/ */
/* First try the traditional way via intermediate character set WCHAR_T.
* Up to August 2011 this was the only way. But it will not work if
* there is no character set "WCHAR_T". E.g. on Solaris.
*/
conv_ret = 0;
result = str2wchar(icharset, input, &wsrc_); result = str2wchar(icharset, input, &wsrc_);
if (result < 0) { if (result == (int) ISO_SUCCESS) {
return result;
}
src = (char *)wsrc_; src = (char *)wsrc_;
numchars = wcslen(wsrc_); numchars = wcslen(wsrc_);
@ -551,9 +583,8 @@ int str2ucs(const char *icharset, const char *input, uint16_t **output)
loop_limit = inbytes + 3; loop_limit = inbytes + 3;
ret_ = malloc((numchars+1) * sizeof(uint16_t)); ret_ = malloc((numchars+1) * sizeof(uint16_t));
if (ret_ == NULL) { if (ret_ == NULL)
return ISO_OUT_OF_MEM; return ISO_OUT_OF_MEM;
}
outbytes = numchars * sizeof(uint16_t); outbytes = numchars * sizeof(uint16_t);
ret = ret_; ret = ret_;
@ -562,8 +593,27 @@ int str2ucs(const char *icharset, const char *input, uint16_t **output)
if (conv_ret <= 0) { if (conv_ret <= 0) {
free(wsrc_); free(wsrc_);
free(ret_); free(ret_);
}
} else if (result != (int) ISO_CHARSET_CONV_ERROR)
return result;
/* If this did not succeed : Try the untraditional direct conversion.
*/
if (conv_ret <= 0) {
conv_ret = iso_iconv_open(&conv, "UCS-2BE", (char *) icharset, 0);
if (conv_ret <= 0) {
return ISO_CHARSET_CONV_ERROR; return ISO_CHARSET_CONV_ERROR;
} }
direct_conv = 1;
src = (char *) input;
inbytes = strlen(input);
loop_limit = inbytes + 3;
outbytes = (inbytes + 1) * sizeof(uint16_t);
ret_ = malloc(outbytes);
if (ret == NULL)
return ISO_OUT_OF_MEM;
ret = ret_;
}
n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0); n = iso_iconv(&conv, &src, &inbytes, &ret, &outbytes, 0);
while (n == (size_t) -1) { while (n == (size_t) -1) {
@ -589,8 +639,13 @@ int str2ucs(const char *icharset, const char *input, uint16_t **output)
/* There was an error with one character but some other remain /* There was an error with one character but some other remain
* to be converted. That's probably a multibyte character. * to be converted. That's probably a multibyte character.
* See above comment. */ * See above comment. */
if (direct_conv) {
src++;
inbytes--;
} else {
src += sizeof(wchar_t); src += sizeof(wchar_t);
inbytes -= sizeof(wchar_t); inbytes -= sizeof(wchar_t);
}
if (!inbytes) if (!inbytes)
break; break;
@ -605,6 +660,7 @@ int str2ucs(const char *icharset, const char *input, uint16_t **output)
/* close the ucs string */ /* close the ucs string */
set_ucsbe((uint16_t*) ret, '\0'); set_ucsbe((uint16_t*) ret, '\0');
if (wsrc_ != NULL)
free(wsrc_); free(wsrc_);
*output = (uint16_t*)ret_; *output = (uint16_t*)ret_;