Util function to convert string charset.

This commit is contained in:
Vreixo Formoso 2007-12-27 21:11:29 +01:00
parent 1e4851792b
commit c3ded11773
3 changed files with 97 additions and 1 deletions

View File

@ -17,6 +17,7 @@
#include <errno.h>
#include <ctype.h>
#include <stdio.h>
#include <limits.h>
int int_pow(int base, int power)
{
@ -27,6 +28,45 @@ int int_pow(int base, int power)
return result;
}
int strconv(const char *str, const char *icharset, const char *ocharset,
char **output)
{
size_t inbytes;
size_t outbytes;
size_t n;
iconv_t conv;
char *out;
char *src;
char *ret;
inbytes = strlen(str);
outbytes = (inbytes + 1) * MB_LEN_MAX;
out = malloc(outbytes);
if (out == NULL) {
return ISO_MEM_ERROR;
}
conv = iconv_open(ocharset, icharset);
if (conv == (iconv_t)(-1)) {
return ISO_CHARSET_CONV_ERROR;
}
src = (char *)str;
ret = (char *)out;
n = iconv(conv, &src, &inbytes, &ret, &outbytes);
if (n == -1) {
/* error */
iconv_close(conv);
return ISO_CHARSET_CONV_ERROR;
}
iconv_close(conv);
*ret = '\0';
*output = realloc(out, ret - out);
return ISO_SUCCESS;
}
/**
* Convert a str in a specified codeset to WCHAR_T.
* The result must be free() when no more needed

View File

@ -32,6 +32,23 @@ extern inline int round_up(unsigned int n, unsigned int mul)
int int_pow(int base, int power);
/**
* Convert the charset encoding of a given string.
*
* @param input
* Input string
* @param icharset
* Input charset. Must be supported by iconv
* @param ocharset
* Output charset. Must be supported by iconv
* @param output
* Location where the pointer to the ouput string will be stored
* @return
* 1 on success, < 0 on error
*/
int strconv(const char *input, const char *icharset, const char *ocharset,
char **output);
/**
* Convert a given string from any input charset to ASCII
*
@ -39,7 +56,7 @@ int int_pow(int base, int power);
* Input charset. Must be supported by iconv
* @param input
* Input string
* @param ouput
* @param output
* Location where the pointer to the ouput string will be stored
* @return
* 1 on success, < 0 on error

View File

@ -5,8 +5,46 @@
*/
#include "test.h"
#include "util.h"
#include "error.h"
#include <string.h>
#include <stdlib.h>
static void test_strconv()
{
int ret;
char *out;
/* Prova de cadeia com codificação ISO-8859-15 */
unsigned char in1[45] =
{0x50, 0x72, 0x6f, 0x76, 0x61, 0x20, 0x64, 0x65, 0x20, 0x63, 0x61,
0x64, 0x65, 0x69, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x20, 0x63, 0x6f,
0x64, 0x69, 0x66, 0x69, 0x63, 0x61, 0xe7, 0xe3, 0x6f, 0x20, 0x49,
0x53, 0x4f, 0x2d, 0x38, 0x38, 0x35, 0x39, 0x2d, 0x31, 0x35, 0x0a,
0x00}; /* encoded in ISO-8859-15 */
unsigned char out1[47] =
{0x50, 0x72, 0x6f, 0x76, 0x61, 0x20, 0x64, 0x65, 0x20, 0x63, 0x61,
0x64, 0x65, 0x69, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x20, 0x63, 0x6f,
0x64, 0x69, 0x66, 0x69, 0x63, 0x61, 0xc3, 0xa7, 0xc3, 0xa3, 0x6f,
0x20, 0x49, 0x53, 0x4f, 0x2d, 0x38, 0x38, 0x35, 0x39, 0x2d, 0x31,
0x35, 0x0a, 0x00}; /* encoded in UTF-8 */
unsigned char in2[45] =
{0x50, 0x72, 0x6f, 0x76, 0x61, 0x20, 0x64, 0x65, 0x20, 0x63, 0x61,
0x64, 0x65, 0x69, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x20, 0x63, 0x6f,
0x64, 0x69, 0x66, 0x69, 0x63, 0x61, 0xe7, 0xe3, 0x6f, 0x20, 0x49,
0x53, 0x4f, 0x2d, 0x38, 0x38, 0xff, 0xff, 0x2d, 0x31, 0x35, 0x0a,
0x00}; /* incorrect encoding */
/* ISO-8859-15 to UTF-8 */
ret = strconv((char*)in1, "ISO-8859-15", "UTF-8", &out);
CU_ASSERT_EQUAL(ret, 1);
CU_ASSERT_STRING_EQUAL(out, (char*)out1);
free(out);
/* try with an incorrect input */
ret = strconv((char*)in2, "UTF-8", "ISO-8859-15", &out);
CU_ASSERT_EQUAL(ret, ISO_CHARSET_CONV_ERROR);
}
static void test_div_up()
{
@ -260,6 +298,7 @@ void add_util_suite()
{
CU_pSuite pSuite = CU_add_suite("UtilSuite", NULL, NULL);
CU_add_test(pSuite, "strconv()", test_strconv);
CU_add_test(pSuite, "div_up()", test_div_up);
CU_add_test(pSuite, "round_up()", test_round_up);
CU_add_test(pSuite, "iso_bb()", test_iso_bb);