123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- /* Copyright (C) 1999-2001, 2003 Bruno Haible.
- This file is not part of the GNU LIBICONV Library.
- This file is put into the public domain. */
- #include "iconv_string.h"
- #include <iconv.h>
- #include <errno.h>
- #include <stdlib.h>
- #include <string.h>
- #define tmpbufsize 4096
- int iconv_string (const char* tocode, const char* fromcode,
- const char* start, const char* end,
- char** resultp, size_t* lengthp)
- {
- iconv_t cd = iconv_open(tocode,fromcode);
- size_t length;
- char* result;
- if (cd == (iconv_t)(-1)) {
- if (errno != EINVAL)
- return -1;
- /* Unsupported fromcode or tocode. Check whether the caller requested
- autodetection. */
- if (!strcmp(fromcode,"autodetect_utf8")) {
- int ret;
- /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
- be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
- ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
- if (!(ret < 0 && errno == EILSEQ))
- return ret;
- ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
- return ret;
- }
- if (!strcmp(fromcode,"autodetect_jp")) {
- int ret;
- /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
- it will fail. */
- ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
- if (!(ret < 0 && errno == EILSEQ))
- return ret;
- /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
- is unavoidable. People will condemn SHIFT_JIS.
- If we tried SHIFT_JIS first, then some short EUC-JP inputs would
- come out wrong, and people would condemn EUC-JP and Unix, which
- would not be good. */
- ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
- if (!(ret < 0 && errno == EILSEQ))
- return ret;
- /* Finally try SHIFT_JIS. */
- ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
- return ret;
- }
- if (!strcmp(fromcode,"autodetect_kr")) {
- int ret;
- /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
- it will fail. */
- ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
- if (!(ret < 0 && errno == EILSEQ))
- return ret;
- /* Finally try EUC-KR. */
- ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
- return ret;
- }
- errno = EINVAL;
- return -1;
- }
- /* Determine the length we need. */
- {
- size_t count = 0;
- char tmpbuf[tmpbufsize];
- const char* inptr = start;
- size_t insize = end-start;
- while (insize > 0) {
- char* outptr = tmpbuf;
- size_t outsize = tmpbufsize;
- size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
- if (res == (size_t)(-1) && errno != E2BIG) {
- if (errno == EINVAL)
- break;
- else {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- return -1;
- }
- }
- count += outptr-tmpbuf;
- }
- {
- char* outptr = tmpbuf;
- size_t outsize = tmpbufsize;
- size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
- if (res == (size_t)(-1)) {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- return -1;
- }
- count += outptr-tmpbuf;
- }
- length = count;
- }
- if (lengthp != NULL)
- *lengthp = length;
- if (resultp == NULL) {
- iconv_close(cd);
- return 0;
- }
- result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
- *resultp = result;
- if (length == 0) {
- iconv_close(cd);
- return 0;
- }
- if (result == NULL) {
- iconv_close(cd);
- errno = ENOMEM;
- return -1;
- }
- iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
- /* Do the conversion for real. */
- {
- const char* inptr = start;
- size_t insize = end-start;
- char* outptr = result;
- size_t outsize = length;
- while (insize > 0) {
- size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
- if (res == (size_t)(-1)) {
- if (errno == EINVAL)
- break;
- else {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- return -1;
- }
- }
- }
- {
- size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
- if (res == (size_t)(-1)) {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- return -1;
- }
- }
- if (outsize != 0) abort();
- }
- iconv_close(cd);
- return 0;
- }
|