iconv_string.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /* Copyright (C) 1999-2001, 2003 Bruno Haible.
  2. This file is not part of the GNU LIBICONV Library.
  3. This file is put into the public domain. */
  4. #include "iconv_string.h"
  5. #include <iconv.h>
  6. #include <errno.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #define tmpbufsize 4096
  10. int iconv_string (const char* tocode, const char* fromcode,
  11. const char* start, const char* end,
  12. char** resultp, size_t* lengthp)
  13. {
  14. iconv_t cd = iconv_open(tocode,fromcode);
  15. size_t length;
  16. char* result;
  17. if (cd == (iconv_t)(-1)) {
  18. if (errno != EINVAL)
  19. return -1;
  20. /* Unsupported fromcode or tocode. Check whether the caller requested
  21. autodetection. */
  22. if (!strcmp(fromcode,"autodetect_utf8")) {
  23. int ret;
  24. /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
  25. be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
  26. ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
  27. if (!(ret < 0 && errno == EILSEQ))
  28. return ret;
  29. ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
  30. return ret;
  31. }
  32. if (!strcmp(fromcode,"autodetect_jp")) {
  33. int ret;
  34. /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
  35. it will fail. */
  36. ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
  37. if (!(ret < 0 && errno == EILSEQ))
  38. return ret;
  39. /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
  40. is unavoidable. People will condemn SHIFT_JIS.
  41. If we tried SHIFT_JIS first, then some short EUC-JP inputs would
  42. come out wrong, and people would condemn EUC-JP and Unix, which
  43. would not be good. */
  44. ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
  45. if (!(ret < 0 && errno == EILSEQ))
  46. return ret;
  47. /* Finally try SHIFT_JIS. */
  48. ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
  49. return ret;
  50. }
  51. if (!strcmp(fromcode,"autodetect_kr")) {
  52. int ret;
  53. /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
  54. it will fail. */
  55. ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
  56. if (!(ret < 0 && errno == EILSEQ))
  57. return ret;
  58. /* Finally try EUC-KR. */
  59. ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
  60. return ret;
  61. }
  62. errno = EINVAL;
  63. return -1;
  64. }
  65. /* Determine the length we need. */
  66. {
  67. size_t count = 0;
  68. char tmpbuf[tmpbufsize];
  69. const char* inptr = start;
  70. size_t insize = end-start;
  71. while (insize > 0) {
  72. char* outptr = tmpbuf;
  73. size_t outsize = tmpbufsize;
  74. size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
  75. if (res == (size_t)(-1) && errno != E2BIG) {
  76. if (errno == EINVAL)
  77. break;
  78. else {
  79. int saved_errno = errno;
  80. iconv_close(cd);
  81. errno = saved_errno;
  82. return -1;
  83. }
  84. }
  85. count += outptr-tmpbuf;
  86. }
  87. {
  88. char* outptr = tmpbuf;
  89. size_t outsize = tmpbufsize;
  90. size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  91. if (res == (size_t)(-1)) {
  92. int saved_errno = errno;
  93. iconv_close(cd);
  94. errno = saved_errno;
  95. return -1;
  96. }
  97. count += outptr-tmpbuf;
  98. }
  99. length = count;
  100. }
  101. if (lengthp != NULL)
  102. *lengthp = length;
  103. if (resultp == NULL) {
  104. iconv_close(cd);
  105. return 0;
  106. }
  107. result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
  108. *resultp = result;
  109. if (length == 0) {
  110. iconv_close(cd);
  111. return 0;
  112. }
  113. if (result == NULL) {
  114. iconv_close(cd);
  115. errno = ENOMEM;
  116. return -1;
  117. }
  118. iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
  119. /* Do the conversion for real. */
  120. {
  121. const char* inptr = start;
  122. size_t insize = end-start;
  123. char* outptr = result;
  124. size_t outsize = length;
  125. while (insize > 0) {
  126. size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
  127. if (res == (size_t)(-1)) {
  128. if (errno == EINVAL)
  129. break;
  130. else {
  131. int saved_errno = errno;
  132. iconv_close(cd);
  133. errno = saved_errno;
  134. return -1;
  135. }
  136. }
  137. }
  138. {
  139. size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  140. if (res == (size_t)(-1)) {
  141. int saved_errno = errno;
  142. iconv_close(cd);
  143. errno = saved_errno;
  144. return -1;
  145. }
  146. }
  147. if (outsize != 0) abort();
  148. }
  149. iconv_close(cd);
  150. return 0;
  151. }