loop_wchar.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*
  2. * Copyright (C) 2000-2002, 2005-2006, 2008 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /* This file defines three conversion loops:
  21. - from wchar_t to anything else,
  22. - from anything else to wchar_t,
  23. - from wchar_t to wchar_t.
  24. */
  25. #if HAVE_WCRTOMB || HAVE_MBRTOWC
  26. # include <wchar.h>
  27. # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
  28. /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
  29. extern size_t mbrtowc ();
  30. # ifdef mbstate_t
  31. # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
  32. # define mbsinit(ps) 1
  33. # endif
  34. # ifndef mbsinit
  35. # if !HAVE_MBSINIT
  36. # define mbsinit(ps) 1
  37. # endif
  38. # endif
  39. #endif
  40. /*
  41. * The first two conversion loops have an extended conversion descriptor.
  42. */
  43. struct wchar_conv_struct {
  44. struct conv_struct parent;
  45. #if HAVE_WCRTOMB || HAVE_MBRTOWC
  46. mbstate_t state;
  47. #endif
  48. };
  49. #if HAVE_WCRTOMB
  50. /* From wchar_t to anything else. */
  51. #ifndef LIBICONV_PLUG
  52. #if 0
  53. struct wc_to_mb_fallback_locals {
  54. struct wchar_conv_struct * l_wcd;
  55. char* l_outbuf;
  56. size_t l_outbytesleft;
  57. int l_errno;
  58. };
  59. /* A callback that writes a string given in the locale encoding. */
  60. static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
  61. void* callback_arg)
  62. {
  63. struct wc_to_mb_fallback_locals * plocals =
  64. (struct wc_to_mb_fallback_locals *) callback_arg;
  65. /* Do nothing if already encountered an error in a previous call. */
  66. if (plocals->l_errno == 0) {
  67. /* Attempt to convert the passed buffer to the target encoding.
  68. Here we don't support characters split across multiple calls. */
  69. const char* bufptr = buf;
  70. size_t bufleft = buflen;
  71. size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
  72. &bufptr,&bufleft,
  73. &plocals->l_outbuf,&plocals->l_outbytesleft);
  74. if (res == (size_t)(-1)) {
  75. if (errno == EILSEQ || errno == EINVAL)
  76. /* Invalid buf contents. */
  77. plocals->l_errno = EILSEQ;
  78. else if (errno == E2BIG)
  79. /* Output buffer too small. */
  80. plocals->l_errno = E2BIG;
  81. else
  82. abort();
  83. } else {
  84. /* Successful conversion. */
  85. if (bufleft > 0)
  86. abort();
  87. }
  88. }
  89. }
  90. #else
  91. struct wc_to_mb_fallback_locals {
  92. char* l_outbuf;
  93. size_t l_outbytesleft;
  94. int l_errno;
  95. };
  96. /* A callback that writes a string given in the target encoding. */
  97. static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
  98. void* callback_arg)
  99. {
  100. struct wc_to_mb_fallback_locals * plocals =
  101. (struct wc_to_mb_fallback_locals *) callback_arg;
  102. /* Do nothing if already encountered an error in a previous call. */
  103. if (plocals->l_errno == 0) {
  104. /* Attempt to copy the passed buffer to the output buffer. */
  105. if (plocals->l_outbytesleft < buflen)
  106. plocals->l_errno = E2BIG;
  107. else {
  108. memcpy(plocals->l_outbuf, buf, buflen);
  109. plocals->l_outbuf += buflen;
  110. plocals->l_outbytesleft -= buflen;
  111. }
  112. }
  113. }
  114. #endif
  115. #endif /* !LIBICONV_PLUG */
  116. static size_t wchar_from_loop_convert (iconv_t icd,
  117. const char* * inbuf, size_t *inbytesleft,
  118. char* * outbuf, size_t *outbytesleft)
  119. {
  120. struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  121. size_t result = 0;
  122. while (*inbytesleft >= sizeof(wchar_t)) {
  123. const wchar_t * inptr = (const wchar_t *) *inbuf;
  124. size_t inleft = *inbytesleft;
  125. char buf[BUF_SIZE];
  126. mbstate_t state = wcd->state;
  127. size_t bufcount = 0;
  128. while (inleft >= sizeof(wchar_t)) {
  129. /* Convert one wchar_t to multibyte representation. */
  130. size_t count = wcrtomb(buf+bufcount,*inptr,&state);
  131. if (count == (size_t)(-1)) {
  132. /* Invalid input. */
  133. if (wcd->parent.discard_ilseq) {
  134. count = 0;
  135. }
  136. #ifndef LIBICONV_PLUG
  137. else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
  138. /* Drop the contents of buf[] accumulated so far, and instead
  139. pass all queued wide characters to the fallback handler. */
  140. struct wc_to_mb_fallback_locals locals;
  141. const wchar_t * fallback_inptr;
  142. #if 0
  143. locals.l_wcd = wcd;
  144. #endif
  145. locals.l_outbuf = *outbuf;
  146. locals.l_outbytesleft = *outbytesleft;
  147. locals.l_errno = 0;
  148. for (fallback_inptr = (const wchar_t *) *inbuf;
  149. fallback_inptr <= inptr;
  150. fallback_inptr++)
  151. wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
  152. wc_to_mb_write_replacement,
  153. &locals,
  154. wcd->parent.fallbacks.data);
  155. if (locals.l_errno != 0) {
  156. errno = locals.l_errno;
  157. return -1;
  158. }
  159. wcd->state = state;
  160. *inbuf = (const char *) (inptr + 1);
  161. *inbytesleft = inleft - sizeof(wchar_t);
  162. *outbuf = locals.l_outbuf;
  163. *outbytesleft = locals.l_outbytesleft;
  164. result += 1;
  165. break;
  166. }
  167. #endif
  168. else {
  169. errno = EILSEQ;
  170. return -1;
  171. }
  172. }
  173. inptr++;
  174. inleft -= sizeof(wchar_t);
  175. bufcount += count;
  176. if (count == 0) {
  177. /* Continue, append next wchar_t. */
  178. } else {
  179. /* Attempt to convert the accumulated multibyte representations
  180. to the target encoding. */
  181. const char* bufptr = buf;
  182. size_t bufleft = bufcount;
  183. char* outptr = *outbuf;
  184. size_t outleft = *outbytesleft;
  185. size_t res = unicode_loop_convert(&wcd->parent,
  186. &bufptr,&bufleft,
  187. &outptr,&outleft);
  188. if (res == (size_t)(-1)) {
  189. if (errno == EILSEQ)
  190. /* Invalid input. */
  191. return -1;
  192. else if (errno == E2BIG)
  193. /* Output buffer too small. */
  194. return -1;
  195. else if (errno == EINVAL) {
  196. /* Continue, append next wchar_t, but avoid buffer overrun. */
  197. if (bufcount + MB_CUR_MAX > BUF_SIZE)
  198. abort();
  199. } else
  200. abort();
  201. } else {
  202. /* Successful conversion. */
  203. wcd->state = state;
  204. *inbuf = (const char *) inptr;
  205. *inbytesleft = inleft;
  206. *outbuf = outptr;
  207. *outbytesleft = outleft;
  208. result += res;
  209. break;
  210. }
  211. }
  212. }
  213. }
  214. return result;
  215. }
  216. static size_t wchar_from_loop_reset (iconv_t icd,
  217. char* * outbuf, size_t *outbytesleft)
  218. {
  219. struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  220. if (outbuf == NULL || *outbuf == NULL) {
  221. /* Reset the states. */
  222. memset(&wcd->state,'\0',sizeof(mbstate_t));
  223. return unicode_loop_reset(&wcd->parent,NULL,NULL);
  224. } else {
  225. if (!mbsinit(&wcd->state)) {
  226. mbstate_t state = wcd->state;
  227. char buf[BUF_SIZE];
  228. size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
  229. if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
  230. abort();
  231. else {
  232. const char* bufptr = buf;
  233. size_t bufleft = bufcount-1;
  234. char* outptr = *outbuf;
  235. size_t outleft = *outbytesleft;
  236. size_t res = unicode_loop_convert(&wcd->parent,
  237. &bufptr,&bufleft,
  238. &outptr,&outleft);
  239. if (res == (size_t)(-1)) {
  240. if (errno == E2BIG)
  241. return -1;
  242. else
  243. abort();
  244. } else {
  245. res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
  246. if (res == (size_t)(-1))
  247. return res;
  248. else {
  249. /* Successful. */
  250. wcd->state = state;
  251. *outbuf = outptr;
  252. *outbytesleft = outleft;
  253. return 0;
  254. }
  255. }
  256. }
  257. } else
  258. return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  259. }
  260. }
  261. #endif
  262. #if HAVE_MBRTOWC
  263. /* From anything else to wchar_t. */
  264. #ifndef LIBICONV_PLUG
  265. struct mb_to_wc_fallback_locals {
  266. char* l_outbuf;
  267. size_t l_outbytesleft;
  268. int l_errno;
  269. };
  270. static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
  271. void* callback_arg)
  272. {
  273. struct mb_to_wc_fallback_locals * plocals =
  274. (struct mb_to_wc_fallback_locals *) callback_arg;
  275. /* Do nothing if already encountered an error in a previous call. */
  276. if (plocals->l_errno == 0) {
  277. /* Attempt to copy the passed buffer to the output buffer. */
  278. if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
  279. plocals->l_errno = E2BIG;
  280. else {
  281. for (; buflen > 0; buf++, buflen--) {
  282. *(wchar_t*) plocals->l_outbuf = *buf;
  283. plocals->l_outbuf += sizeof(wchar_t);
  284. plocals->l_outbytesleft -= sizeof(wchar_t);
  285. }
  286. }
  287. }
  288. }
  289. #endif /* !LIBICONV_PLUG */
  290. static size_t wchar_to_loop_convert (iconv_t icd,
  291. const char* * inbuf, size_t *inbytesleft,
  292. char* * outbuf, size_t *outbytesleft)
  293. {
  294. struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  295. size_t result = 0;
  296. while (*inbytesleft > 0) {
  297. size_t incount;
  298. for (incount = 1; incount <= *inbytesleft; incount++) {
  299. char buf[BUF_SIZE];
  300. const char* inptr = *inbuf;
  301. size_t inleft = incount;
  302. char* bufptr = buf;
  303. size_t bufleft = BUF_SIZE;
  304. size_t res = unicode_loop_convert(&wcd->parent,
  305. &inptr,&inleft,
  306. &bufptr,&bufleft);
  307. if (res == (size_t)(-1)) {
  308. if (errno == EILSEQ)
  309. /* Invalid input. */
  310. return -1;
  311. else if (errno == EINVAL) {
  312. /* Incomplete input. Next try with one more input byte. */
  313. } else
  314. /* E2BIG shouldn't occur. */
  315. abort();
  316. } else {
  317. /* Successful conversion. */
  318. size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
  319. mbstate_t state = wcd->state;
  320. wchar_t wc;
  321. res = mbrtowc(&wc,buf,bufcount,&state);
  322. if (res == (size_t)(-2)) {
  323. /* Next try with one more input byte. */
  324. } else {
  325. if (res == (size_t)(-1)) {
  326. /* Invalid input. */
  327. if (wcd->parent.discard_ilseq) {
  328. }
  329. #ifndef LIBICONV_PLUG
  330. else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
  331. /* Drop the contents of buf[] accumulated so far, and instead
  332. pass all queued chars to the fallback handler. */
  333. struct mb_to_wc_fallback_locals locals;
  334. locals.l_outbuf = *outbuf;
  335. locals.l_outbytesleft = *outbytesleft;
  336. locals.l_errno = 0;
  337. wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
  338. mb_to_wc_write_replacement,
  339. &locals,
  340. wcd->parent.fallbacks.data);
  341. if (locals.l_errno != 0) {
  342. errno = locals.l_errno;
  343. return -1;
  344. }
  345. /* Restoring the state is not needed because it is the initial
  346. state anyway: For all known locale encodings, the multibyte
  347. to wchar_t conversion doesn't have shift state, and we have
  348. excluded partial accumulated characters. */
  349. /* wcd->state = state; */
  350. *inbuf += incount;
  351. *inbytesleft -= incount;
  352. *outbuf = locals.l_outbuf;
  353. *outbytesleft = locals.l_outbytesleft;
  354. result += 1;
  355. break;
  356. }
  357. #endif
  358. else
  359. return -1;
  360. } else {
  361. if (*outbytesleft < sizeof(wchar_t)) {
  362. errno = E2BIG;
  363. return -1;
  364. }
  365. *(wchar_t*) *outbuf = wc;
  366. /* Restoring the state is not needed because it is the initial
  367. state anyway: For all known locale encodings, the multibyte
  368. to wchar_t conversion doesn't have shift state, and we have
  369. excluded partial accumulated characters. */
  370. /* wcd->state = state; */
  371. *outbuf += sizeof(wchar_t);
  372. *outbytesleft -= sizeof(wchar_t);
  373. }
  374. *inbuf += incount;
  375. *inbytesleft -= incount;
  376. result += res;
  377. break;
  378. }
  379. }
  380. }
  381. }
  382. return result;
  383. }
  384. static size_t wchar_to_loop_reset (iconv_t icd,
  385. char* * outbuf, size_t *outbytesleft)
  386. {
  387. struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  388. size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  389. if (res == (size_t)(-1))
  390. return res;
  391. memset(&wcd->state,0,sizeof(mbstate_t));
  392. return 0;
  393. }
  394. #endif
  395. /* From wchar_t to wchar_t. */
  396. static size_t wchar_id_loop_convert (iconv_t icd,
  397. const char* * inbuf, size_t *inbytesleft,
  398. char* * outbuf, size_t *outbytesleft)
  399. {
  400. struct conv_struct * cd = (struct conv_struct *) icd;
  401. const wchar_t* inptr = (const wchar_t*) *inbuf;
  402. size_t inleft = *inbytesleft / sizeof(wchar_t);
  403. wchar_t* outptr = (wchar_t*) *outbuf;
  404. size_t outleft = *outbytesleft / sizeof(wchar_t);
  405. size_t count = (inleft <= outleft ? inleft : outleft);
  406. if (count > 0) {
  407. *inbytesleft -= count * sizeof(wchar_t);
  408. *outbytesleft -= count * sizeof(wchar_t);
  409. do {
  410. wchar_t wc = *inptr++;
  411. *outptr++ = wc;
  412. #ifndef LIBICONV_PLUG
  413. if (cd->hooks.wc_hook)
  414. (*cd->hooks.wc_hook)(wc, cd->hooks.data);
  415. #endif
  416. } while (--count > 0);
  417. *inbuf = (const char*) inptr;
  418. *outbuf = (char*) outptr;
  419. }
  420. return 0;
  421. }
  422. static size_t wchar_id_loop_reset (iconv_t icd,
  423. char* * outbuf, size_t *outbytesleft)
  424. {
  425. return 0;
  426. }