iconv.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. /* Copyright (C) 2000-2009 Free Software Foundation, Inc.
  2. This file is part of the GNU LIBICONV Library.
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. #include "config.h"
  14. #ifndef ICONV_CONST
  15. # define ICONV_CONST
  16. #endif
  17. #include <limits.h>
  18. #include <stddef.h>
  19. #include <stdio.h>
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <iconv.h>
  23. #include <errno.h>
  24. #include <locale.h>
  25. #include <fcntl.h>
  26. /* Ensure that iconv_no_i18n does not depend on libintl. */
  27. #ifdef NO_I18N
  28. # undef ENABLE_NLS
  29. # undef ENABLE_RELOCATABLE
  30. #endif
  31. #include "binary-io.h"
  32. #include "progname.h"
  33. #include "relocatable.h"
  34. #include "xalloc.h"
  35. #include "uniwidth.h"
  36. #include "uniwidth/cjk.h"
  37. /* Ensure that iconv_no_i18n does not depend on libintl. */
  38. #ifdef NO_I18N
  39. #include <stdarg.h>
  40. static void
  41. error (int status, int errnum, const char *message, ...)
  42. {
  43. va_list args;
  44. fflush(stdout);
  45. fprintf(stderr,"%s: ",program_name);
  46. va_start(args,message);
  47. vfprintf(stderr,message,args);
  48. va_end(args);
  49. if (errnum) {
  50. const char *s = strerror(errnum);
  51. if (s == NULL)
  52. s = "Unknown system error";
  53. }
  54. putc('\n',stderr);
  55. fflush(stderr);
  56. if (status)
  57. exit(status);
  58. }
  59. #else
  60. # include "error.h"
  61. #endif
  62. #include "gettext.h"
  63. #define _(str) gettext(str)
  64. /* Ensure that iconv_no_i18n does not depend on libintl. */
  65. #ifdef NO_I18N
  66. # define xmalloc malloc
  67. # define xalloc_die abort
  68. #endif
  69. /* Locale independent test for a decimal digit.
  70. Argument can be 'char' or 'unsigned char'. (Whereas the argument of
  71. <ctype.h> isdigit must be an 'unsigned char'.) */
  72. #undef isdigit
  73. #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
  74. /* Locale independent test for a printable character.
  75. Argument can be 'char' or 'unsigned char'. (Whereas the argument of
  76. <ctype.h> isdigit must be an 'unsigned char'.) */
  77. #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
  78. /* ========================================================================= */
  79. static int discard_unconvertible = 0;
  80. static int silent = 0;
  81. static void usage (int exitcode)
  82. {
  83. if (exitcode != 0) {
  84. const char* helpstring1 =
  85. /* TRANSLATORS: The first line of the short usage message. */
  86. _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
  87. const char* helpstring2 =
  88. /* TRANSLATORS: The second line of the short usage message.
  89. Align it correctly against the first line. */
  90. _("or: iconv -l");
  91. fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
  92. fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
  93. } else {
  94. /* xgettext: no-wrap */
  95. /* TRANSLATORS: The first line of the long usage message.
  96. The %s placeholder expands to the program name. */
  97. printf(_("\
  98. Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
  99. program_name);
  100. /* xgettext: no-wrap */
  101. /* TRANSLATORS: The second line of the long usage message.
  102. Align it correctly against the first line.
  103. The %s placeholder expands to the program name. */
  104. printf(_("\
  105. or: %s -l\n"),
  106. program_name);
  107. printf("\n");
  108. /* xgettext: no-wrap */
  109. /* TRANSLATORS: Description of the iconv program. */
  110. printf(_("\
  111. Converts text from one encoding to another encoding.\n"));
  112. printf("\n");
  113. /* xgettext: no-wrap */
  114. printf(_("\
  115. Options controlling the input and output format:\n"));
  116. /* xgettext: no-wrap */
  117. printf(_("\
  118. -f ENCODING, --from-code=ENCODING\n\
  119. the encoding of the input\n"));
  120. /* xgettext: no-wrap */
  121. printf(_("\
  122. -t ENCODING, --to-code=ENCODING\n\
  123. the encoding of the output\n"));
  124. printf("\n");
  125. /* xgettext: no-wrap */
  126. printf(_("\
  127. Options controlling conversion problems:\n"));
  128. /* xgettext: no-wrap */
  129. printf(_("\
  130. -c discard unconvertible characters\n"));
  131. /* xgettext: no-wrap */
  132. printf(_("\
  133. --unicode-subst=FORMATSTRING\n\
  134. substitution for unconvertible Unicode characters\n"));
  135. /* xgettext: no-wrap */
  136. printf(_("\
  137. --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
  138. /* xgettext: no-wrap */
  139. printf(_("\
  140. --widechar-subst=FORMATSTRING\n\
  141. substitution for unconvertible wide characters\n"));
  142. printf("\n");
  143. /* xgettext: no-wrap */
  144. printf(_("\
  145. Options controlling error output:\n"));
  146. /* xgettext: no-wrap */
  147. printf(_("\
  148. -s, --silent suppress error messages about conversion problems\n"));
  149. printf("\n");
  150. /* xgettext: no-wrap */
  151. printf(_("\
  152. Informative output:\n"));
  153. /* xgettext: no-wrap */
  154. printf(_("\
  155. -l, --list list the supported encodings\n"));
  156. /* xgettext: no-wrap */
  157. printf(_("\
  158. --help display this help and exit\n"));
  159. /* xgettext: no-wrap */
  160. printf(_("\
  161. --version output version information and exit\n"));
  162. printf("\n");
  163. /* TRANSLATORS: The placeholder indicates the bug-reporting address
  164. for this package. Please add _another line_ saying
  165. "Report translation bugs to <...>\n" with the address for translation
  166. bugs (typically your translation team's web or email address). */
  167. fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
  168. }
  169. exit(exitcode);
  170. }
  171. static void print_version (void)
  172. {
  173. printf("iconv (GNU libiconv %d.%d)\n",
  174. _libiconv_version >> 8, _libiconv_version & 0xff);
  175. printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2009");
  176. /* xgettext: no-wrap */
  177. fputs (_("\
  178. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
  179. This is free software: you are free to change and redistribute it.\n\
  180. There is NO WARRANTY, to the extent permitted by law.\n\
  181. "),stdout);
  182. /* TRANSLATORS: The %s placeholder expands to an author's name. */
  183. printf(_("Written by %s.\n"),"Bruno Haible");
  184. exit(EXIT_SUCCESS);
  185. }
  186. static int print_one (unsigned int namescount, const char * const * names,
  187. void* data)
  188. {
  189. unsigned int i;
  190. (void)data;
  191. for (i = 0; i < namescount; i++) {
  192. if (i > 0)
  193. putc(' ',stdout);
  194. fputs(names[i],stdout);
  195. }
  196. putc('\n',stdout);
  197. return 0;
  198. }
  199. /* ========================================================================= */
  200. /* Line number and column position. */
  201. static unsigned int line;
  202. static unsigned int column;
  203. static const char* cjkcode;
  204. /* Update the line number and column position after a character was
  205. successfully converted. */
  206. static void update_line_column (unsigned int uc, void* data)
  207. {
  208. if (uc == 0x000A) {
  209. line++;
  210. column = 0;
  211. } else {
  212. int width = uc_width(uc, cjkcode);
  213. if (width >= 0)
  214. column += width;
  215. else if (uc == 0x0009)
  216. column += 8 - (column % 8);
  217. }
  218. }
  219. /* ========================================================================= */
  220. /* Production of placeholder strings as fallback for unconvertible
  221. characters. */
  222. /* Check that the argument is a format string taking either no argument
  223. or exactly one unsigned integer argument. Returns the maximum output
  224. size of the format string. */
  225. static size_t check_subst_formatstring (const char *format, const char *param_name)
  226. {
  227. /* C format strings are described in POSIX (IEEE P1003.1 2001), section
  228. XSH 3 fprintf(). See also Linux fprintf(3) manual page.
  229. For simplicity, we don't accept
  230. - the '%m$' reordering syntax,
  231. - the 'I' flag,
  232. - width specifications referring to an argument,
  233. - precision specifications referring to an argument,
  234. - size specifiers,
  235. - format specifiers other than 'o', 'u', 'x', 'X'.
  236. What remains?
  237. A directive
  238. - starts with '%',
  239. - is optionally followed by any of the characters '#', '0', '-', ' ',
  240. '+', "'", each of which acts as a flag,
  241. - is optionally followed by a width specification: a nonempty digit
  242. sequence,
  243. - is optionally followed by '.' and a precision specification: a
  244. nonempty digit sequence,
  245. - is finished by a specifier
  246. - '%', that needs no argument,
  247. - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
  248. */
  249. size_t maxsize = 0;
  250. unsigned int unnumbered_arg_count = 0;
  251. for (; *format != '\0';) {
  252. if (*format++ == '%') {
  253. /* A directive. */
  254. unsigned int width = 0;
  255. unsigned int precision = 0;
  256. unsigned int length;
  257. /* Parse flags. */
  258. for (;;) {
  259. if (*format == ' ' || *format == '+' || *format == '-'
  260. || *format == '#' || *format == '0' || *format == '\'')
  261. format++;
  262. else
  263. break;
  264. }
  265. /* Parse width. */
  266. if (*format == '*')
  267. error(EXIT_FAILURE,0,
  268. /* TRANSLATORS: An error message.
  269. The %s placeholder expands to a command-line option. */
  270. _("%s argument: A format directive with a variable width is not allowed here."),
  271. param_name);
  272. if (isdigit (*format)) {
  273. do {
  274. width = 10*width + (*format - '0');
  275. format++;
  276. } while (isdigit (*format));
  277. }
  278. /* Parse precision. */
  279. if (*format == '.') {
  280. format++;
  281. if (*format == '*')
  282. error(EXIT_FAILURE,0,
  283. /* TRANSLATORS: An error message.
  284. The %s placeholder expands to a command-line option. */
  285. _("%s argument: A format directive with a variable precision is not allowed here."),
  286. param_name);
  287. if (isdigit (*format)) {
  288. do {
  289. precision = 10*precision + (*format - '0');
  290. format++;
  291. } while (isdigit (*format));
  292. }
  293. }
  294. /* Parse size. */
  295. switch (*format) {
  296. case 'h': case 'l': case 'L': case 'q':
  297. case 'j': case 'z': case 'Z': case 't':
  298. error(EXIT_FAILURE,0,
  299. /* TRANSLATORS: An error message.
  300. The %s placeholder expands to a command-line option. */
  301. _("%s argument: A format directive with a size is not allowed here."),
  302. param_name);
  303. }
  304. /* Parse end of directive. */
  305. switch (*format) {
  306. case '%':
  307. length = 1;
  308. break;
  309. case 'u': case 'o': case 'x': case 'X':
  310. if (*format == 'u') {
  311. length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  312. * 0.30103 /* binary -> decimal */
  313. )
  314. + 1; /* turn floor into ceil */
  315. if (length < precision)
  316. length = precision;
  317. length *= 2; /* estimate for FLAG_GROUP */
  318. length += 1; /* account for leading sign */
  319. } else if (*format == 'o') {
  320. length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  321. * 0.333334 /* binary -> octal */
  322. )
  323. + 1; /* turn floor into ceil */
  324. if (length < precision)
  325. length = precision;
  326. length += 1; /* account for leading sign */
  327. } else { /* 'x', 'X' */
  328. length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  329. * 0.25 /* binary -> hexadecimal */
  330. )
  331. + 1; /* turn floor into ceil */
  332. if (length < precision)
  333. length = precision;
  334. length += 2; /* account for leading sign or alternate form */
  335. }
  336. unnumbered_arg_count++;
  337. break;
  338. default:
  339. if (*format == '\0')
  340. error(EXIT_FAILURE,0,
  341. /* TRANSLATORS: An error message.
  342. The %s placeholder expands to a command-line option. */
  343. _("%s argument: The string ends in the middle of a directive."),
  344. param_name);
  345. else if (c_isprint(*format))
  346. error(EXIT_FAILURE,0,
  347. /* TRANSLATORS: An error message.
  348. The %s placeholder expands to a command-line option.
  349. The %c placeholder expands to an unknown format directive. */
  350. _("%s argument: The character '%c' is not a valid conversion specifier."),
  351. param_name,*format);
  352. else
  353. error(EXIT_FAILURE,0,
  354. /* TRANSLATORS: An error message.
  355. The %s placeholder expands to a command-line option. */
  356. _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
  357. param_name);
  358. abort(); /*NOTREACHED*/
  359. }
  360. format++;
  361. if (length < width)
  362. length = width;
  363. maxsize += length;
  364. } else
  365. maxsize++;
  366. }
  367. if (unnumbered_arg_count > 1)
  368. error(EXIT_FAILURE,0,
  369. /* TRANSLATORS: An error message.
  370. The %s placeholder expands to a command-line option.
  371. The %u placeholder expands to the number of arguments consumed by the format string. */
  372. ngettext("%s argument: The format string consumes more than one argument: %u argument.",
  373. "%s argument: The format string consumes more than one argument: %u arguments.",
  374. unnumbered_arg_count),
  375. param_name,unnumbered_arg_count);
  376. return maxsize;
  377. }
  378. /* Format strings. */
  379. static const char* ilseq_byte_subst;
  380. static const char* ilseq_wchar_subst;
  381. static const char* ilseq_unicode_subst;
  382. /* Maximum result size for each format string. */
  383. static size_t ilseq_byte_subst_size;
  384. static size_t ilseq_wchar_subst_size;
  385. static size_t ilseq_unicode_subst_size;
  386. /* Buffer of size ilseq_byte_subst_size+1. */
  387. static char* ilseq_byte_subst_buffer;
  388. #if HAVE_WCHAR_T
  389. /* Buffer of size ilseq_wchar_subst_size+1. */
  390. static char* ilseq_wchar_subst_buffer;
  391. #endif
  392. /* Buffer of size ilseq_unicode_subst_size+1. */
  393. static char* ilseq_unicode_subst_buffer;
  394. /* Auxiliary variables for subst_mb_to_uc_fallback. */
  395. /* Converter from locale encoding to UCS-4. */
  396. static iconv_t subst_mb_to_uc_cd;
  397. /* Buffer of size ilseq_byte_subst_size. */
  398. static unsigned int* subst_mb_to_uc_temp_buffer;
  399. static void subst_mb_to_uc_fallback
  400. (const char* inbuf, size_t inbufsize,
  401. void (*write_replacement) (const unsigned int *buf, size_t buflen,
  402. void* callback_arg),
  403. void* callback_arg,
  404. void* data)
  405. {
  406. for (; inbufsize > 0; inbuf++, inbufsize--) {
  407. const char* inptr;
  408. size_t inbytesleft;
  409. char* outptr;
  410. size_t outbytesleft;
  411. sprintf(ilseq_byte_subst_buffer,
  412. ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  413. inptr = ilseq_byte_subst_buffer;
  414. inbytesleft = strlen(ilseq_byte_subst_buffer);
  415. outptr = (char*)subst_mb_to_uc_temp_buffer;
  416. outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
  417. iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
  418. if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  419. == (size_t)(-1)
  420. || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
  421. == (size_t)(-1))
  422. error(EXIT_FAILURE,0,
  423. /* TRANSLATORS: An error message.
  424. The %s placeholder expands to a piece of text, specified through --byte-subst. */
  425. _("cannot convert byte substitution to Unicode: %s"),
  426. ilseq_byte_subst_buffer);
  427. if (!(outbytesleft%sizeof(unsigned int) == 0))
  428. abort();
  429. write_replacement(subst_mb_to_uc_temp_buffer,
  430. ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
  431. callback_arg);
  432. }
  433. }
  434. /* Auxiliary variables for subst_uc_to_mb_fallback. */
  435. /* Converter from locale encoding to target encoding. */
  436. static iconv_t subst_uc_to_mb_cd;
  437. /* Buffer of size ilseq_unicode_subst_size*4. */
  438. static char* subst_uc_to_mb_temp_buffer;
  439. static void subst_uc_to_mb_fallback
  440. (unsigned int code,
  441. void (*write_replacement) (const char *buf, size_t buflen,
  442. void* callback_arg),
  443. void* callback_arg,
  444. void* data)
  445. {
  446. const char* inptr;
  447. size_t inbytesleft;
  448. char* outptr;
  449. size_t outbytesleft;
  450. sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
  451. inptr = ilseq_unicode_subst_buffer;
  452. inbytesleft = strlen(ilseq_unicode_subst_buffer);
  453. outptr = subst_uc_to_mb_temp_buffer;
  454. outbytesleft = ilseq_unicode_subst_size*4;
  455. iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
  456. if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  457. == (size_t)(-1)
  458. || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  459. == (size_t)(-1))
  460. error(EXIT_FAILURE,0,
  461. /* TRANSLATORS: An error message.
  462. The %s placeholder expands to a piece of text, specified through --unicode-subst. */
  463. _("cannot convert unicode substitution to target encoding: %s"),
  464. ilseq_unicode_subst_buffer);
  465. write_replacement(subst_uc_to_mb_temp_buffer,
  466. ilseq_unicode_subst_size*4-outbytesleft,
  467. callback_arg);
  468. }
  469. #if HAVE_WCHAR_T
  470. /* Auxiliary variables for subst_mb_to_wc_fallback. */
  471. /* Converter from locale encoding to wchar_t. */
  472. static iconv_t subst_mb_to_wc_cd;
  473. /* Buffer of size ilseq_byte_subst_size. */
  474. static wchar_t* subst_mb_to_wc_temp_buffer;
  475. static void subst_mb_to_wc_fallback
  476. (const char* inbuf, size_t inbufsize,
  477. void (*write_replacement) (const wchar_t *buf, size_t buflen,
  478. void* callback_arg),
  479. void* callback_arg,
  480. void* data)
  481. {
  482. for (; inbufsize > 0; inbuf++, inbufsize--) {
  483. const char* inptr;
  484. size_t inbytesleft;
  485. char* outptr;
  486. size_t outbytesleft;
  487. sprintf(ilseq_byte_subst_buffer,
  488. ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  489. inptr = ilseq_byte_subst_buffer;
  490. inbytesleft = strlen(ilseq_byte_subst_buffer);
  491. outptr = (char*)subst_mb_to_wc_temp_buffer;
  492. outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
  493. iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
  494. if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  495. == (size_t)(-1)
  496. || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
  497. == (size_t)(-1))
  498. error(EXIT_FAILURE,0,
  499. /* TRANSLATORS: An error message.
  500. The %s placeholder expands to a piece of text, specified through --byte-subst. */
  501. _("cannot convert byte substitution to wide string: %s"),
  502. ilseq_byte_subst_buffer);
  503. if (!(outbytesleft%sizeof(wchar_t) == 0))
  504. abort();
  505. write_replacement(subst_mb_to_wc_temp_buffer,
  506. ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
  507. callback_arg);
  508. }
  509. }
  510. /* Auxiliary variables for subst_wc_to_mb_fallback. */
  511. /* Converter from locale encoding to target encoding. */
  512. static iconv_t subst_wc_to_mb_cd;
  513. /* Buffer of size ilseq_wchar_subst_size*4.
  514. Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
  515. static char* subst_wc_to_mb_temp_buffer;
  516. static void subst_wc_to_mb_fallback
  517. (wchar_t code,
  518. void (*write_replacement) (const char *buf, size_t buflen,
  519. void* callback_arg),
  520. void* callback_arg,
  521. void* data)
  522. {
  523. const char* inptr;
  524. size_t inbytesleft;
  525. char* outptr;
  526. size_t outbytesleft;
  527. sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
  528. inptr = ilseq_wchar_subst_buffer;
  529. inbytesleft = strlen(ilseq_wchar_subst_buffer);
  530. outptr = subst_wc_to_mb_temp_buffer;
  531. outbytesleft = ilseq_wchar_subst_size*4;
  532. iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
  533. if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  534. == (size_t)(-1)
  535. || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  536. == (size_t)(-1))
  537. error(EXIT_FAILURE,0,
  538. /* TRANSLATORS: An error message.
  539. The %s placeholder expands to a piece of text, specified through --widechar-subst. */
  540. _("cannot convert widechar substitution to target encoding: %s"),
  541. ilseq_wchar_subst_buffer);
  542. write_replacement(subst_wc_to_mb_temp_buffer,
  543. ilseq_wchar_subst_size*4-outbytesleft,
  544. callback_arg);
  545. }
  546. #else
  547. #define subst_mb_to_wc_fallback NULL
  548. #define subst_wc_to_mb_fallback NULL
  549. #endif
  550. /* Auxiliary variables for subst_mb_to_mb_fallback. */
  551. /* Converter from locale encoding to target encoding. */
  552. static iconv_t subst_mb_to_mb_cd;
  553. /* Buffer of size ilseq_byte_subst_size*4. */
  554. static char* subst_mb_to_mb_temp_buffer;
  555. static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
  556. {
  557. for (; inbufsize > 0; inbuf++, inbufsize--) {
  558. const char* inptr;
  559. size_t inbytesleft;
  560. char* outptr;
  561. size_t outbytesleft;
  562. sprintf(ilseq_byte_subst_buffer,
  563. ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  564. inptr = ilseq_byte_subst_buffer;
  565. inbytesleft = strlen(ilseq_byte_subst_buffer);
  566. outptr = subst_mb_to_mb_temp_buffer;
  567. outbytesleft = ilseq_byte_subst_size*4;
  568. iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
  569. if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  570. == (size_t)(-1)
  571. || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  572. == (size_t)(-1))
  573. error(EXIT_FAILURE,0,
  574. /* TRANSLATORS: An error message.
  575. The %s placeholder expands to a piece of text, specified through --byte-subst. */
  576. _("cannot convert byte substitution to target encoding: %s"),
  577. ilseq_byte_subst_buffer);
  578. fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
  579. stdout);
  580. }
  581. }
  582. /* ========================================================================= */
  583. /* Error messages during conversion. */
  584. static void conversion_error_EILSEQ (const char* infilename)
  585. {
  586. fflush(stdout);
  587. if (column > 0)
  588. putc('\n',stderr);
  589. error(0,0,
  590. /* TRANSLATORS: An error message.
  591. The placeholders expand to the input file name, a line number, and a column number. */
  592. _("%s:%u:%u: cannot convert"),
  593. infilename,line,column);
  594. }
  595. static void conversion_error_EINVAL (const char* infilename)
  596. {
  597. fflush(stdout);
  598. if (column > 0)
  599. putc('\n',stderr);
  600. error(0,0,
  601. /* TRANSLATORS: An error message.
  602. The placeholders expand to the input file name, a line number, and a column number.
  603. A "shift sequence" is a sequence of bytes that changes the state of the converter;
  604. this concept exists only for "stateful" encodings like ISO-2022-JP. */
  605. _("%s:%u:%u: incomplete character or shift sequence"),
  606. infilename,line,column);
  607. }
  608. static void conversion_error_other (int errnum, const char* infilename)
  609. {
  610. fflush(stdout);
  611. if (column > 0)
  612. putc('\n',stderr);
  613. error(0,errnum,
  614. /* TRANSLATORS: The first part of an error message.
  615. It is followed by a colon and a detail message.
  616. The placeholders expand to the input file name, a line number, and a column number. */
  617. _("%s:%u:%u"),
  618. infilename,line,column);
  619. }
  620. /* Convert the input given in infile. */
  621. static int convert (iconv_t cd, FILE* infile, const char* infilename)
  622. {
  623. char inbuf[4096+4096];
  624. size_t inbufrest = 0;
  625. char initial_outbuf[4096];
  626. char *outbuf = initial_outbuf;
  627. size_t outbufsize = sizeof(initial_outbuf);
  628. int status = 0;
  629. #if O_BINARY
  630. SET_BINARY(fileno(infile));
  631. #endif
  632. line = 1; column = 0;
  633. iconv(cd,NULL,NULL,NULL,NULL);
  634. for (;;) {
  635. size_t inbufsize = fread(inbuf+4096,1,4096,infile);
  636. if (inbufsize == 0) {
  637. if (inbufrest == 0)
  638. break;
  639. else {
  640. if (ilseq_byte_subst != NULL)
  641. subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
  642. if (!silent)
  643. conversion_error_EINVAL(infilename);
  644. status = 1;
  645. goto done;
  646. }
  647. } else {
  648. const char* inptr = inbuf+4096-inbufrest;
  649. size_t insize = inbufrest+inbufsize;
  650. inbufrest = 0;
  651. while (insize > 0) {
  652. char* outptr = outbuf;
  653. size_t outsize = outbufsize;
  654. size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
  655. if (outptr != outbuf) {
  656. int saved_errno = errno;
  657. if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  658. status = 1;
  659. goto done;
  660. }
  661. errno = saved_errno;
  662. }
  663. if (res == (size_t)(-1)) {
  664. if (errno == EILSEQ) {
  665. if (discard_unconvertible == 1) {
  666. int one = 1;
  667. iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  668. discard_unconvertible = 2;
  669. status = 1;
  670. } else {
  671. if (!silent)
  672. conversion_error_EILSEQ(infilename);
  673. status = 1;
  674. goto done;
  675. }
  676. } else if (errno == EINVAL) {
  677. if (inbufsize == 0 || insize > 4096) {
  678. if (!silent)
  679. conversion_error_EINVAL(infilename);
  680. status = 1;
  681. goto done;
  682. } else {
  683. inbufrest = insize;
  684. if (insize > 0) {
  685. /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
  686. we cannot use memcpy here, because source and destination
  687. regions may overlap. */
  688. char* restptr = inbuf+4096-insize;
  689. do { *restptr++ = *inptr++; } while (--insize > 0);
  690. }
  691. break;
  692. }
  693. } else if (errno == E2BIG) {
  694. if (outptr==outbuf) {
  695. /* outbuf is too small. Double its size. */
  696. if (outbuf != initial_outbuf)
  697. free(outbuf);
  698. outbufsize = 2*outbufsize;
  699. if (outbufsize==0) /* integer overflow? */
  700. xalloc_die();
  701. outbuf = (char*)xmalloc(outbufsize);
  702. }
  703. } else {
  704. if (!silent)
  705. conversion_error_other(errno,infilename);
  706. status = 1;
  707. goto done;
  708. }
  709. }
  710. }
  711. }
  712. }
  713. for (;;) {
  714. char* outptr = outbuf;
  715. size_t outsize = outbufsize;
  716. size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  717. if (outptr != outbuf) {
  718. int saved_errno = errno;
  719. if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  720. status = 1;
  721. goto done;
  722. }
  723. errno = saved_errno;
  724. }
  725. if (res == (size_t)(-1)) {
  726. if (errno == EILSEQ) {
  727. if (discard_unconvertible == 1) {
  728. int one = 1;
  729. iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  730. discard_unconvertible = 2;
  731. status = 1;
  732. } else {
  733. if (!silent)
  734. conversion_error_EILSEQ(infilename);
  735. status = 1;
  736. goto done;
  737. }
  738. } else if (errno == EINVAL) {
  739. if (!silent)
  740. conversion_error_EINVAL(infilename);
  741. status = 1;
  742. goto done;
  743. } else if (errno == E2BIG) {
  744. if (outptr==outbuf) {
  745. /* outbuf is too small. Double its size. */
  746. if (outbuf != initial_outbuf)
  747. free(outbuf);
  748. outbufsize = 2*outbufsize;
  749. if (outbufsize==0) /* integer overflow? */
  750. xalloc_die();
  751. outbuf = (char*)xmalloc(outbufsize);
  752. }
  753. } else {
  754. if (!silent)
  755. conversion_error_other(errno,infilename);
  756. status = 1;
  757. goto done;
  758. }
  759. } else
  760. break;
  761. }
  762. if (ferror(infile)) {
  763. fflush(stdout);
  764. if (column > 0)
  765. putc('\n',stderr);
  766. error(0,0,
  767. /* TRANSLATORS: An error message.
  768. The placeholder expands to the input file name. */
  769. _("%s: I/O error"),
  770. infilename);
  771. status = 1;
  772. goto done;
  773. }
  774. done:
  775. if (outbuf != initial_outbuf)
  776. free(outbuf);
  777. return status;
  778. }
  779. /* ========================================================================= */
  780. int main (int argc, char* argv[])
  781. {
  782. const char* fromcode = NULL;
  783. const char* tocode = NULL;
  784. int do_list = 0;
  785. iconv_t cd;
  786. struct iconv_fallbacks fallbacks;
  787. struct iconv_hooks hooks;
  788. int i;
  789. int status;
  790. set_program_name (argv[0]);
  791. #if HAVE_SETLOCALE
  792. /* Needed for the locale dependent encodings, "char" and "wchar_t",
  793. and for gettext. */
  794. setlocale(LC_CTYPE,"");
  795. #if ENABLE_NLS
  796. /* Needed for gettext. */
  797. setlocale(LC_MESSAGES,"");
  798. #endif
  799. #endif
  800. #if ENABLE_NLS
  801. bindtextdomain("libiconv",relocate(LOCALEDIR));
  802. #endif
  803. textdomain("libiconv");
  804. for (i = 1; i < argc;) {
  805. size_t len = strlen(argv[i]);
  806. if (!strcmp(argv[i],"--")) {
  807. i++;
  808. break;
  809. }
  810. if (!strcmp(argv[i],"-f")
  811. /* --f ... --from-code */
  812. || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
  813. /* --from-code=... */
  814. || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
  815. if (len < 12)
  816. if (i == argc-1) usage(1);
  817. if (fromcode != NULL) usage(1);
  818. if (len < 12) {
  819. fromcode = argv[i+1];
  820. i += 2;
  821. } else {
  822. fromcode = argv[i]+12;
  823. i++;
  824. }
  825. continue;
  826. }
  827. if (!strcmp(argv[i],"-t")
  828. /* --t ... --to-code */
  829. || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
  830. /* --from-code=... */
  831. || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
  832. if (len < 10)
  833. if (i == argc-1) usage(1);
  834. if (tocode != NULL) usage(1);
  835. if (len < 10) {
  836. tocode = argv[i+1];
  837. i += 2;
  838. } else {
  839. tocode = argv[i]+10;
  840. i++;
  841. }
  842. continue;
  843. }
  844. if (!strcmp(argv[i],"-l")
  845. /* --l ... --list */
  846. || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
  847. do_list = 1;
  848. i++;
  849. continue;
  850. }
  851. if (/* --by ... --byte-subst */
  852. (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
  853. /* --byte-subst=... */
  854. || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
  855. if (len < 13) {
  856. if (i == argc-1) usage(1);
  857. ilseq_byte_subst = argv[i+1];
  858. i += 2;
  859. } else {
  860. ilseq_byte_subst = argv[i]+13;
  861. i++;
  862. }
  863. ilseq_byte_subst_size =
  864. check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
  865. continue;
  866. }
  867. if (/* --w ... --widechar-subst */
  868. (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
  869. /* --widechar-subst=... */
  870. || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
  871. if (len < 17) {
  872. if (i == argc-1) usage(1);
  873. ilseq_wchar_subst = argv[i+1];
  874. i += 2;
  875. } else {
  876. ilseq_wchar_subst = argv[i]+17;
  877. i++;
  878. }
  879. ilseq_wchar_subst_size =
  880. check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
  881. continue;
  882. }
  883. if (/* --u ... --unicode-subst */
  884. (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
  885. /* --unicode-subst=... */
  886. || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
  887. if (len < 16) {
  888. if (i == argc-1) usage(1);
  889. ilseq_unicode_subst = argv[i+1];
  890. i += 2;
  891. } else {
  892. ilseq_unicode_subst = argv[i]+16;
  893. i++;
  894. }
  895. ilseq_unicode_subst_size =
  896. check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
  897. continue;
  898. }
  899. if /* --s ... --silent */
  900. (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
  901. silent = 1;
  902. continue;
  903. }
  904. if /* --h ... --help */
  905. (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
  906. usage(0);
  907. }
  908. if /* --v ... --version */
  909. (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
  910. print_version();
  911. }
  912. #if O_BINARY
  913. /* Backward compatibility with iconv <= 1.9.1. */
  914. if /* --bi ... --binary */
  915. (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
  916. i++;
  917. continue;
  918. }
  919. #endif
  920. if (argv[i][0] == '-') {
  921. const char *option = argv[i] + 1;
  922. if (*option == '\0')
  923. usage(1);
  924. for (; *option; option++)
  925. switch (*option) {
  926. case 'c': discard_unconvertible = 1; break;
  927. case 's': silent = 1; break;
  928. default: usage(1);
  929. }
  930. i++;
  931. continue;
  932. }
  933. break;
  934. }
  935. if (do_list) {
  936. if (i != 2 || i != argc)
  937. usage(1);
  938. iconvlist(print_one,NULL);
  939. status = 0;
  940. } else {
  941. #if O_BINARY
  942. SET_BINARY(fileno(stdout));
  943. #endif
  944. if (fromcode == NULL)
  945. fromcode = "char";
  946. if (tocode == NULL)
  947. tocode = "char";
  948. cd = iconv_open(tocode,fromcode);
  949. if (cd == (iconv_t)(-1)) {
  950. if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
  951. error(0,0,
  952. /* TRANSLATORS: An error message.
  953. The placeholder expands to the encoding name, specified through --from-code. */
  954. _("conversion from %s unsupported"),
  955. fromcode);
  956. else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
  957. error(0,0,
  958. /* TRANSLATORS: An error message.
  959. The placeholder expands to the encoding name, specified through --to-code. */
  960. _("conversion to %s unsupported"),
  961. tocode);
  962. else
  963. error(0,0,
  964. /* TRANSLATORS: An error message.
  965. The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
  966. _("conversion from %s to %s unsupported"),
  967. fromcode,tocode);
  968. error(EXIT_FAILURE,0,
  969. /* TRANSLATORS: Additional advice after an error message.
  970. The %s placeholder expands to the program name. */
  971. _("try '%s -l' to get the list of supported encodings"),
  972. program_name);
  973. }
  974. /* Look at fromcode and tocode, to determine whether character widths
  975. should be determined according to legacy CJK conventions. */
  976. cjkcode = iconv_canonicalize(tocode);
  977. if (!is_cjk_encoding(cjkcode))
  978. cjkcode = iconv_canonicalize(fromcode);
  979. /* Set up fallback routines for handling impossible conversions. */
  980. if (ilseq_byte_subst != NULL)
  981. ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
  982. if (!discard_unconvertible) {
  983. #if HAVE_WCHAR_T
  984. if (ilseq_wchar_subst != NULL)
  985. ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
  986. #endif
  987. if (ilseq_unicode_subst != NULL)
  988. ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
  989. if (ilseq_byte_subst != NULL) {
  990. subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
  991. subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
  992. #if HAVE_WCHAR_T
  993. subst_mb_to_wc_cd = iconv_open("wchar_t","char");
  994. subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
  995. #endif
  996. subst_mb_to_mb_cd = iconv_open(tocode,"char");
  997. subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
  998. }
  999. #if HAVE_WCHAR_T
  1000. if (ilseq_wchar_subst != NULL) {
  1001. subst_wc_to_mb_cd = iconv_open(tocode,"char");
  1002. subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
  1003. }
  1004. #endif
  1005. if (ilseq_unicode_subst != NULL) {
  1006. subst_uc_to_mb_cd = iconv_open(tocode,"char");
  1007. subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
  1008. }
  1009. fallbacks.mb_to_uc_fallback =
  1010. (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
  1011. fallbacks.uc_to_mb_fallback =
  1012. (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
  1013. fallbacks.mb_to_wc_fallback =
  1014. (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
  1015. fallbacks.wc_to_mb_fallback =
  1016. (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
  1017. fallbacks.data = NULL;
  1018. iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
  1019. }
  1020. /* Set up hooks for updating the line and column position. */
  1021. hooks.uc_hook = update_line_column;
  1022. hooks.wc_hook = NULL;
  1023. hooks.data = NULL;
  1024. iconvctl(cd, ICONV_SET_HOOKS, &hooks);
  1025. if (i == argc)
  1026. status = convert(cd,stdin,
  1027. /* TRANSLATORS: A filename substitute denoting standard input. */
  1028. _("(stdin)"));
  1029. else {
  1030. status = 0;
  1031. for (; i < argc; i++) {
  1032. const char* infilename = argv[i];
  1033. FILE* infile = fopen(infilename,"r");
  1034. if (infile == NULL) {
  1035. int saved_errno = errno;
  1036. error(0,saved_errno,
  1037. /* TRANSLATORS: The first part of an error message.
  1038. It is followed by a colon and a detail message.
  1039. The %s placeholder expands to the input file name. */
  1040. _("%s"),
  1041. infilename);
  1042. status = 1;
  1043. } else {
  1044. status |= convert(cd,infile,infilename);
  1045. fclose(infile);
  1046. }
  1047. }
  1048. }
  1049. iconv_close(cd);
  1050. }
  1051. if (ferror(stdout) || fclose(stdout)) {
  1052. error(0,0,
  1053. /* TRANSLATORS: An error message. */
  1054. _("I/O error"));
  1055. status = 1;
  1056. }
  1057. exit(status);
  1058. }