iconv_open1.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /*
  2. * Copyright (C) 1999-2008 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /* Part 1 of iconv_open.
  21. Input: const char* tocode, const char* fromcode.
  22. Output:
  23. unsigned int from_index;
  24. int from_wchar;
  25. unsigned int to_index;
  26. int to_wchar;
  27. int transliterate;
  28. int discard_ilseq;
  29. Jumps to 'invalid' in case of errror.
  30. */
  31. {
  32. char buf[MAX_WORD_LENGTH+10+1];
  33. const char* cp;
  34. char* bp;
  35. const struct alias * ap;
  36. unsigned int count;
  37. transliterate = 0;
  38. discard_ilseq = 0;
  39. /* Before calling aliases_lookup, convert the input string to upper case,
  40. * and check whether it's entirely ASCII (we call gperf with option "-7"
  41. * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
  42. * or if it's too long, it is not a valid encoding name.
  43. */
  44. for (to_wchar = 0;;) {
  45. /* Search tocode in the table. */
  46. for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  47. unsigned char c = * (unsigned char *) cp;
  48. if (c >= 0x80)
  49. goto invalid;
  50. if (c >= 'a' && c <= 'z')
  51. c -= 'a'-'A';
  52. *bp = c;
  53. if (c == '\0')
  54. break;
  55. if (--count == 0)
  56. goto invalid;
  57. }
  58. for (;;) {
  59. if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  60. bp -= 10;
  61. *bp = '\0';
  62. transliterate = 1;
  63. continue;
  64. }
  65. if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  66. bp -= 8;
  67. *bp = '\0';
  68. discard_ilseq = 1;
  69. continue;
  70. }
  71. break;
  72. }
  73. if (buf[0] == '\0') {
  74. tocode = locale_charset();
  75. /* Avoid an endless loop that could occur when using an older version
  76. of localcharset.c. */
  77. if (tocode[0] == '\0')
  78. goto invalid;
  79. continue;
  80. }
  81. ap = aliases_lookup(buf,bp-buf);
  82. if (ap == NULL) {
  83. ap = aliases2_lookup(buf);
  84. if (ap == NULL)
  85. goto invalid;
  86. }
  87. if (ap->encoding_index == ei_local_char) {
  88. tocode = locale_charset();
  89. /* Avoid an endless loop that could occur when using an older version
  90. of localcharset.c. */
  91. if (tocode[0] == '\0')
  92. goto invalid;
  93. continue;
  94. }
  95. if (ap->encoding_index == ei_local_wchar_t) {
  96. /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  97. This is also the case on native Woe32 systems. */
  98. #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
  99. if (sizeof(wchar_t) == 4) {
  100. to_index = ei_ucs4internal;
  101. break;
  102. }
  103. if (sizeof(wchar_t) == 2) {
  104. to_index = ei_ucs2internal;
  105. break;
  106. }
  107. if (sizeof(wchar_t) == 1) {
  108. to_index = ei_iso8859_1;
  109. break;
  110. }
  111. #endif
  112. #if HAVE_MBRTOWC
  113. to_wchar = 1;
  114. tocode = locale_charset();
  115. continue;
  116. #endif
  117. goto invalid;
  118. }
  119. to_index = ap->encoding_index;
  120. break;
  121. }
  122. for (from_wchar = 0;;) {
  123. /* Search fromcode in the table. */
  124. for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  125. unsigned char c = * (unsigned char *) cp;
  126. if (c >= 0x80)
  127. goto invalid;
  128. if (c >= 'a' && c <= 'z')
  129. c -= 'a'-'A';
  130. *bp = c;
  131. if (c == '\0')
  132. break;
  133. if (--count == 0)
  134. goto invalid;
  135. }
  136. for (;;) {
  137. if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  138. bp -= 10;
  139. *bp = '\0';
  140. continue;
  141. }
  142. if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  143. bp -= 8;
  144. *bp = '\0';
  145. continue;
  146. }
  147. break;
  148. }
  149. if (buf[0] == '\0') {
  150. fromcode = locale_charset();
  151. /* Avoid an endless loop that could occur when using an older version
  152. of localcharset.c. */
  153. if (fromcode[0] == '\0')
  154. goto invalid;
  155. continue;
  156. }
  157. ap = aliases_lookup(buf,bp-buf);
  158. if (ap == NULL) {
  159. ap = aliases2_lookup(buf);
  160. if (ap == NULL)
  161. goto invalid;
  162. }
  163. if (ap->encoding_index == ei_local_char) {
  164. fromcode = locale_charset();
  165. /* Avoid an endless loop that could occur when using an older version
  166. of localcharset.c. */
  167. if (fromcode[0] == '\0')
  168. goto invalid;
  169. continue;
  170. }
  171. if (ap->encoding_index == ei_local_wchar_t) {
  172. /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  173. This is also the case on native Woe32 systems. */
  174. #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
  175. if (sizeof(wchar_t) == 4) {
  176. from_index = ei_ucs4internal;
  177. break;
  178. }
  179. if (sizeof(wchar_t) == 2) {
  180. from_index = ei_ucs2internal;
  181. break;
  182. }
  183. if (sizeof(wchar_t) == 1) {
  184. from_index = ei_iso8859_1;
  185. break;
  186. }
  187. #endif
  188. #if HAVE_WCRTOMB
  189. from_wchar = 1;
  190. fromcode = locale_charset();
  191. continue;
  192. #endif
  193. goto invalid;
  194. }
  195. from_index = ap->encoding_index;
  196. break;
  197. }
  198. }