iso2022_cn.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. /*
  2. * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /*
  21. * ISO-2022-CN
  22. */
  23. /* Specification: RFC 1922 */
  24. #define ESC 0x1b
  25. #define SO 0x0e
  26. #define SI 0x0f
  27. /*
  28. * The state is composed of one of the following values
  29. */
  30. #define STATE_ASCII 0
  31. #define STATE_TWOBYTE 1
  32. /*
  33. * and one of the following values, << 8
  34. */
  35. #define STATE2_NONE 0
  36. #define STATE2_DESIGNATED_GB2312 1
  37. #define STATE2_DESIGNATED_CNS11643_1 2
  38. /*
  39. * and one of the following values, << 16
  40. */
  41. #define STATE3_NONE 0
  42. #define STATE3_DESIGNATED_CNS11643_2 1
  43. #define SPLIT_STATE \
  44. unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
  45. #define COMBINE_STATE \
  46. state = (state3 << 16) | (state2 << 8) | state1
  47. static int
  48. iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
  49. {
  50. state_t state = conv->istate;
  51. SPLIT_STATE;
  52. int count = 0;
  53. unsigned char c;
  54. for (;;) {
  55. c = *s;
  56. if (c == ESC) {
  57. if (n < count+4)
  58. goto none;
  59. if (s[1] == '$') {
  60. if (s[2] == ')') {
  61. if (s[3] == 'A') {
  62. state2 = STATE2_DESIGNATED_GB2312;
  63. s += 4; count += 4;
  64. if (n < count+1)
  65. goto none;
  66. continue;
  67. }
  68. if (s[3] == 'G') {
  69. state2 = STATE2_DESIGNATED_CNS11643_1;
  70. s += 4; count += 4;
  71. if (n < count+1)
  72. goto none;
  73. continue;
  74. }
  75. }
  76. if (s[2] == '*') {
  77. if (s[3] == 'H') {
  78. state3 = STATE3_DESIGNATED_CNS11643_2;
  79. s += 4; count += 4;
  80. if (n < count+1)
  81. goto none;
  82. continue;
  83. }
  84. }
  85. }
  86. if (s[1] == 'N') {
  87. switch (state3) {
  88. case STATE3_NONE:
  89. goto ilseq;
  90. case STATE3_DESIGNATED_CNS11643_2:
  91. if (s[2] < 0x80 && s[3] < 0x80) {
  92. int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
  93. if (ret == RET_ILSEQ)
  94. goto ilseq;
  95. if (ret != 2) abort();
  96. COMBINE_STATE;
  97. conv->istate = state;
  98. return count+4;
  99. } else
  100. goto ilseq;
  101. default: abort();
  102. }
  103. }
  104. goto ilseq;
  105. }
  106. if (c == SO) {
  107. if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
  108. goto ilseq;
  109. state1 = STATE_TWOBYTE;
  110. s++; count++;
  111. if (n < count+1)
  112. goto none;
  113. continue;
  114. }
  115. if (c == SI) {
  116. state1 = STATE_ASCII;
  117. s++; count++;
  118. if (n < count+1)
  119. goto none;
  120. continue;
  121. }
  122. break;
  123. }
  124. switch (state1) {
  125. case STATE_ASCII:
  126. if (c < 0x80) {
  127. int ret = ascii_mbtowc(conv,pwc,s,1);
  128. if (ret == RET_ILSEQ)
  129. goto ilseq;
  130. if (ret != 1) abort();
  131. if (*pwc == 0x000a || *pwc == 0x000d) {
  132. state2 = STATE2_NONE; state3 = STATE3_NONE;
  133. }
  134. COMBINE_STATE;
  135. conv->istate = state;
  136. return count+1;
  137. } else
  138. goto ilseq;
  139. case STATE_TWOBYTE:
  140. if (n < count+2)
  141. goto none;
  142. if (s[0] < 0x80 && s[1] < 0x80) {
  143. int ret;
  144. switch (state2) {
  145. case STATE2_NONE:
  146. goto ilseq;
  147. case STATE2_DESIGNATED_GB2312:
  148. ret = gb2312_mbtowc(conv,pwc,s,2); break;
  149. case STATE2_DESIGNATED_CNS11643_1:
  150. ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  151. default: abort();
  152. }
  153. if (ret == RET_ILSEQ)
  154. goto ilseq;
  155. if (ret != 2) abort();
  156. COMBINE_STATE;
  157. conv->istate = state;
  158. return count+2;
  159. } else
  160. goto ilseq;
  161. default: abort();
  162. }
  163. none:
  164. COMBINE_STATE;
  165. conv->istate = state;
  166. return RET_TOOFEW(count);
  167. ilseq:
  168. COMBINE_STATE;
  169. conv->istate = state;
  170. return RET_SHIFT_ILSEQ(count);
  171. }
  172. static int
  173. iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  174. {
  175. state_t state = conv->ostate;
  176. SPLIT_STATE;
  177. unsigned char buf[3];
  178. int ret;
  179. /* There is no need to handle Unicode 3.1 tag characters and to look for
  180. "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  181. /* Try ASCII. */
  182. ret = ascii_wctomb(conv,buf,wc,1);
  183. if (ret != RET_ILUNI) {
  184. if (ret != 1) abort();
  185. if (buf[0] < 0x80) {
  186. int count = (state1 == STATE_ASCII ? 1 : 2);
  187. if (n < count)
  188. return RET_TOOSMALL;
  189. if (state1 != STATE_ASCII) {
  190. r[0] = SI;
  191. r += 1;
  192. state1 = STATE_ASCII;
  193. }
  194. r[0] = buf[0];
  195. if (wc == 0x000a || wc == 0x000d) {
  196. state2 = STATE2_NONE; state3 = STATE3_NONE;
  197. }
  198. COMBINE_STATE;
  199. conv->ostate = state;
  200. return count;
  201. }
  202. }
  203. /* Try GB 2312-1980. */
  204. ret = gb2312_wctomb(conv,buf,wc,2);
  205. if (ret != RET_ILUNI) {
  206. if (ret != 2) abort();
  207. if (buf[0] < 0x80 && buf[1] < 0x80) {
  208. int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  209. if (n < count)
  210. return RET_TOOSMALL;
  211. if (state2 != STATE2_DESIGNATED_GB2312) {
  212. r[0] = ESC;
  213. r[1] = '$';
  214. r[2] = ')';
  215. r[3] = 'A';
  216. r += 4;
  217. state2 = STATE2_DESIGNATED_GB2312;
  218. }
  219. if (state1 != STATE_TWOBYTE) {
  220. r[0] = SO;
  221. r += 1;
  222. state1 = STATE_TWOBYTE;
  223. }
  224. r[0] = buf[0];
  225. r[1] = buf[1];
  226. COMBINE_STATE;
  227. conv->ostate = state;
  228. return count;
  229. }
  230. }
  231. ret = cns11643_wctomb(conv,buf,wc,3);
  232. if (ret != RET_ILUNI) {
  233. if (ret != 3) abort();
  234. /* Try CNS 11643-1992 Plane 1. */
  235. if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  236. int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  237. if (n < count)
  238. return RET_TOOSMALL;
  239. if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  240. r[0] = ESC;
  241. r[1] = '$';
  242. r[2] = ')';
  243. r[3] = 'G';
  244. r += 4;
  245. state2 = STATE2_DESIGNATED_CNS11643_1;
  246. }
  247. if (state1 != STATE_TWOBYTE) {
  248. r[0] = SO;
  249. r += 1;
  250. state1 = STATE_TWOBYTE;
  251. }
  252. r[0] = buf[1];
  253. r[1] = buf[2];
  254. COMBINE_STATE;
  255. conv->ostate = state;
  256. return count;
  257. }
  258. /* Try CNS 11643-1992 Plane 2. */
  259. if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  260. int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  261. if (n < count)
  262. return RET_TOOSMALL;
  263. if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  264. r[0] = ESC;
  265. r[1] = '$';
  266. r[2] = '*';
  267. r[3] = 'H';
  268. r += 4;
  269. state3 = STATE3_DESIGNATED_CNS11643_2;
  270. }
  271. r[0] = ESC;
  272. r[1] = 'N';
  273. r[2] = buf[1];
  274. r[3] = buf[2];
  275. COMBINE_STATE;
  276. conv->ostate = state;
  277. return count;
  278. }
  279. }
  280. return RET_ILUNI;
  281. }
  282. static int
  283. iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
  284. {
  285. state_t state = conv->ostate;
  286. SPLIT_STATE;
  287. (void)state2;
  288. (void)state3;
  289. if (state1 != STATE_ASCII) {
  290. if (n < 1)
  291. return RET_TOOSMALL;
  292. r[0] = SI;
  293. /* conv->ostate = 0; will be done by the caller */
  294. return 1;
  295. } else
  296. return 0;
  297. }
  298. #undef COMBINE_STATE
  299. #undef SPLIT_STATE
  300. #undef STATE3_DESIGNATED_CNS11643_2
  301. #undef STATE3_NONE
  302. #undef STATE2_DESIGNATED_CNS11643_1
  303. #undef STATE2_DESIGNATED_GB2312
  304. #undef STATE2_NONE
  305. #undef STATE_TWOBYTE
  306. #undef STATE_ASCII