iso2022_kr.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /*
  2. * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /*
  21. * ISO-2022-KR
  22. */
  23. /* Specification: RFC 1557 */
  24. /* Note: CJK.INF says the SO designator needs to appear only once at the
  25. beginning of a text, but to decrease the risk of ambiguities, when
  26. producing ISO-2022-KR, we repeat the designator in every line containing
  27. SO characters. RFC 1557 does not mandate this. */
  28. #define ESC 0x1b
  29. #define SO 0x0e
  30. #define SI 0x0f
  31. /*
  32. * The state is composed of one of the following values
  33. */
  34. #define STATE_ASCII 0
  35. #define STATE_TWOBYTE 1
  36. /*
  37. * and one of the following values, << 8
  38. */
  39. #define STATE2_NONE 0
  40. #define STATE2_DESIGNATED_KSC5601 1
  41. #define SPLIT_STATE \
  42. unsigned int state1 = state & 0xff, state2 = state >> 8
  43. #define COMBINE_STATE \
  44. state = (state2 << 8) | state1
  45. static int
  46. iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
  47. {
  48. state_t state = conv->istate;
  49. SPLIT_STATE;
  50. int count = 0;
  51. unsigned char c;
  52. for (;;) {
  53. c = *s;
  54. if (c == ESC) {
  55. if (n < count+4)
  56. goto none;
  57. if (s[1] == '$') {
  58. if (s[2] == ')') {
  59. if (s[3] == 'C') {
  60. state2 = STATE2_DESIGNATED_KSC5601;
  61. s += 4; count += 4;
  62. if (n < count+1)
  63. goto none;
  64. continue;
  65. }
  66. }
  67. }
  68. goto ilseq;
  69. }
  70. if (c == SO) {
  71. if (state2 != STATE2_DESIGNATED_KSC5601)
  72. goto ilseq;
  73. state1 = STATE_TWOBYTE;
  74. s++; count++;
  75. if (n < count+1)
  76. goto none;
  77. continue;
  78. }
  79. if (c == SI) {
  80. state1 = STATE_ASCII;
  81. s++; count++;
  82. if (n < count+1)
  83. goto none;
  84. continue;
  85. }
  86. break;
  87. }
  88. switch (state1) {
  89. case STATE_ASCII:
  90. if (c < 0x80) {
  91. int ret = ascii_mbtowc(conv,pwc,s,1);
  92. if (ret == RET_ILSEQ)
  93. goto ilseq;
  94. if (ret != 1) abort();
  95. #if 0 /* Accept ISO-2022-KR according to CJK.INF. */
  96. if (*pwc == 0x000a || *pwc == 0x000d)
  97. state2 = STATE2_NONE;
  98. #endif
  99. COMBINE_STATE;
  100. conv->istate = state;
  101. return count+1;
  102. } else
  103. goto ilseq;
  104. case STATE_TWOBYTE:
  105. if (n < count+2)
  106. goto none;
  107. if (state2 != STATE2_DESIGNATED_KSC5601) abort();
  108. if (s[0] < 0x80 && s[1] < 0x80) {
  109. int ret = ksc5601_mbtowc(conv,pwc,s,2);
  110. if (ret == RET_ILSEQ)
  111. goto ilseq;
  112. if (ret != 2) abort();
  113. COMBINE_STATE;
  114. conv->istate = state;
  115. return count+2;
  116. } else
  117. goto ilseq;
  118. default: abort();
  119. }
  120. none:
  121. COMBINE_STATE;
  122. conv->istate = state;
  123. return RET_TOOFEW(count);
  124. ilseq:
  125. COMBINE_STATE;
  126. conv->istate = state;
  127. return RET_SHIFT_ILSEQ(count);
  128. }
  129. static int
  130. iso2022_kr_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  131. {
  132. state_t state = conv->ostate;
  133. SPLIT_STATE;
  134. unsigned char buf[2];
  135. int ret;
  136. /* Try ASCII. */
  137. ret = ascii_wctomb(conv,buf,wc,1);
  138. if (ret != RET_ILUNI) {
  139. if (ret != 1) abort();
  140. if (buf[0] < 0x80) {
  141. int count = (state1 == STATE_ASCII ? 1 : 2);
  142. if (n < count)
  143. return RET_TOOSMALL;
  144. if (state1 != STATE_ASCII) {
  145. r[0] = SI;
  146. r += 1;
  147. state1 = STATE_ASCII;
  148. }
  149. r[0] = buf[0];
  150. if (wc == 0x000a || wc == 0x000d)
  151. state2 = STATE2_NONE;
  152. COMBINE_STATE;
  153. conv->ostate = state;
  154. return count;
  155. }
  156. }
  157. /* Try KS C 5601-1992. */
  158. ret = ksc5601_wctomb(conv,buf,wc,2);
  159. if (ret != RET_ILUNI) {
  160. if (ret != 2) abort();
  161. if (buf[0] < 0x80 && buf[1] < 0x80) {
  162. int count = (state2 == STATE2_DESIGNATED_KSC5601 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  163. if (n < count)
  164. return RET_TOOSMALL;
  165. if (state2 != STATE2_DESIGNATED_KSC5601) {
  166. r[0] = ESC;
  167. r[1] = '$';
  168. r[2] = ')';
  169. r[3] = 'C';
  170. r += 4;
  171. state2 = STATE2_DESIGNATED_KSC5601;
  172. }
  173. if (state1 != STATE_TWOBYTE) {
  174. r[0] = SO;
  175. r += 1;
  176. state1 = STATE_TWOBYTE;
  177. }
  178. r[0] = buf[0];
  179. r[1] = buf[1];
  180. COMBINE_STATE;
  181. conv->ostate = state;
  182. return count;
  183. }
  184. }
  185. return RET_ILUNI;
  186. }
  187. static int
  188. iso2022_kr_reset (conv_t conv, unsigned char *r, int n)
  189. {
  190. state_t state = conv->ostate;
  191. SPLIT_STATE;
  192. (void)state2;
  193. if (state1 != STATE_ASCII) {
  194. if (n < 1)
  195. return RET_TOOSMALL;
  196. r[0] = SI;
  197. /* conv->ostate = 0; will be done by the caller */
  198. return 1;
  199. } else
  200. return 0;
  201. }
  202. #undef COMBINE_STATE
  203. #undef SPLIT_STATE
  204. #undef STATE2_DESIGNATED_KSC5601
  205. #undef STATE2_NONE
  206. #undef STATE_TWOBYTE
  207. #undef STATE_ASCII