cjk_tab_to_h.c 67 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148
  1. /* Copyright (C) 1999-2004, 2006-2007 Free Software Foundation, Inc.
  2. This file is part of the GNU LIBICONV Tools.
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software Foundation,
  13. Inc., along with this program. If not, see <http://www.gnu.org/licenses/>. */
  14. /*
  15. * Generates a CJK character set table from a .TXT table as found on
  16. * ftp.unicode.org or in the X nls directory.
  17. * Examples:
  18. *
  19. * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
  20. * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
  21. * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
  22. *
  23. * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
  24. * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
  25. * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
  26. * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
  27. * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
  28. *
  29. * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
  30. *
  31. * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
  32. *
  33. * ./cjk_tab_to_h JISX0213:2004 jisx0213 > jisx0213.h < JISX0213.TXT
  34. */
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <stdbool.h>
  38. #include <string.h>
  39. #include <ctype.h>
  40. #include <assert.h>
  41. typedef struct {
  42. int start;
  43. int end;
  44. } Block;
  45. typedef struct {
  46. int rows; /* number of possible values for the 1st byte */
  47. int cols; /* number of possible values for the 2nd byte */
  48. int (*row_byte) (int row); /* returns the 1st byte value for a given row */
  49. int (*col_byte) (int col); /* returns the 2nd byte value for a given col */
  50. int (*byte_row) (int byte); /* converts a 1st byte value to a row, else -1 */
  51. int (*byte_col) (int byte); /* converts a 2nd byte value to a col, else -1 */
  52. const char* check_row_expr; /* format string for 1st byte value checking */
  53. const char* check_col_expr; /* format string for 2nd byte value checking */
  54. const char* byte_row_expr; /* format string for 1st byte value to row */
  55. const char* byte_col_expr; /* format string for 2nd byte value to col */
  56. int** charset2uni; /* charset2uni[0..rows-1][0..cols-1] is valid */
  57. /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
  58. Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
  59. int* charsetpage; /* charsetpage[0..rows]: how large is a page for a row */
  60. int ncharsetblocks;
  61. Block* charsetblocks; /* blocks[0..nblocks-1] */
  62. int* uni2charset; /* uni2charset[0x0000..0xffff] */
  63. int fffd; /* uni representation of the invalid character */
  64. } Encoding;
  65. /*
  66. * Outputs the file title.
  67. */
  68. static void output_title (const char *charsetname)
  69. {
  70. printf("/*\n");
  71. printf(" * Copyright (C) 1999-2007 Free Software Foundation, Inc.\n");
  72. printf(" * This file is part of the GNU LIBICONV Library.\n");
  73. printf(" *\n");
  74. printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
  75. printf(" * and/or modify it under the terms of the GNU Library General Public\n");
  76. printf(" * License as published by the Free Software Foundation; either version 2\n");
  77. printf(" * of the License, or (at your option) any later version.\n");
  78. printf(" *\n");
  79. printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
  80. printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
  81. printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
  82. printf(" * Library General Public License for more details.\n");
  83. printf(" *\n");
  84. printf(" * You should have received a copy of the GNU Library General Public\n");
  85. printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
  86. printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
  87. printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
  88. printf(" */\n");
  89. printf("\n");
  90. printf("/*\n");
  91. printf(" * %s\n", charsetname);
  92. printf(" */\n");
  93. printf("\n");
  94. }
  95. /*
  96. * Reads the charset2uni table from standard input.
  97. */
  98. static void read_table (Encoding* enc)
  99. {
  100. int row, col, i, i1, i2, c, j;
  101. enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
  102. for (row = 0; row < enc->rows; row++)
  103. enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
  104. for (row = 0; row < enc->rows; row++)
  105. for (col = 0; col < enc->cols; col++)
  106. enc->charset2uni[row][col] = 0xfffd;
  107. c = getc(stdin);
  108. ungetc(c,stdin);
  109. if (c == '#') {
  110. /* Read a unicode.org style .TXT file. */
  111. for (;;) {
  112. c = getc(stdin);
  113. if (c == EOF)
  114. break;
  115. if (c == '\n' || c == ' ' || c == '\t')
  116. continue;
  117. if (c == '#') {
  118. do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
  119. continue;
  120. }
  121. ungetc(c,stdin);
  122. if (scanf("0x%x", &j) != 1)
  123. exit(1);
  124. i1 = j >> 8;
  125. i2 = j & 0xff;
  126. row = enc->byte_row(i1);
  127. col = enc->byte_col(i2);
  128. if (row < 0 || col < 0) {
  129. fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
  130. exit(1);
  131. }
  132. if (scanf(" 0x%x", &enc->charset2uni[row][col]) != 1)
  133. exit(1);
  134. }
  135. } else {
  136. /* Read a table of hexadecimal Unicode values. */
  137. for (i1 = 32; i1 < 132; i1++)
  138. for (i2 = 32; i2 < 132; i2++) {
  139. i = scanf("%x", &j);
  140. if (i == EOF)
  141. goto read_done;
  142. if (i != 1)
  143. exit(1);
  144. if (j < 0 || j == 0xffff)
  145. j = 0xfffd;
  146. if (j != 0xfffd) {
  147. if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
  148. fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
  149. exit (1);
  150. }
  151. enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
  152. }
  153. }
  154. read_done: ;
  155. }
  156. }
  157. /*
  158. * Determine whether the Unicode range goes outside the BMP.
  159. */
  160. static bool is_charset2uni_large (Encoding* enc)
  161. {
  162. int row, col;
  163. for (row = 0; row < enc->rows; row++)
  164. for (col = 0; col < enc->cols; col++)
  165. if (enc->charset2uni[row][col] >= 0x10000)
  166. return true;
  167. return false;
  168. }
  169. /*
  170. * Compactify the Unicode range by use of an auxiliary table,
  171. * so 16 bits suffice to store each value.
  172. */
  173. static int compact_large_charset2uni (Encoding* enc, unsigned int **urows, unsigned int *urowshift)
  174. {
  175. unsigned int shift;
  176. for (shift = 8; ; shift--) {
  177. int *upages = (int *) malloc((0x110000>>shift) * sizeof(int));
  178. int i, row, col, nurows;
  179. for (i = 0; i < 0x110000>>shift; i++)
  180. upages[i] = -1;
  181. for (row = 0; row < enc->rows; row++)
  182. for (col = 0; col < enc->cols; col++)
  183. upages[enc->charset2uni[row][col] >> shift] = 0;
  184. nurows = 0;
  185. for (i = 0; i < 0x110000>>shift; i++)
  186. if (upages[i] == 0)
  187. nurows++;
  188. /* We want all table entries to fit in an 'unsigned short'. */
  189. if (nurows <= 1<<(16-shift)) {
  190. int** old_charset2uni;
  191. *urows = (unsigned int *) malloc(nurows * sizeof(unsigned int));
  192. *urowshift = shift;
  193. nurows = 0;
  194. for (i = 0; i < 0x110000>>shift; i++)
  195. if (upages[i] == 0) {
  196. upages[i] = nurows;
  197. (*urows)[nurows] = i;
  198. nurows++;
  199. }
  200. old_charset2uni = enc->charset2uni;
  201. enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
  202. for (row = 0; row < enc->rows; row++)
  203. enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
  204. for (row = 0; row < enc->rows; row++)
  205. for (col = 0; col < enc->cols; col++) {
  206. int u = old_charset2uni[row][col];
  207. enc->charset2uni[row][col] =
  208. (upages[u >> shift] << shift) | (u & ((1 << shift) - 1));
  209. }
  210. enc->fffd =
  211. (upages[0xfffd >> shift] << shift) | (0xfffd & ((1 << shift) - 1));
  212. return nurows;
  213. }
  214. }
  215. abort();
  216. }
  217. /*
  218. * Computes the charsetpage[0..rows] array.
  219. */
  220. static void find_charset2uni_pages (Encoding* enc)
  221. {
  222. int row, col;
  223. enc->charsetpage = (int*) malloc((enc->rows+1)*sizeof(int));
  224. for (row = 0; row <= enc->rows; row++)
  225. enc->charsetpage[row] = 0;
  226. for (row = 0; row < enc->rows; row++) {
  227. int used = 0;
  228. for (col = 0; col < enc->cols; col++)
  229. if (enc->charset2uni[row][col] != enc->fffd)
  230. used = col+1;
  231. enc->charsetpage[row] = used;
  232. }
  233. }
  234. /*
  235. * Fills in nblocks and blocks.
  236. */
  237. static void find_charset2uni_blocks (Encoding* enc)
  238. {
  239. int n, row, lastrow;
  240. enc->charsetblocks = (Block*) malloc(enc->rows*sizeof(Block));
  241. n = 0;
  242. for (row = 0; row < enc->rows; row++)
  243. if (enc->charsetpage[row] > 0 && (row == 0 || enc->charsetpage[row-1] == 0)) {
  244. for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
  245. enc->charsetblocks[n].start = row * enc->cols;
  246. enc->charsetblocks[n].end = lastrow * enc->cols + enc->charsetpage[lastrow];
  247. n++;
  248. }
  249. enc->ncharsetblocks = n;
  250. }
  251. /*
  252. * Outputs the charset to unicode table and function.
  253. */
  254. static void output_charset2uni (const char* name, Encoding* enc)
  255. {
  256. int nurows, row, col, lastrow, col_max, i, i1_min, i1_max;
  257. bool is_large;
  258. unsigned int* urows;
  259. unsigned int urowshift;
  260. Encoding tmpenc;
  261. is_large = is_charset2uni_large(enc);
  262. if (is_large) {
  263. /* Use a temporary copy of enc. */
  264. tmpenc = *enc;
  265. enc = &tmpenc;
  266. nurows = compact_large_charset2uni(enc,&urows,&urowshift);
  267. } else {
  268. nurows = 0; urows = NULL; urowshift = 0; enc->fffd = 0xfffd;
  269. }
  270. find_charset2uni_pages(enc);
  271. find_charset2uni_blocks(enc);
  272. for (row = 0; row < enc->rows; row++)
  273. if (enc->charsetpage[row] > 0) {
  274. if (row == 0 || enc->charsetpage[row-1] == 0) {
  275. /* Start a new block. */
  276. for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
  277. printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
  278. name, enc->row_byte(row),
  279. (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
  280. }
  281. printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
  282. col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
  283. for (col = 0; col < col_max; col++) {
  284. printf(" 0x%04x,", enc->charset2uni[row][col]);
  285. if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
  286. }
  287. printf("\n");
  288. if (enc->charsetpage[row+1] == 0) {
  289. /* End a block. */
  290. printf("};\n");
  291. }
  292. }
  293. printf("\n");
  294. if (is_large) {
  295. printf("static const ucs4_t %s_2uni_upages[%d] = {\n ", name, nurows);
  296. for (i = 0; i < nurows; i++) {
  297. printf(" 0x%05x,", urows[i] << urowshift);
  298. if ((i % 8) == 7 && (i+1 < nurows)) printf("\n ");
  299. }
  300. printf("\n");
  301. printf("};\n");
  302. printf("\n");
  303. }
  304. printf("static int\n");
  305. printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
  306. printf("{\n");
  307. printf(" unsigned char c1 = s[0];\n");
  308. printf(" if (");
  309. for (i = 0; i < enc->ncharsetblocks; i++) {
  310. i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
  311. i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
  312. if (i > 0)
  313. printf(" || ");
  314. if (i1_min == i1_max)
  315. printf("(c1 == 0x%02x)", i1_min);
  316. else
  317. printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
  318. }
  319. printf(") {\n");
  320. printf(" if (n >= 2) {\n");
  321. printf(" unsigned char c2 = s[1];\n");
  322. printf(" if (");
  323. printf(enc->check_col_expr, "c2");
  324. printf(") {\n");
  325. printf(" unsigned int i = %d * (", enc->cols);
  326. printf(enc->byte_row_expr, "c1");
  327. printf(") + (");
  328. printf(enc->byte_col_expr, "c2");
  329. printf(");\n");
  330. printf(" %s wc = 0xfffd;\n", is_large ? "ucs4_t" : "unsigned short");
  331. if (is_large) printf(" unsigned short swc;\n");
  332. for (i = 0; i < enc->ncharsetblocks; i++) {
  333. printf(" ");
  334. if (i > 0)
  335. printf("} else ");
  336. if (i < enc->ncharsetblocks-1)
  337. printf("if (i < %d) ", enc->charsetblocks[i+1].start);
  338. printf("{\n");
  339. printf(" if (i < %d)\n", enc->charsetblocks[i].end);
  340. printf(" %s = ", is_large ? "swc" : "wc");
  341. printf("%s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
  342. if (enc->charsetblocks[i].start > 0)
  343. printf("-%d", enc->charsetblocks[i].start);
  344. printf("]");
  345. if (is_large) printf(",\n wc = %s_2uni_upages[swc>>%d] | (swc & 0x%x)", name, urowshift, (1 << urowshift) - 1);
  346. printf(";\n");
  347. }
  348. printf(" }\n");
  349. printf(" if (wc != 0xfffd) {\n");
  350. printf(" *pwc = %swc;\n", is_large ? "" : "(ucs4_t) ");
  351. printf(" return 2;\n");
  352. printf(" }\n");
  353. printf(" }\n");
  354. printf(" return RET_ILSEQ;\n");
  355. printf(" }\n");
  356. printf(" return RET_TOOFEW(0);\n");
  357. printf(" }\n");
  358. printf(" return RET_ILSEQ;\n");
  359. printf("}\n");
  360. printf("\n");
  361. }
  362. /*
  363. * Outputs the charset to unicode table and function.
  364. * (Suitable if the mapping function is well defined, i.e. has no holes, and
  365. * is monotonically increasing with small gaps only.)
  366. */
  367. static void output_charset2uni_noholes_monotonic (const char* name, Encoding* enc)
  368. {
  369. int row, col, lastrow, r, col_max, i, i1_min, i1_max;
  370. /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
  371. enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
  372. is always < 0x100. */
  373. int steps_per_row = 2;
  374. int stepsize = (enc->cols + steps_per_row-1) / steps_per_row;
  375. find_charset2uni_pages(enc);
  376. find_charset2uni_blocks(enc);
  377. for (row = 0; row < enc->rows; row++)
  378. if (enc->charsetpage[row] > 0) {
  379. if (row == 0 || enc->charsetpage[row-1] == 0) {
  380. /* Start a new block. */
  381. for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
  382. printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
  383. name, enc->row_byte(row),
  384. steps_per_row*(lastrow-row+1));
  385. for (r = row; r <= lastrow; r++) {
  386. for (i = 0; i < steps_per_row; i++)
  387. printf(" 0x%04x,", enc->charset2uni[r][i*stepsize]);
  388. if (((r-row) % 4) == 3 && (r < lastrow)) printf("\n ");
  389. }
  390. printf("\n");
  391. printf("};\n");
  392. printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
  393. name, enc->row_byte(row),
  394. (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
  395. }
  396. printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
  397. col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
  398. for (col = 0; col < col_max; col++) {
  399. printf(" 0x%02x,", enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]);
  400. if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
  401. }
  402. printf("\n");
  403. if (enc->charsetpage[row+1] == 0) {
  404. /* End a block. */
  405. printf("};\n");
  406. }
  407. }
  408. printf("\n");
  409. printf("static int\n");
  410. printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
  411. printf("{\n");
  412. printf(" unsigned char c1 = s[0];\n");
  413. printf(" if (");
  414. for (i = 0; i < enc->ncharsetblocks; i++) {
  415. i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
  416. i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
  417. if (i > 0)
  418. printf(" || ");
  419. if (i1_min == i1_max)
  420. printf("(c1 == 0x%02x)", i1_min);
  421. else
  422. printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
  423. }
  424. printf(") {\n");
  425. printf(" if (n >= 2) {\n");
  426. printf(" unsigned char c2 = s[1];\n");
  427. printf(" if (");
  428. printf(enc->check_col_expr, "c2");
  429. printf(") {\n");
  430. printf(" unsigned int row = ");
  431. printf(enc->byte_row_expr, "c1");
  432. printf(";\n");
  433. printf(" unsigned int col = ");
  434. printf(enc->byte_col_expr, "c2");
  435. printf(";\n");
  436. printf(" unsigned int i = %d * row + col;\n", enc->cols);
  437. printf(" unsigned short wc = 0xfffd;\n");
  438. for (i = 0; i < enc->ncharsetblocks; i++) {
  439. printf(" ");
  440. if (i > 0)
  441. printf("} else ");
  442. if (i < enc->ncharsetblocks-1)
  443. printf("if (i < %d) ", enc->charsetblocks[i+1].start);
  444. printf("{\n");
  445. printf(" if (i < %d)\n", enc->charsetblocks[i].end);
  446. printf(" wc = %s_2uni_main_page%02x[%d*", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols), steps_per_row);
  447. if (enc->charsetblocks[i].start > 0)
  448. printf("(row-%d)", enc->charsetblocks[i].start / enc->cols);
  449. else
  450. printf("row");
  451. printf("+");
  452. if (steps_per_row == 2)
  453. printf("(col>=%d?1:0)", stepsize);
  454. else
  455. printf("col/%d", stepsize);
  456. printf("] + %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
  457. if (enc->charsetblocks[i].start > 0)
  458. printf("-%d", enc->charsetblocks[i].start);
  459. printf("];\n");
  460. }
  461. printf(" }\n");
  462. printf(" if (wc != 0xfffd) {\n");
  463. printf(" *pwc = (ucs4_t) wc;\n");
  464. printf(" return 2;\n");
  465. printf(" }\n");
  466. printf(" }\n");
  467. printf(" return RET_ILSEQ;\n");
  468. printf(" }\n");
  469. printf(" return RET_TOOFEW(0);\n");
  470. printf(" }\n");
  471. printf(" return RET_ILSEQ;\n");
  472. printf("}\n");
  473. printf("\n");
  474. }
  475. /*
  476. * Computes the uni2charset[0x0000..0x2ffff] array.
  477. */
  478. static void invert (Encoding* enc)
  479. {
  480. int row, col, j;
  481. enc->uni2charset = (int*) malloc(0x30000*sizeof(int));
  482. for (j = 0; j < 0x30000; j++)
  483. enc->uni2charset[j] = 0;
  484. for (row = 0; row < enc->rows; row++)
  485. for (col = 0; col < enc->cols; col++) {
  486. j = enc->charset2uni[row][col];
  487. if (j != 0xfffd)
  488. enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col);
  489. }
  490. }
  491. /*
  492. * Outputs the unicode to charset table and function, using a linear array.
  493. * (Suitable if the table is dense.)
  494. */
  495. static void output_uni2charset_dense (const char* name, Encoding* enc)
  496. {
  497. /* Like in 8bit_tab_to_h.c */
  498. bool pages[0x300];
  499. int line[0x6000];
  500. int tableno;
  501. struct { int minline; int maxline; int usecount; } tables[0x6000];
  502. bool first;
  503. int row, col, j, p, j1, j2, t;
  504. for (p = 0; p < 0x300; p++)
  505. pages[p] = false;
  506. for (row = 0; row < enc->rows; row++)
  507. for (col = 0; col < enc->cols; col++) {
  508. j = enc->charset2uni[row][col];
  509. if (j != 0xfffd)
  510. pages[j>>8] = true;
  511. }
  512. for (j1 = 0; j1 < 0x6000; j1++) {
  513. bool all_invalid = true;
  514. for (j2 = 0; j2 < 8; j2++) {
  515. j = 8*j1+j2;
  516. if (enc->uni2charset[j] != 0)
  517. all_invalid = false;
  518. }
  519. if (all_invalid)
  520. line[j1] = -1;
  521. else
  522. line[j1] = 0;
  523. }
  524. tableno = 0;
  525. for (j1 = 0; j1 < 0x6000; j1++) {
  526. if (line[j1] >= 0) {
  527. if (tableno > 0
  528. && ((j1 > 0 && line[j1-1] == tableno-1)
  529. || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
  530. && j1 - tables[tableno-1].maxline <= 8))) {
  531. line[j1] = tableno-1;
  532. tables[tableno-1].maxline = j1;
  533. } else {
  534. tableno++;
  535. line[j1] = tableno-1;
  536. tables[tableno-1].minline = tables[tableno-1].maxline = j1;
  537. }
  538. }
  539. }
  540. for (t = 0; t < tableno; t++) {
  541. tables[t].usecount = 0;
  542. j1 = 8*tables[t].minline;
  543. j2 = 8*(tables[t].maxline+1);
  544. for (j = j1; j < j2; j++)
  545. if (enc->uni2charset[j] != 0)
  546. tables[t].usecount++;
  547. }
  548. {
  549. p = -1;
  550. for (t = 0; t < tableno; t++)
  551. if (tables[t].usecount > 1) {
  552. p = tables[t].minline >> 5;
  553. printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1));
  554. for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
  555. if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
  556. printf(" /* 0x%04x */\n", 8*j1);
  557. printf(" ");
  558. for (j2 = 0; j2 < 8; j2++) {
  559. j = 8*j1+j2;
  560. printf(" 0x%04x,", enc->uni2charset[j]);
  561. }
  562. printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
  563. }
  564. printf("};\n");
  565. }
  566. if (p >= 0)
  567. printf("\n");
  568. }
  569. printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
  570. printf("{\n");
  571. printf(" if (n >= 2) {\n");
  572. printf(" unsigned short c = 0;\n");
  573. first = true;
  574. for (j1 = 0; j1 < 0x6000;) {
  575. t = line[j1];
  576. for (j2 = j1; j2 < 0x6000 && line[j2] == t; j2++);
  577. if (t >= 0) {
  578. if (j1 != tables[t].minline) abort();
  579. if (j2 > tables[t].maxline+1) abort();
  580. j2 = tables[t].maxline+1;
  581. if (first)
  582. printf(" ");
  583. else
  584. printf(" else ");
  585. first = false;
  586. if (tables[t].usecount == 0) abort();
  587. if (tables[t].usecount == 1) {
  588. if (j2 != j1+1) abort();
  589. for (j = 8*j1; j < 8*j2; j++)
  590. if (enc->uni2charset[j] != 0) {
  591. printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j, enc->uni2charset[j]);
  592. break;
  593. }
  594. } else {
  595. if (j1 == 0) {
  596. printf("if (wc < 0x%04x)", 8*j2);
  597. } else {
  598. printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
  599. }
  600. printf("\n c = %s_page%02x[wc", name, j1 >> 5);
  601. if (tables[t].minline > 0)
  602. printf("-0x%04x", 8*j1);
  603. printf("];\n");
  604. }
  605. }
  606. j1 = j2;
  607. }
  608. printf(" if (c != 0) {\n");
  609. printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
  610. printf(" return 2;\n");
  611. printf(" }\n");
  612. printf(" return RET_ILUNI;\n");
  613. printf(" }\n");
  614. printf(" return RET_TOOSMALL;\n");
  615. printf("}\n");
  616. }
  617. /*
  618. * Outputs the unicode to charset table and function, using a packed array.
  619. * (Suitable if the table is sparse.)
  620. * The argument 'monotonic' may be set to true if the mapping is monotonically
  621. * increasing with small gaps only.
  622. */
  623. static void output_uni2charset_sparse (const char* name, Encoding* enc, bool monotonic)
  624. {
  625. bool pages[0x300];
  626. Block pageblocks[0x300]; int npageblocks;
  627. int indx2charset[0x30000];
  628. int summary_indx[0x3000];
  629. int summary_used[0x3000];
  630. int i, row, col, j, p, j1, j2, indx;
  631. bool is_large;
  632. /* for monotonic: */
  633. int log2_stepsize = (!strcmp(name,"uhc_2") ? 6 : 7);
  634. int stepsize = 1 << log2_stepsize;
  635. int indxsteps;
  636. /* Fill pages[0x300]. */
  637. for (p = 0; p < 0x300; p++)
  638. pages[p] = false;
  639. for (row = 0; row < enc->rows; row++)
  640. for (col = 0; col < enc->cols; col++) {
  641. j = enc->charset2uni[row][col];
  642. if (j != 0xfffd)
  643. pages[j>>8] = true;
  644. }
  645. /* Determine whether two or three bytes are needed for each character. */
  646. is_large = false;
  647. for (j = 0; j < 0x30000; j++)
  648. if (enc->uni2charset[j] >= 0x10000)
  649. is_large = true;
  650. #if 0
  651. for (p = 0; p < 0x300; p++)
  652. if (pages[p]) {
  653. printf("static const unsigned short %s_page%02x[256] = {\n", name, p);
  654. for (j1 = 0; j1 < 32; j1++) {
  655. printf(" ");
  656. for (j2 = 0; j2 < 8; j2++)
  657. printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]);
  658. printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7);
  659. }
  660. printf("};\n");
  661. }
  662. printf("\n");
  663. #endif
  664. /* Fill summary_indx[] and summary_used[]. */
  665. indx = 0;
  666. for (j1 = 0; j1 < 0x3000; j1++) {
  667. summary_indx[j1] = indx;
  668. summary_used[j1] = 0;
  669. for (j2 = 0; j2 < 16; j2++) {
  670. j = 16*j1+j2;
  671. if (enc->uni2charset[j] != 0) {
  672. indx2charset[indx++] = enc->uni2charset[j];
  673. summary_used[j1] |= (1 << j2);
  674. }
  675. }
  676. }
  677. /* Fill npageblocks and pageblocks[]. */
  678. npageblocks = 0;
  679. for (p = 0; p < 0x300; ) {
  680. if (pages[p] && (p == 0 || !pages[p-1])) {
  681. pageblocks[npageblocks].start = 16*p;
  682. do p++; while (p < 0x300 && pages[p]);
  683. j1 = 16*p;
  684. while (summary_used[j1-1] == 0) j1--;
  685. pageblocks[npageblocks].end = j1;
  686. npageblocks++;
  687. } else
  688. p++;
  689. }
  690. if (monotonic) {
  691. indxsteps = (indx + stepsize-1) / stepsize;
  692. printf("static const unsigned short %s_2charset_main[%d] = {\n", name, indxsteps);
  693. for (i = 0; i < indxsteps; ) {
  694. if ((i % 8) == 0) printf(" ");
  695. printf(" 0x%04x,", indx2charset[i*stepsize]);
  696. i++;
  697. if ((i % 8) == 0 || i == indxsteps) printf("\n");
  698. }
  699. printf("};\n");
  700. printf("static const unsigned char %s_2charset[%d] = {\n", name, indx);
  701. for (i = 0; i < indx; ) {
  702. if ((i % 8) == 0) printf(" ");
  703. printf(" 0x%02x,", indx2charset[i] - indx2charset[i/stepsize*stepsize]);
  704. i++;
  705. if ((i % 8) == 0 || i == indx) printf("\n");
  706. }
  707. printf("};\n");
  708. } else {
  709. if (is_large) {
  710. printf("static const unsigned char %s_2charset[3*%d] = {\n", name, indx);
  711. for (i = 0; i < indx; ) {
  712. if ((i % 4) == 0) printf(" ");
  713. printf(" 0x%1x,0x%02x,0x%02x,", indx2charset[i] >> 16,
  714. (indx2charset[i] >> 8) & 0xff, indx2charset[i] & 0xff);
  715. i++;
  716. if ((i % 4) == 0 || i == indx) printf("\n");
  717. }
  718. printf("};\n");
  719. } else {
  720. printf("static const unsigned short %s_2charset[%d] = {\n", name, indx);
  721. for (i = 0; i < indx; ) {
  722. if ((i % 8) == 0) printf(" ");
  723. printf(" 0x%04x,", indx2charset[i]);
  724. i++;
  725. if ((i % 8) == 0 || i == indx) printf("\n");
  726. }
  727. printf("};\n");
  728. }
  729. }
  730. printf("\n");
  731. for (i = 0; i < npageblocks; i++) {
  732. printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name,
  733. pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start);
  734. for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) {
  735. if (((16*j1) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1);
  736. if ((j1 % 4) == 0) printf(" ");
  737. printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]);
  738. j1++;
  739. if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n");
  740. }
  741. printf("};\n");
  742. }
  743. printf("\n");
  744. printf("static int\n");
  745. printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
  746. printf("{\n");
  747. printf(" if (n >= 2) {\n");
  748. printf(" const Summary16 *summary = NULL;\n");
  749. for (i = 0; i < npageblocks; i++) {
  750. printf(" ");
  751. if (i > 0)
  752. printf("else ");
  753. printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
  754. 16*pageblocks[i].start, 16*pageblocks[i].end);
  755. printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name,
  756. pageblocks[i].start/16);
  757. if (pageblocks[i].start > 0)
  758. printf("-0x%03x", pageblocks[i].start);
  759. printf("];\n");
  760. }
  761. printf(" if (summary) {\n");
  762. printf(" unsigned short used = summary->used;\n");
  763. printf(" unsigned int i = wc & 0x0f;\n");
  764. printf(" if (used & ((unsigned short) 1 << i)) {\n");
  765. if (monotonic || !is_large)
  766. printf(" unsigned short c;\n");
  767. printf(" /* Keep in `used' only the bits 0..i-1. */\n");
  768. printf(" used &= ((unsigned short) 1 << i) - 1;\n");
  769. printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
  770. printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
  771. printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
  772. printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
  773. printf(" used = (used & 0x00ff) + (used >> 8);\n");
  774. if (monotonic) {
  775. printf(" used += summary->indx;\n");
  776. printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name, log2_stepsize, name);
  777. printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
  778. printf(" return 2;\n");
  779. } else {
  780. if (is_large) {
  781. printf(" used += summary->indx;\n");
  782. printf(" r[0] = %s_2charset[3*used];\n", name);
  783. printf(" r[1] = %s_2charset[3*used+1];\n", name);
  784. printf(" r[2] = %s_2charset[3*used+2];\n", name);
  785. printf(" return 3;\n");
  786. } else {
  787. printf(" c = %s_2charset[summary->indx + used];\n", name);
  788. printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
  789. printf(" return 2;\n");
  790. }
  791. }
  792. printf(" }\n");
  793. printf(" }\n");
  794. printf(" return RET_ILUNI;\n");
  795. printf(" }\n");
  796. printf(" return RET_TOOSMALL;\n");
  797. printf("}\n");
  798. }
  799. /* ISO-2022/EUC specifics */
  800. static int row_byte_normal (int row) { return 0x21+row; }
  801. static int col_byte_normal (int col) { return 0x21+col; }
  802. static int byte_row_normal (int byte) { return byte-0x21; }
  803. static int byte_col_normal (int byte) { return byte-0x21; }
  804. static void do_normal (const char* name)
  805. {
  806. Encoding enc;
  807. enc.rows = 94;
  808. enc.cols = 94;
  809. enc.row_byte = row_byte_normal;
  810. enc.col_byte = col_byte_normal;
  811. enc.byte_row = byte_row_normal;
  812. enc.byte_col = byte_col_normal;
  813. enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  814. enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  815. enc.byte_row_expr = "%1$s - 0x21";
  816. enc.byte_col_expr = "%1$s - 0x21";
  817. read_table(&enc);
  818. output_charset2uni(name,&enc);
  819. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  820. }
  821. /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
  822. starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
  823. order. There are 75 out-of-order values, scattered all throughout the table.
  824. */
  825. static void do_normal_only_charset2uni (const char* name)
  826. {
  827. Encoding enc;
  828. enc.rows = 94;
  829. enc.cols = 94;
  830. enc.row_byte = row_byte_normal;
  831. enc.col_byte = col_byte_normal;
  832. enc.byte_row = byte_row_normal;
  833. enc.byte_col = byte_col_normal;
  834. enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  835. enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  836. enc.byte_row_expr = "%1$s - 0x21";
  837. enc.byte_col_expr = "%1$s - 0x21";
  838. read_table(&enc);
  839. output_charset2uni(name,&enc);
  840. }
  841. /* CNS 11643 specifics - trick to put two tables into one */
  842. static int row_byte_cns11643 (int row) {
  843. return 0x100 * (row / 94) + (row % 94) + 0x21;
  844. }
  845. static int byte_row_cns11643 (int byte) {
  846. return (byte >> 8) * 94 + (byte & 0xff) - 0x21;
  847. }
  848. static void do_cns11643_only_uni2charset (const char* name)
  849. {
  850. Encoding enc;
  851. enc.rows = 16*94;
  852. enc.cols = 94;
  853. enc.row_byte = row_byte_cns11643;
  854. enc.col_byte = col_byte_normal;
  855. enc.byte_row = byte_row_cns11643;
  856. enc.byte_col = byte_col_normal;
  857. enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  858. enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  859. enc.byte_row_expr = "%1$s - 0x21";
  860. enc.byte_col_expr = "%1$s - 0x21";
  861. read_table(&enc);
  862. invert(&enc);
  863. output_uni2charset_sparse(name,&enc,false);
  864. }
  865. /* GBK specifics */
  866. static int row_byte_gbk1 (int row) {
  867. return 0x81+row;
  868. }
  869. static int col_byte_gbk1 (int col) {
  870. return (col >= 0x3f ? 0x41 : 0x40) + col;
  871. }
  872. static int byte_row_gbk1 (int byte) {
  873. if (byte >= 0x81 && byte < 0xff)
  874. return byte-0x81;
  875. else
  876. return -1;
  877. }
  878. static int byte_col_gbk1 (int byte) {
  879. if (byte >= 0x40 && byte < 0x7f)
  880. return byte-0x40;
  881. else if (byte >= 0x80 && byte < 0xff)
  882. return byte-0x41;
  883. else
  884. return -1;
  885. }
  886. static void do_gbk1 (const char* name)
  887. {
  888. Encoding enc;
  889. enc.rows = 126;
  890. enc.cols = 190;
  891. enc.row_byte = row_byte_gbk1;
  892. enc.col_byte = col_byte_gbk1;
  893. enc.byte_row = byte_row_gbk1;
  894. enc.byte_col = byte_col_gbk1;
  895. enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
  896. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
  897. enc.byte_row_expr = "%1$s - 0x81";
  898. enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
  899. read_table(&enc);
  900. output_charset2uni(name,&enc);
  901. invert(&enc); output_uni2charset_dense(name,&enc);
  902. }
  903. static void do_gbk1_only_charset2uni (const char* name)
  904. {
  905. Encoding enc;
  906. enc.rows = 126;
  907. enc.cols = 190;
  908. enc.row_byte = row_byte_gbk1;
  909. enc.col_byte = col_byte_gbk1;
  910. enc.byte_row = byte_row_gbk1;
  911. enc.byte_col = byte_col_gbk1;
  912. enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
  913. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
  914. enc.byte_row_expr = "%1$s - 0x81";
  915. enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
  916. read_table(&enc);
  917. output_charset2uni(name,&enc);
  918. }
  919. static int row_byte_gbk2 (int row) {
  920. return 0x81+row;
  921. }
  922. static int col_byte_gbk2 (int col) {
  923. return (col >= 0x3f ? 0x41 : 0x40) + col;
  924. }
  925. static int byte_row_gbk2 (int byte) {
  926. if (byte >= 0x81 && byte < 0xff)
  927. return byte-0x81;
  928. else
  929. return -1;
  930. }
  931. static int byte_col_gbk2 (int byte) {
  932. if (byte >= 0x40 && byte < 0x7f)
  933. return byte-0x40;
  934. else if (byte >= 0x80 && byte < 0xa1)
  935. return byte-0x41;
  936. else
  937. return -1;
  938. }
  939. static void do_gbk2_only_charset2uni (const char* name)
  940. {
  941. Encoding enc;
  942. enc.rows = 126;
  943. enc.cols = 96;
  944. enc.row_byte = row_byte_gbk2;
  945. enc.col_byte = col_byte_gbk2;
  946. enc.byte_row = byte_row_gbk2;
  947. enc.byte_col = byte_col_gbk2;
  948. enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
  949. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
  950. enc.byte_row_expr = "%1$s - 0x81";
  951. enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
  952. read_table(&enc);
  953. output_charset2uni(name,&enc);
  954. }
  955. static void do_gbk1_only_uni2charset (const char* name)
  956. {
  957. Encoding enc;
  958. enc.rows = 126;
  959. enc.cols = 190;
  960. enc.row_byte = row_byte_gbk1;
  961. enc.col_byte = col_byte_gbk1;
  962. enc.byte_row = byte_row_gbk1;
  963. enc.byte_col = byte_col_gbk1;
  964. enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
  965. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
  966. enc.byte_row_expr = "%1$s - 0x81";
  967. enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
  968. read_table(&enc);
  969. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  970. }
  971. /* KSC 5601 specifics */
  972. /*
  973. * Reads the charset2uni table from standard input.
  974. */
  975. static void read_table_ksc5601 (Encoding* enc)
  976. {
  977. int row, col, i, i1, i2, c, j;
  978. enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
  979. for (row = 0; row < enc->rows; row++)
  980. enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
  981. for (row = 0; row < enc->rows; row++)
  982. for (col = 0; col < enc->cols; col++)
  983. enc->charset2uni[row][col] = 0xfffd;
  984. c = getc(stdin);
  985. ungetc(c,stdin);
  986. if (c == '#') {
  987. /* Read a unicode.org style .TXT file. */
  988. for (;;) {
  989. c = getc(stdin);
  990. if (c == EOF)
  991. break;
  992. if (c == '\n' || c == ' ' || c == '\t')
  993. continue;
  994. if (c == '#') {
  995. do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
  996. continue;
  997. }
  998. ungetc(c,stdin);
  999. if (scanf("0x%x", &j) != 1)
  1000. exit(1);
  1001. i1 = j >> 8;
  1002. i2 = j & 0xff;
  1003. if (scanf(" 0x%x", &j) != 1)
  1004. exit(1);
  1005. /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
  1006. = KS X 1001.1992, ignore the rest. */
  1007. if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127))
  1008. continue; /* KSC5601 specific */
  1009. i1 &= 0x7f; /* KSC5601 specific */
  1010. i2 &= 0x7f; /* KSC5601 specific */
  1011. row = enc->byte_row(i1);
  1012. col = enc->byte_col(i2);
  1013. if (row < 0 || col < 0) {
  1014. fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
  1015. exit(1);
  1016. }
  1017. enc->charset2uni[row][col] = j;
  1018. }
  1019. } else {
  1020. /* Read a table of hexadecimal Unicode values. */
  1021. for (i1 = 33; i1 < 127; i1++)
  1022. for (i2 = 33; i2 < 127; i2++) {
  1023. i = scanf("%x", &j);
  1024. if (i == EOF)
  1025. goto read_done;
  1026. if (i != 1)
  1027. exit(1);
  1028. if (j < 0 || j == 0xffff)
  1029. j = 0xfffd;
  1030. if (j != 0xfffd) {
  1031. if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
  1032. fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
  1033. exit (1);
  1034. }
  1035. enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
  1036. }
  1037. }
  1038. read_done: ;
  1039. }
  1040. }
  1041. static void do_ksc5601 (const char* name)
  1042. {
  1043. Encoding enc;
  1044. enc.rows = 94;
  1045. enc.cols = 94;
  1046. enc.row_byte = row_byte_normal;
  1047. enc.col_byte = col_byte_normal;
  1048. enc.byte_row = byte_row_normal;
  1049. enc.byte_col = byte_col_normal;
  1050. enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  1051. enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
  1052. enc.byte_row_expr = "%1$s - 0x21";
  1053. enc.byte_col_expr = "%1$s - 0x21";
  1054. read_table_ksc5601(&enc);
  1055. output_charset2uni(name,&enc);
  1056. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  1057. }
  1058. /* UHC specifics */
  1059. /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
  1060. static int row_byte_uhc_1 (int row) {
  1061. return 0x81 + row;
  1062. }
  1063. static int col_byte_uhc_1 (int col) {
  1064. return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
  1065. }
  1066. static int byte_row_uhc_1 (int byte) {
  1067. if (byte >= 0x81 && byte < 0xa1)
  1068. return byte-0x81;
  1069. else
  1070. return -1;
  1071. }
  1072. static int byte_col_uhc_1 (int byte) {
  1073. if (byte >= 0x41 && byte < 0x5b)
  1074. return byte-0x41;
  1075. else if (byte >= 0x61 && byte < 0x7b)
  1076. return byte-0x47;
  1077. else if (byte >= 0x81 && byte < 0xff)
  1078. return byte-0x4d;
  1079. else
  1080. return -1;
  1081. }
  1082. static void do_uhc_1 (const char* name)
  1083. {
  1084. Encoding enc;
  1085. enc.rows = 32;
  1086. enc.cols = 178;
  1087. enc.row_byte = row_byte_uhc_1;
  1088. enc.col_byte = col_byte_uhc_1;
  1089. enc.byte_row = byte_row_uhc_1;
  1090. enc.byte_col = byte_col_uhc_1;
  1091. enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa1)";
  1092. enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
  1093. enc.byte_row_expr = "%1$s - 0x81";
  1094. enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
  1095. read_table(&enc);
  1096. output_charset2uni_noholes_monotonic(name,&enc);
  1097. invert(&enc); output_uni2charset_sparse(name,&enc,true);
  1098. }
  1099. /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
  1100. static int row_byte_uhc_2 (int row) {
  1101. return 0xa1 + row;
  1102. }
  1103. static int col_byte_uhc_2 (int col) {
  1104. return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
  1105. }
  1106. static int byte_row_uhc_2 (int byte) {
  1107. if (byte >= 0xa1 && byte < 0xff)
  1108. return byte-0xa1;
  1109. else
  1110. return -1;
  1111. }
  1112. static int byte_col_uhc_2 (int byte) {
  1113. if (byte >= 0x41 && byte < 0x5b)
  1114. return byte-0x41;
  1115. else if (byte >= 0x61 && byte < 0x7b)
  1116. return byte-0x47;
  1117. else if (byte >= 0x81 && byte < 0xa1)
  1118. return byte-0x4d;
  1119. else
  1120. return -1;
  1121. }
  1122. static void do_uhc_2 (const char* name)
  1123. {
  1124. Encoding enc;
  1125. enc.rows = 94;
  1126. enc.cols = 84;
  1127. enc.row_byte = row_byte_uhc_2;
  1128. enc.col_byte = col_byte_uhc_2;
  1129. enc.byte_row = byte_row_uhc_2;
  1130. enc.byte_col = byte_col_uhc_2;
  1131. enc.check_row_expr = "(%1$s >= 0xa1 && %1$s < 0xff)";
  1132. enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
  1133. enc.byte_row_expr = "%1$s - 0xa1";
  1134. enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
  1135. read_table(&enc);
  1136. output_charset2uni_noholes_monotonic(name,&enc);
  1137. invert(&enc); output_uni2charset_sparse(name,&enc,true);
  1138. }
  1139. /* Big5 specifics */
  1140. static int row_byte_big5 (int row) {
  1141. return 0xa1+row;
  1142. }
  1143. static int col_byte_big5 (int col) {
  1144. return (col >= 0x3f ? 0x62 : 0x40) + col;
  1145. }
  1146. static int byte_row_big5 (int byte) {
  1147. if (byte >= 0xa1 && byte < 0xff)
  1148. return byte-0xa1;
  1149. else
  1150. return -1;
  1151. }
  1152. static int byte_col_big5 (int byte) {
  1153. if (byte >= 0x40 && byte < 0x7f)
  1154. return byte-0x40;
  1155. else if (byte >= 0xa1 && byte < 0xff)
  1156. return byte-0x62;
  1157. else
  1158. return -1;
  1159. }
  1160. static void do_big5 (const char* name)
  1161. {
  1162. Encoding enc;
  1163. enc.rows = 94;
  1164. enc.cols = 157;
  1165. enc.row_byte = row_byte_big5;
  1166. enc.col_byte = col_byte_big5;
  1167. enc.byte_row = byte_row_big5;
  1168. enc.byte_col = byte_col_big5;
  1169. enc.check_row_expr = "%1$s >= 0xa1 && %1$s < 0xff";
  1170. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
  1171. enc.byte_row_expr = "%1$s - 0xa1";
  1172. enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
  1173. read_table(&enc);
  1174. output_charset2uni(name,&enc);
  1175. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  1176. }
  1177. /* HKSCS specifics */
  1178. static int row_byte_hkscs (int row) {
  1179. return 0x80+row;
  1180. }
  1181. static int byte_row_hkscs (int byte) {
  1182. if (byte >= 0x80 && byte < 0xff)
  1183. return byte-0x80;
  1184. else
  1185. return -1;
  1186. }
  1187. static void do_hkscs (const char* name)
  1188. {
  1189. Encoding enc;
  1190. enc.rows = 128;
  1191. enc.cols = 157;
  1192. enc.row_byte = row_byte_hkscs;
  1193. enc.col_byte = col_byte_big5;
  1194. enc.byte_row = byte_row_hkscs;
  1195. enc.byte_col = byte_col_big5;
  1196. enc.check_row_expr = "%1$s >= 0x80 && %1$s < 0xff";
  1197. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
  1198. enc.byte_row_expr = "%1$s - 0x80";
  1199. enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
  1200. read_table(&enc);
  1201. output_charset2uni(name,&enc);
  1202. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  1203. }
  1204. /* Johab Hangul specifics */
  1205. static int row_byte_johab_hangul (int row) {
  1206. return 0x84+row;
  1207. }
  1208. static int col_byte_johab_hangul (int col) {
  1209. return (col >= 0x3e ? 0x43 : 0x41) + col;
  1210. }
  1211. static int byte_row_johab_hangul (int byte) {
  1212. if (byte >= 0x84 && byte < 0xd4)
  1213. return byte-0x84;
  1214. else
  1215. return -1;
  1216. }
  1217. static int byte_col_johab_hangul (int byte) {
  1218. if (byte >= 0x41 && byte < 0x7f)
  1219. return byte-0x41;
  1220. else if (byte >= 0x81 && byte < 0xff)
  1221. return byte-0x43;
  1222. else
  1223. return -1;
  1224. }
  1225. static void do_johab_hangul (const char* name)
  1226. {
  1227. Encoding enc;
  1228. enc.rows = 80;
  1229. enc.cols = 188;
  1230. enc.row_byte = row_byte_johab_hangul;
  1231. enc.col_byte = col_byte_johab_hangul;
  1232. enc.byte_row = byte_row_johab_hangul;
  1233. enc.byte_col = byte_col_johab_hangul;
  1234. enc.check_row_expr = "%1$s >= 0x84 && %1$s < 0xd4";
  1235. enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
  1236. enc.byte_row_expr = "%1$s - 0x84";
  1237. enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
  1238. read_table(&enc);
  1239. output_charset2uni(name,&enc);
  1240. invert(&enc); output_uni2charset_dense(name,&enc);
  1241. }
  1242. /* SJIS specifics */
  1243. static int row_byte_sjis (int row) {
  1244. return (row >= 0x1f ? 0xc1 : 0x81) + row;
  1245. }
  1246. static int col_byte_sjis (int col) {
  1247. return (col >= 0x3f ? 0x41 : 0x40) + col;
  1248. }
  1249. static int byte_row_sjis (int byte) {
  1250. if (byte >= 0x81 && byte < 0xa0)
  1251. return byte-0x81;
  1252. else if (byte >= 0xe0)
  1253. return byte-0xc1;
  1254. else
  1255. return -1;
  1256. }
  1257. static int byte_col_sjis (int byte) {
  1258. if (byte >= 0x40 && byte < 0x7f)
  1259. return byte-0x40;
  1260. else if (byte >= 0x80 && byte < 0xfd)
  1261. return byte-0x41;
  1262. else
  1263. return -1;
  1264. }
  1265. static void do_sjis (const char* name)
  1266. {
  1267. Encoding enc;
  1268. enc.rows = 94;
  1269. enc.cols = 188;
  1270. enc.row_byte = row_byte_sjis;
  1271. enc.col_byte = col_byte_sjis;
  1272. enc.byte_row = byte_row_sjis;
  1273. enc.byte_col = byte_col_sjis;
  1274. enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
  1275. enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
  1276. enc.byte_row_expr = "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
  1277. enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
  1278. read_table(&enc);
  1279. output_charset2uni(name,&enc);
  1280. invert(&enc); output_uni2charset_sparse(name,&enc,false);
  1281. }
  1282. /* GB18030 Unicode specifics */
  1283. static void do_gb18030uni (const char* name)
  1284. {
  1285. int c;
  1286. unsigned int bytes;
  1287. int i1, i2, i3, i4, i, j, k;
  1288. int charset2uni[4*10*126*10];
  1289. int uni2charset[0x10000];
  1290. struct { int low; int high; int diff; int total; } ranges[256];
  1291. int ranges_count, ranges_total;
  1292. for (i = 0; i < 4*10*126*10; i++)
  1293. charset2uni[i] = 0;
  1294. for (j = 0; j < 0x10000; j++)
  1295. uni2charset[j] = 0;
  1296. /* Read a unicode.org style .TXT file. */
  1297. for (;;) {
  1298. c = getc(stdin);
  1299. if (c == EOF)
  1300. break;
  1301. if (c == '\n' || c == ' ' || c == '\t')
  1302. continue;
  1303. if (c == '#') {
  1304. do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
  1305. continue;
  1306. }
  1307. ungetc(c,stdin);
  1308. if (scanf("0x%x", &bytes) != 1)
  1309. exit(1);
  1310. i1 = (bytes >> 24) & 0xff;
  1311. i2 = (bytes >> 16) & 0xff;
  1312. i3 = (bytes >> 8) & 0xff;
  1313. i4 = bytes & 0xff;
  1314. if (!(i1 >= 0x81 && i1 <= 0x84
  1315. && i2 >= 0x30 && i2 <= 0x39
  1316. && i3 >= 0x81 && i3 <= 0xfe
  1317. && i4 >= 0x30 && i4 <= 0x39)) {
  1318. fprintf(stderr, "lost entry for %02x %02x %02x %02x\n", i1, i2, i3, i4);
  1319. exit(1);
  1320. }
  1321. i = (((i1-0x81) * 10 + (i2-0x30)) * 126 + (i3-0x81)) * 10 + (i4-0x30);
  1322. if (scanf(" 0x%x", &j) != 1)
  1323. exit(1);
  1324. if (!(j >= 0 && j < 0x10000))
  1325. exit(1);
  1326. charset2uni[i] = j;
  1327. uni2charset[j] = i;
  1328. }
  1329. /* Verify that the mapping i -> j is monotonically increasing and
  1330. of the form
  1331. low[k] <= i <= high[k] => j = diff[k] + i
  1332. with a set of disjoint intervals (low[k], high[k]). */
  1333. ranges_count = 0;
  1334. for (i = 0; i < 4*10*126*10; i++)
  1335. if (charset2uni[i] != 0) {
  1336. int diff;
  1337. j = charset2uni[i];
  1338. diff = j - i;
  1339. if (ranges_count > 0) {
  1340. if (!(i > ranges[ranges_count-1].high))
  1341. exit(1);
  1342. if (!(j > ranges[ranges_count-1].high + ranges[ranges_count-1].diff))
  1343. exit(1);
  1344. /* Additional property: The diffs are also increasing. */
  1345. if (!(diff >= ranges[ranges_count-1].diff))
  1346. exit(1);
  1347. }
  1348. if (ranges_count > 0 && diff == ranges[ranges_count-1].diff)
  1349. ranges[ranges_count-1].high = i;
  1350. else {
  1351. if (ranges_count == 256)
  1352. exit(1);
  1353. ranges[ranges_count].low = i;
  1354. ranges[ranges_count].high = i;
  1355. ranges[ranges_count].diff = diff;
  1356. ranges_count++;
  1357. }
  1358. }
  1359. /* Determine size of bitmap. */
  1360. ranges_total = 0;
  1361. for (k = 0; k < ranges_count; k++) {
  1362. ranges[k].total = ranges_total;
  1363. ranges_total += ranges[k].high - ranges[k].low + 1;
  1364. }
  1365. printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name, 2*ranges_count);
  1366. for (k = 0; k < ranges_count; k++) {
  1367. printf(" 0x%04x, 0x%04x", ranges[k].low, ranges[k].high);
  1368. if (k+1 < ranges_count) printf(",");
  1369. if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
  1370. }
  1371. printf("\n");
  1372. printf("};\n");
  1373. printf("\n");
  1374. printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name, 2*ranges_count);
  1375. for (k = 0; k < ranges_count; k++) {
  1376. printf(" 0x%04x, 0x%04x", ranges[k].low + ranges[k].diff, ranges[k].high + ranges[k].diff);
  1377. if (k+1 < ranges_count) printf(",");
  1378. if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
  1379. }
  1380. printf("\n");
  1381. printf("};\n");
  1382. printf("\n");
  1383. printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name, ranges_count);
  1384. for (k = 0; k < ranges_count; k++) {
  1385. printf(" { %5d, 0x%04x }", ranges[k].diff, ranges[k].total);
  1386. if (k+1 < ranges_count) printf(",");
  1387. if ((k % 4) == 3 && k+1 < ranges_count) printf("\n ");
  1388. }
  1389. printf("\n");
  1390. printf("};\n");
  1391. printf("\n");
  1392. printf("static const unsigned char %s_bitmap[%d] = {\n ", name, (ranges_total + 7) / 8);
  1393. {
  1394. int accu = 0;
  1395. for (k = 0; k < ranges_count; k++) {
  1396. for (i = ranges[k].total; i <= ranges[k].total + (ranges[k].high - ranges[k].low);) {
  1397. if (charset2uni[i - ranges[k].total + ranges[k].low] != 0)
  1398. accu |= (1 << (i % 8));
  1399. i++;
  1400. if ((i % 8) == 0) {
  1401. printf(" 0x%02x", accu);
  1402. if ((i / 8) < (ranges_total + 7) / 8) printf(",");
  1403. if (((i / 8) % 12) == 0)
  1404. printf("\n ");
  1405. accu = 0;
  1406. }
  1407. }
  1408. if (i != (k+1 < ranges_count ? ranges[k+1].total : ranges_total)) abort();
  1409. }
  1410. if ((ranges_total % 8) != 0)
  1411. printf(" 0x%02x", accu);
  1412. printf("\n");
  1413. }
  1414. printf("};\n");
  1415. printf("\n");
  1416. printf("static int\n");
  1417. printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
  1418. printf("{\n");
  1419. printf(" unsigned char c1 = s[0];\n");
  1420. printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
  1421. printf(" if (n >= 2) {\n");
  1422. printf(" unsigned char c2 = s[1];\n");
  1423. printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
  1424. printf(" if (n >= 3) {\n");
  1425. printf(" unsigned char c3 = s[2];\n");
  1426. printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
  1427. printf(" if (n >= 4) {\n");
  1428. printf(" unsigned char c4 = s[3];\n");
  1429. printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
  1430. printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
  1431. printf(" if (i >= %d && i <= %d) {\n", ranges[0].low, ranges[ranges_count-1].high);
  1432. printf(" unsigned int k1 = 0;\n");
  1433. printf(" unsigned int k2 = %d;\n", ranges_count-1);
  1434. printf(" while (k1 < k2) {\n");
  1435. printf(" unsigned int k = (k1 + k2) / 2;\n");
  1436. printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name);
  1437. printf(" k2 = k;\n");
  1438. printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name);
  1439. printf(" k1 = k + 1;\n");
  1440. printf(" else\n");
  1441. printf(" return RET_ILSEQ;\n");
  1442. printf(" }\n");
  1443. printf(" {\n");
  1444. printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
  1445. printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
  1446. printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
  1447. printf(" *pwc = (ucs4_t) (i + diff);\n");
  1448. printf(" return 4;\n");
  1449. printf(" }\n");
  1450. printf(" }\n");
  1451. printf(" }\n");
  1452. printf(" }\n");
  1453. printf(" return RET_ILSEQ;\n");
  1454. printf(" }\n");
  1455. printf(" return RET_TOOFEW(0);\n");
  1456. printf(" }\n");
  1457. printf(" return RET_ILSEQ;\n");
  1458. printf(" }\n");
  1459. printf(" return RET_TOOFEW(0);\n");
  1460. printf(" }\n");
  1461. printf(" return RET_ILSEQ;\n");
  1462. printf(" }\n");
  1463. printf(" return RET_TOOFEW(0);\n");
  1464. printf(" }\n");
  1465. printf(" return RET_ILSEQ;\n");
  1466. printf("}\n");
  1467. printf("\n");
  1468. printf("static int\n");
  1469. printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
  1470. printf("{\n");
  1471. printf(" if (n >= 4) {\n");
  1472. printf(" unsigned int i = wc;\n");
  1473. printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges[0].low + ranges[0].diff, ranges[ranges_count-1].high + ranges[ranges_count-1].diff);
  1474. printf(" unsigned int k1 = 0;\n");
  1475. printf(" unsigned int k2 = %d;\n", ranges_count-1);
  1476. printf(" while (k1 < k2) {\n");
  1477. printf(" unsigned int k = (k1 + k2) / 2;\n");
  1478. printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name);
  1479. printf(" k2 = k;\n");
  1480. printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name);
  1481. printf(" k1 = k + 1;\n");
  1482. printf(" else\n");
  1483. printf(" return RET_ILUNI;\n");
  1484. printf(" }\n");
  1485. printf(" {\n");
  1486. printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
  1487. printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
  1488. printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
  1489. printf(" i -= diff;\n");
  1490. printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
  1491. printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
  1492. printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
  1493. printf(" r[0] = i + 0x81;\n");
  1494. printf(" return 4;\n");
  1495. printf(" }\n");
  1496. printf(" }\n");
  1497. printf(" }\n");
  1498. printf(" return RET_ILUNI;\n");
  1499. printf(" }\n");
  1500. printf(" return RET_TOOSMALL;\n");
  1501. printf("}\n");
  1502. }
  1503. /* JISX0213 specifics */
  1504. static void do_jisx0213 (const char* name)
  1505. {
  1506. printf("#ifndef _JISX0213_H\n");
  1507. printf("#define _JISX0213_H\n");
  1508. printf("\n");
  1509. printf("/* JISX0213 plane 1 (= ISO-IR-233) characters are in the range\n");
  1510. printf(" 0x{21..7E}{21..7E}.\n");
  1511. printf(" JISX0213 plane 2 (= ISO-IR-229) characters are in the range\n");
  1512. printf(" 0x{21,23..25,28,2C..2F,6E..7E}{21..7E}.\n");
  1513. printf(" Together this makes 120 rows of 94 characters.\n");
  1514. printf("*/\n");
  1515. printf("\n");
  1516. {
  1517. #define row_convert(row) \
  1518. ((row) >= 0x121 && (row) <= 0x17E ? row-289 : /* 0..93 */ \
  1519. (row) == 0x221 ? row-451 : /* 94 */ \
  1520. (row) >= 0x223 && (row) <= 0x225 ? row-452 : /* 95..97 */ \
  1521. (row) == 0x228 ? row-454 : /* 98 */ \
  1522. (row) >= 0x22C && (row) <= 0x22F ? row-457 : /* 99..102 */ \
  1523. (row) >= 0x26E && (row) <= 0x27E ? row-519 : /* 103..119 */ \
  1524. -1)
  1525. unsigned int table[120][94];
  1526. int pagemin[0x1100];
  1527. int pagemax[0x1100];
  1528. int pageidx[0x1100];
  1529. unsigned int pagestart[0x1100];
  1530. unsigned int pagestart_len = 0;
  1531. {
  1532. unsigned int rowc, colc;
  1533. for (rowc = 0; rowc < 120; rowc++)
  1534. for (colc = 0; colc < 94; colc++)
  1535. table[rowc][colc] = 0;
  1536. }
  1537. {
  1538. unsigned int page;
  1539. for (page = 0; page < 0x1100; page++)
  1540. pagemin[page] = -1;
  1541. for (page = 0; page < 0x1100; page++)
  1542. pagemax[page] = -1;
  1543. for (page = 0; page < 0x1100; page++)
  1544. pageidx[page] = -1;
  1545. }
  1546. printf("static const unsigned short jisx0213_to_ucs_combining[][2] = {\n");
  1547. {
  1548. int private_use = 0x0001;
  1549. for (;;) {
  1550. char line[30];
  1551. unsigned int row, col;
  1552. unsigned int ucs;
  1553. memset(line,0,sizeof(line));
  1554. if (scanf("%[^\n]\n",line) < 1)
  1555. break;
  1556. assert(line[0]=='0');
  1557. assert(line[1]=='x');
  1558. assert(isxdigit(line[2]));
  1559. assert(isxdigit(line[3]));
  1560. assert(isxdigit(line[4]));
  1561. assert(isxdigit(line[5]));
  1562. assert(isxdigit(line[6]));
  1563. assert(line[7]=='\t');
  1564. line[7] = '\0';
  1565. col = strtoul(&line[5],NULL,16);
  1566. line[5] = '\0';
  1567. row = strtoul(&line[2],NULL,16);
  1568. if (line[20] != '\0' && line[21] == '\0') {
  1569. unsigned int u1, u2;
  1570. assert(line[8]=='0');
  1571. assert(line[9]=='x');
  1572. assert(isxdigit(line[10]));
  1573. assert(isxdigit(line[11]));
  1574. assert(isxdigit(line[12]));
  1575. assert(isxdigit(line[13]));
  1576. assert(line[14]==' ');
  1577. assert(line[15]=='0');
  1578. assert(line[16]=='x');
  1579. assert(isxdigit(line[17]));
  1580. assert(isxdigit(line[18]));
  1581. assert(isxdigit(line[19]));
  1582. assert(isxdigit(line[20]));
  1583. u2 = strtoul(&line[17],NULL,16);
  1584. line[14] = '\0';
  1585. u1 = strtoul(&line[10],NULL,16);
  1586. printf(" { 0x%04x, 0x%04x },\n", u1, u2);
  1587. ucs = private_use++;
  1588. } else {
  1589. assert(line[8]=='0');
  1590. assert(line[9]=='x');
  1591. assert(isxdigit(line[10]));
  1592. assert(isxdigit(line[11]));
  1593. assert(isxdigit(line[12]));
  1594. assert(isxdigit(line[13]));
  1595. ucs = strtoul(&line[10],NULL,16);
  1596. }
  1597. assert((unsigned int) row_convert(row) < 120);
  1598. assert((unsigned int) (col-0x21) < 94);
  1599. table[row_convert(row)][col-0x21] = ucs;
  1600. }
  1601. }
  1602. printf("};\n");
  1603. printf("\n");
  1604. {
  1605. unsigned int rowc, colc;
  1606. for (rowc = 0; rowc < 120; rowc++) {
  1607. for (colc = 0; colc < 94; colc++) {
  1608. unsigned int value = table[rowc][colc];
  1609. unsigned int page = value >> 8;
  1610. unsigned int rest = value & 0xff;
  1611. if (pagemin[page] < 0 || pagemin[page] > rest) pagemin[page] = rest;
  1612. if (pagemax[page] < 0 || pagemax[page] < rest) pagemax[page] = rest;
  1613. }
  1614. }
  1615. }
  1616. {
  1617. unsigned int index = 0;
  1618. unsigned int i;
  1619. for (i = 0; i < 0x1100; ) {
  1620. if (pagemin[i] >= 0) {
  1621. if (pagemin[i+1] >= 0 && pagemin[i] >= 0x80 && pagemax[i+1] < 0x80) {
  1622. /* Combine two pages into a single one. */
  1623. assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
  1624. pagestart[pagestart_len++] = (i<<8)+0x80;
  1625. pageidx[i] = index;
  1626. pageidx[i+1] = index;
  1627. index++;
  1628. i += 2;
  1629. } else {
  1630. /* A single page. */
  1631. assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
  1632. pagestart[pagestart_len++] = i<<8;
  1633. pageidx[i] = index;
  1634. index++;
  1635. i += 1;
  1636. }
  1637. } else
  1638. i++;
  1639. }
  1640. }
  1641. printf("static const unsigned short jisx0213_to_ucs_main[120 * 94] = {\n");
  1642. {
  1643. unsigned int row;
  1644. for (row = 0; row < 0x300; row++) {
  1645. unsigned int rowc = row_convert(row);
  1646. if (rowc != (unsigned int) (-1)) {
  1647. printf(" /* 0x%X21..0x%X7E */\n",row,row);
  1648. {
  1649. unsigned int count = 0;
  1650. unsigned int colc;
  1651. for (colc = 0; colc < 94; colc++) {
  1652. if ((count % 8) == 0) printf(" ");
  1653. {
  1654. unsigned int value = table[rowc][colc];
  1655. unsigned int page = value >> 8;
  1656. unsigned int index = pageidx[page];
  1657. assert(value-pagestart[index] < 0x100);
  1658. printf(" 0x%04x,",(index<<8)|(value-pagestart[index]));
  1659. }
  1660. count++;
  1661. if ((count % 8) == 0) printf("\n");
  1662. }
  1663. }
  1664. printf("\n");
  1665. }
  1666. }
  1667. }
  1668. printf("};\n");
  1669. printf("\n");
  1670. printf("static const ucs4_t jisx0213_to_ucs_pagestart[] = {\n");
  1671. {
  1672. unsigned int count = 0;
  1673. unsigned int i;
  1674. for (i = 0; i < pagestart_len; i++) {
  1675. char buf[10];
  1676. if ((count % 8) == 0) printf(" ");
  1677. printf(" ");
  1678. sprintf(buf,"0x%04x",pagestart[i]);
  1679. if (strlen(buf) < 7) printf("%*s",7-strlen(buf),"");
  1680. printf("%s,",buf);
  1681. count++;
  1682. if ((count % 8) == 0) printf("\n");
  1683. }
  1684. }
  1685. printf("\n");
  1686. printf("};\n");
  1687. #undef row_convert
  1688. }
  1689. rewind(stdin);
  1690. printf("\n");
  1691. {
  1692. int table[0x110000];
  1693. bool pages[0x4400];
  1694. int maxpage = -1;
  1695. unsigned int combining_prefixes[100];
  1696. unsigned int combining_prefixes_len = 0;
  1697. {
  1698. unsigned int i;
  1699. for (i = 0; i < 0x110000; i++)
  1700. table[i] = -1;
  1701. for (i = 0; i < 0x4400; i++)
  1702. pages[i] = false;
  1703. }
  1704. for (;;) {
  1705. char line[30];
  1706. unsigned int plane, row, col;
  1707. memset(line,0,sizeof(line));
  1708. if (scanf("%[^\n]\n",line) < 1)
  1709. break;
  1710. assert(line[0]=='0');
  1711. assert(line[1]=='x');
  1712. assert(isxdigit(line[2]));
  1713. assert(isxdigit(line[3]));
  1714. assert(isxdigit(line[4]));
  1715. assert(isxdigit(line[5]));
  1716. assert(isxdigit(line[6]));
  1717. assert(line[7]=='\t');
  1718. line[7] = '\0';
  1719. col = strtoul(&line[5],NULL,16);
  1720. line[5] = '\0';
  1721. row = strtoul(&line[3],NULL,16);
  1722. line[3] = '\0';
  1723. plane = strtoul(&line[2],NULL,16) - 1;
  1724. if (line[20] != '\0' && line[21] == '\0') {
  1725. unsigned int u1, u2;
  1726. assert(line[8]=='0');
  1727. assert(line[9]=='x');
  1728. assert(isxdigit(line[10]));
  1729. assert(isxdigit(line[11]));
  1730. assert(isxdigit(line[12]));
  1731. assert(isxdigit(line[13]));
  1732. assert(line[14]==' ');
  1733. assert(line[15]=='0');
  1734. assert(line[16]=='x');
  1735. assert(isxdigit(line[17]));
  1736. assert(isxdigit(line[18]));
  1737. assert(isxdigit(line[19]));
  1738. assert(isxdigit(line[20]));
  1739. u2 = strtoul(&line[17],NULL,16);
  1740. line[14] = '\0';
  1741. u1 = strtoul(&line[10],NULL,16);
  1742. assert(u2 == 0x02E5 || u2 == 0x02E9 || u2 == 0x0300 || u2 == 0x0301
  1743. || u2 == 0x309A);
  1744. assert(combining_prefixes_len < sizeof(combining_prefixes)/sizeof(combining_prefixes[0]));
  1745. combining_prefixes[combining_prefixes_len++] = u1;
  1746. } else {
  1747. unsigned int ucs;
  1748. assert(line[8]=='0');
  1749. assert(line[9]=='x');
  1750. assert(isxdigit(line[10]));
  1751. assert(isxdigit(line[11]));
  1752. assert(isxdigit(line[12]));
  1753. assert(isxdigit(line[13]));
  1754. ucs = strtoul(&line[10],NULL,16);
  1755. /* Add an entry. */
  1756. assert(plane <= 1);
  1757. assert(row <= 0x7f);
  1758. assert(col <= 0x7f);
  1759. table[ucs] = (plane << 15) | (row << 8) | col;
  1760. pages[ucs>>6] = true;
  1761. if (maxpage < 0 || (ucs>>6) > maxpage) maxpage = ucs>>6;
  1762. }
  1763. }
  1764. {
  1765. unsigned int i;
  1766. for (i = 0; i < combining_prefixes_len; i++) {
  1767. unsigned int u1 = combining_prefixes[i];
  1768. assert(table[u1] >= 0);
  1769. table[u1] |= 0x0080;
  1770. }
  1771. }
  1772. printf("static const short jisx0213_from_ucs_level1[%d] = {\n",maxpage+1);
  1773. {
  1774. unsigned int index = 0;
  1775. unsigned int i;
  1776. for (i = 0; i <= maxpage; i++) {
  1777. if ((i % 8) == 0) printf(" ");
  1778. if (pages[i]) {
  1779. printf(" %3u,",index);
  1780. index++;
  1781. } else {
  1782. printf(" %3d,",-1);
  1783. }
  1784. if (((i+1) % 8) == 0) printf("\n");
  1785. }
  1786. }
  1787. printf("\n");
  1788. printf("};\n");
  1789. printf("\n");
  1790. #if 0 /* Dense array */
  1791. printf("static const unsigned short jisx0213_from_ucs_level2[] = {\n");
  1792. {
  1793. unsigned int i;
  1794. for (i = 0; i <= maxpage; i++) {
  1795. if (pages[i]) {
  1796. printf(" /* 0x%04X */\n",i<<6);
  1797. {
  1798. unsigned int j;
  1799. for (j = 0; j < 0x40; ) {
  1800. unsigned int ucs = (i<<6)+j;
  1801. int value = table[ucs];
  1802. if (value < 0) value = 0;
  1803. if ((j % 8) == 0) printf(" ");
  1804. printf(" 0x%04x,",value);
  1805. j++;
  1806. if ((j % 8) == 0) printf("\n");
  1807. }
  1808. }
  1809. }
  1810. }
  1811. }
  1812. printf("};\n");
  1813. #else /* Sparse array */
  1814. {
  1815. int summary_indx[0x11000];
  1816. int summary_used[0x11000];
  1817. unsigned int i, k, indx;
  1818. printf("static const unsigned short jisx0213_from_ucs_level2_data[] = {\n");
  1819. /* Fill summary_indx[] and summary_used[]. */
  1820. indx = 0;
  1821. for (i = 0, k = 0; i <= maxpage; i++) {
  1822. if (pages[i]) {
  1823. unsigned int j1, j2;
  1824. unsigned int count = 0;
  1825. printf(" /* 0x%04X */\n",i<<6);
  1826. for (j1 = 0; j1 < 4; j1++) {
  1827. summary_indx[4*k+j1] = indx;
  1828. summary_used[4*k+j1] = 0;
  1829. for (j2 = 0; j2 < 16; j2++) {
  1830. unsigned int j = 16*j1+j2;
  1831. unsigned int ucs = (i<<6)+j;
  1832. int value = table[ucs];
  1833. if (value < 0) value = 0;
  1834. if (value > 0) {
  1835. summary_used[4*k+j1] |= (1 << j2);
  1836. if ((count % 8) == 0) printf(" ");
  1837. printf(" 0x%04x,",value);
  1838. count++;
  1839. if ((count % 8) == 0) printf("\n");
  1840. indx++;
  1841. }
  1842. }
  1843. }
  1844. if ((count % 8) > 0)
  1845. printf("\n");
  1846. k++;
  1847. }
  1848. }
  1849. printf("};\n");
  1850. printf("\n");
  1851. printf("static const Summary16 jisx0213_from_ucs_level2_2indx[] = {\n");
  1852. for (i = 0, k = 0; i <= maxpage; i++) {
  1853. if (pages[i]) {
  1854. unsigned int j1;
  1855. printf(" /* 0x%04X */\n",i<<6);
  1856. printf(" ");
  1857. for (j1 = 0; j1 < 4; j1++) {
  1858. printf(" { %4d, 0x%04x },", summary_indx[4*k+j1], summary_used[4*k+j1]);
  1859. }
  1860. printf("\n");
  1861. k++;
  1862. }
  1863. }
  1864. printf("};\n");
  1865. }
  1866. #endif
  1867. printf("\n");
  1868. }
  1869. printf("#ifdef __GNUC__\n");
  1870. printf("__inline\n");
  1871. printf("#else\n");
  1872. printf("#ifdef __cplusplus\n");
  1873. printf("inline\n");
  1874. printf("#endif\n");
  1875. printf("#endif\n");
  1876. printf("static ucs4_t jisx0213_to_ucs4 (unsigned int row, unsigned int col)\n");
  1877. printf("{\n");
  1878. printf(" ucs4_t val;\n");
  1879. printf("\n");
  1880. printf(" if (row >= 0x121 && row <= 0x17e)\n");
  1881. printf(" row -= 289;\n");
  1882. printf(" else if (row == 0x221)\n");
  1883. printf(" row -= 451;\n");
  1884. printf(" else if (row >= 0x223 && row <= 0x225)\n");
  1885. printf(" row -= 452;\n");
  1886. printf(" else if (row == 0x228)\n");
  1887. printf(" row -= 454;\n");
  1888. printf(" else if (row >= 0x22c && row <= 0x22f)\n");
  1889. printf(" row -= 457;\n");
  1890. printf(" else if (row >= 0x26e && row <= 0x27e)\n");
  1891. printf(" row -= 519;\n");
  1892. printf(" else\n");
  1893. printf(" return 0x0000;\n");
  1894. printf("\n");
  1895. printf(" if (col >= 0x21 && col <= 0x7e)\n");
  1896. printf(" col -= 0x21;\n");
  1897. printf(" else\n");
  1898. printf(" return 0x0000;\n");
  1899. printf("\n");
  1900. printf(" val = jisx0213_to_ucs_main[row * 94 + col];\n");
  1901. printf(" val = jisx0213_to_ucs_pagestart[val >> 8] + (val & 0xff);\n");
  1902. printf(" if (val == 0xfffd)\n");
  1903. printf(" val = 0x0000;\n");
  1904. printf(" return val;\n");
  1905. printf("}\n");
  1906. printf("\n");
  1907. printf("#ifdef __GNUC__\n");
  1908. printf("__inline\n");
  1909. printf("#else\n");
  1910. printf("#ifdef __cplusplus\n");
  1911. printf("inline\n");
  1912. printf("#endif\n");
  1913. printf("#endif\n");
  1914. printf("static unsigned short ucs4_to_jisx0213 (ucs4_t ucs)\n");
  1915. printf("{\n");
  1916. printf(" if (ucs < (sizeof(jisx0213_from_ucs_level1)/sizeof(jisx0213_from_ucs_level1[0])) << 6) {\n");
  1917. printf(" int index1 = jisx0213_from_ucs_level1[ucs >> 6];\n");
  1918. printf(" if (index1 >= 0)");
  1919. #if 0 /* Dense array */
  1920. printf("\n");
  1921. printf(" return jisx0213_from_ucs_level2[(index1 << 6) + (ucs & 0x3f)];\n");
  1922. #else /* Sparse array */
  1923. printf(" {\n");
  1924. printf(" const Summary16 *summary = &jisx0213_from_ucs_level2_2indx[((index1 << 6) + (ucs & 0x3f)) >> 4];\n");
  1925. printf(" unsigned short used = summary->used;\n");
  1926. printf(" unsigned int i = ucs & 0x0f;\n");
  1927. printf(" if (used & ((unsigned short) 1 << i)) {\n");
  1928. printf(" /* Keep in `used' only the bits 0..i-1. */\n");
  1929. printf(" used &= ((unsigned short) 1 << i) - 1;\n");
  1930. printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
  1931. printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
  1932. printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
  1933. printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
  1934. printf(" used = (used & 0x00ff) + (used >> 8);\n");
  1935. printf(" return jisx0213_from_ucs_level2_data[summary->indx + used];\n");
  1936. printf(" };\n");
  1937. printf(" };\n");
  1938. #endif
  1939. printf(" }\n");
  1940. printf(" return 0x0000;\n");
  1941. printf("}\n");
  1942. printf("\n");
  1943. printf("#endif /* _JISX0213_H */\n");
  1944. }
  1945. /* Main program */
  1946. int main (int argc, char *argv[])
  1947. {
  1948. const char* charsetname;
  1949. const char* name;
  1950. if (argc != 3)
  1951. exit(1);
  1952. charsetname = argv[1];
  1953. name = argv[2];
  1954. output_title(charsetname);
  1955. if (!strcmp(name,"gb2312")
  1956. || !strcmp(name,"isoir165ext") || !strcmp(name,"gb12345ext")
  1957. || !strcmp(name,"jisx0208") || !strcmp(name,"jisx0212"))
  1958. do_normal(name);
  1959. else if (!strcmp(name,"cns11643_1") || !strcmp(name,"cns11643_2")
  1960. || !strcmp(name,"cns11643_3") || !strcmp(name,"cns11643_4a")
  1961. || !strcmp(name,"cns11643_4b") || !strcmp(name,"cns11643_5")
  1962. || !strcmp(name,"cns11643_6") || !strcmp(name,"cns11643_7")
  1963. || !strcmp(name,"cns11643_15"))
  1964. do_normal_only_charset2uni(name);
  1965. else if (!strcmp(name,"cns11643_inv"))
  1966. do_cns11643_only_uni2charset(name);
  1967. else if (!strcmp(name,"gbkext1"))
  1968. do_gbk1_only_charset2uni(name);
  1969. else if (!strcmp(name,"gbkext2"))
  1970. do_gbk2_only_charset2uni(name);
  1971. else if (!strcmp(name,"gbkext_inv"))
  1972. do_gbk1_only_uni2charset(name);
  1973. else if (!strcmp(name,"cp936ext") || !strcmp(name,"gb18030ext"))
  1974. do_gbk1(name);
  1975. else if (!strcmp(name,"ksc5601"))
  1976. do_ksc5601(name);
  1977. else if (!strcmp(name,"uhc_1"))
  1978. do_uhc_1(name);
  1979. else if (!strcmp(name,"uhc_2"))
  1980. do_uhc_2(name);
  1981. else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext"))
  1982. do_big5(name);
  1983. else if (!strcmp(name,"hkscs1999") || !strcmp(name,"hkscs2001")
  1984. || !strcmp(name,"hkscs2004"))
  1985. do_hkscs(name);
  1986. else if (!strcmp(name,"johab_hangul"))
  1987. do_johab_hangul(name);
  1988. else if (!strcmp(name,"cp932ext"))
  1989. do_sjis(name);
  1990. else if (!strcmp(name,"gb18030uni"))
  1991. do_gb18030uni(name);
  1992. else if (!strcmp(name,"jisx0213"))
  1993. do_jisx0213(name);
  1994. else
  1995. exit(1);
  1996. return 0;
  1997. }