123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- /*
- * Copyright (C) 1999-2008 Free Software Foundation, Inc.
- * This file is part of the GNU LIBICONV Library.
- *
- * The GNU LIBICONV Library is free software; you can redistribute it
- * and/or modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * The GNU LIBICONV Library is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
- * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
- * Fifth Floor, Boston, MA 02110-1301, USA.
- */
- /* Part 1 of iconv_open.
- Input: const char* tocode, const char* fromcode.
- Output:
- unsigned int from_index;
- int from_wchar;
- unsigned int to_index;
- int to_wchar;
- int transliterate;
- int discard_ilseq;
- Jumps to 'invalid' in case of errror.
- */
- {
- char buf[MAX_WORD_LENGTH+10+1];
- const char* cp;
- char* bp;
- const struct alias * ap;
- unsigned int count;
- transliterate = 0;
- discard_ilseq = 0;
- /* Before calling aliases_lookup, convert the input string to upper case,
- * and check whether it's entirely ASCII (we call gperf with option "-7"
- * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
- * or if it's too long, it is not a valid encoding name.
- */
- for (to_wchar = 0;;) {
- /* Search tocode in the table. */
- for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
- unsigned char c = * (unsigned char *) cp;
- if (c >= 0x80)
- goto invalid;
- if (c >= 'a' && c <= 'z')
- c -= 'a'-'A';
- *bp = c;
- if (c == '\0')
- break;
- if (--count == 0)
- goto invalid;
- }
- for (;;) {
- if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
- bp -= 10;
- *bp = '\0';
- transliterate = 1;
- continue;
- }
- if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
- bp -= 8;
- *bp = '\0';
- discard_ilseq = 1;
- continue;
- }
- break;
- }
- if (buf[0] == '\0') {
- tocode = locale_charset();
- /* Avoid an endless loop that could occur when using an older version
- of localcharset.c. */
- if (tocode[0] == '\0')
- goto invalid;
- continue;
- }
- ap = aliases_lookup(buf,bp-buf);
- if (ap == NULL) {
- ap = aliases2_lookup(buf);
- if (ap == NULL)
- goto invalid;
- }
- if (ap->encoding_index == ei_local_char) {
- tocode = locale_charset();
- /* Avoid an endless loop that could occur when using an older version
- of localcharset.c. */
- if (tocode[0] == '\0')
- goto invalid;
- continue;
- }
- if (ap->encoding_index == ei_local_wchar_t) {
- /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
- This is also the case on native Woe32 systems. */
- #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
- if (sizeof(wchar_t) == 4) {
- to_index = ei_ucs4internal;
- break;
- }
- if (sizeof(wchar_t) == 2) {
- to_index = ei_ucs2internal;
- break;
- }
- if (sizeof(wchar_t) == 1) {
- to_index = ei_iso8859_1;
- break;
- }
- #endif
- #if HAVE_MBRTOWC
- to_wchar = 1;
- tocode = locale_charset();
- continue;
- #endif
- goto invalid;
- }
- to_index = ap->encoding_index;
- break;
- }
- for (from_wchar = 0;;) {
- /* Search fromcode in the table. */
- for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
- unsigned char c = * (unsigned char *) cp;
- if (c >= 0x80)
- goto invalid;
- if (c >= 'a' && c <= 'z')
- c -= 'a'-'A';
- *bp = c;
- if (c == '\0')
- break;
- if (--count == 0)
- goto invalid;
- }
- for (;;) {
- if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
- bp -= 10;
- *bp = '\0';
- continue;
- }
- if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
- bp -= 8;
- *bp = '\0';
- continue;
- }
- break;
- }
- if (buf[0] == '\0') {
- fromcode = locale_charset();
- /* Avoid an endless loop that could occur when using an older version
- of localcharset.c. */
- if (fromcode[0] == '\0')
- goto invalid;
- continue;
- }
- ap = aliases_lookup(buf,bp-buf);
- if (ap == NULL) {
- ap = aliases2_lookup(buf);
- if (ap == NULL)
- goto invalid;
- }
- if (ap->encoding_index == ei_local_char) {
- fromcode = locale_charset();
- /* Avoid an endless loop that could occur when using an older version
- of localcharset.c. */
- if (fromcode[0] == '\0')
- goto invalid;
- continue;
- }
- if (ap->encoding_index == ei_local_wchar_t) {
- /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
- This is also the case on native Woe32 systems. */
- #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
- if (sizeof(wchar_t) == 4) {
- from_index = ei_ucs4internal;
- break;
- }
- if (sizeof(wchar_t) == 2) {
- from_index = ei_ucs2internal;
- break;
- }
- if (sizeof(wchar_t) == 1) {
- from_index = ei_iso8859_1;
- break;
- }
- #endif
- #if HAVE_WCRTOMB
- from_wchar = 1;
- fromcode = locale_charset();
- continue;
- #endif
- goto invalid;
- }
- from_index = ap->encoding_index;
- break;
- }
- }
|