Logo Search packages:      
Sourcecode: tcltk8.0-ja version File versions  Download package

tclKanjiUtil.c

/* 
 * tclKanjiUtil.c --
 *
 *    This file contains utility procedures that are used by many Tcl
 *    commands.
 *
 * Copyright (c) 1988-1998 Software Research Associates, Inc.
 * Permission to use, copy, modify, and distribute this software and its
 * documentation for any purpose and without fee is hereby granted, provided
 * that the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Software Research Associates not be
 * used in advertising or publicity pertaining to distribution of the
 * software without specific, written prior permission.  Software Research
 * Associates makes no representations about the suitability of this software
 * for any purpose.  It is provided "as is" without express or implied
 * warranty.
 */

#ifndef lint
static char rcsid[] = "$Header: /home/m-hirano/cvsroot/tcltk/tcl8/generic/tclKanjiUtil.c,v 1.12 1998/12/04 22:42:49 m-hirano Exp $";
#endif

#ifdef KANJI

#include "tclInt.h"
#include "tclPort.h"

#ifdef ENCODE_DEBUG
#ifdef __WIN32__
#define DEBUG_OUT stdout
#else
#define DEBUG_OUT stderr
#endif /* __WIN32__ */
#endif /* ENCODE_DEBUG */

/*
 * Global flag to enable/disable kanji token scanning.
 */
int   globalDoKanjiScan = 1;

/*
 * Global flag to assume SJIS/EUC ambiguous string as SJIS.
 */
#ifdef __WIN32__
int   globalAssumeSjis = 1;
#else
int   globalAssumeSjis = 0;
#endif /* __WIN32__ */

/*
 * If C_LOCALE_SPECIAL is defined, C locale is treated specially.
 * When the locale is C, the automatic kanji encoding detection
 * feature is disabled, so that any string is recognized as a normal
 * (ISO Latin-1) string.
 * This makes Japanized Tcl to behave just like the original Tcl.
 */
#define C_LOCALE_SPECIAL

/*
 * For setlocale() call.
 */
#ifdef HAVE_SETLOCALE
#include <locale.h>
#endif /* HAVE_SETLOCALE */

/*
 * This array holds the printable kanji code name corresponding to
 * the kanji code defined in tcl.h.
 */

char *Tcl_KanjiCodeStr[] = { "JIS", "SJIS", "EUC", "ANY" };

#ifdef C_LOCALE_SPECIAL
/*
 * This variable indicates whether some special Kanji related feature
 * is disabled nor not.
 */
static int  noKanjiFeature = 0;
#endif /* C_LOCALE_SPECIAL */


/*
 * One of the following data structures exists for each font set that is
 * currently active.  The structure is indexed with two hash tables,
 * one based on font name and one based on XFontStruct address.
 */

typedef struct {
    int kanjiCode;      /* original kanji code */
    char *str[4]; /* for Tcl_DecodeWStr(), prepare other code's
                   * expression. Offset is as same as TCL_(JIS, SJIS,
                   * EUC, ANY). */
    wchar *wstr;
    int refCount;
    Tcl_HashEntry *wstrHashPtr;
} TclWStr;

/*
 * Hash table to map from a wide string's values to a TclWStr structure
 * describing a wide string with those values (used by Tcl_GetWStr).  */

static Tcl_HashTable wstrTable;

/*
 * Hash table for wchar -> TclWStr mapping. This table is indexed by the 
 * wchar identifier, and is used by Tcl_FreeWStr.
 */

static Tcl_HashTable ws_idTable;

static int ws_initialized = 0;      /* 0 means static structures haven't been
                         * initialized yet. */

/*
 * Trivial yet useful macros.
 */
#ifndef MIN
#define MIN(a, b) ((a) > (b) ? (b) : (a))
#endif
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif

/*
 * Function prototypes for local procedures in this file:
 */
static int        EncodingDetection _ANSI_ARGS_((unsigned char *string, unsigned char *end));
static void       WStrInit _ANSI_ARGS_((void));

#define T_ASCII   0
#define T_KANJI   1
#define T_KANA    2

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiEncode --
 *
 *    Encode kanji string to wide string.
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiEncode(kanjiCode, ks, ws)
     int kanjiCode;
     unsigned char *ks;
     wchar *ws;
{
    switch (kanjiCode) {
      case TCL_JIS:
      return Tcl_EncodeJIS(ks, ws);
      case TCL_SJIS:
      return Tcl_EncodeSJIS(ks, ws);
      case TCL_EUC:
      return Tcl_EncodeEUC(ks, ws);
      case TCL_ANY:
      return Tcl_EncodeANY(ks, ws);
      default:
      panic("Tcl_KanjiEncode: Unknown kanjiCode.");
    }
    return -1;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiDecode --
 *
 *    Decode kanji string to wide string.
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiDecode(kanjiCode, ws, ks)
     int kanjiCode;
     wchar *ws;
     unsigned char *ks;
{
    switch (kanjiCode) {
      case TCL_JIS:
      return Tcl_DecodeJIS(ws, ks);
      case TCL_SJIS:
      return Tcl_DecodeSJIS(ws, ks);
      case TCL_EUC:
      return Tcl_DecodeEUC(ws, ks);
      case TCL_ANY:
      return Tcl_DecodeANY(ws, ks);
      default:
      panic("Tcl_KanjiDecode: Unknown kanjiCode.");
    }
    return -1;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_EncodeJIS --
 *
 *    Encode JIS kanji string to wide string.
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_EncodeJIS(js, ws)
     unsigned char *js;
     wchar *ws;
{
    int     c, c1;
    int     kanji = T_ASCII;
    int     n = 0;

    while( (c = *js++) != 0 ) {
      if( c == '\033' ) {
          if( !strncmp(js, "$B", 2) || !strncmp(js, "$@", 2)) {
            kanji = T_KANJI;
            js += 2;
          } else if( !strncmp(js, "(J", 2) || !strncmp(js, "(B", 2) ) {
            kanji = T_ASCII;
            js += 2;
          } else if( !strncmp(js, "(I", 2) ) {
            kanji = T_KANA;
            js += 2;
          } else {
            if( ws ) *ws++ = c;
            n++;
          }
      } else if( kanji == T_KANJI ) {
          c1 = *js++;
          if( c1 == '\0' ) break;
          if( ws ) *ws++ = (c << 8) | c1 | 0x8080;
          n++;
      } else {
          if( ws ) *ws++ = c | ((kanji == T_KANA) ? 0x80 : 0);
          n++;
      }
    }
    if( ws ) *ws = 0;

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DecodeJIS --
 *
 *    Decode wide string to JIS kanji string.
 *
 * Results:
 *    Bytes of the decoded kanji string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_DecodeJIS(ws, js)
     wchar *ws;
     unsigned char *js;
{
    int     c;
    int     kanji = T_ASCII;
    int     n = 0;

    while( (c = *ws++) != 0 ) {
      switch( c & 0x8080 ) {
        case 0:
          if( kanji != T_ASCII ) {
            if( js ) {
                *js++ = '\033';
                *js++ = '(';
                *js++ = 'B';
            }
            n += 3;
          }
          if( js ) *js++ = c & 0x7f;
          n++;
          kanji = T_ASCII;
          break;
        case 0x80:
          if( kanji != T_KANA ) {
            if( js ) {
                *js++ = '\033';
                *js++ = '(';
                *js++ = 'I';
            }
            n += 3;
          }
          if( js ) *js++ = c & 0x7f;
          n++;
          kanji = T_KANA;
          break;
        case 0x8080:
          if( kanji != T_KANJI ) {
            if( js ) {
                *js++ = '\033';
                *js++ = '$';
                *js++ = 'B';
            }
            n += 3;
          }
          if( js ) {
            *js++ = (c >> 8) & 0x7f;
            *js++ = c & 0x7f;
          }
          n += 2;
          kanji = T_KANJI;
          break;
      }
    }
    if( kanji != T_ASCII ) {
      if( js ) {
          *js++ = '\033';
          *js++ = '(';
          *js++ = 'B';
      }
      n += 3;
    }
    if( js ) *js = '\0';

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_EncodeSJIS --
 *
 *    Encode SJIS kanji string to wide string.
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

#define IS_SJIS(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))

int
Tcl_EncodeSJIS(ss, ws)
     unsigned char *ss;
     wchar *ws;
{
    int     c, c1;
    int     n = 0;

    while( (c = *ss++) != 0 ) {
      if( IS_SJIS(c) ) {
          c1 = *ss++;
          c -= (c>=0xa0) ? 0xc1 : 0x81;
          if( ws ) {
            if( c1 >= 0x9f ) {
                *ws++ = ((c<<9) + 0x2200 + c1 - 0x7e) | 0x8080;
            } else {
                *ws++ = ((c<<9) + 0x2100 + c1
                       - ((c1<=0x7e) ? 0x1f : 0x20)) | 0x8080;
            }
          }
          n++;
      } else {
          if( ws ) *ws++ = c;
          n++;
      }
    }
    if( ws ) *ws = 0;

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DecodeSJIS --
 *
 *    Decode wide string to SJIS kanji string.
 *
 * Results:
 *    Bytes of the decoded kanji string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_DecodeSJIS(ws, ss)
     wchar *ws;
     unsigned char *ss;
{
    int     c1, c2;
    int     n = 0;

    while( (c1 = *ws++) != 0 ) {
      switch( c1 & 0x8080 ) {
        case 0:
        case 0x80:
          if( ss ) *ss++ = c1 & 0xff;
          n++;
          break;
        case 0x8080:
          c2 = c1 & 0x7f;
          c1 = (c1 >> 8) & 0x7f;
          if( ss ) {
            *ss++ = (c1 - 0x21) / 2 + ((c1 <= 0x5e) ? 0x81 : 0xc1);
            if( c1 & 1 ) {    /* odd */
                *ss++ = c2 + ((c2 <= 0x5f) ? 0x1f : 0x20);
            } else {
                *ss++ = c2 + 0x7e;
            }
          }
          n += 2;
          break;
      }
    }
    if( ss ) *ss = '\0';

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_EncodeEUC --
 *
 *    Encode EUC kanji string to wide string.
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_EncodeEUC(es, ws)
     unsigned char *es;
     wchar *ws;
{
    int     c;
    int     n = 0;

    while( (c = *es++) != 0 ) {
      if( c == 0x8e ) { /* SS2 */
          if( ws ) *ws++ = *es | 0x80;
          es++;
          n++;
      } else if( c == 0x8f ) {      /* SS3 */
          c = *es++;
          if( ws ) *ws++ = (c << 8) | (*es & 0x7f) | 0x8000;
          es++;
          n++;
      } else if( c & 0x80 ) {
          if( ws ) *ws++ = (c << 8) | *es | 0x8080;
          es++;
          n++;
      } else {
          if( ws ) *ws++ = c;
          n++;
      }
    }
    if( ws ) *ws = 0;

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DecodeEUC --
 *
 *    Decode wide string to EUC kanji string.
 *
 * Results:
 *    Bytes of the decoded kanji string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_DecodeEUC(ws, es)
     wchar *ws;
     unsigned char *es;
{
    int     c;
    int     n = 0;

    while( (c = *ws++) != 0 ) {
      switch( c & 0x8080 ) {
        case 0:
          if( es ) *es++ = c & 0x7f;
          n++;
          break;
        case 0x80:
          if( es ) {
            *es++ = 0x8e;     /* SS2 */
            *es++ = c & 0xff;
          }
          n += 2;
          break;
        case 0x8000:
          if( es ) {
            *es++ = 0x8f;     /* SS3 */
            *es++ = (c >> 8) | 0x80;
            *es++ = (c & 0xff) | 0x80;
          }
          n += 3;
          break;
        case 0x8080:
          if( es ) {
            *es++ = c >> 8;
            *es++ = c & 0xff;
          }
          n += 2;
          break;
      }
    }
    if( es ) *es = '\0';

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_EncodeANY --
 *
 *    Encode ANY kanji string to wide string. (as ascii string)
 *
 * Results:
 *    Number of the encoded characters. (Not bytes)
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_EncodeANY(as, ws)
     unsigned char *as;
     wchar *ws;
{
    int c;
    int     n = 0;

    while( (c = *as++) != 0 ) {
      if( ws ) *ws++ = c;
      n++;
    }
    if( ws ) *ws = 0;

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DecodeANY --
 *
 *    Decode wide string to ANY kanji string. (as ascii string)
 *
 * Results:
 *    Bytes of the decoded kanji string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_DecodeANY(ws, as)
     wchar *ws;
     unsigned char *as;
{
    int     c;
    int     n = 0;

    while( (c = *ws++) != 0 ) {
      switch( c & 0x8080 ) {
        case 0:
        case 0x80:
          if( as ) *as++ = c & 0xff;
          n++;
          break;
        case 0x8000:
        case 0x8080:
          if( as ) {
            *as++ = c >> 8;
            *as++ = c & 0xff;
          }
          n += 2;
          break;
      }
    }
    if( as ) *as = '\0';

    return n;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DefaultKanjiCode --
 *
 *    Determine the default Kanji code from current locale.
 *
 * Results:
 *    This procudure returns a kanji code to be used as a default.
 *
 * Side effects:
 *    None.
 *----------------------------------------------------------------------
 */

int
Tcl_DefaultKanjiCode()
{
    char *lang;
    int i;
    static struct lang {
      char *lang;
      int code;
    } langtab[] = {
      {"ja_JP.SJIS",          TCL_SJIS},
      {"ja_JP.EUC",           TCL_EUC},
      {"ja_JP.JIS",           TCL_JIS},
      {"ja_JP.mscode",  TCL_SJIS},  /* from Xsi nls database */
      {"ja_JP.ujis",          TCL_EUC},   /* from Xsi nls database */
      {"ja_JP",         TCL_EUC},   /* IBM */
      {"Ja_JP",         TCL_SJIS},  /* IBM */
      {"Jp_JP",         TCL_SJIS},  /* IBM */
      {"japan",         TCL_EUC},   /* MIPS, NEC */
#ifdef hpux
      {"japanese",            TCL_SJIS},  /* HP */
#else
      {"japanese",            TCL_EUC},   /* SUN */
#endif
      {"ja",                  TCL_EUC},   /* SUN */
      {"japanese.sjis", TCL_SJIS},  /* HP? */
      {"japanese.euc",  TCL_EUC},   /* HP */
      {"japanese-sjis", TCL_SJIS},  /* IBM */
      {"japanese-ujis", TCL_EUC},   /* IBM */
      {"C",             TCL_ANY},
      {NULL,                  0}
    };

#ifdef HAVE_SETLOCALE
    static int firstcall = 1;

    if (firstcall) {
      setlocale(LC_ALL, "");
      firstcall = 0;
    }

    lang = setlocale(LC_CTYPE, NULL);
#else /* HAVE_SETLOCALE */
    lang = getenv("LANG");
#endif /* HAVE_SETLOCALE */

    if (lang != NULL) {
      /*
       * If the LANG variable is "C", skip some of the
       * Kanji related feature (e.g. automatic encoding detection)
       */
#ifdef C_LOCALE_SPECIAL
      if (!strcmp(lang, "C")) noKanjiFeature = 1;
#endif /* C_LOCALE_SPECIAL */
      for (i = 0; langtab[i].lang != NULL; i++) {
          if (!strcmp(langtab[i].lang, lang)) {
            return langtab[i].code;
          }
      }
    }
    return TCL_DEFAULT_KANJI_CODE;
}


/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiCode --
 *
 *    Returns the internal kanji code of the interpreter.
 *
 * Results:
 *    The internal kanji code.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiCode(interp)
     Tcl_Interp *interp;
{
    return ((Interp *)interp)->kanjiCode;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiSkip --
 *
 *    Skip all kanji sequence.
 *
 * Results:
 *    Return length of the sequence.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiSkip(string, end, kanjiCodePtr)
     unsigned char *string;
     unsigned char *end;
     int *kanjiCodePtr;
{
    unsigned char *str = string;
    int len;
    int ret = 1;
    int kanjiCode = TCL_ANY;

    if (kanjiCodePtr != NULL) {
      kanjiCode = *kanjiCodePtr;
    }

    if (end == NULL) {
      len = strlen(string);
      end = string + len;
    } else {
      len = end - string;
    }

#ifdef C_LOCALE_SPECIAL
    if (noKanjiFeature == 1) return (len > 0) ? 1 : 0;
#endif /* C_LOCALE_SPECIAL */

    if (len <= 1) {
      if (kanjiCodePtr != NULL) {
          *kanjiCodePtr = TCL_ANY;
      }
      if (len <= 0) {
          panic("Tcl_KanjiSkip: get NULL.");
      }
      return len;
    }

    if (*str == '\033' &&
      kanjiCode != TCL_EUC &&
      kanjiCode != TCL_SJIS) {
      /*
       *    JIS
       *          start:      ESC $ B
       *                ESC $ ( B
       *                ESC $ @
       *                ESC $ ( @
       *
       *          end:  ESC ( J
       *                ESC ( B
       */
      unsigned char *old;
      int completeJIS = 0;
      str++;
      if (kanjiCodePtr != NULL) {
          *kanjiCodePtr = TCL_JIS;
      }
      while (*str != '\033' && str < end) str++;
      if (str == end) {
          /*
           * found leading ESC but no trailing ESC.
           */
          if (kanjiCodePtr != NULL) {
            *kanjiCodePtr = TCL_ANY;
          }
          return len;
      } else if (*str == '\033') {
          if (str >= end) {
            goto Ambig;
          }
          str++;
          old = str;
          if (*str == '(') {
            if (str >= end) {
                goto Ambig;
            }
            str++;
            if (*str == 'J' || *str == 'B') {
                if (str >= end) {
                  goto Ambig;
                }
                str++;
                completeJIS = 1;
            }
          }
          if (completeJIS) {
            ret = str - string;
          } else {
            ret = old - string;
          }
      } else {
          Ambig:
          ret = str - string;
      }
    } else {
      if (kanjiCode == TCL_NOT_KANJI || kanjiCode == TCL_ANY) {
          kanjiCode = EncodingDetection(str, end);
      }
      if (kanjiCode == TCL_NOT_KANJI || kanjiCode == TCL_ANY) {
          ret = 1;
          kanjiCode = TCL_ANY;
      } else {
          ret = Tcl_KanjiLength(str, end, kanjiCode);
      }
      if (kanjiCodePtr != NULL) {
          *kanjiCodePtr = kanjiCode;
      }
#ifdef ENCODE_DEBUG
      if (ret > 0) {
          char *x = alloca(ret + 1);
          memcpy(x, string, ret);
          x[ret] = 0;
          fprintf(DEBUG_OUT, "debug: '%s' len %d, ret %d code %s\n", x, len, ret,
                Tcl_KanjiCodeStr[kanjiCode]);
      } else {
          int ll = ((len >= 20) ? 20 : len);
          char *x = alloca(ll + 1);
          memcpy(x, string, ll);
          x[ll] = 0;
          fprintf(DEBUG_OUT, "debug: '%s'... len %d, ret %d code %s\n", x, len, ret,
                Tcl_KanjiCodeStr[kanjiCode]);
      }
#endif /* ENCODE_DEBUG */
    }
    return ((ret > 0) ? ret : 1);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiStart --
 *
 *    Check if the string starts with kanji or not.
 *
 *    KanjiCodePtr is a pointer to an int which specifies
 *    the encoding of the given string.  This procedure
 *    checks if the first character of the string is a
 *    kanji.
 *
 *    If the value pointed by kanjiCodePtr is TCL_ANY,
 *    and if the first character of the string seems to be
 *    a kanji character, this procedure examines the string
 *    further, determines the encoding used, and assign the
 *    encoding value to *kanjiCodePtr.
 *
 * Results:
 *    If the first character of the given string is kanji,
 *    this procedure returns 1. Otherwise 0 is returned.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiStart(string, end, kanjiCodePtr)
    unsigned char *string;
    unsigned char *end;
    int *kanjiCodePtr;
{
    unsigned char c = *string;

#ifdef C_LOCALE_SPECIAL
    if (noKanjiFeature) return 0;
#endif /* C_LOCALE_SPECIAL */

    if (string == end) return 0;
 retry:
    switch (*kanjiCodePtr) {
    case TCL_ANY:
      if (c != '\033' && c < 0x80) return 0;
      *kanjiCodePtr = EncodingDetection(string, end);
      goto retry;
    case TCL_JIS: {
      int len;
      if (end == NULL) {
          len = strlen(string);
          end = string + len;
      } else {
          len = end - string;
      }
      if (len >= 4) {
          return (c == '\033' &&
                string[1] == '$' &&
                ((string[2] == 'B' || string[2] == '@') ||
                 (string[2] == '(' && (string[3] == 'B' || string[3] == '@'))));
      } else if (len >= 3) {
          return (c == '\033' &&
                string[1] == '$' &&
                (string[2] == 'B' || string[2] == '@'));
      } else {
          return 0;
      }
    }
    case TCL_SJIS:
      return ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xfc));
    case TCL_EUC:
      return (c == 0x8e || c == 0x8f || (c & 0x80));
    default:      /* TCL_NOT_KANJI */
      return 0;
    }
    return 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiEnd --
 *
 *    Check if the string ends with kanji or not.
 *
 *    KanjiCodePtr is a pointer to an int which specifies
 *    the encoding of the given string.  This procedure
 *    checks if the last character of the string is a
 *    kanji.
 *
 *    If the value pointed by kanjiCodePtr is TCL_ANY,
 *    and if the last character of the string seems to be
 *    a kanji character, this procedure examines the string
 *    further, determines the encoding used, and assign the
 *    encoding value to *kanjiCodePtr.
 *
 * Results:
 *    If the last character of the given string is kanji,
 *    this procedure returns 1. Otherwise 0 is returned.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiEnd(string, end, kanjiCodePtr)
    unsigned char *string;
    unsigned char *end;
    int *kanjiCodePtr;
{
    unsigned char *p = string;
    int foundKanji = 0;

#ifdef C_LOCALE_SPECIAL
    if (noKanjiFeature) return 0;
#endif /* C_LOCALE_SPECIAL */

    if (string == end) return 0;
    if (end == NULL) {
      end = p + strlen(string);
    }

    while (p < end) {
      if (IS_KANJISTART(UCHAR(*p))) {
          p += Tcl_KanjiSkip(p, end, kanjiCodePtr);
          foundKanji = 1;
          break;
      } else {
          p++;
      }
    }
    if (foundKanji == 1 && p == end) {
      return 1;
    } else {
      return 0;
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiLength --
 *
 *    Count a byte number of the given kanji sequence.
 *
 * Results:
 *    Return value is a byte number of the kanji sequence.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiLength(string, end, kanjiCode)
    unsigned char *string;
    unsigned char *end;
    int kanjiCode;
{
    unsigned char *src = string;

    if (string == end) return 0;
    if (end == NULL) {
      end = string + strlen(string);
    }

    switch (kanjiCode) {
    case TCL_JIS: {
      int theCode = TCL_JIS;
      if (*src == '\033') {
          src += Tcl_KanjiSkip(src, end, &theCode);
      }
      return (int)(src - string);
      break;
    }
    case TCL_SJIS:
      while (src < end) {
          if ((*src >= 0x81 && *src <= 0x9f) || (*src >= 0xe0 && *src <= 0xfc)) {
            src++;
            if (src < end) {
                src++;
            } else {
                break;
            }
          } else {
            break;
          }
      }
      return (int)(src - string);
      break;
    case TCL_EUC:
      while (src < end) {
          if (*src == 0x8e) {
            src++;
            if (src < end) {
                src++;
            } else {
                break;
            }
            continue;
          } else if (*src == 0x8f) {
            src++;
            if (src < end) {
                src++;
                if (src < end) {
                  src++;
                } else {
                  break;
                }
            } else {
                break;
            }
            continue;
          } else {
            if (*src & 0x80) {
                src++;
            } else {
                break;
            }
          }
      }
      return (int)(src - string);
      break;
    }
#if 0
    panic("unknown kanji code.");
    return 0;
#else
    return 1;
#endif
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiString --
 *
 *    Check if the string contains kanji.
 *
 * Results:
 *    If the string contains kanji, set its kanji code
 *    and return TCL_OK.  Otherwise return TCL_NOT_KANJI.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiString(interp, string, end, kanjiCodePtr)
     Tcl_Interp *interp;
     unsigned char *string;
     unsigned char *end;
     int *kanjiCodePtr;
{
    int encoding;

    if (end == NULL) {
      end = string + strlen(string);
    }

    if (
#ifdef C_LOCALE_SPECIAL
      noKanjiFeature ||
#endif /* C_LOCALE_SPECIAL */
      (encoding = EncodingDetection(string, end)) == TCL_NOT_KANJI) {
      *kanjiCodePtr = TCL_ANY;
      return TCL_NOT_KANJI;
    } else {
      *kanjiCodePtr = encoding;
      return TCL_OK;
    }
}


#define TYPE_NONE       (0)
#define TYPE_SJIS_1     (1)
#define TYPE_SJIS_2     (1<<1)
#define TYPE_SKANA      (1<<2)
#define TYPE_EUC_1      (1<<3)
#define TYPE_EUC_2      (1<<4)
#define TYPE_ASCII      (1<<5)
#define TYPE_UNDEF      (1<<6)

/*
 * range of sjis byte 1 is same as IBM DBCS-PC byte 1 range.
 */

/*
 * Well, who's gonna use hankaku kana in EUC?
 * Ignore SS2/SS3.
 */

static unsigned char charTypeTbl[] = {
      /* 0x00 */ (TYPE_ASCII),
      /* 0x01 */ (TYPE_ASCII),
      /* 0x02 */ (TYPE_ASCII),
      /* 0x03 */ (TYPE_ASCII),
      /* 0x04 */ (TYPE_ASCII),
      /* 0x05 */ (TYPE_ASCII),
      /* 0x06 */ (TYPE_ASCII),
      /* 0x07 */ (TYPE_ASCII),
      /* 0x08 */ (TYPE_ASCII),
      /* 0x09 */ (TYPE_ASCII),
      /* 0x0a */ (TYPE_ASCII),
      /* 0x0b */ (TYPE_ASCII),
      /* 0x0c */ (TYPE_ASCII),
      /* 0x0d */ (TYPE_ASCII),
      /* 0x0e */ (TYPE_ASCII),
      /* 0x0f */ (TYPE_ASCII),
      /* 0x10 */ (TYPE_ASCII),
      /* 0x11 */ (TYPE_ASCII),
      /* 0x12 */ (TYPE_ASCII),
      /* 0x13 */ (TYPE_ASCII),
      /* 0x14 */ (TYPE_ASCII),
      /* 0x15 */ (TYPE_ASCII),
      /* 0x16 */ (TYPE_ASCII),
      /* 0x17 */ (TYPE_ASCII),
      /* 0x18 */ (TYPE_ASCII),
      /* 0x19 */ (TYPE_ASCII),
      /* 0x1a */ (TYPE_ASCII),
      /* 0x1b */ (TYPE_ASCII),
      /* 0x1c */ (TYPE_ASCII),
      /* 0x1d */ (TYPE_ASCII),
      /* 0x1e */ (TYPE_ASCII),
      /* 0x1f */ (TYPE_ASCII),
      /* 0x20 */ (TYPE_ASCII),
      /* 0x21 */ (TYPE_ASCII),
      /* 0x22 */ (TYPE_ASCII),
      /* 0x23 */ (TYPE_ASCII),
      /* 0x24 */ (TYPE_ASCII),
      /* 0x25 */ (TYPE_ASCII),
      /* 0x26 */ (TYPE_ASCII),
      /* 0x27 */ (TYPE_ASCII),
      /* 0x28 */ (TYPE_ASCII),
      /* 0x29 */ (TYPE_ASCII),
      /* 0x2a */ (TYPE_ASCII),
      /* 0x2b */ (TYPE_ASCII),
      /* 0x2c */ (TYPE_ASCII),
      /* 0x2d */ (TYPE_ASCII),
      /* 0x2e */ (TYPE_ASCII),
      /* 0x2f */ (TYPE_ASCII),
      /* 0x30 */ (TYPE_ASCII),
      /* 0x31 */ (TYPE_ASCII),
      /* 0x32 */ (TYPE_ASCII),
      /* 0x33 */ (TYPE_ASCII),
      /* 0x34 */ (TYPE_ASCII),
      /* 0x35 */ (TYPE_ASCII),
      /* 0x36 */ (TYPE_ASCII),
      /* 0x37 */ (TYPE_ASCII),
      /* 0x38 */ (TYPE_ASCII),
      /* 0x39 */ (TYPE_ASCII),
      /* 0x3a */ (TYPE_ASCII),
      /* 0x3b */ (TYPE_ASCII),
      /* 0x3c */ (TYPE_ASCII),
      /* 0x3d */ (TYPE_ASCII),
      /* 0x3e */ (TYPE_ASCII),
      /* 0x3f */ (TYPE_ASCII),

            /* 0x00 - 0x3f    ascii */

      /* 0x40 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x41 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x42 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x43 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x44 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x45 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x46 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x47 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x48 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x49 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4a */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4b */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4c */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4d */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4e */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x4f */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x50 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x51 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x52 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x53 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x54 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x55 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x56 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x57 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x58 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x59 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5a */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5b */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5c */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5d */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5e */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x5f */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x60 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x61 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x62 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x63 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x64 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x65 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x66 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x67 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x68 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x69 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6a */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6b */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6c */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6d */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6e */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x6f */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x70 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x71 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x72 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x73 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x74 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x75 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x76 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x77 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x78 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x79 */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x7a */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x7b */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x7c */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x7d */ (TYPE_ASCII|TYPE_SJIS_2),
      /* 0x7e */ (TYPE_ASCII|TYPE_SJIS_2),

            /* 0x40 - 0x7e    ascii sjis2 */

      /* 0x7f */ (TYPE_ASCII),

            /* 0x7f - 0x7f    ascii */

      /* 0x80 */ (TYPE_SJIS_2),

            /* 0x80 - 0x80    sjis2 */

      /* 0x81 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x82 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x83 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x84 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x85 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x86 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x87 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x88 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x89 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8a */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8b */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8c */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8d */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8e */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x8f */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x90 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x91 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x92 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x93 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x94 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x95 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x96 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x97 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x98 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x99 */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9a */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9b */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9c */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9d */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9e */ (TYPE_SJIS_1|TYPE_SJIS_2),
      /* 0x9f */ (TYPE_SJIS_1|TYPE_SJIS_2),

            /* 0x81 - 0x9f    sjis1 sjis2 */

      /* 0xa0 */ (TYPE_SJIS_2),

            /* 0xa0 - 0xa0    sjis2 */

      /* 0xa1 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa2 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa3 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa4 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa5 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa6 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa7 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa8 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xa9 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xaa */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xab */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xac */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xad */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xae */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xaf */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb0 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb1 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb2 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb3 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb4 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb5 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb6 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb7 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb8 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xb9 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xba */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xbb */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xbc */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xbd */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xbe */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xbf */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc0 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc1 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc2 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc3 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc4 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc5 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc6 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc7 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc8 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xc9 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xca */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xcb */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xcc */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xcd */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xce */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xcf */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd0 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd1 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd2 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd3 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd4 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd5 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd6 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd7 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd8 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xd9 */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xda */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xdb */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xdc */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xdd */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xde */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xdf */ (TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),

            /* 0xa1 - 0xdf    sjiskana sjis2 euc1 euc2 */

      /* 0xe0 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe1 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe2 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe3 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe4 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe5 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe6 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe7 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe8 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xe9 */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xea */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xeb */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xec */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xed */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xee */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xef */ (TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),

            /* 0xe0 - 0xef    sjis1 sjis2 euc1 euc2 */

      /* 0xf0 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf1 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf2 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf3 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf4 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf5 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf6 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf7 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf8 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xf9 */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xfa */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xfb */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),
      /* 0xfc */ (TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2),

            /* 0xf0 - 0xfc    sjis2 euc1 euc2 */

      /* 0xfd */ (TYPE_EUC_2),
      /* 0xfe */ (TYPE_EUC_2),

            /* 0xfd - 0xfe    euc2 */

      /* 0xff */ (TYPE_NONE)
};

#define IS_TYPE(x, y) (((x) & (y)) != 0)
    /* PURE SJIS: 0x80 - 0xa0. (0x81 - 0x9f, 0x80, 0xa0) */
#define IS_PURE_SJIS(x) (((x) == TYPE_SJIS_1) || ((x) == TYPE_SJIS_2))
    /* PURE ASCII: 0x00 - 0x3f */
#define IS_PURE_ASCII(x) ((x) == TYPE_ASCII)
    /* PURE EUC: 0xfd - 0xfe */
#define IS_PURE_EUC(x) (((x) == TYPE_EUC_2) || ((x) == TYPE_EUC_1))

/*
 *----------------------------------------------------------------------
 *
 * EncodingDetection --
 *
 *    Determine the encoding (kanji code) of the given string.
 *    This procedure assumes that the given string contains
 *    only ASCII and kanji (defined by the standard JIS X0208)
 *    characters. (i.e. no 1byte-kana and no user-defined
 *    characters are present)
 *
 *    The interp argument is used to retrieve the internal code
 *    of the interpreter, and the internal code is used to help
 *    determining the encoding when it is ambiguous.  Interp might
 *    be NULL.
 *
 * Results:
 *    The return value is the encoding (kanji code) of the
 *    given string.  If the string contains only ASCII
 *    characters, TCL_NOT_KANJI will be returned.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

static int
EncodingDetection(string, end)
     unsigned char *string;
     unsigned char *end;
{
    unsigned char *s = (unsigned char *)string;
    int kanji_found = 0;
    int len;
    int i;

    int type = TYPE_NONE;
    int type2 = TYPE_NONE;

    if (end == NULL) {
      end = string + strlen(string);
    }
    len = end - s;
    if (len < 2) {
      return TCL_NOT_KANJI;
    }

    for (i = 0; i < len; i++) {
      if (kanji_found == 1 &&
          s[i] < 0x80) {
          goto scanDone;
      }
      if (s[i] == '\033') {
          int rem;
          i++;
          rem = len - i;
          /*
           * It might be JIS encoding.  The valid JIS
           * leading sequences are:
           *    ESC $ B  ESC $ ( B   -- designate JIS X0208
           *    ESC $ @  ESC $ ( @   -- designate old JIS X0208
           *    ESC ( B              -- designate ASCII
           *    ESC ( J              -- designate JIS X0201
           */
          if (rem >= 3) {
            if (s[i] == '$' && 
                ( (s[i + 1] == 'B' || s[i + 1] == '@') ||
                  (s[i + 1] == '(' && (s[i + 2] == 'B' || s[i + 2] == '@')) ) ) {
                return TCL_JIS;
            }
          } else if (rem >= 2) {
            if (s[i] == '$' &&
                (s[i + 1] == 'B' || s[i + 1] == '@')) {
                return TCL_JIS;
            } else if (s[i] == '(' &&
                     (s[i + 1] == 'B' || s[i + 1] == 'J')) {
                return TCL_JIS;
            }
          }
          continue;
      } else {
          type = charTypeTbl[s[i]];

          switch (type) {
            case TYPE_ASCII:
            case TYPE_ASCII|TYPE_SJIS_2:
            case TYPE_SJIS_2: {
                /*
                 * need further scan.
                 */
                continue;
                break;
            }

            case TYPE_SJIS_1|TYPE_SJIS_2:
            case TYPE_SJIS_1|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2: {
                /*
                 * got sjis byte 1. check next.
                 */
                if (i < len) {
                  if (s[i + 1] == 0) {
                      goto scanDone;
                  }
                  type2 = charTypeTbl[s[i + 1]];
                  if (type == (TYPE_SJIS_1|TYPE_SJIS_2)) {
                      /*
                       * means byte 1 is sjis1|sjis2 (0x81 - 0x9f).
                       */
                      if (IS_TYPE(type2, TYPE_SJIS_2)) {
                        /*
                         * byte 1 (0x81 - 0x9f)
                         * byte 2 (0x40 - 0x7e, 0x80 - 0xfc)
                         * SJIS no doubt.
                         */
                        return TCL_SJIS;
                      }
                  } else {
                      /*
                       * means byte 1 is sjis1|sjis2|euc1|euc2 (0xe0 - 0xef).
                       */
                      if (IS_TYPE(type2, TYPE_SJIS_2)) {
                        if (!(IS_TYPE(type2, TYPE_EUC_2))) {
                            /*
                             * byte 1 (0xe0 - 0xef)
                             * byte 2 (0x40 - 0x7e, 0x80 - 0xa0)
                             *      THUS:
                             *            SJIS no doubt.
                             */
                            return TCL_SJIS;
                        } else {
                            /*
                             * byte 1 (0xe0 - 0xef)
                             * byte 2 (0xa1 - 0xfc)
                             * EUC/SJIS Ambig. continue scan.
                             */
                            kanji_found = 1;
                        }
                      } else if (type2 == TYPE_EUC_2) {
                        /*
                         * byte 1 (0xe0 - 0xfc)
                         * byte 2 (0xfd - 0xfe)
                         *    THUS:
                         *          EUC no doubt.
                         */
                        return TCL_EUC;
                      }
                  }
                  i++;
                }
                break;
            }

            case TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2:
            case TYPE_SKANA|TYPE_SJIS_2|TYPE_EUC_1|TYPE_EUC_2: {
                /*
                 * sjis hankaku kana/EUC Ambig.
                 */
                int isByte1Kana = 0;
                if (IS_TYPE(type, TYPE_SKANA)) {
                  /*
                   * return SJIS if it is prefered.
                   */
                  if (globalAssumeSjis == 1) {
                      return TCL_SJIS;
                  }
                  isByte1Kana = 1;
                }
                kanji_found = 1;
                if (i < len) {
                  if (s[i + 1] == 0) {
                      goto scanDone;
                  }
                  type2 = charTypeTbl[s[i + 1]];
                  if (isByte1Kana == 0 &&
                      (IS_TYPE(type2, TYPE_EUC_2)) &&
                      (!(IS_TYPE(type2, TYPE_SKANA)))) {
                      /*
                       * TYPE_SJIS_2 on byte 1 is ignorable.
                       *      THUS:
                       *            EUC maybe.
                       */
                      if (globalAssumeSjis == 0) {
                        return TCL_EUC;
                      }
                  }
                  i++;
                }
                break;
            }
            
            default: {
                break;
            }
          }
      }
    }

    scanDone:
    if (kanji_found) {
      /*
       * return TCL_EUC/TCL_SJIS depend on globalAssumeSjis.
       */
#ifdef ENCODE_DEBUG
      char *x = alloca(i + 1);
      memcpy(x, string, i);
      x[i] = 0;
      fprintf(DEBUG_OUT, "\tdebug: '%s' scaned %d\n", x, i);
#endif /* ENCODE_DEBUG */
      if (globalAssumeSjis == 1) {
          return TCL_SJIS;
      } else {
          return TCL_EUC;
      }
    }

    /* no kanji found */
    return TCL_NOT_KANJI;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_ConvertToInternal
 *
 *    Convert the kanji code of given string to current internal
 *    kanji code.
 *
 * Results:
 *    Return a pointer of converted string.
 *
 * Side effects:
 *    Memory is allocated.
 *
 *----------------------------------------------------------------------
 */
char *
Tcl_ConvertToInternal(interp, string, kanjiCodePtr)
     Tcl_Interp *interp;
     char *string;
     int *kanjiCodePtr;
{
    int kanjiCode = TCL_ANY;
    int intKanjiCode = Tcl_KanjiCode(interp);
    char *ret;

    if (intKanjiCode != TCL_ANY) {
      if (Tcl_KanjiString(NULL, string, NULL, &kanjiCode) == TCL_OK) {
          if (intKanjiCode != kanjiCode) {
            wchar *ws;
            int oLength = Tcl_KanjiEncode(kanjiCode, string, NULL);
            int length;
            ws = (wchar *)ckalloc((unsigned)(sizeof(wchar) * (oLength + 1)));
            (void) Tcl_KanjiEncode(kanjiCode, string, ws);
            
            length = Tcl_KanjiDecode(intKanjiCode, ws, NULL);
            ret = (char *)ckalloc((unsigned)(sizeof(char) * (length + 1)));
            (void) Tcl_KanjiDecode(intKanjiCode, ws, ret);
            ckfree((char *)ws);
            string = ret;
          }
      }
    }
    if (kanjiCodePtr != NULL) {
      *kanjiCodePtr = kanjiCode;
    }
    return string;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiFindNamespace
 *
 *    Find "::" in given string, except in kanji sequence.
 *
 * Results:
 *    Like a strstr(string, "::"), return address of first "::" in
 *    the string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

char *
Tcl_KanjiFindNamespace(s)
     char *s;
{
    while (*s != '\0') {
      if (IS_KANJISTART(UCHAR(*s))) {
          s += Tcl_KanjiSkip(s, NULL, NULL);
      } else if (*s == ':' && *(s + 1) == ':') {
          return s;
      } else {
          s++;
      }
    }
    return NULL;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetKanjiCode --
 *
 *    Get the kanji code according to the string.
 *
 * Results:
 *    A standard Tcl result.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_GetKanjiCode(interp, string, kanjiCodePtr)
     Tcl_Interp *interp;
     char *string;
     int *kanjiCodePtr;
{
    if( strcmp(string, "JIS") == 0 ) {
      *kanjiCodePtr = TCL_JIS;
    } else if( strcmp(string, "SJIS") == 0 ) {
      *kanjiCodePtr = TCL_SJIS;
    } else if( strcmp(string, "EUC") == 0 ) {
      *kanjiCodePtr = TCL_EUC;
    } else if( strcmp(string, "ANY") == 0 ) {
      *kanjiCodePtr = TCL_ANY;
    } else {
      Tcl_AppendResult(interp, "bad kanjiCode \"", string,
            "\": should be JIS, SJIS, EUC, or ANY", (char *) NULL);
      return TCL_ERROR;
    }

    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_KanjiFile --
 *
 *    Check if the file contains kanji.
 *
 * Results:
 *    If the string contains kanji, set its kanji code
 *    and return TCL_OK.  Otherwise return TCL_ERROR.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_KanjiFile(interp, fileName, kanjiCodePtr)
     Tcl_Interp *interp;
     char *fileName;
     int *kanjiCodePtr;
{
    Tcl_Channel chan;
    Tcl_DString ds, kc;
    int length, result = TCL_OK;

    chan = Tcl_OpenFileChannel(interp, fileName, "r", 0);
    if (chan == (Tcl_Channel) NULL) {
      return TCL_ERROR;
    }

    *kanjiCodePtr = TCL_ANY;
    Tcl_DStringInit(&ds);
    Tcl_DStringInit(&kc);
    (void) Tcl_GetChannelOption(interp, chan, "-inputCode", &kc);
    (void) Tcl_SetChannelOption(interp, chan, "-inputCode", "ANY");
    while ((length = Tcl_Gets(chan, &ds)) > 0) {
      (void) Tcl_KanjiString(interp, Tcl_DStringValue(&ds), NULL, kanjiCodePtr);
      if (*kanjiCodePtr != TCL_ANY) {
          break;
      }
    }
    (void) Tcl_SetChannelOption(interp, chan, "-inputCode", Tcl_DStringValue(&kc));
    Tcl_DStringFree(&ds);
    Tcl_DStringFree(&kc);

    if (length < 0) {
        if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) {
            Tcl_AppendResult(interp, "error reading \"",
                Tcl_GetChannelName(chan), "\": ", Tcl_PosixError(interp),
                (char *) NULL);
          result = TCL_ERROR;
        }
    }

    if (Tcl_Close(interp, chan) != TCL_OK) {
      result = TCL_ERROR;
    }

    return result;
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrlen --
 *
 *    Get the length of the wide string.
 *
 * Results:
 *    Number of the wide characters.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

int
Tcl_WStrlen(wstr)
     wchar *wstr;
{
    int n = 0;

    while( *wstr++ ) n++;

    return n;
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrcpy --
 *
 *    Copy the wide string.
 *
 * Results:
 *    Pointer to the original string.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

wchar *
Tcl_WStrcpy(wstr1, wstr2)
     wchar *wstr1, *wstr2;
{
    wchar *ans = wstr1;

    while( (*wstr1++ = *wstr2++) != 0 ) ;

    return( ans );
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrncpy --
 *
 *    Copy the specific number of wide characters.
 *
 * Results:
 *    Pointer to the original string.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

wchar *
Tcl_WStrncpy(wstr1, wstr2, n)
     wchar *wstr1, *wstr2;
     int n;
{
    wchar *ans = wstr1;

    while( n-- > 0 && (*wstr1++ = *wstr2++) ) ;

    while( n-- > 0 ) *wstr1++ = 0;

    return( ans );
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrcmp --
 *
 *    Compare two wide strings.
 *
 * Results:
 *    Return 0 if two strings are same.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

int
Tcl_WStrcmp(wstr1, wstr2)
     wchar *wstr1, *wstr2;
{
    while( *wstr1 && *wstr1 == *wstr2 ) wstr1++, wstr2++;

    return( *wstr1 - *wstr2 );
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrncmp --
 *
 *    Compare two wide strings.
 *
 * Results:
 *    Return 0 if two strings are same.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

int
Tcl_WStrncmp(wstr1, wstr2, n)
     wchar *wstr1, *wstr2;
     int n;
{
    while( n-- > 0 && *wstr1 && *wstr1 == *wstr2 ) wstr1++, wstr2++;

    if( n < 0 ) return( 0 );

    return( *wstr1 - *wstr2 );
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_WStrstr --
 *
 *    Locate the first instance of a substring in a string.
 *
 * Results:
 *    If string contains substring, the return value is the
 *    location of the first matching instance of substring
 *    in string.  If string doesn't contain substring, the
 *    return value is 0.  Matching is done on an exact
 *    character-for-character basis with no wildcards or special
 *    characters.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

wchar *
Tcl_WStrstr(wstr, subwstr)
    register wchar *wstr;     /* String to search. */
    wchar *subwstr;           /* Substring to try to find in string. */
{
    register wchar *a, *b;

    /* First scan quickly through the two strings looking for a
     * single-character match.  When it's found, then compare the
     * rest of the substring.
     */

    b = subwstr;
    if (*b == 0) {
      return wstr;
    }
    for ( ; *wstr != 0; wstr += 1) {
      if (*wstr != *b) {
          continue;
      }
      a = wstr;
      while (1) {
          if (*b == 0) {
            return wstr;
          }
          if (*a++ != *b++) {
            break;
          }
      }
      b = subwstr;
    }
    return (wchar *) 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_WStringMatch --
 *
 *    See if a particular wide string matches a particular pattern.
 *
 * Results:
 *    The return value is 1 if string matches pattern, and
 *    0 otherwise.  The matching operation permits the following
 *    special characters in the pattern: *?\[] (see the manual
 *    entry for details on what these mean).
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_WStringMatch(string, pattern)
    register wchar *string;   /* String. */
    register wchar *pattern;  /* Pattern, which may contain
                         * special characters. */
{
    wchar c2;

    while (1) {
      /* See if we're at the end of both the pattern and the string.
       * If so, we succeeded.  If we're at the end of the pattern
       * but not at the end of the string, we failed.
       */
      
      if (*pattern == 0) {
          if (*string == 0) {
            return 1;
          } else {
            return 0;
          }
      }
      if ((*string == 0) && (*pattern != '*')) {
          return 0;
      }

      /* Check for a "*" as the next pattern character.  It matches
       * any substring.  We handle this by calling ourselves
       * recursively for each postfix of string, until either we
       * match or we reach the end of the string.
       */
      
      if (*pattern == '*') {
          pattern += 1;
          if (*pattern == 0) {
            return 1;
          }
          while (1) {
            if (Tcl_WStringMatch(string, pattern)) {
                return 1;
            }
            if (*string == 0) {
                return 0;
            }
            string += 1;
          }
      }
    
      /* Check for a "?" as the next pattern character.  It matches
       * any single character.
       */

      if (*pattern == '?') {
          goto thisCharOK;
      }

      /* Check for a "[" as the next pattern character.  It is followed
       * by a list of characters that are acceptable, or by a range
       * (two characters separated by "-").
       */
      
      if (*pattern == '[') {
          pattern += 1;
          while (1) {
            if ((*pattern == ']') || (*pattern == 0)) {
                return 0;
            }
            if (*pattern == *string) {
                break;
            }
            if (pattern[1] == '-') {
                c2 = pattern[2];
                if (c2 == 0) {
                  return 0;
                }
                if ((*pattern <= *string) && (c2 >= *string)) {
                  break;
                }
                if ((*pattern >= *string) && (c2 <= *string)) {
                  break;
                }
                pattern += 2;
            }
            pattern += 1;
          }
          while ((*pattern != ']') && (*pattern != 0)) {
            pattern += 1;
          }
          goto thisCharOK;
      }
    
      /* If the next pattern character is '/', just strip off the '/'
       * so we do exact matching on the character that follows.
       */
      
      if (*pattern == '\\') {
          pattern += 1;
          if (*pattern == 0) {
            return 0;
          }
      }

      /* There's no special character.  Just make sure that the next
       * characters of each string match.
       */
      
      if (*pattern != *string) {
          return 0;
      }

      thisCharOK: pattern += 1;
      string += 1;
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringInit --
 *
 *    Initializes a dynamic string, discarding any previous contents
 *    of the string (Tcl_DWStringFree should have been called already
 *    if the dynamic string was previously in use).
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The dynamic string is initialized to be empty.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_DWStringInit(dwsPtr)
    register Tcl_DWString *dwsPtr;  /* Pointer to structure for
                               * dynamic string. */
{
    dwsPtr->wstring = dwsPtr->staticSpace;
    dwsPtr->length = 0;
    dwsPtr->spaceAvl = TCL_DWSTRING_STATIC_SIZE;
    dwsPtr->staticSpace[0] = 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringAppend --
 *
 *    Append more characters to the current value of a dynamic string.
 *
 * Results:
 *    The return value is a pointer to the dynamic string's new value.
 *
 * Side effects:
 *    Length bytes from string (or all of string if length is less
 *    than zero) are added to the current value of the string.  Memory
 *    gets reallocated if needed to accomodate the string's new size.
 *
 *----------------------------------------------------------------------
 */

wchar *
Tcl_DWStringAppend(dwsPtr, wstring, length)
    register Tcl_DWString *dwsPtr;  /* Structure describing dynamic
                               * string. */
    wchar *wstring;                 /* String to append.  If length is
                               * -1 then this must be
                               * null-terminated. */
    int length;                     /* Number of characters from string
                               * to append.  If < 0, then append all
                               * of string, up to null at end. */
{
    int newSize;
    wchar *newString, *dst, *end;

    if (length < 0) {
      length = Tcl_WStrlen(wstring);
    }
    newSize = length + dwsPtr->length;

    /*
     * Allocate a larger buffer for the string if the current one isn't
     * large enough.  Allocate extra space in the new buffer so that there
     * will be room to grow before we have to allocate again.
     */

    if (newSize >= dwsPtr->spaceAvl) {
      dwsPtr->spaceAvl = newSize*2;
      newString = (wchar *) ckalloc((unsigned) (dwsPtr->spaceAvl * sizeof(wchar)));
      memcpy((VOID *)newString, (VOID *) dwsPtr->wstring,
            (size_t) (dwsPtr->length * sizeof(wchar)));
      if (dwsPtr->wstring != dwsPtr->staticSpace) {
          ckfree((char *) dwsPtr->wstring);
      }
      dwsPtr->wstring = newString;
    }

    /*
     * Copy the new string into the buffer at the end of the old
     * one.
     */

    for (dst = dwsPtr->wstring + dwsPtr->length, end = wstring+length;
          wstring < end; wstring++, dst++) {
      *dst = *wstring;
    }
    *dst = 0;
    dwsPtr->length += length;
    return dwsPtr->wstring;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringSetLength --
 *
 *    Change the length of a dynamic string.  This can cause the
 *    string to either grow or shrink, depending on the value of
 *    length.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The length of dsPtr is changed to length and a null byte is
 *    stored at that position in the string.  If length is larger
 *    than the space allocated for dsPtr, then a panic occurs.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_DWStringSetLength(dwsPtr, length)
    register Tcl_DWString *dwsPtr;  /* Structure describing dynamic
                               * string. */
    int length;                     /* New length for dynamic string. */
{
    if (length < 0) {
      length = 0;
    }
    if (length >= dwsPtr->spaceAvl) {
      wchar *newString;

      dwsPtr->spaceAvl = length+1;
      newString = (wchar *) ckalloc((unsigned) (dwsPtr->spaceAvl * sizeof(wchar)));

      /*
       * SPECIAL NOTE: must use memcpy, not strcpy, to copy the string
       * to a larger buffer, since there may be embedded NULLs in the
       * string in some cases.
       */

      memcpy((VOID *) newString, (VOID *) dwsPtr->wstring,
            (size_t) (dwsPtr->length * sizeof(wchar)));
      if (dwsPtr->wstring != dwsPtr->staticSpace) {
          ckfree((char *) dwsPtr->wstring);
      }
      dwsPtr->wstring = newString;
    }
    dwsPtr->length = length;
    dwsPtr->wstring[length] = 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringFree --
 *
 *    Frees up any memory allocated for the dynamic string and
 *    reinitializes the string to an empty state.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The previous contents of the dynamic string are lost, and
 *    the new value is an empty string.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_DWStringFree(dwsPtr)
    register Tcl_DWString *dwsPtr;  /* Structure describing dynamic
                               * string. */
{
    if (dwsPtr->wstring != dwsPtr->staticSpace) {
      ckfree((char *) dwsPtr->wstring);
    }
    dwsPtr->wstring = dwsPtr->staticSpace;
    dwsPtr->length = 0;
    dwsPtr->spaceAvl = TCL_DWSTRING_STATIC_SIZE;
    dwsPtr->staticSpace[0] = 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringResult --
 *
 *    This procedure moves the value of a dynamic string into an
 *    interpreter as its result.  The string itself is reinitialized
 *    to an empty string.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The string is "moved" to interp's result, and any existing
 *    result for interp is freed up.  DsPtr is reinitialized to
 *    an empty string.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_DWStringResult(interp, dwsPtr)
    Tcl_Interp *interp;             /* Interpreter whose result is to be
                               * reset. */
    Tcl_DWString *dwsPtr;           /* Dynamic string that is to become
                               * the result of interp. */
{
    int kanjiCode = ((Interp *) interp)->kanjiCode;
    int length;
    char* string;

    length = Tcl_KanjiDecode(kanjiCode, dwsPtr->wstring, NULL);
    string = (char *) ckalloc((unsigned) (length + 1));
    (void) Tcl_KanjiDecode(kanjiCode, dwsPtr->wstring, string);

    Tcl_ResetResult(interp);
    interp->result = string;
    interp->freeProc = (Tcl_FreeProc *) free;

    Tcl_DWStringFree(dwsPtr);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DWStringGetResult --
 *
 *    This procedure moves the result of an interpreter into a
 *    dynamic string.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The interpreter's result is cleared, and the previous contents
 *    of dsPtr are freed.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_DWStringGetResult(interp, dwsPtr)
    Tcl_Interp *interp;             /* Interpreter whose result is to be
                               * reset. */
    Tcl_DWString *dwsPtr;           /* Dynamic string that is to become
                               * the result of interp. */
{
    Interp *iPtr = (Interp *) interp;
    int kanjiCode = iPtr->kanjiCode;
    int length;
    wchar *wstring;

    length = Tcl_KanjiEncode(kanjiCode, iPtr->result, NULL);
    wstring = (wchar *) ckalloc((unsigned) (length * sizeof(wchar)));
    (void) Tcl_KanjiEncode(kanjiCode, iPtr->result, wstring);

    if (iPtr->freeProc != NULL) {
      (*iPtr->freeProc)(iPtr->result);
      iPtr->freeProc = NULL;
    }
    iPtr->result = iPtr->resultSpace;
    iPtr->resultSpace[0] = 0;

    if (dwsPtr->wstring != dwsPtr->staticSpace) {
      ckfree((char *) dwsPtr->wstring);
    }
    dwsPtr->length = Tcl_WStrlen(wstring);
    if (dwsPtr->length < TCL_DWSTRING_STATIC_SIZE) {
      dwsPtr->wstring = dwsPtr->staticSpace;
      dwsPtr->spaceAvl = TCL_DWSTRING_STATIC_SIZE;
      Tcl_WStrcpy(dwsPtr->wstring, wstring);
      ckfree((char *) wstring);
    } else {
      dwsPtr->wstring = wstring;
      dwsPtr->spaceAvl = dwsPtr->length + 1;
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetWStr --
 *
 *    Given a string, map them to a wide string.
 *
 * Results:
 *    The return value is normally a pointer to the wide string.
 *
 * Side effects:
 *    The wide string is added to an internal database with a reference
 *    count.  For each call to this procedure, there should eventually
 *    be a call to Tcl_FreeWStr, so that the database is cleaned up when
 *    wide strings aren't in use anymore.
 *
 *----------------------------------------------------------------------
 */
wchar *
Tcl_GetWStr(interp, str, kanjiCodePtr)
     Tcl_Interp *interp;
     char *str;
     int *kanjiCodePtr;
{
    Tcl_HashEntry *wstrHashPtr;
    int new;
    TclWStr *wstrPtr;
    Tcl_HashEntry *idHashPtr;
    int kanjiCode;
    int length;

    if (!ws_initialized) WStrInit();

    /*
     * First, check to see if there's already a mapping for this string.
     */

    wstrHashPtr = Tcl_CreateHashEntry(&wstrTable, str, &new);
    if (!new) {
      wstrPtr = (TclWStr *)Tcl_GetHashValue(wstrHashPtr);
      wstrPtr->refCount++;
      if (kanjiCodePtr != NULL) {
          *kanjiCodePtr = wstrPtr->kanjiCode;
      }
      return wstrPtr->wstr;
    }

    /*
     * The string isn't currently known.  Map from the string to
     * a wide string, and add a new structure to the database.
     */

    /*
     * Get the kanji encoding information.
     */
    if (interp != NULL) {
      kanjiCode = Tcl_KanjiCode(interp);
      if (kanjiCode == TCL_ANY) goto Detect;
    } else {
      Detect:
      (void) Tcl_KanjiString(NULL, str, NULL, &kanjiCode);
    }

    wstrPtr = (TclWStr *) ckalloc(sizeof(TclWStr));
    memset((VOID *)wstrPtr, 0, sizeof(TclWStr));

    wstrPtr->kanjiCode = kanjiCode;
    length = strlen(str);
    wstrPtr->str[kanjiCode] = ckalloc((unsigned)(length + 1));
    memcpy((VOID *)(wstrPtr->str[kanjiCode]), (VOID *)str, (unsigned)length);
    (wstrPtr->str[kanjiCode])[length] = '\0';

    length = Tcl_KanjiEncode(kanjiCode, str, NULL);
    wstrPtr->wstr = (wchar *) ckalloc((unsigned)(length + 1) * sizeof(wchar));
    (void) Tcl_KanjiEncode(kanjiCode, str, wstrPtr->wstr);

    wstrPtr->refCount = 1;
    wstrPtr->wstrHashPtr = wstrHashPtr;
    idHashPtr = Tcl_CreateHashEntry(&ws_idTable, (char *)wstrPtr->wstr, &new);
    if (!new) {
      panic("wstr already registered in Tcl_GetWStr");
    }
    Tcl_SetHashValue(wstrHashPtr, wstrPtr);
    Tcl_SetHashValue(idHashPtr, wstrPtr);

    if (kanjiCodePtr != NULL) {
      *kanjiCodePtr = kanjiCode;
    }
    return wstrPtr->wstr;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_FreeWStr --
 *
 *    This procedure is called to release a wide string allocated
 *    by Tcl_GetWStr.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    The reference count associated with wstr is decremented, and
 *    wstr is officially deallocated if no-one is using it anymore.
 *
 *----------------------------------------------------------------------
 */
void
Tcl_FreeWStr(wstr)
     wchar *wstr;
{
    Tcl_HashEntry *idHashPtr;
    register TclWStr *wstrPtr;

    if( !ws_initialized ) panic("Tcl_FreeWStr called before Tcl_GetWStr");

    idHashPtr = Tcl_FindHashEntry(&ws_idTable, (char *)wstr);
    if( idHashPtr == NULL ) {
      panic("Tcl_FreeWStr received unknown wstr argument");
    }
    wstrPtr = (TclWStr *)Tcl_GetHashValue(idHashPtr);
    wstrPtr->refCount--;
    if( wstrPtr->refCount == 0 ) {
      int i;
      for (i = 0; i < 4; i++) {
          if (wstrPtr->str[i] != NULL) {
            ckfree((char *)wstrPtr->str[i]);
          }
      }
      ckfree((char *)wstrPtr->wstr);
      Tcl_DeleteHashEntry(wstrPtr->wstrHashPtr);
      Tcl_DeleteHashEntry(idHashPtr);
      ckfree((char *)wstrPtr);
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_InsertWStr --
 *
 *    This procedure is called to modify the existing wide
 *    string by inserting characters.
 *
 * Results:
 *    The return value is a pointer to the wide string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */
wchar *
Tcl_InsertWStr(interp, orig, index, wstr)
     Tcl_Interp *interp;
     wchar *orig;
     int index;
     wchar *wstr;
{
    int kanjiCode = TCL_ANY;
    int origLen, wstrLen;
    wchar *newstr;
    char *str;
    int length, new;
    Tcl_HashEntry *wstrHashPtr;
    register TclWStr *wstrPtr;
    Tcl_HashEntry *idHashPtr;

    if (!ws_initialized) panic("Tcl_InsertWStr called before Tcl_GetWStr");

    origLen = Tcl_WStrlen(orig);
    wstrLen = Tcl_WStrlen(wstr);
    newstr = (wchar *) ckalloc((unsigned)(origLen + wstrLen + 1) * sizeof(wchar));
    Tcl_WStrncpy(newstr, orig, index);
    Tcl_WStrcpy(newstr+index, wstr);
    Tcl_WStrcpy(newstr+index+wstrLen, orig+index);

    /*
     * if interp != NULL, use internal kanji code.
     */
    if (interp != NULL) {
      kanjiCode = Tcl_KanjiCode(interp);
      if (kanjiCode == TCL_ANY) goto Decide;
    } else {
      Decide:
      kanjiCode = Tcl_DefaultKanjiCode();
    }

    /*
     * Check if there's already a mapping for this string.
     */
    length = Tcl_KanjiDecode(kanjiCode, newstr, NULL);
    str = (char *) ckalloc((unsigned)(length + 1));
    (void) Tcl_KanjiDecode(kanjiCode, newstr, str);

    wstrHashPtr = Tcl_CreateHashEntry(&wstrTable, str, &new);
    if (!new) {
      wstrPtr = (TclWStr *) Tcl_GetHashValue(wstrHashPtr);
      wstrPtr->refCount++;
      Tcl_FreeWStr(orig);
      ckfree((char *) newstr);
      ckfree(str);
      return wstrPtr->wstr;
    }

    /*
     * The string isn't currently known.  Map from the string to
     * a wide string, and add a new structure to the database.
     */
    wstrPtr = (TclWStr *) ckalloc(sizeof(TclWStr));
    memset((VOID *)wstrPtr, 0, sizeof(TclWStr));
    wstrPtr->kanjiCode = kanjiCode;
    wstrPtr->str[kanjiCode] = str;
    wstrPtr->wstr = newstr;
    wstrPtr->refCount = 1;
    wstrPtr->wstrHashPtr = wstrHashPtr;
    idHashPtr = Tcl_CreateHashEntry(&ws_idTable, (char *)wstrPtr->wstr, &new);
    if (!new) {
      panic("wstr already registered in Tcl_InsertWStr");
    }
    Tcl_SetHashValue(wstrPtr->wstrHashPtr, wstrPtr);
    Tcl_SetHashValue(idHashPtr, wstrPtr);

    Tcl_FreeWStr(orig);
    return wstrPtr->wstr;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_DeleteWStr --
 *
 *    This procedure is called to modify the existing wide
 *    string by deleting characters.
 *
 * Results:
 *    The return value is a pointer to the wide string.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------
 */
wchar *
Tcl_DeleteWStr(interp, orig, index, count)
     Tcl_Interp *interp;
     wchar *orig;
     int index;
     int count;
{
    int kanjiCode;
    int length;
    wchar *newstr;
    char *str;
    Tcl_HashEntry *wstrHashPtr;
    register TclWStr *wstrPtr;
    Tcl_HashEntry *idHashPtr;
    int new;

    if (!ws_initialized) panic("Tcl_InsertWStr called before Tcl_GetWStr");

    length = Tcl_WStrlen(orig);
    newstr = (wchar *) ckalloc((unsigned)(length - count + 1) * sizeof(wchar));
    Tcl_WStrncpy(newstr, orig, index);
    Tcl_WStrcpy(newstr+index, orig+index+count);

    /*
     * if interp != NULL, use internal kanji code.
     */
    if (interp != NULL) {
      kanjiCode = Tcl_KanjiCode(interp);
      if (kanjiCode == TCL_ANY) goto Decide;
    } else {
      Decide:
      kanjiCode = Tcl_DefaultKanjiCode();
    }

    /*
     * Check if there's already a mapping for this string.
     */
    length = Tcl_KanjiDecode(kanjiCode, newstr, NULL);
    str = (char *) ckalloc((unsigned)(length + 1));
    (void) Tcl_KanjiDecode(kanjiCode, newstr, str);

    wstrHashPtr = Tcl_CreateHashEntry(&wstrTable, str, &new);
    if (!new) {
      wstrPtr = (TclWStr *) Tcl_GetHashValue(wstrHashPtr);
      wstrPtr->refCount++;
      ckfree((char *) newstr);
      ckfree(str);
      Tcl_FreeWStr(orig);
      return wstrPtr->wstr;
    }

    /*
     * The string isn't currently known.  Map from the string to
     * a wide string, and add a new structure to the database.
     */
    wstrPtr = (TclWStr *) ckalloc(sizeof(TclWStr));
    memset((VOID *)wstrPtr, 0, sizeof(TclWStr));
    wstrPtr->kanjiCode = kanjiCode;
    wstrPtr->str[kanjiCode] = str;
    wstrPtr->wstr = newstr;
    wstrPtr->refCount = 1;
    wstrPtr->wstrHashPtr = wstrHashPtr;
    idHashPtr = Tcl_CreateHashEntry(&ws_idTable, (char *)wstrPtr->wstr, &new);
    if (!new) {
      panic("wstr already registered in Tcl_DeleteWStr");
    }
    Tcl_SetHashValue(wstrPtr->wstrHashPtr, wstrPtr);
    Tcl_SetHashValue(idHashPtr, wstrPtr);

    Tcl_FreeWStr(orig);
    return wstrPtr->wstr;
}

/*
 *--------------------------------------------------------------
 *
 * Tcl_DecodeWStr --
 *
 *    Answer the original string of the wide string.
 *
 * Results:
 *    If interp == NULL, return original string of the wide string.
 *    Otherwise return string converted to internal kanji code.
 *
 * Side effects:
 *    None.
 *
 *--------------------------------------------------------------
 */

char *
Tcl_DecodeWStr(interp, wstr, kanjiCodePtr)
     Tcl_Interp *interp;
     wchar *wstr;
     int *kanjiCodePtr;
{
    Tcl_HashEntry *idHashPtr;
    register TclWStr *wstrPtr;
    int kanjiCode;

    if (!ws_initialized) panic("Tcl_DecodeWStr called before Tcl_GetWStr");

    idHashPtr = Tcl_FindHashEntry(&ws_idTable, (char *)wstr);
    if( idHashPtr == NULL ) {
      panic("Tcl_DecodeWStr received unknown wstr argument");
    }
    wstrPtr = (TclWStr *) Tcl_GetHashValue(idHashPtr);

    if (kanjiCodePtr != NULL) {
      *kanjiCodePtr = wstrPtr->kanjiCode;
    }

    if (interp != NULL) {
      kanjiCode = Tcl_KanjiCode(interp);
      if (kanjiCode == TCL_ANY) goto Decide;
    } else {
      Decide:
      kanjiCode = wstrPtr->kanjiCode;
    }

    if (wstrPtr->str[kanjiCode] == NULL) {
      int lenght = Tcl_KanjiDecode(kanjiCode, wstrPtr->wstr, NULL);
      wstrPtr->str[kanjiCode] = ckalloc((unsigned)((lenght + 1) * sizeof(char)));
      (void)Tcl_KanjiDecode(kanjiCode, wstrPtr->wstr, wstrPtr->str[kanjiCode]);
    }
    return wstrPtr->str[kanjiCode];
}

/*
 *----------------------------------------------------------------------
 *
 * WStrInit --
 *
 *    Initialize the structures used for WStr management.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    Read the code.
 *
 *----------------------------------------------------------------------
 */

static void
WStrInit()
{
    ws_initialized = 1;
    Tcl_InitHashTable(&wstrTable, TCL_STRING_KEYS);
    Tcl_InitHashTable(&ws_idTable, TCL_ONE_WORD_KEYS);
}
#endif /* KANJI */

Generated by  Doxygen 1.6.0   Back to index