698 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			698 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* $Id: conv.c,v 1.6 2001/11/29 09:34:14 ukai Exp $ */
 | |
| #include <stdio.h>
 | |
| #include <string.h>
 | |
| #include "fm.h"
 | |
| 
 | |
| #ifdef JP_CHARSET
 | |
| #include "terms.h"
 | |
| #include "Str.h"
 | |
| 
 | |
| #ifdef DEBUG
 | |
| #include <malloc.h>
 | |
| #endif				/* DEBUG */
 | |
| 
 | |
| #define	uchar		unsigned char
 | |
| #define ushort		unsigned short
 | |
| #define uint		unsigned int
 | |
| 
 | |
| #ifdef TRUE
 | |
| #undef TRUE
 | |
| #endif
 | |
| #ifdef FALSE
 | |
| #undef FALSE
 | |
| #endif
 | |
| #define	TRUE		1
 | |
| #define	FALSE		0
 | |
| #ifdef ESC_CODE
 | |
| #undef ESC_CODE
 | |
| #endif
 | |
| #define ESC_CODE	'\033'
 | |
| 
 | |
| #define CODE_NORMAL	0x00
 | |
| #define CODE_OK		0x01
 | |
| #define CODE_BROKEN	0x02
 | |
| #define CODE_ERROR	0x04
 | |
| #define EUC_NOSTATE	0x00
 | |
| #define EUC_MBYTE1	0x10
 | |
| #define EUC_SS2		0x20
 | |
| #define EUC_SS3		0x40
 | |
| #define SJIS_NOSTATE	0x00
 | |
| #define SJIS_SHIFT_L	0x10
 | |
| #define SJIS_SHIFT_H	0x20
 | |
| #define ISO_NOSTATE	0x00
 | |
| #define ISO_ESC		0x10
 | |
| #define ISO_CS94	0x20
 | |
| #define ISO_MBCS	0x40
 | |
| #define ISO_MBYTE1	0x80
 | |
| #define CODE_STATE(c)	((c) & 0x0f)
 | |
| #define EUC_STATE(c)	((c) & 0xf0)
 | |
| #define SJIS_STATE(c)	((c) & 0xf0)
 | |
| #define ISO_STATE(c)	((c) & 0xf0)
 | |
| 
 | |
| #define CSET_ASCII	0
 | |
| #define CSET_X0208	1
 | |
| #define CSET_X0201K	2
 | |
| #define CSET_UNKNOWN	3
 | |
| 
 | |
| #define	JSIcode  "\033$@"
 | |
| #define	JSOcode  "\033(H"
 | |
| #define	J2SIcode "\033$@"
 | |
| #define	J2SOcode "\033(J"
 | |
| #define	NSIcode  "\033$B"
 | |
| #define	NSOcode  "\033(J"
 | |
| #define	N2SIcode  "\033$B"
 | |
| #define	N2SOcode  "\033(B"
 | |
| #define	N3SIcode "\033$@"
 | |
| #define	N3SOcode "\033(B"
 | |
| #define	USIcode  "\033$"
 | |
| #define	USOcode  "\033+"
 | |
| 
 | |
| static char *SIcode, *SOcode;
 | |
| 
 | |
| static Str cConvEE(Str is);
 | |
| static Str cConvEJ(Str is);
 | |
| static Str cConvES(Str is);
 | |
| static Str cConvSE(Str is);
 | |
| static Str cConvJE(Str is);
 | |
| char checkShiftCode(Str buf, uchar);
 | |
| 
 | |
| static char *han2zen_tab[] = {
 | |
|     "!!", "!#", "!V", "!W", "!\"", "!&", "%r", "%!",
 | |
|     "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C",
 | |
|     "!<", "%\"", "%$", "%&", "%(", "%*", "%+", "%-",
 | |
|     "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=",
 | |
|     "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L",
 | |
|     "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^",
 | |
|     "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i",
 | |
|     "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,",
 | |
| };
 | |
| 
 | |
| typedef struct _ConvRoutine {
 | |
|     char key;
 | |
|     Str (*routine) ();
 | |
|     char *ShiftIn, *ShiftOut;
 | |
| } ConvRoutine;
 | |
| 
 | |
| static ConvRoutine FromEJ[] = {
 | |
|     {CODE_JIS_J, cConvEJ, JSIcode, JSOcode},
 | |
|     {CODE_JIS_N, cConvEJ, NSIcode, NSOcode},
 | |
|     {CODE_JIS_n, cConvEJ, N2SIcode, N2SOcode},
 | |
|     {CODE_JIS_m, cConvEJ, N3SIcode, N3SOcode},
 | |
|     {CODE_JIS_j, cConvEJ, J2SIcode, J2SOcode},
 | |
|     {CODE_SJIS, cConvES, "", ""},
 | |
|     {CODE_EUC, cConvEE, "", ""},
 | |
|     {'\0', NULL, NULL, NULL}
 | |
| };
 | |
| 
 | |
| static ConvRoutine ToEJ[] = {
 | |
|     {CODE_JIS_J, cConvJE, JSIcode, JSOcode},
 | |
|     {CODE_JIS_N, cConvJE, NSIcode, NSOcode},
 | |
|     {CODE_JIS_n, cConvJE, N2SIcode, N2SOcode},
 | |
|     {CODE_JIS_m, cConvJE, N3SIcode, N3SOcode},
 | |
|     {CODE_JIS_j, cConvJE, J2SIcode, J2SOcode},
 | |
|     {CODE_SJIS, cConvSE, "", ""},
 | |
|     {CODE_EUC, cConvEE, "", ""},
 | |
|     {'\0', NULL, NULL, NULL}
 | |
| };
 | |
| 
 | |
| char *
 | |
| GetSICode(char key)
 | |
| {
 | |
|     int i;
 | |
|     for (i = 0; FromEJ[i].key != '\0'; i++)
 | |
| 	if (FromEJ[i].key == key)
 | |
| 	    return FromEJ[i].ShiftIn;
 | |
|     return "";
 | |
| }
 | |
| 
 | |
| char *
 | |
| GetSOCode(char key)
 | |
| {
 | |
|     int i;
 | |
|     for (i = 0; FromEJ[i].key != '\0'; i++)
 | |
| 	if (FromEJ[i].key == key)
 | |
| 	    return FromEJ[i].ShiftOut;
 | |
|     return "";
 | |
| }
 | |
| 
 | |
| static void
 | |
| n_impr(char s)
 | |
| {
 | |
|     fprintf(stderr,
 | |
| 	    "conv: option %c(0x%02x) is not implemented yet... sorry\n", s, s);
 | |
|     w3m_exit(1);
 | |
| }
 | |
| 
 | |
| Str
 | |
| conv_str(Str is, char fc, char tc)
 | |
| {
 | |
|     int i;
 | |
|     Str os;
 | |
|     static char from_code = '\0';
 | |
|     static char to_code = '\0';
 | |
|     static Str (*conv_from) ();
 | |
|     static Str (*conv_to) ();
 | |
| 
 | |
|     if (fc == tc || fc == CODE_ASCII || tc == CODE_ASCII)
 | |
| 	return is;
 | |
| 
 | |
|     if (fc == CODE_INNER_EUC)
 | |
| 	os = is;
 | |
|     else {
 | |
| 	if (from_code != fc) {
 | |
| 	    for (i = 0; ToEJ[i].key != '\0'; i++) {
 | |
| 		if (ToEJ[i].key == fc) {
 | |
| 		    from_code = fc;
 | |
| 		    conv_from = *ToEJ[i].routine;
 | |
| 		    goto next;
 | |
| 		}
 | |
| 	    }
 | |
| 	    n_impr(fc);
 | |
| 	    return NULL;
 | |
| 	}
 | |
|       next:
 | |
| 	os = conv_from(is);
 | |
|     }
 | |
|     if (tc == CODE_INNER_EUC || tc == CODE_EUC)
 | |
| 	return os;
 | |
|     else {
 | |
| 	if (to_code != tc) {
 | |
| 	    for (i = 0; FromEJ[i].key != '\0'; i++) {
 | |
| 		if (FromEJ[i].key == tc) {
 | |
| 		    SIcode = FromEJ[i].ShiftIn;
 | |
| 		    SOcode = FromEJ[i].ShiftOut;
 | |
| 		    to_code = tc;
 | |
| 		    conv_to = *FromEJ[i].routine;
 | |
| 		    goto next2;
 | |
| 		}
 | |
| 	    }
 | |
| 	    n_impr(tc);
 | |
| 	    return NULL;
 | |
| 	}
 | |
|       next2:
 | |
| 	return conv_to(os);
 | |
|     }
 | |
| }
 | |
| 
 | |
| Str
 | |
| conv(char *is, char fc, char tc)
 | |
| {
 | |
|     return conv_str(Strnew_charp(is), fc, tc);
 | |
| }
 | |
| 
 | |
| static uchar
 | |
| getSLb(uchar * ptr, uchar * ub)
 | |
| {				/* Get Shift-JIS Lower byte */
 | |
|     uchar c = *ptr;
 | |
| 
 | |
|     *ub <<= 1;
 | |
|     if (c < 0x9f) {
 | |
| 	if (c > 0x7e)
 | |
| 	    c--;
 | |
| 	*ub -= 1;
 | |
| 	c -= 0x3f;
 | |
|     }
 | |
|     else {
 | |
| 	c -= 0x9e;
 | |
|     }
 | |
|     return c;
 | |
| }
 | |
| 
 | |
| static Str
 | |
| cConvSE(Str is)
 | |
| {				/* Convert Shift-JIS to EUC-JP */
 | |
|     uchar *p, ub, lb;
 | |
|     int state = SJIS_NOSTATE;
 | |
|     Str os = Strnew_size(is->length);
 | |
|     uchar *endp = (uchar *) & is->ptr[is->length];
 | |
| 
 | |
|     for (p = (uchar *) is->ptr; p < endp; p++) {
 | |
| 	switch (state) {
 | |
| 	case SJIS_NOSTATE:
 | |
| 	    if (!(*p & 0x80))	/* ASCII */
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    else if (0x81 <= *p && *p <= 0x9f) {	/* JIS X 0208,
 | |
| 							 * 0213 */
 | |
| 		ub = *p & 0x7f;
 | |
| 		state = SJIS_SHIFT_L;
 | |
| 	    }
 | |
| 	    else if (0xe0 <= *p && *p <= 0xef) {	/* JIS X 0208 */
 | |
| /* } else if (0xe0 <= *p && *p <= 0xfc) { *//* JIS X 0213 */
 | |
| 		ub = (*p & 0x7f) - 0x40;
 | |
| 		state = SJIS_SHIFT_H;
 | |
| 	    }
 | |
| 	    else if (0xa0 <= *p && *p <= 0xdf) {	/* JIS X 0201-Kana 
 | |
| 							 */
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | 0x80));
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | 0x80));
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case SJIS_SHIFT_L:
 | |
| 	case SJIS_SHIFT_H:
 | |
| 	    if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) {	/* JIS X 0208, 0213 */
 | |
| 		lb = getSLb(p, &ub);
 | |
| 		ub += 0x20;
 | |
| 		lb += 0x20;
 | |
| 		Strcat_char(os, (char)(ub | 0x80));
 | |
| 		Strcat_char(os, (char)(lb | 0x80));
 | |
| 	    }
 | |
| 	    else if (!(*p & 0x80))	/* broken ? */
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    state = SJIS_NOSTATE;
 | |
| 	    break;
 | |
| 	}
 | |
|     }
 | |
|     return os;
 | |
| }
 | |
| 
 | |
| static Str
 | |
| cConvJE(Str is)
 | |
| {				/* Convert ISO-2022-JP to EUC-JP */
 | |
|     uchar *p, ub = 0;
 | |
|     char cset = CSET_ASCII;
 | |
|     int state = ISO_NOSTATE;
 | |
|     Str os = Strnew_size(is->length);
 | |
|     uchar *endp = (uchar *) & is->ptr[is->length];
 | |
| 
 | |
|     for (p = (uchar *) is->ptr; p < endp; p++) {
 | |
| 	switch (state) {
 | |
| 	case ISO_NOSTATE:
 | |
| 	    if (*p == ESC_CODE)	/* ESC sequence */
 | |
| 		state = ISO_ESC;
 | |
| 	    else if (cset == CSET_ASCII || *p < 0x21)
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    else if (cset == CSET_X0208 && *p <= 0x7e) {
 | |
| 		/* JIS X 0208 */
 | |
| 		ub = *p;
 | |
| 		state = ISO_MBYTE1;
 | |
| 	    }
 | |
| 	    else if (cset == CSET_X0201K && *p <= 0x5f) {
 | |
| 		/* JIS X 0201-Kana */
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0x20][0] | 0x80));
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0x20][1] | 0x80));
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case ISO_MBYTE1:
 | |
| 	    if (*p == ESC_CODE)	/* ESC sequence */
 | |
| 		state = ISO_ESC;
 | |
| 	    else if (0x21 <= *p && *p <= 0x7e) {	/* JIS X 0208 */
 | |
| 		Strcat_char(os, (char)(ub | 0x80));
 | |
| 		Strcat_char(os, (char)(*p | 0x80));
 | |
| 		state = ISO_NOSTATE;
 | |
| 	    }
 | |
| 	    else {
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 		state = ISO_NOSTATE;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case ISO_ESC:
 | |
| 	    if (*p == '(')	/* ESC ( F */
 | |
| 		state = ISO_CS94;
 | |
| 	    else if (*p == '$')	/* ESC $ F, ESC $ ( F */
 | |
| 		state = ISO_MBCS;
 | |
| 	    else {
 | |
| 		Strcat_char(os, ESC_CODE);
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 		state = ISO_NOSTATE;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case ISO_CS94:
 | |
| 	    if (*p == 'B' || *p == 'J' || *p == 'H')
 | |
| 		cset = CSET_ASCII;
 | |
| 	    else if (*p == 'I')
 | |
| 		cset = CSET_X0201K;
 | |
| 	    else {
 | |
| 		Strcat_char(os, ESC_CODE);
 | |
| 		Strcat_char(os, '(');
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    }
 | |
| 	    state = ISO_NOSTATE;
 | |
| 	    break;
 | |
| 	case ISO_MBCS:
 | |
| 	    if (*p == '(') {	/* ESC $ ( F */
 | |
| 		state = ISO_MBCS | ISO_CS94;
 | |
| 		break;
 | |
| 	    }
 | |
| 	case ISO_MBCS | ISO_CS94:
 | |
| 	    if (*p == 'B' || *p == '@')
 | |
| 		cset = CSET_X0208;
 | |
| 	    else {
 | |
| 		Strcat_char(os, ESC_CODE);
 | |
| 		Strcat_char(os, '$');
 | |
| 		if (state == (ISO_MBCS | ISO_CS94))
 | |
| 		    Strcat_char(os, '(');
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    }
 | |
| 	    state = ISO_NOSTATE;
 | |
| 	    break;
 | |
| 	}
 | |
|     }
 | |
|     return os;
 | |
| }
 | |
| 
 | |
| static Str
 | |
| _cConvEE(Str is, char is_euc)
 | |
| {				/* Convert EUC-JP to EUC-JP / ISO-2022-JP
 | |
| 				 * (no JIS X 0201-Kana, 0212, 0213-2) */
 | |
|     uchar *p, ub = 0, euc = 0;
 | |
|     int state = EUC_NOSTATE;
 | |
|     char cset = CSET_ASCII;
 | |
|     Str os;
 | |
|     uchar *endp = (uchar *) & is->ptr[is->length];
 | |
| 
 | |
|     if (is_euc) {
 | |
| 	os = Strnew_size(is->length);
 | |
| 	euc = 0x80;
 | |
|     }
 | |
|     else
 | |
| 	os = Strnew_size(is->length * 3 / 2);
 | |
| 
 | |
|     for (p = (uchar *) is->ptr; p < endp; p++) {
 | |
| 	switch (state) {
 | |
| 	case EUC_NOSTATE:
 | |
| 	    if (!(*p & 0x80)) {	/* ASCII */
 | |
| 		if (!is_euc && cset != CSET_ASCII) {
 | |
| 		    Strcat_charp(os, SOcode);
 | |
| 		    cset = CSET_ASCII;
 | |
| 		}
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    }
 | |
| 	    else if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208,
 | |
| 							 * 0213-1 */
 | |
| 		ub = *p;
 | |
| 		state = EUC_MBYTE1;
 | |
| 	    }
 | |
| 	    else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
 | |
| 		state = EUC_SS2;
 | |
| 	    else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
 | |
| 		state = EUC_SS3;
 | |
| 	    break;
 | |
| 	case EUC_MBYTE1:
 | |
| 	    if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208, 0213-1 */
 | |
| 		if (!is_euc && cset != CSET_X0208) {
 | |
| 		    Strcat_charp(os, SIcode);
 | |
| 		    cset = CSET_X0208;
 | |
| 		}
 | |
| 		Strcat_char(os, (char)((ub & 0x7f) | euc));
 | |
| 		Strcat_char(os, (char)((*p & 0x7f) | euc));
 | |
| 	    }
 | |
| 	    else if (!(*p & 0x80)) {	/* broken ? */
 | |
| 		if (!is_euc && cset != CSET_ASCII) {
 | |
| 		    Strcat_charp(os, SOcode);
 | |
| 		    cset = CSET_ASCII;
 | |
| 		}
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    }
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	case EUC_SS2:
 | |
| 	    if (0xa0 <= *p && *p <= 0xdf) {	/* JIS X 0201-Kana */
 | |
| 		if (!is_euc && cset != CSET_X0208) {
 | |
| 		    Strcat_charp(os, SIcode);
 | |
| 		    cset = CSET_X0208;
 | |
| 		}
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | euc));
 | |
| 		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | euc));
 | |
| 	    }
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	case EUC_SS3:
 | |
| 	    state = (EUC_SS3 | EUC_MBYTE1);
 | |
| 	    break;
 | |
| 	case EUC_SS3 | EUC_MBYTE1:
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	}
 | |
|     }
 | |
|     if (!is_euc && cset != CSET_ASCII)
 | |
| 	Strcat_charp(os, SOcode);
 | |
|     return os;
 | |
| }
 | |
| 
 | |
| static Str
 | |
| cConvEE(Str is)
 | |
| {
 | |
|     return _cConvEE(is, TRUE);
 | |
| }
 | |
| 
 | |
| static Str
 | |
| cConvEJ(Str is)
 | |
| {
 | |
|     return _cConvEE(is, FALSE);
 | |
| }
 | |
| 
 | |
| void
 | |
| put_sjis(Str os, uchar ub, uchar lb)
 | |
| {
 | |
|     ub -= 0x20;
 | |
|     lb -= 0x20;
 | |
|     if ((ub & 1) == 0)
 | |
| 	lb += 94;
 | |
|     ub = ((ub - 1) >> 1) + 0x81;
 | |
|     lb += 0x3f;
 | |
|     if (ub > 0x9f)
 | |
| 	ub += 0x40;
 | |
|     if (lb > 0x7e)
 | |
| 	lb++;
 | |
| 
 | |
|     Strcat_char(os, (char)(ub));
 | |
|     Strcat_char(os, (char)(lb));
 | |
| }
 | |
| 
 | |
| static Str
 | |
| cConvES(Str is)
 | |
| {				/* Convert EUC-JP to Shift-JIS */
 | |
|     uchar *p, ub = 0;
 | |
|     int state = EUC_NOSTATE;
 | |
|     Str os = Strnew_size(is->length);
 | |
|     uchar *endp = (uchar *) & is->ptr[is->length];
 | |
| 
 | |
|     for (p = (uchar *) is->ptr; p < endp; p++) {
 | |
| 	switch (state) {
 | |
| 	case EUC_NOSTATE:
 | |
| 	    if (!(*p & 0x80))	/* ASCII */
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    else if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208,
 | |
| 							 * 0213-1 */
 | |
| 		ub = *p;
 | |
| 		state = EUC_MBYTE1;
 | |
| 	    }
 | |
| 	    else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
 | |
| 		state = EUC_SS2;
 | |
| 	    else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
 | |
| 		state = EUC_SS3;
 | |
| 	    break;
 | |
| 	case EUC_MBYTE1:
 | |
| 	    if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
 | |
| 		put_sjis(os, ub & 0x7f, *p & 0x7f);
 | |
| 	    else if (!(*p & 0x80))	/* broken ? */
 | |
| 		Strcat_char(os, (char)(*p));
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	case EUC_SS2:
 | |
| 	    if (0xa0 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana */
 | |
| 		put_sjis(os, han2zen_tab[*p - 0xa0][0],
 | |
| 			 han2zen_tab[*p - 0xa0][1]);
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	case EUC_SS3:
 | |
| 	    state = (EUC_SS3 | EUC_MBYTE1);
 | |
| 	    break;
 | |
| 	case EUC_SS3 | EUC_MBYTE1:
 | |
| 	    state = EUC_NOSTATE;
 | |
| 	    break;
 | |
| 	}
 | |
|     }
 | |
|     return os;
 | |
| }
 | |
| 
 | |
| /* 
 | |
|  * static ushort sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0,
 | |
|  * 0x0, 0xffff, 0x0 }; static ushort sjis_second[16] = { 0x0, 0x0, 
 | |
|  * 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff,
 | |
|  * 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; */
 | |
| 
 | |
| char
 | |
| checkShiftCode(Str buf, uchar hint)
 | |
| {
 | |
|     uchar *p, si = '\0', so = '\0';
 | |
|     int euc = (CODE_NORMAL | EUC_NOSTATE),
 | |
| 	sjis = (CODE_NORMAL | SJIS_NOSTATE), sjis_kana = CODE_NORMAL,
 | |
| 	iso = (CODE_NORMAL | ISO_NOSTATE), iso_kana = CODE_NORMAL;
 | |
|     uchar *endp = (uchar *) & buf->ptr[buf->length];
 | |
| 
 | |
|     if (hint == CODE_INNER_EUC)
 | |
| 	return '\0';
 | |
|     p = (uchar *) buf->ptr;
 | |
|     while (1) {
 | |
| 	if (iso != CODE_ERROR && (si == '\0' || so == '\0')) {
 | |
| 	    switch (ISO_STATE(iso)) {
 | |
| 	    case ISO_NOSTATE:
 | |
| 		if (*p == ESC_CODE)	/* ESC sequence */
 | |
| 		    iso = (CODE_STATE(iso) | ISO_ESC);
 | |
| 		break;
 | |
| 	    case ISO_ESC:
 | |
| 		if (*p == '(')	/* ESC ( F */
 | |
| 		    iso = (CODE_STATE(iso) | ISO_CS94);
 | |
| 		else if (*p == '$')	/* ESC $ F, ESC $ ( F */
 | |
| 		    iso = (CODE_STATE(iso) | ISO_MBCS);
 | |
| 		else
 | |
| 		    iso = (CODE_STATE(iso) | ISO_NOSTATE);
 | |
| 		break;
 | |
| 	    case ISO_CS94:
 | |
| 		if (*p == 'B' || *p == 'J' || *p == 'H')
 | |
| 		    so = *p;
 | |
| 		else if (*p == 'I')
 | |
| 		    iso_kana = CODE_OK;
 | |
| 		iso = (CODE_STATE(iso) | ISO_NOSTATE);
 | |
| 		break;
 | |
| 	    case ISO_MBCS:
 | |
| 		if (*p == '(') {	/* ESC $ ( F */
 | |
| 		    iso = (CODE_STATE(iso) | ISO_MBCS | ISO_CS94);
 | |
| 		    break;
 | |
| 		}
 | |
| 	    case ISO_MBCS | ISO_CS94:
 | |
| 		if (*p == 'B' || *p == '@')
 | |
| 		    si = *p;
 | |
| 		iso = (CODE_STATE(iso) | ISO_NOSTATE);
 | |
| 		break;
 | |
| 	    }
 | |
| 	    if (*p & 0x80)
 | |
| 		iso = CODE_ERROR;
 | |
| 	}
 | |
| 	if (euc != CODE_ERROR) {
 | |
| 	    switch (EUC_STATE(euc)) {
 | |
| 	    case EUC_NOSTATE:
 | |
| 		if (!(*p & 0x80))	/* ASCII */
 | |
| 		    ;
 | |
| 		else if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_MBYTE1);
 | |
| 		else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_SS2);
 | |
| 		else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_SS3);
 | |
| 		else
 | |
| 		    euc = CODE_ERROR;
 | |
| 		break;
 | |
| 	    case EUC_MBYTE1:
 | |
| 		if (CODE_STATE(euc) == CODE_NORMAL)
 | |
| 		    euc = CODE_OK;
 | |
| 	    case EUC_SS3 | EUC_MBYTE1:
 | |
| 		if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_NOSTATE);
 | |
| 		else if (euc & CODE_BROKEN)
 | |
| 		    euc = CODE_ERROR;
 | |
| 		else
 | |
| 		    euc = (CODE_BROKEN | EUC_NOSTATE);
 | |
| 		break;
 | |
| 	    case EUC_SS2:
 | |
| 		if (0xa0 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_NOSTATE);
 | |
| 		else
 | |
| 		    euc = CODE_ERROR;
 | |
| 		break;
 | |
| 	    case EUC_SS3:
 | |
| 		if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0212, 0213-2 */
 | |
| 		    euc = (CODE_STATE(euc) | EUC_SS3 | EUC_MBYTE1);
 | |
| 		else
 | |
| 		    euc = CODE_ERROR;
 | |
| 		break;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (sjis != CODE_ERROR) {
 | |
| 	    switch (SJIS_STATE(sjis)) {
 | |
| 	    case SJIS_NOSTATE:
 | |
| 		if (!(*p & 0x80))	/* ASCII */
 | |
| 		    ;
 | |
| 		else if (0x81 <= *p && *p <= 0x9f)
 | |
| 		    sjis = (CODE_STATE(sjis) | SJIS_SHIFT_L);
 | |
| 		else if (0xe0 <= *p && *p <= 0xef)
 | |
| 
 | |
| 		    /* JIS X 0208 */
 | |
| 		    /* else if (0xe0 <= *p && *p <= 0xfc) */
 | |
| 		    /* JIS X 0213 */
 | |
| 		    sjis = (CODE_STATE(sjis) | SJIS_SHIFT_H);
 | |
| 		else if (0xa0 == *p)
 | |
| 		    sjis = (CODE_BROKEN | SJIS_NOSTATE);
 | |
| 		else if (0xa1 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana 
 | |
| 							 */
 | |
| 		    sjis_kana = CODE_OK;
 | |
| 		else
 | |
| 		    sjis = CODE_ERROR;
 | |
| 		break;
 | |
| 	    case SJIS_SHIFT_L:
 | |
| 	    case SJIS_SHIFT_H:
 | |
| 		if (CODE_STATE(sjis) == CODE_NORMAL)
 | |
| 		    sjis = CODE_OK;
 | |
| 		if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc))	/* JIS X 0208, 0213 */
 | |
| 		    sjis = (CODE_STATE(sjis) | SJIS_NOSTATE);
 | |
| 		else if (sjis & CODE_BROKEN)
 | |
| 		    sjis = CODE_ERROR;
 | |
| 		else
 | |
| 		    sjis = (CODE_BROKEN | SJIS_NOSTATE);
 | |
| 		break;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (euc == CODE_ERROR || sjis == CODE_ERROR)
 | |
| 	    break;
 | |
| 	if (p == endp)
 | |
| 	    break;
 | |
| 	p++;
 | |
|     }
 | |
|     if (iso != CODE_ERROR) {
 | |
| 	if (si == '\0' && so == '\0' && iso_kana != CODE_OK)
 | |
| 	    return '\0';
 | |
| 	switch (si) {
 | |
| 	case '@':
 | |
| 	    switch (so) {
 | |
| 	    case 'H':
 | |
| 		return CODE_JIS_J;
 | |
| 	    case 'J':
 | |
| 		return CODE_JIS_j;
 | |
| 	    case 'B':
 | |
| 		return CODE_JIS_m;
 | |
| 	    default:
 | |
| 		return CODE_JIS_m;
 | |
| 	    }
 | |
| 	case 'B':
 | |
| 	    switch (so) {
 | |
| 	    case 'J':
 | |
| 		return CODE_JIS_N;
 | |
| 	    case 'B':
 | |
| 		return CODE_JIS_n;
 | |
| 	    default:
 | |
| 		return CODE_JIS_n;
 | |
| 	    }
 | |
| 	default:
 | |
| 	    switch (so) {
 | |
| 	    case 'H':
 | |
| 		return CODE_JIS_J;
 | |
| 	    case 'J':
 | |
| 		return CODE_JIS_N;
 | |
| 	    case 'B':
 | |
| 		return CODE_JIS_n;
 | |
| 	    default:
 | |
| 		return CODE_JIS_n;
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
|     if (hint == CODE_EUC) {
 | |
| 	if (euc != CODE_ERROR)
 | |
| 	    return CODE_EUC;
 | |
|     }
 | |
|     else if (hint == CODE_SJIS) {
 | |
| 	if (sjis != CODE_ERROR)
 | |
| 	    return CODE_SJIS;
 | |
|     }
 | |
|     if (CODE_STATE(euc) == CODE_OK)
 | |
| 	return CODE_EUC;
 | |
|     if (CODE_STATE(sjis) == CODE_OK)
 | |
| 	return CODE_SJIS;
 | |
|     if (CODE_STATE(euc) == CODE_NORMAL)
 | |
| 	return CODE_EUC;
 | |
|     if (CODE_STATE(sjis) == CODE_NORMAL)
 | |
| 	return CODE_SJIS;
 | |
|     return CODE_EUC;
 | |
| }
 | |
| #endif				/* JP_CHARSET */
 |