Origin: http://marc.info/?l=openbsd-ports&m=142090828929750&w=2 * main.c: Call exit(1) when out of memory to avoid dereferencing null pointers when gc's malloc fails. * alloc.h: Replacements for w3m's allocation macros which add overflow detection and concentrate the macros in one file. * indep.h, libwc/charset.c, libwc/status.c, matrix.c: Use the overflow-detecting allocation macros from alloc.h.
		
			
				
	
	
		
			503 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			503 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
 | |
| #include <stdlib.h>
 | |
| #include <ctype.h>
 | |
| #include "../alloc.h"
 | |
| 
 | |
| #include "wc.h"
 | |
| 
 | |
| #ifdef HAVE_LANGINFO_CODESET
 | |
| #include <langinfo.h>
 | |
| #endif
 | |
| 
 | |
| wc_locale WcLocale = 0;
 | |
| 
 | |
| static struct {
 | |
|   char *lang;
 | |
|   wc_ces ces;
 | |
| } lang_ces_table[] = {
 | |
|   { "cs", WC_CES_ISO_8859_2 },	/* cs_CZ */
 | |
|   { "el", WC_CES_ISO_8859_7 },	/* el_GR */
 | |
|   { "iw", WC_CES_ISO_8859_8 },	/* iw_IL */
 | |
|   { "ja", WC_CES_EUC_JP },	/* ja_JP */
 | |
|   { "ko", WC_CES_EUC_KR },	/* ko_KR */
 | |
|   { "hu", WC_CES_ISO_8859_2 },	/* hu_HU */
 | |
|   { "pl", WC_CES_ISO_8859_2 },	/* pl_PL */
 | |
|   { "ro", WC_CES_ISO_8859_2 },	/* ro_RO */
 | |
|   { "ru", WC_CES_ISO_8859_5 },	/* ru_SU */
 | |
|   { "sk", WC_CES_ISO_8859_2 },	/* sk_SK */
 | |
|   { "sl", WC_CES_ISO_8859_2 },	/* sl_CS */
 | |
|   { "tr", WC_CES_ISO_8859_9 },	/* tr_TR */
 | |
|   { "zh", WC_CES_EUC_CN },	/* zh_CN */
 | |
|   { NULL, 0 }
 | |
| };
 | |
| 
 | |
| static wc_ces
 | |
| wc_codepage(int n)
 | |
| {
 | |
| 	switch (n) {
 | |
| 	case 437: return WC_CES_CP437;
 | |
| 	case 737: return WC_CES_CP737;
 | |
| 	case 775: return WC_CES_CP775;
 | |
| 	case 850: return WC_CES_CP850;
 | |
| 	case 852: return WC_CES_CP852;
 | |
| 	case 855: return WC_CES_CP855;
 | |
| 	case 856: return WC_CES_CP856;
 | |
| 	case 857: return WC_CES_CP857;
 | |
| 	case 860: return WC_CES_CP860;
 | |
| 	case 861: return WC_CES_CP861;
 | |
| 	case 862: return WC_CES_CP862;
 | |
| 	case 863: return WC_CES_CP863;
 | |
| 	case 864: return WC_CES_CP864;
 | |
| 	case 865: return WC_CES_CP865;
 | |
| 	case 866: return WC_CES_CP866;
 | |
| 	case 869: return WC_CES_CP869;
 | |
| 	case 874: return WC_CES_CP874;
 | |
| 	case 932: return WC_CES_CP932;		/* CP932 = Shift_JIS */
 | |
| 	case 936: return WC_CES_CP936;		/* CP936 = GBK > EUC_CN */
 | |
| 	case 943: return WC_CES_CP943;		/* CP943 = Shift_JIS */
 | |
| 	case 949: return WC_CES_CP949;		/* CP949 = UHC > EUC_KR */
 | |
| 	case 950: return WC_CES_CP950;		/* CP950 = Big5 */
 | |
| 	case 1006: return WC_CES_CP1006;
 | |
| 	case 1250: return WC_CES_CP1250;
 | |
| 	case 1251: return WC_CES_CP1251;
 | |
| 	case 1252: return WC_CES_CP1252;
 | |
| 	case 1253: return WC_CES_CP1253;
 | |
| 	case 1254: return WC_CES_CP1254;
 | |
| 	case 1255: return WC_CES_CP1255;
 | |
| 	case 1256: return WC_CES_CP1256;
 | |
| 	case 1257: return WC_CES_CP1257;
 | |
| 	case 1258: return WC_CES_CP1258;
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_guess_charset(char *charset, wc_ces orig)
 | |
| {
 | |
|     wc_ces guess;
 | |
| 
 | |
|     if (charset == NULL || *charset == '\0')
 | |
| 	return orig;
 | |
|     guess = wc_charset_to_ces(charset);
 | |
|     return guess ? guess : orig;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_guess_charset_short(char *charset, wc_ces orig)
 | |
| {
 | |
|     wc_ces guess;
 | |
| 
 | |
|     if (charset == NULL || *charset == '\0')
 | |
| 	return orig;
 | |
|     guess = wc_charset_short_to_ces(charset);
 | |
|     return guess ? guess : orig;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_guess_locale_charset(char *locale, wc_ces orig)
 | |
| {
 | |
|     wc_ces guess;
 | |
| 
 | |
|     if (locale == NULL || *locale == '\0')
 | |
| 	return orig;
 | |
|     guess = wc_locale_to_ces(locale);
 | |
|     return guess ? guess : orig;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_charset_to_ces(char *charset)
 | |
| {
 | |
|     char *p = charset;
 | |
|     char buf[16];
 | |
|     int n;
 | |
| 
 | |
|     if (tolower(*p) == 'x' && *(p+1) == '-')
 | |
| 	p += 2;
 | |
|     for (n = 0; *p && n < 15; p++) {
 | |
| 	if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
 | |
| 	    buf[n++] = tolower(*p);
 | |
|     }
 | |
|     buf[n] = 0;
 | |
|     p = buf;
 | |
|     switch (*p) {
 | |
|     case 'e':
 | |
| 	if (! strncmp(p, "euc", 3)) {
 | |
| 	    p += 3;
 | |
| 	    switch (*p) {
 | |
| 	    case 'j': return WC_CES_EUC_JP;
 | |
| 	    case 'c': return WC_CES_EUC_CN;
 | |
| 	    case 't': return WC_CES_EUC_TW;
 | |
| 	    case 'k': return WC_CES_EUC_KR;
 | |
| 	    }
 | |
| 	    switch (WcLocale) {
 | |
| 	    case WC_LOCALE_JA_JP: return WC_CES_EUC_JP;
 | |
| 	    case WC_LOCALE_ZH_CN: return WC_CES_EUC_CN;
 | |
| 	    case WC_LOCALE_ZH_TW: return WC_CES_EUC_TW;
 | |
| 	    case WC_LOCALE_ZH_HK: return WC_CES_EUC_CN;
 | |
| 	    case WC_LOCALE_KO_KR: return WC_CES_EUC_KR;
 | |
| 	    }
 | |
| 	    return WC_CES_EUC_JP;
 | |
|         }
 | |
| 	break;
 | |
|     case 'i':
 | |
| 	if (! strncmp(p, "iso2022", 7)) {
 | |
| 	    p += 7;
 | |
| 	    switch (*p) {
 | |
| 	    case 'j':
 | |
| 		if (! strncmp(p, "jp2", 3))
 | |
| 		    return WC_CES_ISO_2022_JP_2;
 | |
| 		if (! strncmp(p, "jp3", 3))
 | |
| 		    return WC_CES_ISO_2022_JP_3;
 | |
| 		return WC_CES_ISO_2022_JP;
 | |
| 	    case 'c': return WC_CES_ISO_2022_CN;
 | |
| 	    case 'k': return WC_CES_ISO_2022_KR;
 | |
| 	    }
 | |
| 	    return WC_CES_ISO_2022_JP;
 | |
| 	} else if (! strncmp(p, "iso8859", 7)) {
 | |
| 	    n = atoi(p + 7);
 | |
| 	    if (n >= 1 && n <= 16 && n != 12)
 | |
| 		return (WC_CES_E_ISO_8859 | n);
 | |
| 	    return WC_CES_ISO_8859_1;
 | |
| 	} else if (! strncmp(p, "ibm", 3)) {
 | |
| 	    p += 3;
 | |
| 	    if (*p >= '1' && *p <= '9')
 | |
| 	    	return wc_codepage(atoi(p));
 | |
| 	    return wc_charset_to_ces(p);
 | |
| 	}
 | |
| 	break;
 | |
|     case 'j':
 | |
| 	if (! strncmp(p, "johab", 5))
 | |
| 	    return WC_CES_JOHAB;
 | |
| 	if (! strncmp(p, "jis", 3))
 | |
| 	    return WC_CES_ISO_2022_JP;
 | |
| 	break;
 | |
|     case 's':
 | |
| 	if (! strncmp(p, "shiftjisx0213", 13) ||
 | |
| 	    ! strncmp(p, "sjisx0213", 9))
 | |
| 	    return WC_CES_SHIFT_JISX0213;
 | |
| 	if (! strncmp(p, "shiftjis", 8) ||
 | |
| 	    ! strncmp(p, "sjis", 4))
 | |
| 	    return WC_CES_SHIFT_JIS;
 | |
| 	break;
 | |
|     case 'p':
 | |
| 	if (! strncmp(p, "pck", 3))
 | |
| 	    return WC_CES_SHIFT_JIS;
 | |
| 	break;
 | |
|     case 'g':
 | |
| 	if (! strncmp(p, "gb18030", 7) ||
 | |
| 	    ! strncmp(p, "gbk2k", 5))
 | |
| 	    return WC_CES_GB18030;
 | |
| 	if (! strncmp(p, "gbk", 3))
 | |
| 	    return WC_CES_GBK;
 | |
| 	if (! strncmp(p, "gb2312", 6))
 | |
| 	    return WC_CES_EUC_CN;
 | |
| 	break;
 | |
|     case 'b':
 | |
| 	if (! strncmp(p, "big5hkscs", 9))
 | |
| 	    return WC_CES_HKSCS;
 | |
| 	if (! strncmp(p, "big5", 4))
 | |
| 	    return WC_CES_BIG5;
 | |
| 	break;
 | |
|     case 'h':
 | |
| 	if (! strncmp(p, "hz", 2))
 | |
| 	    return WC_CES_HZ_GB_2312;
 | |
| 	if (! strncmp(p, "hkscs", 5))
 | |
| 	    return WC_CES_HKSCS;
 | |
| 	break;
 | |
|     case 'k':
 | |
| 	if (! strncmp(p, "koi8r", 5))
 | |
| 	    return WC_CES_KOI8_R;
 | |
| 	if (! strncmp(p, "koi8u", 5))
 | |
| 	    return WC_CES_KOI8_U;
 | |
| 	if (! strncmp(p, "ksx1001", 7))
 | |
| 	    return WC_CES_EUC_KR;
 | |
| 	if (! strncmp(p, "ksc5601", 7))
 | |
| 	    return WC_CES_EUC_KR;
 | |
| 	break;
 | |
|     case 't':
 | |
| 	if (! strncmp(p, "tis620", 6))
 | |
| 	    return WC_CES_TIS_620;
 | |
| 	if (! strncmp(p, "tcvn", 4))
 | |
| 	    return WC_CES_TCVN_5712;
 | |
| 	break;
 | |
|     case 'n':
 | |
| 	if (! strncmp(p, "next", 4))
 | |
| 	    return WC_CES_NEXTSTEP;
 | |
| 	break;
 | |
|     case 'v':
 | |
| 	if (! strncmp(p, "viet", 4)) {
 | |
| 	    p += 4;
 | |
| 	    if (! strncmp(p, "tcvn", 4))
 | |
| 		return WC_CES_TCVN_5712;
 | |
| 	}
 | |
| 	if (! strncmp(p, "viscii", 6))
 | |
| 	    return WC_CES_VISCII_11;
 | |
| 	if (! strncmp(p, "vps", 3))
 | |
| 	    return WC_CES_VPS;
 | |
| 	break;
 | |
|     case 'u':
 | |
| #ifdef USE_UNICODE
 | |
| 	if (! strncmp(p, "utf8", 4))
 | |
| 	    return WC_CES_UTF_8;
 | |
| 	if (! strncmp(p, "utf7", 4))
 | |
| 	    return WC_CES_UTF_7;
 | |
| #endif
 | |
| 	if (! strncmp(p, "uhc", 3))
 | |
| 	    return WC_CES_UHC;
 | |
| 	if (! strncmp(p, "ujis", 4))
 | |
| 	    return WC_CES_EUC_JP;
 | |
| 	if (! strncmp(p, "usascii", 7))
 | |
| 	    return WC_CES_US_ASCII;
 | |
| 	break;
 | |
|     case 'a':
 | |
| 	if (! strncmp(p, "ascii", 5))
 | |
| 	    return WC_CES_US_ASCII;
 | |
| 	break;
 | |
|     case 'c':
 | |
| 	if (! strncmp(p, "cngb", 4))
 | |
| 	    return WC_CES_EUC_CN;
 | |
| 	if (*(p+1) != 'p')
 | |
| 	    break;
 | |
| 	p += 2;
 | |
| 	if (*p >= '1' &&  *p <= '9')
 | |
| 	    return wc_codepage(atoi(p));
 | |
| 	break;
 | |
|     case 'w':
 | |
| 	if (strncmp(p, "windows", 7))
 | |
| 	    break;
 | |
| 	p += 7;
 | |
| 	if (! strncmp(p, "31j", 3))
 | |
| 	    return WC_CES_CP932;
 | |
| 	if (*p >= '1' &&  *p <= '9')
 | |
| 	    return wc_codepage(atoi(p));
 | |
| 	break;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_charset_short_to_ces(char *charset)
 | |
| {
 | |
|     char *p = charset;
 | |
|     char buf[16];
 | |
|     wc_ces ces;
 | |
|     int n;
 | |
| 
 | |
|     ces = wc_charset_to_ces(charset);
 | |
|     if (ces)
 | |
| 	return ces;
 | |
| 
 | |
|     for (n = 0; *p && n < 15; p++) {
 | |
| 	if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
 | |
| 	    buf[n++] = tolower(*p);
 | |
|     }
 | |
|     buf[n] = 0;
 | |
|     p = buf;
 | |
|     switch (*p) {
 | |
|     case 'e':
 | |
| 	switch (*(p+1)) {
 | |
| 	case 'j': return WC_CES_EUC_JP;
 | |
| 	case 'c': return WC_CES_EUC_CN;
 | |
| 	case 't': return WC_CES_EUC_TW;
 | |
| 	case 'k': return WC_CES_EUC_KR;
 | |
| 	}
 | |
| 	return WC_CES_EUC_JP;
 | |
|     case 'j':
 | |
| 	p++;
 | |
| 	if (*p == 'o')
 | |
| 	    return WC_CES_JOHAB;
 | |
| 	if (*p == 'p')
 | |
| 	   p++;
 | |
| 	if (*p == '2')
 | |
| 	   return WC_CES_ISO_2022_JP_2;
 | |
| 	if (*p == '3')
 | |
| 	   return WC_CES_ISO_2022_JP_3;
 | |
| 	return WC_CES_ISO_2022_JP;
 | |
|     case 's':
 | |
| 	return WC_CES_SHIFT_JIS;
 | |
|     case 'g':
 | |
| 	return WC_CES_EUC_CN;
 | |
|     case 'b':
 | |
| 	return WC_CES_BIG5;
 | |
|     case 'h':
 | |
| 	if (*(p+1) == 'k')
 | |
| 	    return WC_CES_HKSCS;
 | |
| 	return WC_CES_HZ_GB_2312;
 | |
|     case 'k':
 | |
| 	if (*(p+1) == 'o')
 | |
| 	    return WC_CES_KOI8_R;
 | |
| 	return WC_CES_ISO_2022_KR;
 | |
|     case 'l':
 | |
| 	n = atoi(p + 1);
 | |
| 	if (n >= 1 && n <= 16 && n != 12)
 | |
| 	    return (WC_CES_E_ISO_8859 | n);
 | |
| 	return WC_CES_ISO_8859_1;
 | |
|     case 't':
 | |
| 	if (*(p+1) == 'c')
 | |
| 	    return WC_CES_TCVN_5712;
 | |
| 	return WC_CES_TIS_620;
 | |
|     case 'n':
 | |
| 	return WC_CES_NEXTSTEP;
 | |
|     case 'v':
 | |
| 	if (*(p+1) == 'p')
 | |
| 	    return WC_CES_VPS;
 | |
| 	return WC_CES_VISCII_11;
 | |
| #ifdef USE_UNICODE
 | |
|     case 'u':
 | |
| 	if (*(p+1) == '7')
 | |
| 	    return WC_CES_UTF_7;
 | |
| 	return WC_CES_UTF_8;
 | |
| #endif
 | |
|     case 'a':
 | |
| 	return WC_CES_US_ASCII;
 | |
|     case 'c':
 | |
| 	return WC_CES_ISO_2022_CN;
 | |
|     case 'w':
 | |
| 	p++;
 | |
| 	if (*p >= '1' &&  *p <= '9')
 | |
| 	    return wc_codepage(atoi(p));
 | |
| 	break;
 | |
|     case 'r':
 | |
| 	return WC_CES_RAW;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_locale_to_ces(char *locale)
 | |
| {
 | |
|     char *p = locale;
 | |
|     char buf[8];
 | |
|     int n;
 | |
| 
 | |
|     if (*p == 'C' && *(p+1) == '\0')
 | |
| 	return WC_CES_US_ASCII;
 | |
| #ifdef HAVE_LANGINFO_CODESET
 | |
|     {
 | |
| 	char *cs = nl_langinfo(CODESET);
 | |
| 	if (cs && strcmp(cs, "US-ASCII"))
 | |
| 	    return wc_charset_to_ces(cs);
 | |
|     }
 | |
| #endif
 | |
|     for (n = 0; *p && *p != '.' && n < 7; p++) {
 | |
| 	if ((unsigned char)*p > 0x20)
 | |
| 	    buf[n++] = tolower(*p);
 | |
|     }
 | |
|     buf[n] = 0;
 | |
|     if (*p == '.') {
 | |
| 	p++;
 | |
| 	if (! strcasecmp(p, "euc")) {
 | |
| 	    switch (buf[0]) {
 | |
| 	    case 'j':
 | |
| 		WcLocale = WC_LOCALE_JA_JP;
 | |
| 		break;
 | |
| 	    case 'k':
 | |
| 		WcLocale = WC_LOCALE_KO_KR;
 | |
| 		break;
 | |
| 	    case 'z':
 | |
| 	        if (!strcmp(buf, "zh_tw"))
 | |
| 		    WcLocale = WC_LOCALE_ZH_TW;
 | |
| 	        else if (!strcmp(buf, "zh_hk"))
 | |
| 		    WcLocale = WC_LOCALE_ZH_HK;
 | |
| 		else
 | |
| 		    WcLocale = WC_LOCALE_ZH_CN;
 | |
| 		break;
 | |
| 	    default:
 | |
| 		WcLocale = 0;
 | |
| 		break;
 | |
| 	    }
 | |
| 	}
 | |
| 	return wc_charset_to_ces(p);
 | |
|     }
 | |
| 
 | |
|     if (!strcmp(buf, "japanese"))
 | |
| 	return WC_CES_SHIFT_JIS;
 | |
|     if (!strcmp(buf, "zh_tw") ||
 | |
| 	!strcmp(buf, "zh_hk"))
 | |
| 	return WC_CES_BIG5;
 | |
|     for (n = 0; lang_ces_table[n].lang; n++) {
 | |
| 	if (!strncmp(buf, lang_ces_table[n].lang, 2))
 | |
| 	    return lang_ces_table[n].ces;
 | |
|     }
 | |
|     return WC_CES_ISO_8859_1;
 | |
| }
 | |
| 
 | |
| char *
 | |
| wc_ces_to_charset(wc_ces ces)
 | |
| {
 | |
|     if (ces == WC_CES_WTF)
 | |
| 	return "WTF";
 | |
|     return WcCesInfo[WC_CES_INDEX(ces)].name;
 | |
| }
 | |
| 
 | |
| char *
 | |
| wc_ces_to_charset_desc(wc_ces ces)
 | |
| {
 | |
|     if (ces == WC_CES_WTF)
 | |
| 	return "W3M Transfer Format";
 | |
|     return WcCesInfo[WC_CES_INDEX(ces)].desc;
 | |
| }
 | |
| 
 | |
| wc_ces
 | |
| wc_guess_8bit_charset(wc_ces orig)
 | |
| {
 | |
|     switch (orig) {
 | |
|     case WC_CES_ISO_2022_JP:
 | |
|     case WC_CES_ISO_2022_JP_2:
 | |
|     case WC_CES_ISO_2022_JP_3:
 | |
| 	return WC_CES_EUC_JP;
 | |
|     case WC_CES_ISO_2022_KR:
 | |
| 	return WC_CES_EUC_KR;
 | |
|     case WC_CES_ISO_2022_CN:
 | |
|     case WC_CES_HZ_GB_2312:
 | |
| 	return WC_CES_EUC_CN;
 | |
|     case WC_CES_US_ASCII:
 | |
| 	return WC_CES_ISO_8859_1;
 | |
|     }
 | |
|     return orig;
 | |
| }
 | |
| 
 | |
| wc_bool
 | |
| wc_check_ces(wc_ces ces)
 | |
| {
 | |
|     size_t i = WC_CES_INDEX(ces);
 | |
| 
 | |
|     return (i <= WC_CES_END && WcCesInfo[i].id == ces);
 | |
| }
 | |
| 
 | |
| static int
 | |
| wc_ces_list_cmp(const void *a, const void *b)
 | |
| {
 | |
|     return strcasecmp(((wc_ces_list *)a)->desc, ((wc_ces_list *)b)->desc);
 | |
| }
 | |
| 
 | |
| static wc_ces_list *list = NULL;
 | |
| 
 | |
| wc_ces_list *
 | |
| wc_get_ces_list(void)
 | |
| {
 | |
|     wc_ces_info *info;
 | |
|     size_t n;
 | |
| 
 | |
|     if (list)
 | |
| 	return list;
 | |
|     for (info = WcCesInfo, n = 0; info->id; info++) {
 | |
| 	if (info->name != NULL)
 | |
| 	    n++;
 | |
|     }
 | |
|     list = New_N(wc_ces_list, n + 1);
 | |
|     for (info = WcCesInfo, n = 0; info->id; info++) {
 | |
| 	if (info->name != NULL) {
 | |
| 	    list[n].id = info->id;
 | |
| 	    list[n].name = info->name;
 | |
| 	    list[n].desc = info->desc;
 | |
| 	    n++;
 | |
| 	}
 | |
|     }
 | |
|     list[n].id = 0;
 | |
|     list[n].name = NULL;
 | |
|     list[n].desc = NULL;
 | |
|     qsort(list, n, sizeof(wc_ces_list), wc_ces_list_cmp);
 | |
|     return list;
 | |
| }
 |