* merge ambiguous width patch

* http://www.j10n.org/files/w3m-cvs-1.914-ambwidth.patch
This commit is contained in:
Ito Hiroyuki
2010-07-25 09:55:05 +00:00
parent 1cf2e954a1
commit e2782c2ae9
17 changed files with 22236 additions and 174 deletions
File diff suppressed because it is too large Load Diff
+3 -1
View File
@@ -95,6 +95,8 @@ $(LIBRARY): $(OBJS)
test: $(LIBRARY) test.o ../Str.o ../myctype.o
$(CC) -o wctest test.o ../Str.o ../myctype.o $(LIBRARY) ../gc/gc.a
map/ucs_ambwidth.map: EastAsianWidth.txt
awk -f ambwidth_map.awk EastAsianWidth.txt > map/ucs_ambwidth.map
###
big5.o: wc.h wc_types.h ces.h ccs.h iso2022.h priv.h big5.h search.h wtf.h \
ucs.h
@@ -154,7 +156,7 @@ ucs.o: wc.h wc_types.h ces.h ccs.h iso2022.h priv.h ucs.h search.h big5.h \
map/hkscs_ucs_p2.map map/gb12345_ucs.map map/johab_ucs.map \
map/sjis_ext_ucs.map map/gbk_ucs.map map/uhc_ucs.map map/ucs_wide.map \
map/ucs_combining.map map/ucs_precompose.map map/ucs_hangul.map \
map/ucs_fullwidth.map
map/ucs_fullwidth.map map/ucs_ambwidth.map
uhc.o: wc.h wc_types.h ces.h ccs.h iso2022.h priv.h uhc.h wtf.h ucs.h
utf7.o: wc.h wc_types.h ces.h ccs.h iso2022.h priv.h ucs.h utf7.h wtf.h
utf8.o: wc.h wc_types.h ces.h ccs.h iso2022.h priv.h ucs.h utf8.h wtf.h
+47
View File
@@ -0,0 +1,47 @@
BEGIN {
FS = "[; ]";
i = 0;
}
$2 == "A" {
code = sprintf("0x%s", $1);
if (strtonum(code) < 0x10000) {
map[i] = code
i++;
}
}
END {
n = 0;
start = map[0]
prev = strtonum(map[0]);
for (j = 1; j < i; j++) {
cur = strtonum(map[j]);
if (match(map[j], "[.]+")) {
map2[n] = sprintf("%s, %s", start, map[j - 1]);
n++;
gsub("[.]+", ", 0x", map[j])
map2[n] = map[j];
n++;
start = map[j + 1];
cur = strtonum(start);
} else {
if (cur - prev > 2) {
map2[n] = sprintf("%s, %s", start, map[j - 1]);
start = map[j];
n++;
}
if (j == i - 1) {
map2[n] = sprintf("%s, %s", start, map[j]);
n++;
}
}
prev = cur;
}
printf("static wc_map ucs_ambwidth_map[] = {\n");
for (j = 0; j < n; j++) {
printf(" { %s },\n", map2[j]);
}
printf("};\n");
printf("#define N_ucs_ambwidth_map (sizeof(ucs_ambwidth_map) / sizeof(*ucs_ambwidth_map))\n");
}
+120
View File
@@ -0,0 +1,120 @@
static wc_map ucs_ambwidth_map[] = {
{ 0x00A1, 0x00A1 },
{ 0x00A4, 0x00A4 },
{ 0x00A7, 0x00AA },
{ 0x00AD, 0x00BF },
{ 0x00C6, 0x00C6 },
{ 0x00D0, 0x00D0 },
{ 0x00D7, 0x00D8 },
{ 0x00DE, 0x00E1 },
{ 0x00E6, 0x00ED },
{ 0x00F0, 0x00F3 },
{ 0x00F7, 0x00FE },
{ 0x0101, 0x0101 },
{ 0x0111, 0x0113 },
{ 0x011B, 0x011B },
{ 0x0126, 0x0127 },
{ 0x012B, 0x012B },
{ 0x0131, 0x0133 },
{ 0x0138, 0x0138 },
{ 0x013F, 0x0144 },
{ 0x0148, 0x014D },
{ 0x0152, 0x0153 },
{ 0x0166, 0x0167 },
{ 0x016B, 0x016B },
{ 0x01CE, 0x01DC },
{ 0x0251, 0x0251 },
{ 0x0261, 0x0261 },
{ 0x02C4, 0x02C4 },
{ 0x02C7, 0x02CD },
{ 0x02D0, 0x02D0 },
{ 0x02D8, 0x02DF },
{ 0x0300, 0x036F },
{ 0x0391, 0x03A9 },
{ 0x03B1, 0x03C9 },
{ 0x0401, 0x0401 },
{ 0x0410, 0x0451 },
{ 0x2010, 0x2010 },
{ 0x2013, 0x2019 },
{ 0x201C, 0x201D },
{ 0x2020, 0x2027 },
{ 0x2030, 0x2035 },
{ 0x203B, 0x203B },
{ 0x203E, 0x203E },
{ 0x2074, 0x2074 },
{ 0x207F, 0x2084 },
{ 0x20AC, 0x20AC },
{ 0x2103, 0x2105 },
{ 0x2109, 0x2109 },
{ 0x2113, 0x2113 },
{ 0x2116, 0x2116 },
{ 0x2121, 0x2122 },
{ 0x2126, 0x2126 },
{ 0x212B, 0x212B },
{ 0x2153, 0x2154 },
{ 0x215B, 0x216B },
{ 0x2170, 0x2179 },
{ 0x2189, 0x2189 },
{ 0x2190, 0x2199 },
{ 0x21B8, 0x21B9 },
{ 0x21D2, 0x21D4 },
{ 0x21E7, 0x21E7 },
{ 0x2200, 0x2203 },
{ 0x2207, 0x2208 },
{ 0x220B, 0x220B },
{ 0x220F, 0x2211 },
{ 0x2215, 0x2215 },
{ 0x221A, 0x221A },
{ 0x221D, 0x2220 },
{ 0x2223, 0x222E },
{ 0x2234, 0x2237 },
{ 0x223C, 0x223D },
{ 0x2248, 0x2248 },
{ 0x224C, 0x224C },
{ 0x2252, 0x2252 },
{ 0x2260, 0x2261 },
{ 0x2264, 0x2267 },
{ 0x226A, 0x226B },
{ 0x226E, 0x226F },
{ 0x2282, 0x2283 },
{ 0x2286, 0x2287 },
{ 0x2295, 0x2295 },
{ 0x2299, 0x2299 },
{ 0x22A5, 0x22A5 },
{ 0x22BF, 0x22BF },
{ 0x2312, 0x2312 },
{ 0x2460, 0x254B },
{ 0x2550, 0x2573 },
{ 0x2580, 0x258F },
{ 0x2592, 0x2595 },
{ 0x25A0, 0x25A9 },
{ 0x25B2, 0x25B3 },
{ 0x25B6, 0x25B7 },
{ 0x25BC, 0x25BD },
{ 0x25C0, 0x25C1 },
{ 0x25C6, 0x25C8 },
{ 0x25CB, 0x25CB },
{ 0x25CE, 0x25D1 },
{ 0x25E2, 0x25E5 },
{ 0x25EF, 0x25EF },
{ 0x2605, 0x2606 },
{ 0x2609, 0x2609 },
{ 0x260E, 0x260F },
{ 0x2614, 0x2615 },
{ 0x261C, 0x261E },
{ 0x2640, 0x2642 },
{ 0x2660, 0x266F },
{ 0x269E, 0x269F },
{ 0x26BE, 0x26BF },
{ 0x26C4, 0x26E3 },
{ 0x26E8, 0x26FF },
{ 0x273D, 0x273D },
{ 0x2757, 0x2757 },
{ 0x2776, 0x277F },
{ 0x2B55, 0x2B59 },
{ 0x3248, 0x324F },
{ 0xE000, 0xF8FF },
{ 0xFE00, 0xFE0F },
{ 0xFFFD, 0xFFFD },
};
#define N_ucs_ambwidth_map (sizeof(ucs_ambwidth_map) / sizeof(*ucs_ambwidth_map))
+1
View File
@@ -25,6 +25,7 @@ wc_option WcOption = {
WC_FALSE, /* gb18030_as_ucs */
WC_FALSE, /* no_replace */
WC_TRUE, /* use_wide */
WC_FALSE, /* east_asian_width */
};
static wc_status output_st;
+16
View File
@@ -17,6 +17,7 @@
#include "ucs.map"
#include "map/ucs_ambwidth.map"
#include "map/ucs_wide.map"
#include "map/ucs_combining.map"
#include "map/ucs_precompose.map"
@@ -511,10 +512,25 @@ wc_ucs_to_ccs(wc_uint32 ucs)
if (0x80 <= ucs && ucs <= 0x9F)
return WC_CCS_C1;
return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
| ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
? WC_CCS_A_WIDE : 0)
| (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
| (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
}
wc_bool
wc_is_ucs_ambiguous_width(wc_uint32 ucs)
{
if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
return 1;
else if (ucs <= WC_C_UCS2_END)
return (wc_map_range_search((wc_uint16)ucs,
ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
else
return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
|| (0x100000 <= ucs && ucs <= 0x10FFFD));
}
wc_bool
wc_is_ucs_wide(wc_uint32 ucs)
{
+1
View File
@@ -44,6 +44,7 @@ extern wc_wchar_t wc_any_to_iso2022(wc_wchar_t cc, wc_status *st);
extern wc_wchar_t wc_ucs_to_iso2022(wc_uint32 ucs);
extern wc_wchar_t wc_ucs_to_iso2022w(wc_uint32 ucs);
extern wc_ccs wc_ucs_to_ccs(wc_uint32 ucs);
extern wc_bool wc_is_ucs_ambiguous_width(wc_uint32 ucs);
extern wc_bool wc_is_ucs_wide(wc_uint32 ucs);
extern wc_bool wc_is_ucs_combining(wc_uint32 ucs);
extern wc_bool wc_is_ucs_hangul(wc_uint32 ucs);
+1
View File
@@ -91,6 +91,7 @@ typedef struct {
wc_bool gb18030_as_ucs; /* treat 4 bytes char. of GB18030 as Unicode */
wc_bool no_replace; /* don't output replace character */
wc_bool use_wide; /* use wide characters */
wc_bool east_asian_width; /* East Asian Ambiguous characters are wide */
} wc_option;
typedef struct {