Skip soft hyphen when reading token
The soft hyphen should only appear if a word is broken at the hyphen position. Filter it out. Adjust the entity test files to reflect the new behaviour. This fixes Issue #224 and Debian Bug #830173. Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173 Bug-Debian: https://github.com/tats/w3m/issues/224
This commit is contained in:
@@ -1997,8 +1997,8 @@ function getEntities() {
|
||||
"ш": { "codepoints": [1096], "characters": "\u0448" },
|
||||
"∣": { "codepoints": [8739], "characters": "\u2223" },
|
||||
"∥": { "codepoints": [8741], "characters": "\u2225" },
|
||||
"­": { "codepoints": [173], "characters": "\u00AD" },
|
||||
"­": { "codepoints": [173], "characters": "\u00AD" },
|
||||
"­": { "codepoints": [173], "characters": "" },
|
||||
"­": { "codepoints": [173], "characters": "" },
|
||||
"σ": { "codepoints": [963], "characters": "\u03C3" },
|
||||
"ς": { "codepoints": [962], "characters": "\u03C2" },
|
||||
"ς": { "codepoints": [962], "characters": "\u03C2" },
|
||||
|
||||
@@ -834,6 +834,15 @@ read_token(Str buf, char **instr, int *status, int pre, int append)
|
||||
if (**instr == '\0')
|
||||
return 0;
|
||||
for (p = *instr; *p; p++) {
|
||||
/* Drop Unicode soft hyphen */
|
||||
if (*(unsigned char *)p == 0210
|
||||
&& *(unsigned char *)(p + 1) == 0200
|
||||
&& *(unsigned char *)(p + 2) == 0201
|
||||
&& *(unsigned char *)(p + 3) == 0255) {
|
||||
p += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
prev_status = *status;
|
||||
next_status(*p, status);
|
||||
switch (*status) {
|
||||
|
||||
@@ -3274,8 +3274,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
|
||||
break;
|
||||
default:
|
||||
r = conv_entity(ec);
|
||||
if (r != NULL && strlen(r) == 1 &&
|
||||
ec == (unsigned char)*r) {
|
||||
if (!r || !*r)
|
||||
break;
|
||||
if (strlen(r) == 1 && ec == (unsigned char)*r) {
|
||||
Strcat_char(tmp, *r);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1726,7 +1726,7 @@ CounterClockwiseContourIntegral;
|
||||
ш 0x448 ш ш ш
|
||||
∣ 0x2223 ∣ ∣ ∣
|
||||
∥ 0x2225 ∥ ∥ ∥
|
||||
­ 0xAD
|
||||
­ 0xAD
|
||||
σ 0x3C3 σ σ σ
|
||||
ς 0x3C2 ς ς ς
|
||||
ς 0x3C2 ς ς ς
|
||||
@@ -2125,7 +2125,7 @@ CounterClockwiseContourIntegral;
|
||||
» 0xBB » » »
|
||||
® 0xAE ® ® ®
|
||||
§ 0xA7 § § §
|
||||
­ 0xAD ­
|
||||
­ 0xAD ­
|
||||
¹ 0xB9 ¹ ¹ ¹
|
||||
² 0xB2 ² ² ²
|
||||
³ 0xB3 ³ ³ ³
|
||||
|
||||
@@ -5184,7 +5184,7 @@
|
||||
<td>&shortparallel;</td><td>0x2225</td><td></td><td>∥</td><td>∥</td><td>∥</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>&shy;</td><td>0xAD</td><td></td><td>­</td><td>­</td><td></td>
|
||||
<td>&shy;</td><td>0xAD</td><td></td><td>­</td><td>­</td><td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>&sigma;</td><td>0x3C3</td><td></td><td>σ</td><td>σ</td><td>σ</td>
|
||||
@@ -6379,7 +6379,7 @@
|
||||
<td>&sect</td><td>0xA7</td><td></td><td>§</td><td>§</td><td>§</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>&shy</td><td>0xAD</td><td></td><td>­</td><td>­</td><td></td>
|
||||
<td>&shy</td><td>0xAD</td><td></td><td>­</td><td>­</td><td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>&sup1</td><td>0xB9</td><td></td><td>¹</td><td>¹</td><td>¹</td>
|
||||
|
||||
Reference in New Issue
Block a user