Skip soft hyphen when reading token

The soft hyphen should only appear if a word is broken at the hyphen
position. Filter it out.

Adjust the entity test files to reflect the new behaviour.

This fixes Issue #224 and Debian Bug #830173.

Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173
Bug-Debian: https://github.com/tats/w3m/issues/224
This commit is contained in:
Rene Kita
2022-04-24 14:37:08 +02:00
parent 2692d22006
commit e8287f36b0
5 changed files with 18 additions and 8 deletions
+2 -2
View File
@@ -1997,8 +1997,8 @@ function getEntities() {
"ш": { "codepoints": [1096], "characters": "\u0448" },
"∣": { "codepoints": [8739], "characters": "\u2223" },
"∥": { "codepoints": [8741], "characters": "\u2225" },
"&shy": { "codepoints": [173], "characters": "\u00AD" },
"­": { "codepoints": [173], "characters": "\u00AD" },
"&shy": { "codepoints": [173], "characters": "" },
"­": { "codepoints": [173], "characters": "" },
"σ": { "codepoints": [963], "characters": "\u03C3" },
"ς": { "codepoints": [962], "characters": "\u03C2" },
"ς": { "codepoints": [962], "characters": "\u03C2" },
+9
View File
@@ -834,6 +834,15 @@ read_token(Str buf, char **instr, int *status, int pre, int append)
if (**instr == '\0')
return 0;
for (p = *instr; *p; p++) {
/* Drop Unicode soft hyphen */
if (*(unsigned char *)p == 0210
&& *(unsigned char *)(p + 1) == 0200
&& *(unsigned char *)(p + 2) == 0201
&& *(unsigned char *)(p + 3) == 0255) {
p += 3;
continue;
}
prev_status = *status;
next_status(*p, status);
switch (*status) {
+3 -2
View File
@@ -3274,8 +3274,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
break;
default:
r = conv_entity(ec);
if (r != NULL && strlen(r) == 1 &&
ec == (unsigned char)*r) {
if (!r || !*r)
break;
if (strlen(r) == 1 && ec == (unsigned char)*r) {
Strcat_char(tmp, *r);
break;
}
+2 -2
View File
@@ -1726,7 +1726,7 @@ CounterClockwiseContourIntegral;
ш 0x448 ш ш ш
∣ 0x2223
∥ 0x2225 ∥ ∥ ∥
­ 0xAD ­
­ 0xAD
σ 0x3C3 σ σ σ
ς 0x3C2 ς ς ς
ς 0x3C2 ς ς ς
@@ -2125,7 +2125,7 @@ CounterClockwiseContourIntegral;
&raquo 0xBB » &raquo »
&reg 0xAE ® &reg ®
&sect 0xA7 § &sect §
&shy 0xAD &shy ­
&shy 0xAD &shy
&sup1 0xB9 ¹ &sup1 ¹
&sup2 0xB2 ² &sup2 ²
&sup3 0xB3 ³ &sup3 ³
+2 -2
View File
@@ -5184,7 +5184,7 @@
<td>&amp;shortparallel;</td><td>0x2225</td><td></td><td>&#x2225;</td><td>&shortparallel;</td><td></td>
</tr>
<tr>
<td>&amp;shy;</td><td>0xAD</td><td></td><td>&#xAD;</td><td>&shy;</td><td>­</td>
<td>&amp;shy;</td><td>0xAD</td><td></td><td>&#xAD;</td><td>&shy;</td><td></td>
</tr>
<tr>
<td>&amp;sigma;</td><td>0x3C3</td><td></td><td>&#x3C3;</td><td>&sigma;</td><td>σ</td>
@@ -6379,7 +6379,7 @@
<td>&amp;sect</td><td>0xA7</td><td></td><td>&#xA7;</td><td>&sect</td><td>§</td>
</tr>
<tr>
<td>&amp;shy</td><td>0xAD</td><td></td><td>&#xAD;</td><td>&shy</td><td>­</td>
<td>&amp;shy</td><td>0xAD</td><td></td><td>&#xAD;</td><td>&shy</td><td></td>
</tr>
<tr>
<td>&amp;sup1</td><td>0xB9</td><td></td><td>&#xB9;</td><td>&sup1</td><td>¹</td>