Merge pull request #256 from rkta/softhyphen

Skip soft hyphen when reading token
This commit is contained in:
Tatsuya Kinoshita
2023-01-06 19:38:36 +09:00
committed by GitHub
5 changed files with 2259 additions and 2248 deletions
+10 -9
View File
@@ -1,4 +1,5 @@
//Usage: qjs --std entity.js [test/tab] > outfile
// Usage: qjs --std entity.js [test/tab] > outfile
// See: https://github.com/bellard/quickjs
const a = getEntities();
@@ -15,9 +16,9 @@ function gentable(criteria, item) {
}
function gentestitem(b, c, d) {
let ha = "0x" + d[0].toString(16).toUpperCase() + "</td>";
let ha = "0x" + d[0].toString(16).toUpperCase();
if (d.length == 2)
var hb = "0x" + d[1].toString(16).toUpperCase() + "</td>";
var hb = "0x" + d[1].toString(16).toUpperCase();
else
var hb = "";
console.log(" <tr>");
@@ -54,19 +55,19 @@ function gentest() {
console.log(" <tr>");
console.log(" " +
"<th>Name</th><th>Hex 1</th><th>Hex 2</th><th>Hex result</th><th>Name result</th><th>Byte result</th>");
console.log(" </th>");
console.log(" </tr>");
gentable((b, d) => d.length == 1 && b[b.length - 1] == ';', gentestitem);
console.log("<th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th>");
console.log("<tr><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th></tr>");
gentable((b, d) => d.length == 1 && b[b.length - 1] != ';', gentestitem);
console.log("<th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th>");
console.log("<tr><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th></tr>");
gentable((b, d) => d.length == 2 && b[b.length - 1] == ';', gentestitem);
console.log("<th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th>");
console.log("<tr><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th><th><hr></th></tr>");
gentable((b, d) => d.length == 2 && b[b.length - 1] != ';', gentestitem);
@@ -1996,8 +1997,8 @@ function getEntities() {
"&shcy;": { "codepoints": [1096], "characters": "\u0448" },
"&shortmid;": { "codepoints": [8739], "characters": "\u2223" },
"&shortparallel;": { "codepoints": [8741], "characters": "\u2225" },
"&shy": { "codepoints": [173], "characters": "\u00AD" },
"&shy;": { "codepoints": [173], "characters": "\u00AD" },
"&shy": { "codepoints": [173], "characters": "" },
"&shy;": { "codepoints": [173], "characters": "" },
"&sigma;": { "codepoints": [963], "characters": "\u03C3" },
"&sigmaf;": { "codepoints": [962], "characters": "\u03C2" },
"&sigmav;": { "codepoints": [962], "characters": "\u03C2" },
+9
View File
@@ -834,6 +834,15 @@ read_token(Str buf, char **instr, int *status, int pre, int append)
if (**instr == '\0')
return 0;
for (p = *instr; *p; p++) {
/* Drop Unicode soft hyphen */
if (*(unsigned char *)p == 0210
&& *(unsigned char *)(p + 1) == 0200
&& *(unsigned char *)(p + 2) == 0201
&& *(unsigned char *)(p + 3) == 0255) {
p += 3;
continue;
}
prev_status = *status;
next_status(*p, status);
switch (*status) {
+3 -2
View File
@@ -3274,8 +3274,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
break;
default:
r = conv_entity(ec);
if (r != NULL && strlen(r) == 1 &&
ec == (unsigned char)*r) {
if (!r || !*r)
break;
if (strlen(r) == 1 && ec == (unsigned char)*r) {
Strcat_char(tmp, *r);
break;
}
+2 -2
View File
@@ -1726,7 +1726,7 @@ CounterClockwiseContourIntegral;
&shcy; 0x448 ш ш ш
&shortmid; 0x2223
&shortparallel; 0x2225 ∥ ∥ ∥
&shy; 0xAD ­
&shy; 0xAD
&sigma; 0x3C3 σ σ σ
&sigmaf; 0x3C2 ς ς ς
&sigmav; 0x3C2 ς ς ς
@@ -2125,7 +2125,7 @@ CounterClockwiseContourIntegral;
&raquo 0xBB » &raquo »
&reg 0xAE ® &reg ®
&sect 0xA7 § &sect §
&shy 0xAD &shy ­
&shy 0xAD &shy
&sup1 0xB9 ¹ &sup1 ¹
&sup2 0xB2 ² &sup2 ²
&sup3 0xB3 ³ &sup3 ³
+2235 -2235
View File
File diff suppressed because it is too large Load Diff