Merge pull request #122 from mackyle/master

entities: support ' entity
This commit is contained in:
Tatsuya Kinoshita
2019-11-10 19:57:02 +09:00
committed by GitHub
3 changed files with 6 additions and 4 deletions

View File

@@ -7,6 +7,8 @@ amp 0x26
AMP 0x26
quot 0x22
QUOT 0x22
apos 0x27
APOS 0x27
nbsp 0xA0
NBSP 0xA0
iexcl 0xA1

View File

@@ -19,7 +19,7 @@ unsigned char QUOTE_MAP[0x100] = {
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* SPC ! " # $ % & ' ( ) * + , - . / */
24, 72, 76, 40, 8, 40, 41, 72, 72, 72, 72, 40, 72, 8, 0, 64,
24, 72, 76, 40, 8, 40, 41, 77, 72, 72, 72, 40, 72, 8, 0, 64,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40,
/* @ A B C D E F G H I J K L M N O */
@@ -47,7 +47,7 @@ char *HTML_QUOTE_MAP[] = {
"&lt;",
"&gt;",
"&quot;",
NULL,
"&apos;",
NULL,
NULL,
};
@@ -462,7 +462,7 @@ getescapechar(char **str)
q = p;
for (p++; IS_ALNUM(*p); p++) ;
q = allocStr(q, p - q);
if (strcasestr("lt gt amp quot nbsp", q) && *p != '=') {
if (strcasestr("lt gt amp quot apos nbsp", q) && *p != '=') {
/* a character entity MUST be terminated with ";". However,
* there's MANY web pages which uses &lt , &gt or something
* like them as &lt;, &gt;, etc. Therefore, we treat the most

View File

@@ -27,7 +27,7 @@ struct growbuf {
extern unsigned char QUOTE_MAP[];
extern char *HTML_QUOTE_MAP[];
#define HTML_QUOTE_MASK 0x07 /* &, <, >, " */
#define HTML_QUOTE_MASK 0x07 /* &, <, >, ", ' */
#define SHELL_UNSAFE_MASK 0x08 /* [^A-Za-z0-9_./:\200-\377] */
#define URL_QUOTE_MASK 0x10 /* [\0- \177-\377] */
#define FILE_QUOTE_MASK 0x30 /* [\0- #%&+:?\177-\377] */