entities: support ' entity

The XHTML standard encompasses the XML standard.

From the beginning, the XML standard [1] has always included required
support for five character entities:

 1. the ampersand (&) as &
 2. the left angle bracket (<) as &lt;
 3. the right angle bracket (>) as &gt;
 4. the double-quote character (") as &quot;
 5. the apostrophe or single-quote character (') as &apos;

See section "2.4 Character Data and Markup" of the XML standard [1]
for further details.

Add support for the character single-quote character entity (&apos;)
in order to fully support XHTML pages.

[1]: https://www.w3.org/TR/REC-xml/

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
This commit is contained in:
Kyle J. McKay
2019-11-08 15:53:20 -07:00
parent b3054364d1
commit e8948ec3a3
3 changed files with 6 additions and 4 deletions

View File

@@ -7,6 +7,8 @@ amp 0x26
AMP 0x26 AMP 0x26
quot 0x22 quot 0x22
QUOT 0x22 QUOT 0x22
apos 0x27
APOS 0x27
nbsp 0xA0 nbsp 0xA0
NBSP 0xA0 NBSP 0xA0
iexcl 0xA1 iexcl 0xA1

View File

@@ -19,7 +19,7 @@ unsigned char QUOTE_MAP[0x100] = {
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* SPC ! " # $ % & ' ( ) * + , - . / */ /* SPC ! " # $ % & ' ( ) * + , - . / */
24, 72, 76, 40, 8, 40, 41, 72, 72, 72, 72, 40, 72, 8, 0, 64, 24, 72, 76, 40, 8, 40, 41, 77, 72, 72, 72, 40, 72, 8, 0, 64,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40,
/* @ A B C D E F G H I J K L M N O */ /* @ A B C D E F G H I J K L M N O */
@@ -47,7 +47,7 @@ char *HTML_QUOTE_MAP[] = {
"&lt;", "&lt;",
"&gt;", "&gt;",
"&quot;", "&quot;",
NULL, "&apos;",
NULL, NULL,
NULL, NULL,
}; };
@@ -462,7 +462,7 @@ getescapechar(char **str)
q = p; q = p;
for (p++; IS_ALNUM(*p); p++) ; for (p++; IS_ALNUM(*p); p++) ;
q = allocStr(q, p - q); q = allocStr(q, p - q);
if (strcasestr("lt gt amp quot nbsp", q) && *p != '=') { if (strcasestr("lt gt amp quot apos nbsp", q) && *p != '=') {
/* a character entity MUST be terminated with ";". However, /* a character entity MUST be terminated with ";". However,
* there's MANY web pages which uses &lt , &gt or something * there's MANY web pages which uses &lt , &gt or something
* like them as &lt;, &gt;, etc. Therefore, we treat the most * like them as &lt;, &gt;, etc. Therefore, we treat the most

View File

@@ -27,7 +27,7 @@ struct growbuf {
extern unsigned char QUOTE_MAP[]; extern unsigned char QUOTE_MAP[];
extern char *HTML_QUOTE_MAP[]; extern char *HTML_QUOTE_MAP[];
#define HTML_QUOTE_MASK 0x07 /* &, <, >, " */ #define HTML_QUOTE_MASK 0x07 /* &, <, >, ", ' */
#define SHELL_UNSAFE_MASK 0x08 /* [^A-Za-z0-9_./:\200-\377] */ #define SHELL_UNSAFE_MASK 0x08 /* [^A-Za-z0-9_./:\200-\377] */
#define URL_QUOTE_MASK 0x10 /* [\0- \177-\377] */ #define URL_QUOTE_MASK 0x10 /* [\0- \177-\377] */
#define FILE_QUOTE_MASK 0x30 /* [\0- #%&+:?\177-\377] */ #define FILE_QUOTE_MASK 0x30 /* [\0- #%&+:?\177-\377] */