entities: support ' entity
The XHTML standard encompasses the XML standard. From the beginning, the XML standard [1] has always included required support for five character entities: 1. the ampersand (&) as & 2. the left angle bracket (<) as < 3. the right angle bracket (>) as > 4. the double-quote character (") as " 5. the apostrophe or single-quote character (') as ' See section "2.4 Character Data and Markup" of the XML standard [1] for further details. Add support for the character single-quote character entity (') in order to fully support XHTML pages. [1]: https://www.w3.org/TR/REC-xml/ Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
This commit is contained in:
@@ -7,6 +7,8 @@ amp 0x26
|
|||||||
AMP 0x26
|
AMP 0x26
|
||||||
quot 0x22
|
quot 0x22
|
||||||
QUOT 0x22
|
QUOT 0x22
|
||||||
|
apos 0x27
|
||||||
|
APOS 0x27
|
||||||
nbsp 0xA0
|
nbsp 0xA0
|
||||||
NBSP 0xA0
|
NBSP 0xA0
|
||||||
iexcl 0xA1
|
iexcl 0xA1
|
||||||
|
6
indep.c
6
indep.c
@@ -19,7 +19,7 @@ unsigned char QUOTE_MAP[0x100] = {
|
|||||||
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
|
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
|
||||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
/* SPC ! " # $ % & ' ( ) * + , - . / */
|
/* SPC ! " # $ % & ' ( ) * + , - . / */
|
||||||
24, 72, 76, 40, 8, 40, 41, 72, 72, 72, 72, 40, 72, 8, 0, 64,
|
24, 72, 76, 40, 8, 40, 41, 77, 72, 72, 72, 40, 72, 8, 0, 64,
|
||||||
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
|
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40,
|
||||||
/* @ A B C D E F G H I J K L M N O */
|
/* @ A B C D E F G H I J K L M N O */
|
||||||
@@ -47,7 +47,7 @@ char *HTML_QUOTE_MAP[] = {
|
|||||||
"<",
|
"<",
|
||||||
">",
|
">",
|
||||||
""",
|
""",
|
||||||
NULL,
|
"'",
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
@@ -462,7 +462,7 @@ getescapechar(char **str)
|
|||||||
q = p;
|
q = p;
|
||||||
for (p++; IS_ALNUM(*p); p++) ;
|
for (p++; IS_ALNUM(*p); p++) ;
|
||||||
q = allocStr(q, p - q);
|
q = allocStr(q, p - q);
|
||||||
if (strcasestr("lt gt amp quot nbsp", q) && *p != '=') {
|
if (strcasestr("lt gt amp quot apos nbsp", q) && *p != '=') {
|
||||||
/* a character entity MUST be terminated with ";". However,
|
/* a character entity MUST be terminated with ";". However,
|
||||||
* there's MANY web pages which uses < , > or something
|
* there's MANY web pages which uses < , > or something
|
||||||
* like them as <, >, etc. Therefore, we treat the most
|
* like them as <, >, etc. Therefore, we treat the most
|
||||||
|
2
indep.h
2
indep.h
@@ -27,7 +27,7 @@ struct growbuf {
|
|||||||
|
|
||||||
extern unsigned char QUOTE_MAP[];
|
extern unsigned char QUOTE_MAP[];
|
||||||
extern char *HTML_QUOTE_MAP[];
|
extern char *HTML_QUOTE_MAP[];
|
||||||
#define HTML_QUOTE_MASK 0x07 /* &, <, >, " */
|
#define HTML_QUOTE_MASK 0x07 /* &, <, >, ", ' */
|
||||||
#define SHELL_UNSAFE_MASK 0x08 /* [^A-Za-z0-9_./:\200-\377] */
|
#define SHELL_UNSAFE_MASK 0x08 /* [^A-Za-z0-9_./:\200-\377] */
|
||||||
#define URL_QUOTE_MASK 0x10 /* [\0- \177-\377] */
|
#define URL_QUOTE_MASK 0x10 /* [\0- \177-\377] */
|
||||||
#define FILE_QUOTE_MASK 0x30 /* [\0- #%&+:?\177-\377] */
|
#define FILE_QUOTE_MASK 0x30 /* [\0- #%&+:?\177-\377] */
|
||||||
|
Reference in New Issue
Block a user