entities: support ' entity
The XHTML standard encompasses the XML standard.
From the beginning, the XML standard [1] has always included required
support for five character entities:
 1. the ampersand (&) as &
 2. the left angle bracket (<) as <
 3. the right angle bracket (>) as >
 4. the double-quote character (") as "
 5. the apostrophe or single-quote character (') as '
See section "2.4 Character Data and Markup" of the XML standard [1]
for further details.
Add support for the character single-quote character entity (')
in order to fully support XHTML pages.
[1]: https://www.w3.org/TR/REC-xml/
Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
			
			
This commit is contained in:
		| @@ -7,6 +7,8 @@ amp	0x26 | ||||
| AMP	0x26 | ||||
| quot	0x22 | ||||
| QUOT	0x22 | ||||
| apos	0x27 | ||||
| APOS	0x27 | ||||
| nbsp	0xA0 | ||||
| NBSP	0xA0 | ||||
| iexcl	0xA1 | ||||
|   | ||||
							
								
								
									
										6
									
								
								indep.c
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								indep.c
									
									
									
									
									
								
							| @@ -19,7 +19,7 @@ unsigned char QUOTE_MAP[0x100] = { | ||||
|     /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN  EM SUB ESC  FS  GS  RS  US */ | ||||
|     24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | ||||
|     /* SPC   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   / */ | ||||
|     24, 72, 76, 40, 8, 40, 41, 72, 72, 72, 72, 40, 72, 8, 0, 64, | ||||
|     24, 72, 76, 40, 8, 40, 41, 77, 72, 72, 72, 40, 72, 8, 0, 64, | ||||
|     /*   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ? */ | ||||
|     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40, | ||||
|     /*   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O */ | ||||
| @@ -47,7 +47,7 @@ char *HTML_QUOTE_MAP[] = { | ||||
|     "<", | ||||
|     ">", | ||||
|     """, | ||||
|     NULL, | ||||
|     "'", | ||||
|     NULL, | ||||
|     NULL, | ||||
| }; | ||||
| @@ -462,7 +462,7 @@ getescapechar(char **str) | ||||
|     q = p; | ||||
|     for (p++; IS_ALNUM(*p); p++) ; | ||||
|     q = allocStr(q, p - q); | ||||
|     if (strcasestr("lt gt amp quot nbsp", q) && *p != '=') { | ||||
|     if (strcasestr("lt gt amp quot apos nbsp", q) && *p != '=') { | ||||
| 	/* a character entity MUST be terminated with ";". However, | ||||
| 	 * there's MANY web pages which uses < , > or something | ||||
| 	 * like them as <, >, etc. Therefore, we treat the most | ||||
|   | ||||
							
								
								
									
										2
									
								
								indep.h
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								indep.h
									
									
									
									
									
								
							| @@ -27,7 +27,7 @@ struct growbuf { | ||||
|  | ||||
| extern unsigned char QUOTE_MAP[]; | ||||
| extern char *HTML_QUOTE_MAP[]; | ||||
| #define HTML_QUOTE_MASK   0x07	/* &, <, >, " */ | ||||
| #define HTML_QUOTE_MASK   0x07	/* &, <, >, ", ' */ | ||||
| #define SHELL_UNSAFE_MASK 0x08	/* [^A-Za-z0-9_./:\200-\377] */ | ||||
| #define URL_QUOTE_MASK    0x10	/* [\0- \177-\377] */ | ||||
| #define FILE_QUOTE_MASK   0x30	/* [\0- #%&+:?\177-\377] */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user