entities: support ' entity
The XHTML standard encompasses the XML standard.
From the beginning, the XML standard [1] has always included required
support for five character entities:
 1. the ampersand (&) as &
 2. the left angle bracket (<) as <
 3. the right angle bracket (>) as >
 4. the double-quote character (") as "
 5. the apostrophe or single-quote character (') as '
See section "2.4 Character Data and Markup" of the XML standard [1]
for further details.
Add support for the character single-quote character entity (')
in order to fully support XHTML pages.
[1]: https://www.w3.org/TR/REC-xml/
Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
			
			
This commit is contained in:
		| @@ -7,6 +7,8 @@ amp	0x26 | |||||||
| AMP	0x26 | AMP	0x26 | ||||||
| quot	0x22 | quot	0x22 | ||||||
| QUOT	0x22 | QUOT	0x22 | ||||||
|  | apos	0x27 | ||||||
|  | APOS	0x27 | ||||||
| nbsp	0xA0 | nbsp	0xA0 | ||||||
| NBSP	0xA0 | NBSP	0xA0 | ||||||
| iexcl	0xA1 | iexcl	0xA1 | ||||||
|   | |||||||
							
								
								
									
										6
									
								
								indep.c
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								indep.c
									
									
									
									
									
								
							| @@ -19,7 +19,7 @@ unsigned char QUOTE_MAP[0x100] = { | |||||||
|     /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN  EM SUB ESC  FS  GS  RS  US */ |     /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN  EM SUB ESC  FS  GS  RS  US */ | ||||||
|     24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, |     24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | ||||||
|     /* SPC   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   / */ |     /* SPC   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   / */ | ||||||
|     24, 72, 76, 40, 8, 40, 41, 72, 72, 72, 72, 40, 72, 8, 0, 64, |     24, 72, 76, 40, 8, 40, 41, 77, 72, 72, 72, 40, 72, 8, 0, 64, | ||||||
|     /*   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ? */ |     /*   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ? */ | ||||||
|     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40, |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 72, 74, 72, 75, 40, | ||||||
|     /*   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O */ |     /*   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O */ | ||||||
| @@ -47,7 +47,7 @@ char *HTML_QUOTE_MAP[] = { | |||||||
|     "<", |     "<", | ||||||
|     ">", |     ">", | ||||||
|     """, |     """, | ||||||
|     NULL, |     "'", | ||||||
|     NULL, |     NULL, | ||||||
|     NULL, |     NULL, | ||||||
| }; | }; | ||||||
| @@ -462,7 +462,7 @@ getescapechar(char **str) | |||||||
|     q = p; |     q = p; | ||||||
|     for (p++; IS_ALNUM(*p); p++) ; |     for (p++; IS_ALNUM(*p); p++) ; | ||||||
|     q = allocStr(q, p - q); |     q = allocStr(q, p - q); | ||||||
|     if (strcasestr("lt gt amp quot nbsp", q) && *p != '=') { |     if (strcasestr("lt gt amp quot apos nbsp", q) && *p != '=') { | ||||||
| 	/* a character entity MUST be terminated with ";". However, | 	/* a character entity MUST be terminated with ";". However, | ||||||
| 	 * there's MANY web pages which uses < , > or something | 	 * there's MANY web pages which uses < , > or something | ||||||
| 	 * like them as <, >, etc. Therefore, we treat the most | 	 * like them as <, >, etc. Therefore, we treat the most | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								indep.h
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								indep.h
									
									
									
									
									
								
							| @@ -27,7 +27,7 @@ struct growbuf { | |||||||
|  |  | ||||||
| extern unsigned char QUOTE_MAP[]; | extern unsigned char QUOTE_MAP[]; | ||||||
| extern char *HTML_QUOTE_MAP[]; | extern char *HTML_QUOTE_MAP[]; | ||||||
| #define HTML_QUOTE_MASK   0x07	/* &, <, >, " */ | #define HTML_QUOTE_MASK   0x07	/* &, <, >, ", ' */ | ||||||
| #define SHELL_UNSAFE_MASK 0x08	/* [^A-Za-z0-9_./:\200-\377] */ | #define SHELL_UNSAFE_MASK 0x08	/* [^A-Za-z0-9_./:\200-\377] */ | ||||||
| #define URL_QUOTE_MASK    0x10	/* [\0- \177-\377] */ | #define URL_QUOTE_MASK    0x10	/* [\0- \177-\377] */ | ||||||
| #define FILE_QUOTE_MASK   0x30	/* [\0- #%&+:?\177-\377] */ | #define FILE_QUOTE_MASK   0x30	/* [\0- #%&+:?\177-\377] */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user