[w3m-dev 02503]

From: aito@fw.ipsj.or.jp
closes: Debian Bug#120540
This commit is contained in:
Fumitoshi UKAI
2001-11-22 14:15:19 +00:00
parent 06680ddf4c
commit 6f0999cbc2
2 changed files with 24 additions and 1 deletions

19
indep.c
View File

@@ -1,4 +1,4 @@
/* $Id: indep.c,v 1.7 2001/11/22 13:30:02 ukai Exp $ */
/* $Id: indep.c,v 1.8 2001/11/22 14:15:19 ukai Exp $ */
#include "fm.h"
#include <stdio.h>
#include <pwd.h>
@@ -275,6 +275,7 @@ getescapechar(char **str)
{
int dummy = -1;
char *p = *str, *q;
int strict_entity = TRUE;
if (*p == '&')
p++;
@@ -319,8 +320,24 @@ getescapechar(char **str)
for (p++; IS_ALNUM(*p); p++)
;
q = allocStr(q, p - q);
if (strcasestr("lt gt amp quot nbsp",q) &&
*p != '=') {
/* a character entity MUST be terminated with ";". However,
there's MANY web pages which uses &lt , &gt or something
like them as &lt;, &gt;, etc. Therefore, we treat the most
popular character entities (including &#xxxx;) without
the last ";" as character entities. If the trailing character
is "=", it must be a part of query in an URL. So &lt=, &gt=, etc.
are not regarded as character entities.
*/
strict_entity = FALSE;
}
if (*p == ';')
p++;
else if (strict_entity) {
*str = p;
return -1;
}
*str = p;
return getHash_si(&entity, q, -1);
}