In HTML5 anchors should not be closed when encountering divs, for example, but should be closed when encountering buttons, for example. Many sites that use HTML5-style anchors end up having links displayed with zero-length link texts. The proposed patch correct this behaviour by detecting whether the document is HTML5, then suppressing the close-anchor action in CLOSE_A if it's an HTML5 document. A new macro handles the HTML5-specific cases where anchors are not already always closed.
This also fixes a bug in the tokenizing FSM in etc.c that prevented the !doctype element from being recognized; the fix is necessary because HTML5 detection depends on checking the !doctype element.
This commit is contained in:
@@ -727,6 +727,11 @@ next_status(char c, int *status)
|
||||
case '>':
|
||||
*status = R_ST_NORMAL;
|
||||
break;
|
||||
case 'D':
|
||||
case 'd':
|
||||
/* could be a !doctype */
|
||||
*status = R_ST_TAG;
|
||||
break;
|
||||
default:
|
||||
*status = R_ST_IRRTAG;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/* $Id: file.c,v 1.266 2012/05/22 09:45:56 inu Exp $ */
|
||||
/* vi: set sw=4 ts=8 ai sm noet : */
|
||||
#include "fm.h"
|
||||
#include <sys/types.h>
|
||||
#include "myctype.h"
|
||||
@@ -4322,9 +4323,18 @@ process_idattr(struct readbuffer *obuf, int cmd, struct parsed_tag *tag)
|
||||
obuf->flag &= ~RB_P;\
|
||||
}
|
||||
|
||||
#define CLOSE_A \
|
||||
CLOSE_P; \
|
||||
close_anchor(h_env, obuf);
|
||||
#define HTML5_CLOSE_A do { \
|
||||
if (obuf->flag & RB_HTML5) { \
|
||||
close_anchor(h_env, obuf); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CLOSE_A do { \
|
||||
CLOSE_P; \
|
||||
if (!(obuf->flag & RB_HTML5)) { \
|
||||
close_anchor(h_env, obuf); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CLOSE_DT \
|
||||
if (obuf->flag & RB_IN_DT) { \
|
||||
@@ -4930,6 +4940,8 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
|
||||
close_anchor(h_env, obuf);
|
||||
return 1;
|
||||
case HTML_IMG:
|
||||
if (parsedtag_exists(tag, ATTR_USEMAP))
|
||||
HTML5_CLOSE_A;
|
||||
tmp = process_img(tag, h_env->limit);
|
||||
HTMLlineproc1(tmp->ptr, h_env);
|
||||
return 1;
|
||||
@@ -5125,6 +5137,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
|
||||
HTMLlineproc1(tmp->ptr, h_env);
|
||||
return 1;
|
||||
case HTML_BUTTON:
|
||||
HTML5_CLOSE_A;
|
||||
tmp = process_button(tag);
|
||||
if (tmp)
|
||||
HTMLlineproc1(tmp->ptr, h_env);
|
||||
@@ -5180,6 +5193,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
|
||||
NULL);
|
||||
HTMLlineproc1(tmp->ptr, h_env);
|
||||
return 1;
|
||||
case HTML_DOCTYPE:
|
||||
if (!parsedtag_exists(tag, ATTR_PUBLIC)) {
|
||||
obuf->flag |= RB_HTML5;
|
||||
}
|
||||
return 1;
|
||||
case HTML_META:
|
||||
p = q = r = NULL;
|
||||
parsedtag_get_value(tag, ATTR_HTTP_EQUIV, &p);
|
||||
@@ -5378,6 +5396,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
|
||||
}
|
||||
return 1;
|
||||
case HTML_EMBED:
|
||||
HTML5_CLOSE_A;
|
||||
if (view_unseenobject) {
|
||||
if (parsedtag_get_value(tag, ATTR_SRC, &p)) {
|
||||
Str s;
|
||||
|
||||
@@ -661,6 +661,7 @@ struct readbuffer {
|
||||
#endif /* FORMAT_NICE */
|
||||
#define RB_DEL 0x100000
|
||||
#define RB_S 0x200000
|
||||
#define RB_HTML5 0x400000
|
||||
|
||||
#define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN)
|
||||
#define RB_SET_ALIGN(obuf,align) {(obuf)->flag &= ~RB_ALIGN; (obuf)->flag |= (align); }
|
||||
@@ -673,7 +674,7 @@ struct readbuffer {
|
||||
RB_SET_ALIGN(obuf,(obuf)->flag_stack[--(obuf)->flag_sp]); \
|
||||
}
|
||||
|
||||
/* status flags */
|
||||
/* state of token scanning finite state machine */
|
||||
#define R_ST_NORMAL 0 /* normal */
|
||||
#define R_ST_TAG0 1 /* within tag, just after < */
|
||||
#define R_ST_TAG 2 /* within tag */
|
||||
|
||||
@@ -37,6 +37,8 @@ unsigned char ALST_TABLE[] =
|
||||
ATTR_CELLPADDING, ATTR_VSPACE, ATTR_CORE
|
||||
};
|
||||
#define MAXA_TABLE MAXA_CORE + 6
|
||||
unsigned char ALST_DOCTYPE[] = { ATTR_PUBLIC }; /* only (html and) public should be checked */
|
||||
#define MAXA_DOCTYPE 1
|
||||
unsigned char ALST_META[] = { ATTR_HTTP_EQUIV, ATTR_CONTENT, ATTR_CHARSET, ATTR_CORE };
|
||||
#define MAXA_META MAXA_CORE + 3
|
||||
unsigned char ALST_FRAME[] = { ATTR_SRC, ATTR_NAME, ATTR_CORE };
|
||||
@@ -221,7 +223,7 @@ TagInfo TagMAP[MAX_HTMLTAG] = {
|
||||
{"/option", NULL, 0, TFLG_END}, /* 94 HTML_N_OPTION */
|
||||
{"head", ALST_NOP, MAXA_NOP, 0}, /* 95 HTML_HEAD */
|
||||
{"/head", NULL, 0, TFLG_END}, /* 96 HTML_N_HEAD */
|
||||
{"doctype", ALST_NOP, MAXA_NOP, 0}, /* 97 HTML_DOCTYPE */
|
||||
{"doctype", ALST_DOCTYPE, MAXA_DOCTYPE, 0}, /* 97 HTML_DOCTYPE */
|
||||
{"noframes", ALST_NOFRAMES, MAXA_NOFRAMES, 0}, /* 98 HTML_NOFRAMES */
|
||||
{"/noframes", NULL, 0, TFLG_END}, /* 99 HTML_N_NOFRAMES */
|
||||
|
||||
@@ -367,7 +369,7 @@ TagAttrInfo AttrMAP[MAX_TAGATTR] = {
|
||||
{"rev", VTYPE_STR, 0}, /* 48 ATTR_REV */
|
||||
{"title", VTYPE_STR, 0}, /* 49 ATTR_TITLE */
|
||||
{"accesskey", VTYPE_STR, 0}, /* 50 ATTR_ACCESSKEY */
|
||||
{NULL, VTYPE_NONE, 0}, /* 51 Undefined */
|
||||
{"public", VTYPE_NONE, 0}, /* 51 ATTR_PUBLIC */
|
||||
{NULL, VTYPE_NONE, 0}, /* 52 Undefined */
|
||||
{NULL, VTYPE_NONE, 0}, /* 53 Undefined */
|
||||
{NULL, VTYPE_NONE, 0}, /* 54 Undefined */
|
||||
|
||||
@@ -318,6 +318,7 @@ typedef struct {
|
||||
#define ATTR_REV 48
|
||||
#define ATTR_TITLE 49
|
||||
#define ATTR_ACCESSKEY 50
|
||||
#define ATTR_PUBLIC 51
|
||||
|
||||
/* Internal attribute */
|
||||
#define ATTR_XOFFSET 60
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
test
|
||||
@@ -0,0 +1 @@
|
||||
<a href="example"><div>test</div></a>
|
||||
@@ -0,0 +1 @@
|
||||
test
|
||||
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<a href="example"><div>test</div></a>
|
||||
@@ -0,0 +1,31 @@
|
||||
total=0
|
||||
pass=0
|
||||
fail=0
|
||||
w3m="../w3m
|
||||
-config
|
||||
/dev/null
|
||||
-o
|
||||
ignore_null_img_alt=false"
|
||||
for i in *.html; do
|
||||
cmd="$w3m
|
||||
-I
|
||||
utf-8
|
||||
-O
|
||||
utf-8
|
||||
-T
|
||||
text/html"
|
||||
opts="`basename "$i" .html`.opts"
|
||||
test -f "$opts" && cmd="$cmd
|
||||
`grep -v '^#' $opts`"
|
||||
if (set -x;IFS='
|
||||
';$cmd) < "$i" | diff -u - "`basename "$i" .html`.expected"; then
|
||||
pass="`expr 1 + "$pass"`"
|
||||
else
|
||||
fail="`expr 1 + "$fail"`"
|
||||
fi
|
||||
total="`expr 1 + "$total"`"
|
||||
done
|
||||
echo "TOTAL: $total test(s)"
|
||||
echo "PASS : $pass"
|
||||
echo "FAIL : $fail"
|
||||
test 0 -eq "$fail"
|
||||
Reference in New Issue
Block a user