Cleaned version of 20200823_q branch. Changes the behaviour of the q tag (when m17n and Unicode are configured) to use "smart" quotes if the display charset can handle them. Falls back to old behaviour (ASCII quotes with left/right quote semantics for 6/0 and 2/6) if display charset is us-ascii.
Also changes the behaviour of conv_entity() to convert left/right quotes and some dashes because named entities are needed for the new code for the q tag.
This commit is contained in:
14
entity.c
14
entity.c
@@ -58,11 +58,23 @@ conv_entity(unsigned int c)
|
||||
#ifdef USE_M17N
|
||||
#ifdef USE_UNICODE
|
||||
if (c <= WC_C_UCS4_END) { /* Unicode */
|
||||
char *chk;
|
||||
wc_uchar utf8[7];
|
||||
wc_ucs_to_utf8(c, utf8);
|
||||
return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
|
||||
/* we eventually need to display it so check DisplayCharset */
|
||||
chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr;
|
||||
if (strcmp(chk, "?") != 0)
|
||||
return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033)
|
||||
return "\"";
|
||||
if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032)
|
||||
return "'";
|
||||
if (c >= 0x2010 && c < 0x2014)
|
||||
return "-";
|
||||
if (c == 0x2014)
|
||||
return "--";
|
||||
return "?";
|
||||
}
|
||||
|
18
file.c
18
file.c
@@ -4487,9 +4487,27 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
|
||||
HTMLlineproc1("</b>", h_env);
|
||||
return 1;
|
||||
case HTML_Q:
|
||||
#ifdef USE_M17N
|
||||
#ifdef USE_UNICODE
|
||||
if (DisplayCharset != WC_CES_US_ASCII) {
|
||||
HTMLlineproc1((obuf->q_level & 1 ? "“": "‘"), h_env);
|
||||
obuf->q_level += 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
HTMLlineproc1("`", h_env);
|
||||
return 1;
|
||||
case HTML_N_Q:
|
||||
#ifdef USE_M17N
|
||||
#ifdef USE_UNICODE
|
||||
if (DisplayCharset != WC_CES_US_ASCII) {
|
||||
obuf->q_level -= 1;
|
||||
HTMLlineproc1((obuf->q_level & 1 ? "”": "’"), h_env);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
HTMLlineproc1("'", h_env);
|
||||
return 1;
|
||||
case HTML_FIGURE:
|
||||
|
1
fm.h
1
fm.h
@@ -610,6 +610,7 @@ struct readbuffer {
|
||||
int flag_sp;
|
||||
int status;
|
||||
unsigned char end_tag;
|
||||
unsigned char q_level;
|
||||
short table_level;
|
||||
short nobr_level;
|
||||
Anchor anchor;
|
||||
|
2
tests/name_entity_1.expected
Normal file
2
tests/name_entity_1.expected
Normal file
@@ -0,0 +1,2 @@
|
||||
This is an example sentence that contains some "quoted words" --
|
||||
punctuation that would be displayed as question marks but should not.
|
2
tests/name_entity_1.html
Normal file
2
tests/name_entity_1.html
Normal file
@@ -0,0 +1,2 @@
|
||||
This is an example sentence that contains some “quoted words” —
|
||||
<br>punctuation that would be displayed as question marks but should not.
|
2
tests/name_entity_1.opts
Normal file
2
tests/name_entity_1.opts
Normal file
@@ -0,0 +1,2 @@
|
||||
-O
|
||||
us-ascii
|
1
tests/name_entity_2.expected
Normal file
1
tests/name_entity_2.expected
Normal file
@@ -0,0 +1 @@
|
||||
2πr
|
1
tests/name_entity_2.html
Normal file
1
tests/name_entity_2.html
Normal file
@@ -0,0 +1 @@
|
||||
2πr
|
1
tests/q1.expected
Normal file
1
tests/q1.expected
Normal file
@@ -0,0 +1 @@
|
||||
`test'
|
2
tests/q1.html
Normal file
2
tests/q1.html
Normal file
@@ -0,0 +1,2 @@
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<q>test</q>
|
2
tests/q1.opts
Normal file
2
tests/q1.opts
Normal file
@@ -0,0 +1,2 @@
|
||||
-O
|
||||
us-ascii
|
1
tests/q2.expected
Normal file
1
tests/q2.expected
Normal file
@@ -0,0 +1 @@
|
||||
“test”
|
3
tests/q2.html
Normal file
3
tests/q2.html
Normal file
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<q>test</q>
|
1
tests/q3.expected
Normal file
1
tests/q3.expected
Normal file
@@ -0,0 +1 @@
|
||||
<EFBFBD>test<EFBFBD>
|
3
tests/q3.html
Normal file
3
tests/q3.html
Normal file
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=Big5>
|
||||
<q>test</q>
|
4
tests/q3.opts
Normal file
4
tests/q3.opts
Normal file
@@ -0,0 +1,4 @@
|
||||
-I
|
||||
windows-1252
|
||||
-O
|
||||
windows-1252
|
1
tests/q4.expected
Normal file
1
tests/q4.expected
Normal file
@@ -0,0 +1 @@
|
||||
<EFBFBD>test<EFBFBD>
|
3
tests/q4.html
Normal file
3
tests/q4.html
Normal file
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=Big5>
|
||||
<q>test</q>
|
2
tests/q4.opts
Normal file
2
tests/q4.opts
Normal file
@@ -0,0 +1,2 @@
|
||||
-O
|
||||
windows-1252
|
1
tests/q5.expected
Normal file
1
tests/q5.expected
Normal file
@@ -0,0 +1 @@
|
||||
“example of a ‘nested’ quote”
|
3
tests/q5.html
Normal file
3
tests/q5.html
Normal file
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=utf-8>
|
||||
<q>example of a <q>nested</q> quote</q>
|
1
tests/q6.expected
Normal file
1
tests/q6.expected
Normal file
@@ -0,0 +1 @@
|
||||
"example of a 'nested' quote"
|
3
tests/q6.html
Normal file
3
tests/q6.html
Normal file
@@ -0,0 +1,3 @@
|
||||
<!doctype html>
|
||||
<meta charset=big5>
|
||||
<q>example of a <q>nested</q> quote</q>
|
1
tests/q6.opts
Normal file
1
tests/q6.opts
Normal file
@@ -0,0 +1 @@
|
||||
-O Big5
|
31
tests/run_tests
Normal file
31
tests/run_tests
Normal file
@@ -0,0 +1,31 @@
|
||||
total=0
|
||||
pass=0
|
||||
fail=0
|
||||
w3m="../w3m
|
||||
-config
|
||||
/dev/null
|
||||
-o
|
||||
ignore_null_img_alt=false"
|
||||
for i in *.html; do
|
||||
cmd="$w3m
|
||||
-I
|
||||
utf-8
|
||||
-O
|
||||
utf-8
|
||||
-T
|
||||
text/html"
|
||||
opts="`basename "$i" .html`.opts"
|
||||
test -f "$opts" && cmd="$cmd
|
||||
`grep -v '^#' $opts`"
|
||||
if (set -x;IFS='
|
||||
';$cmd) < "$i" | diff -u - "`basename "$i" .html`.expected"; then
|
||||
pass="`expr 1 + "$pass"`"
|
||||
else
|
||||
fail="`expr 1 + "$fail"`"
|
||||
fi
|
||||
total="`expr 1 + "$total"`"
|
||||
done
|
||||
echo "TOTAL: $total test(s)"
|
||||
echo "PASS : $pass"
|
||||
echo "FAIL : $fail"
|
||||
test 0 -eq "$fail"
|
Reference in New Issue
Block a user