Merge branch 'bug/w3mman2html-utf8'

This commit is contained in:
Tatsuya Kinoshita
2013-08-02 07:01:06 +09:00

View File

@@ -126,12 +126,14 @@ while(<F>) {
s/\&/\&amp;/g;
s/\</\&lt;/g;
s/\>/\&gt;/g;
# non ASCII UTF-8 codepoint
my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}";
s@([\200-\377].)(\010{1,2}\1)+@<b>$1</b>@g;
s@($utf8)(\010\1)+@<b>$1</b>@g;
s@(\&\w+;|.)(\010\1)+@<b>$1</b>@g;
s@__\010{1,2}((\<b\>)?[\200-\377].(\</b\>)?)@<u>$1</u>@g;
s@_\010((\<b\>)?($utf8)(\</b\>)?)@<u>$1</u>@g;
s@_\010((\<b\>)?(\&\w+\;|.)(\</b\>)?)@<u>$1</u>@g;
s@((\<b\>)?[\200-\377].(\</b\>)?)\010{1,2}__@<u>$1</u>@g;
s@((\<b\>)?($utf8)(\</b\>)?)\010_@<u>$1</u>@g;
s@((\<b\>)?(\&\w+\;|.)(\</b\>)?)\010_@<u>$1</u>@g;
s@.\010(.)@$1@g;