Initial revision

This commit is contained in:
Akinori Ito
2001-11-08 05:14:08 +00:00
commit 68a07bf03b
305 changed files with 104639 additions and 0 deletions
+48
View File
@@ -0,0 +1,48 @@
html2latex
HTMLの文書をLaTeX に変換します.Rubyスクリプトです.不完全です.
ある程度の役には立つかもしれません.
使用法
html2latex file.html > file.tex
ここに置いてある理由
makeref のための部品取りです :-)
makeref
HTMLの文書を読み,アンカーに番号を振ります.番号を振った文書を
標準出力に書き出し,最後にその一覧を出力します.Ruby スクリプトです.
使用法
makeref [-url base_url] [file]
-url: 文書のURLを指定します.リンクの一覧を出すときに,そのURLを
補完するために使います.
バグ
HTMLの記述ミス( &lt; ではなく < を使う,&... の最後に ; を付けない
等)があると,悲惨な結果になることがあります.
半角カナ(JIS X-0201カナ)に対応していません.
htmldump
URL からHTML文書を読み,アンカーに番号を振って整形し,標準出力に
書き出します.
使用法
dumphtml [URL]
URL を省略すると,$WWW_HOME の内容を読みます.
バグ
URL の指す文書がHTMLでなかった場合,かわいそうなことになります.
makeref を使っているので,makeref がうまく処理できない文書の表示
は変になります.
+49
View File
@@ -0,0 +1,49 @@
html2latex
Convert HTML document into LaTeX. Ruby script. incomplete.
Usage:
html2latex file.html > file.tex
Why this script is here?
To exploit code for makeref. :-)
makeref
Read HTML document and number the anchors. Print numbered document
into standard output and append reference index. Ruby script.
Usage:
makeref [-u] [-url base_url] [file]
-url: Specify URL of the document. It is used to complete link
in the document.
-u: Append URL after each anchor, instead of reference number.
Bugs
If there are any error in HTML (unbalanced < , character entity
without ; , etc.), output will be miserable.
htmldump
Read HTML document from URL, number the anchors and format it,
and output it on standard output.
Usage
htmldump [-u] [URL]
-u: Append URL after each anchor, instead of reference number.
If URL is omitted, $WWW_HOME is used instead.
Bugs
It assumes that the document on URL is HTML.
As it uses makeref to number the anchor, it can't handle any document
makeref can't handle.
+517
View File
@@ -0,0 +1,517 @@
#!/usr/local/bin/ruby
#
# HTML to LaTeX converter
# by A. Ito, 16 June, 1997
#
require 'kconv'
# configuration
def gif2eps(giffile,epsfile)
cmd = "convert #{giffile} #{epsfile}"
STDERR.print cmd,"\n"
system cmd
end
###########################################################################
class Tag
def initialize(str)
if str =~ /<(.+)>/ then
str = $1
end
tags = str.split
@tagname = tags.shift.downcase
@vals = {}
tags.each do |t|
if t =~ /=/ then
tn,tv = t.split(/\s*=\s*/,2)
tv.sub!(/^"/,"")
tv.sub!(/"$/,"")
@vals[tn.downcase] = tv
else
@vals[t.downcase] = TRUE
end
end
end
def tagname
return @tagname
end
def each
@vals.each do |k,v|
yield k,v
end
end
def switch(k)
return @vals[k]
end
end
class TokenStream
TAG_START = ?<
TAG_END = ?>
AMP_START = ?&
AMP_END = ?;
AMP_REPLACE_TABLE = {
'&amp;' => '\\&',
'&gt;' => '$>$',
'&lt;' => '$<$',
'&nbsp;' => '~',
'&quot;' => '"',
}
def initialize(file)
if file.kind_of?(File) then
@f = file
else
@f = File.new(file)
end
@buf = nil
@bpos = 0
end
def read_until(endsym)
complete = FALSE
tag = []
begin
while @bpos < @buf.size
c = @buf[@bpos]
if c == endsym then
tag.push(c.chr)
complete = TRUE
@bpos += 1
break
end
if c == 10 || c == 13 then
tag.push(' ')
else
tag.push(c.chr)
end
@bpos += 1
end
unless complete
@buf = @f.gets
@bpos = 0
break if @f.eof?
end
end until complete
return tag.join('')
end
def get
while TRUE
if @buf.nil? then
@buf = Kconv.toeuc(@f.gets)
if @f.eof? then
return nil
end
@bpos = 0
end
if @buf[@bpos] == TAG_START then
return Tag.new(read_until(TAG_END))
elsif @buf[@bpos] == AMP_START then
return replace_amp(read_until(AMP_END))
else
i = @bpos
while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
i += 1
end
r = @buf[@bpos,i-@bpos]
if i == @buf.size then
@buf = nil
else
@bpos = i
end
redo if r =~ /^\s+$/
return r
end
end
end
public :eof?
def eof?
@f.eof?
end
def replace_amp(s)
if AMP_REPLACE_TABLE.key?(s) then
return AMP_REPLACE_TABLE[s]
else
return s
end
end
end
def print_header
print '
\documentstyle[epsf]{jarticle}
\def\hr{\par\hbox to \textwidth{\hrulefill}}
\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines}
\def\endpre{\end{quote}}
\makeatletter
\@ifundefined{gt}{\let\gt=\dg}{}
\makeatother
'
end
class Environ_stack
def initialize(*envs)
@stack = envs
end
def action(tag)
if tag =~ /^!/ then # comment
return ["",nil]
end
i = @stack.size-1
while i >= 0
a = @stack[i].action(tag)
unless a.nil? then
return a
end
i -= 1
end
return nil
end
def pop
@stack.pop
end
def push(env)
@stack.push(env)
end
def top
@stack[@stack.size-1]
end
def dup
@stack.push(top.clone)
end
end
class Environment
def initialize(interp)
@silent = FALSE
@in_table = FALSE
@interp = interp;
@align = nil;
end
def action(tag)
return @interp[tag]
end
def flush(tok)
if tok.kind_of?(String) then
tok = tok.gsub(/&/,"\\&");
tok = tok.gsub(/%/,"\\%");
tok = tok.gsub(/#/,"\\#");
tok = tok.gsub(/\$/,"\\$");
tok = tok.gsub(/_/,"\\verb+_+");
tok = tok.gsub(/\^/,"\\verb+^+");
tok = tok.gsub(/~/,"\\verb+~+");
end
if @in_table then
@table[@table_rows][@table_cols] += tok
elsif !@silent then
if !@align.nil? && tok =~ /\n$/ then
print tok.chop,"\\\\\n"
else
print tok
end
end
end
def set_interp(interp)
@interp = interp
end
# tag processing methods
# <TITLE>
def do_silent(tag)
@silent = TRUE
end
# </TITLE>
def undo_silent(tag)
@silent = FALSE
end
# <IMG>
def img_proc(tag)
src = tag.switch('src')
newfile = src.sub(/\.GIF/i,".eps")
gif2eps(src,newfile)
flush "\\epsfile{file=#{newfile}}\n"
end
# <TABLE>
def starttable(tag)
@table = []
@tablespan = []
@table_rows = -1
@table_cols_max = 0
@in_table = TRUE
unless tag.switch('border').nil? then
@table_border = TRUE
else
@table_border = FALSE
end
end
# <TR>
def start_row(tag)
@table_rows += 1
@table[@table_rows] = []
@tablespan[@table_rows] = []
@table_cols = -1
@colspan = 1
end
# <TD>
def start_col(tag)
@colspan = tag.switch('colspan')
if @colspan.nil? then
@colspan = 1
else
@colspan = @colspan.to_i
end
@tablespan[@table_rows][@table_cols+1] = @colspan
@table_cols += @colspan
if @table_cols > @table_cols_max then
@table_cols_max = @table_cols
end
end
# </TABLE>
def endtable(tag)
@in_table = FALSE
flush "\\begin{tabular}{*{"
flush @table_cols_max+1
if @table_border then
flush "}{|l}|}\n\\hline\n"
else
flush "}{l}}\n"
end
for i in 0..@table_rows
j = 0
while j <= @table_cols
span = @tablespan[i][j]
if span == 1 then
flush @table[i][j]
elsif @table_border then
form = "|l"
if j+span > @table_cols then
form = "|l|"
end
flush "\\multicolumn{"+span.to_s+"}{"+form+"}{"
flush @table[i][j+span-1]
flush "}"
else
flush "\\multicolumn{"+span.to_s+"}{l}{"
flush @table[i][j+span-1]
flush "}"
end
j += span
if j <= @table_cols then
flush "&"
end
end
flush "\\\\\n"
flush "\\hline\n" if @table_border
end
flush "\\end{tabular}\n"
end
# <CENTER>
def startcenter(tag)
if @in_table then
flush "\\hfil"
else
flush "\\begin{center}\n"
end
end
# </CENTER>
def endcenter(tag)
if @in_table then
flush "\\hfil"
else
flush "\\end{center}\n"
end
end
# <P>
def paragraph(tag)
align = tag.switch('align')
if align.nil? then
flush "\\par\n"
@endparagraph = ""
else
align = align.downcase
case align
when "left" then
flush "\\begin{flushleft}\n"
@endparagraph = "\\end{flushleft}\n"
when "center" then
flush "\\begin{center}\n"
@endparagraph = "\\end{center}\n"
when "right" then
flush "\\begin{flushright}\n"
@endparagraph = "\\end{flushright}\n"
end
end
@align = align
end
# </P>
def endparagraph(tag)
unless @align.nil? then
@align = nil
flush @endparagraph
end
end
end
enum_interp = {
'li' => ["\\item ",nil]
}
item_interp = {
'li' => ["\\item ",nil]
}
desc_interp = {
'dt' => ["\\item[",nil],
'dd' => ["]\n",nil]
}
table_interp = {
'tr' => [:start_row,nil],
'td' => [:start_col,nil],
'/tr' => ["",nil],
'/td' => ["",nil],
}
para_interp = {
'/p' => [:endparagraph ,"pop",TRUE],
}
main_interp = {
'body' => ["\\begin{document}\n",nil,FALSE],
'/body' => ["\\end{document}\n",nil,FALSE],
'head' => ["",nil,FALSE],
'/head' => ["",nil,FALSE],
'html' => ["",nil,FALSE],
'/html' => ["",nil,FALSE],
'title' => [:do_silent,nil,FALSE],
'/title' => [:undo_silent,nil,FALSE],
'!' => ["",nil,FALSE],
'h1' => ["\\section{",nil,TRUE],
'h2' => ["\\subsection{",nil,TRUE],
'h3' => ["\\subsubsection{",nil,TRUE],
'h4' => ["\\paragraph{",nil,TRUE],
'/h1' => ["}\n",nil,TRUE],
'/h2' => ["}\n",nil,TRUE],
'/h3' => ["}\n",nil,TRUE],
'/h4' => ["}\n",nil,TRUE],
'a' => ["",nil,TRUE],
'/a' => ["",nil,TRUE],
'center' => [:startcenter,nil,TRUE],
'/center' => [:endcenter,nil,TRUE],
'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE],
'/ol' => ["\\end{enumerate}\n","pop",TRUE],
'ul' => ["\\begin{itemize}\n",item_interp,TRUE],
'/ul' => ["\\end{itemize}\n","pop",TRUE],
'dl' => ["\\begin{description}\n",desc_interp,TRUE],
'/dl' => ["\\end{description}\n","pop",TRUE],
'pre' => ["\\begin{pre}\n",nil,TRUE],
'/pre' => ["\\end{pre}\n",nil,TRUE],
'p' => [:paragraph ,para_interp,TRUE],
'br' => ["\\par ",nil,TRUE],
'img' => [:img_proc,nil,TRUE],
'hr' => ["\\hr ",nil,TRUE],
'b' => ["{\\bf\\gt ",nil,TRUE],
'/b' => ["}",nil,TRUE],
'strong' => ["{\\bf\\gt ",nil,TRUE],
'/strong' => ["}",nil,TRUE],
'dfn' => ["{\\bf\\gt ",nil,TRUE],
'/dfn' => ["}",nil,TRUE],
'i' => ["{\\it",nil,TRUE],
'/i' => ["}",nil,TRUE],
'address' => ["{\\it",nil,TRUE],
'/address'=> ["}",nil,TRUE],
'cite' => ["{\\it",nil,TRUE],
'/cite' => ["}",nil,TRUE],
'code' => ["{\\tt",nil,TRUE],
'/code' => ["}",nil,TRUE],
'kbd' => ["{\\tt",nil,TRUE],
'/kbd' => ["}",nil,TRUE],
'tt' => ["{\\tt",nil,TRUE],
'/tt' => ["}",nil,TRUE],
'samp' => ["{\\tt",nil,TRUE],
'/samp' => ["}",nil,TRUE],
'em' => ["{\\em",nil,TRUE],
'/em' => ["}",nil,TRUE],
'u' => ["$\\underline{\\mbox{",nil,TRUE],
'/u' => ["}}$",nil,TRUE],
'sub' => ["${}_\mbox{",nil,TRUE],
'/sub' => ["}$",nil,TRUE],
'sup' => ["${}^\mbox{",nil,TRUE],
'/sup' => ["}$",nil,TRUE],
'table' => [:starttable, table_interp,TRUE],
'/table' => [:endtable, "pop",TRUE],
'font' => ["",nil,TRUE],
'/font' => ["",nil,TRUE],
}
################################ MAIN ####################################
$in_document = FALSE
print_header
intp = Environ_stack.new(Environment.new(main_interp))
f = TokenStream.new(ARGV[0])
until f.eof?
tok = f.get
if tok.kind_of?(Tag) then
case tok.tagname
when "body"
$in_document = TRUE
when "/body"
$in_document = FALSE
end
act = intp.action(tok.tagname)
if act.nil? then
STDERR.print "tag ",tok.tagname," ignored\n"
else
if act[2] && !$in_document then
print "\\begin{document}\n"
$in_document = TRUE
end
# enviconment push
if act[1].kind_of?(Hash) &&
(tok.tagname != "p" || tok.switch('align') != nil) then
intp.dup
intp.top.set_interp(act[1])
end
if act[0].kind_of?(String) then
intp.top.flush act[0]
elsif act[0].kind_of?(Fixnum) then # interned symbol
intp.top.send(act[0],tok)
end
# environment pop
if act[1] == "pop" then
intp.pop
end
end
elsif !tok.nil? then
intp.top.flush tok
end
end
if $in_document then
print "\\end{document}\n"
end
Executable
+12
View File
@@ -0,0 +1,12 @@
#!/bin/sh
OPT=
if [ $# -gt 0 -a $1 = "-u" ]; then
OPT=-u
shift
fi
if [ $# = 0 ]; then
URL=$WWW_HOME
else
URL=$1
fi
w3m -dump_source $URL | makeref $OPT -url $URL | w3m -dump -F -T text/html
Executable
+266
View File
@@ -0,0 +1,266 @@
#!/usr/local/bin/ruby
# HTML reference generator
# by A.Ito 1999/3/30
require 'kconv'
###########################################################################
class URL
attr 'scheme'
attr 'host'
attr 'port'
attr 'file'
attr 'label'
def initialize(str)
if /([a-zA-Z+\-]+):(.*)/ =~ str then
@scheme = $1
str = $2
else
@scheme = 'unknown'
end
hostpart = ''
if %r'//([^/]*)(/.*)' =~ str then
hostpart = $1
str = $2
elsif %r'//([^/]*)$' =~ str then
hostpart = str
str = ''
end
if hostpart != '' then
if /(.*):(\d+)/ =~ hostpart then
@host = $1
@port = $2
else
@host = hostpart
@port = ''
end
else
@host = @port = ''
end
if /(.*)#(.*)/ =~ str then
@file = $1
@label = $2
else
@file = str
@label = ''
end
end
def to_s
s = "#{@scheme}:"
if s == 'news' or s == 'mailto' then
return s+@file
end
s += "//"+@host
s += ":"+@port if @port.size > 0
s += @file
s += "#"+@label if @label.size > 0
s
end
def complete(current)
@scheme = current.scheme if @scheme == 'unknown'
@port = current.port if @host == '' and @port == ''
@host = current.host if @host == ''
unless @file =~ %r'^/' then
@file = File.expand_path(File.dirname(current.file)+'/'+@file)
end
self
end
end
class Tag
def initialize(str)
if str =~ /<(.+)>/ then
str = $1
end
tags = str.split
@tagname = tags.shift.downcase
@vals = {}
tags.each do |t|
if t =~ /=/ then
tn,tv = t.split(/\s*=\s*/,2)
tv.sub!(/^"/,"")
tv.sub!(/"$/,"")
@vals[tn.downcase] = tv
else
@vals[t.downcase] = TRUE
end
end
end
def tagname
return @tagname
end
def each
@vals.each do |k,v|
yield k,v
end
end
def switch(k)
return @vals[k]
end
def to_s
if tagname =~ /!--/ then
return ''
end
t = "<"+tagname
if @vals.size == 0 then
return t+">"
end
each do |a,v|
if v == true then
t += " #{a}"
else
t += " #{a}=\"#{v}\""
end
end
t+">"
end
end
class TokenStream
TAG_START = ?<
TAG_END = ?>
AMP_START = ?&
AMP_END = ?;
def initialize(file)
if file.kind_of?(IO) then
@f = file
else
@f = File.new(file)
end
@buf = nil
@bpos = 0
end
def read_until(endsym)
complete = FALSE
tag = []
begin
while @bpos < @buf.size
c = @buf[@bpos]
if c == endsym then
tag.push(c.chr)
complete = TRUE
@bpos += 1
break
end
if c == 10 || c == 13 then
tag.push(' ')
else
tag.push(c.chr)
end
@bpos += 1
end
unless complete
@buf = @f.gets
@bpos = 0
break if @f.eof?
end
end until complete
return tag.join('')
end
def get
while TRUE
if @buf.nil? then
@buf = @f.gets
if @f.eof? then
return nil
end
@buf = Kconv.toeuc(@buf)
@bpos = 0
end
if @buf[@bpos] == TAG_START then
return Tag.new(read_until(TAG_END))
elsif @buf[@bpos] == AMP_START then
return read_until(AMP_END)
else
i = @bpos
while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
i += 1
end
r = @buf[@bpos,i-@bpos]
if i == @buf.size then
@buf = nil
else
@bpos = i
end
redo if r =~ /^\s+$/
return r
end
end
end
public :eof?
def eof?
@f.eof?
end
end
################################ MAIN ####################################
refs = []
refnum = 0
body_finished = false
html_finished = false
currentURL = nil
immediate_ref = false
while ARGV[0] =~ /^-/
case ARGV.shift
when '-url'
currentURL = URL.new(ARGV.shift)
when '-u'
immediate_ref = true
end
end
if ARGV.size > 0 then
f = TokenStream.new(ARGV[0])
else
f = TokenStream.new(STDIN)
end
until f.eof?
tok = f.get
if tok.kind_of?(Tag) then
if tok.tagname == 'a' and !tok.switch('href').nil? then
refs[refnum] = tok.switch('href')
refnum += 1
elsif tok.tagname == '/a' then
if immediate_ref then
r = refs[refnum-1]
if !currentURL.nil? then
r = URL.new(r).complete(currentURL).to_s
end
print "[#{r}]"
else
print "[#{refnum}]"
end
elsif tok.tagname == '/body' then
body_finished = true
break
elsif tok.tagname == '/html' then
html_finished = true
break
end
print tok.to_s
elsif !tok.nil? then
print tok
end
end
if !immediate_ref and refs.size > 0 then
print "<hr><h2>References</h2>\n"
for i in 0..refs.size-1
if currentURL.nil? then
r = refs[i]
else
r = URL.new(refs[i])
r.complete(currentURL)
r = r.to_s
end
print "[#{i+1}] #{r}<br>\n"
end
end
print "</body>\n" unless body_finished
print "</html>\n" unless html_finished
+88
View File
@@ -0,0 +1,88 @@
#!/usr/local/bin/ruby
# scan history
def usage
STDERR.print "usage: scanhist -h HISTORY ML-archive1 ML-archive2 ...\n"
exit 1
end
def html_quote(s)
s.gsub!(/&/,"&amp;")
s.gsub!(/</,"&lt;")
s.gsub!(/>/,"&gt;")
s
end
if ARGV.size == 0 then
usage
end
histfile = nil
while ARGV[0] =~ /^-/
case ARGV.shift
when "-h"
histfile = ARGV.shift
else
usage
end
end
if histfile.nil? then
usage
end
patched = {}
histline = {}
f = open(histfile)
while f.gets
if /Subject: (\[w3m-dev.*\])/ then
patched[$1] = true
histline[$1] = $.
end
end
f.close
archive = {}
subject = nil
for fn in ARGV
f = open(fn)
while f.gets
if /^From / then
# beginning of a mail
subject = nil
elsif subject.nil? and /^Subject: / then
$_ =~ /Subject: (\[w3m-dev.*\])/
subject = $1
archive[subject] = [$_.chop.sub(/^Subject:\s*/,""),false,fn+"#"+($.).to_s]
elsif /^\+\+\+/ or /\*\*\*/ or /filename=.*(patch|diff).*/ or /^begin \d\d\d/
archive[subject][1] = true
end
end
f.close
end
print "<html><head><title>w3m patch configuration\n</title></head><body>\n"
print "<pre>\n"
for sub in archive.keys.sort
a = archive[sub]
if a[1] then
if patched[sub] then
print "[<a href=\"#{histfile}\##{histline[sub]}\">+</a>]"
else
print "[-]"
end
print "<a href=\"#{a[2]}\">"
print "<b>",html_quote(a[0]),"</b></a>\n"
else
if patched[sub] then
print "[<a href=\"#{histfile}\##{histline[sub]}\">o</a>]"
else
print " "
end
print "<a href=\"#{a[2]}\">"
print "<b>",html_quote(a[0]),"</b></a>\n"
end
end
print "</pre></body></html>\n"
Executable
+33
View File
@@ -0,0 +1,33 @@
#!/bin/sh
HOMEPAGE=http://ei5nazha.yz.yamagata-u.ac.jp/~aito/w3m/
OPT=""
URL=""
for i in $@
do
case $i in
-*)
OPT="$OPT $i"
;;
*)
URL="$URL $i"
;;
esac
done
if [ -z "$URL" ]; then
URL=$HOMEPAGE
fi
URLARG=""
for u in $URL
do
if [ `expr $u : '[a-z][a-z]*://'` -gt 0 ]; then
URLARG="$URLARG $u"
elif [ -f $u -o -d $u ]; then
URLARG="$URLARG $u"
else
URLARG="$URLARG http://$u"
fi
done
w3m $OPTS $URLARG