518 lines
10 KiB
Ruby
Executable File
518 lines
10 KiB
Ruby
Executable File
#!/usr/local/bin/ruby
|
|
|
|
#
|
|
# HTML to LaTeX converter
|
|
# by A. Ito, 16 June, 1997
|
|
#
|
|
|
|
require 'kconv'
|
|
|
|
# configuration
|
|
def gif2eps(giffile,epsfile)
|
|
cmd = "convert #{giffile} #{epsfile}"
|
|
STDERR.print cmd,"\n"
|
|
system cmd
|
|
end
|
|
|
|
###########################################################################
|
|
class Tag
|
|
def initialize(str)
|
|
if str =~ /<(.+)>/ then
|
|
str = $1
|
|
end
|
|
tags = str.split
|
|
@tagname = tags.shift.downcase
|
|
@vals = {}
|
|
tags.each do |t|
|
|
if t =~ /=/ then
|
|
tn,tv = t.split(/\s*=\s*/,2)
|
|
tv.sub!(/^"/,"")
|
|
tv.sub!(/"$/,"")
|
|
@vals[tn.downcase] = tv
|
|
else
|
|
@vals[t.downcase] = TRUE
|
|
end
|
|
end
|
|
end
|
|
def tagname
|
|
return @tagname
|
|
end
|
|
def each
|
|
@vals.each do |k,v|
|
|
yield k,v
|
|
end
|
|
end
|
|
def switch(k)
|
|
return @vals[k]
|
|
end
|
|
end
|
|
|
|
class TokenStream
|
|
TAG_START = ?<
|
|
TAG_END = ?>
|
|
AMP_START = ?&
|
|
AMP_END = ?;
|
|
|
|
AMP_REPLACE_TABLE = {
|
|
'&' => '\\&',
|
|
'>' => '$>$',
|
|
'<' => '$<$',
|
|
' ' => '~',
|
|
'"' => '"',
|
|
}
|
|
def initialize(file)
|
|
if file.kind_of?(File) then
|
|
@f = file
|
|
else
|
|
@f = File.new(file)
|
|
end
|
|
@buf = nil
|
|
@bpos = 0
|
|
end
|
|
|
|
def read_until(endsym)
|
|
complete = FALSE
|
|
tag = []
|
|
begin
|
|
while @bpos < @buf.size
|
|
c = @buf[@bpos]
|
|
if c == endsym then
|
|
tag.push(c.chr)
|
|
complete = TRUE
|
|
@bpos += 1
|
|
break
|
|
end
|
|
if c == 10 || c == 13 then
|
|
tag.push(' ')
|
|
else
|
|
tag.push(c.chr)
|
|
end
|
|
@bpos += 1
|
|
end
|
|
unless complete
|
|
@buf = @f.gets
|
|
@bpos = 0
|
|
break if @f.eof?
|
|
end
|
|
end until complete
|
|
return tag.join('')
|
|
end
|
|
|
|
def get
|
|
while TRUE
|
|
if @buf.nil? then
|
|
@buf = Kconv.toeuc(@f.gets)
|
|
if @f.eof? then
|
|
return nil
|
|
end
|
|
@bpos = 0
|
|
end
|
|
if @buf[@bpos] == TAG_START then
|
|
return Tag.new(read_until(TAG_END))
|
|
elsif @buf[@bpos] == AMP_START then
|
|
return replace_amp(read_until(AMP_END))
|
|
else
|
|
i = @bpos
|
|
while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
|
|
i += 1
|
|
end
|
|
r = @buf[@bpos,i-@bpos]
|
|
if i == @buf.size then
|
|
@buf = nil
|
|
else
|
|
@bpos = i
|
|
end
|
|
redo if r =~ /^\s+$/
|
|
return r
|
|
end
|
|
end
|
|
end
|
|
public :eof?
|
|
def eof?
|
|
@f.eof?
|
|
end
|
|
def replace_amp(s)
|
|
if AMP_REPLACE_TABLE.key?(s) then
|
|
return AMP_REPLACE_TABLE[s]
|
|
else
|
|
return s
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
def print_header
|
|
print '
|
|
\documentstyle[epsf]{jarticle}
|
|
\def\hr{\par\hbox to \textwidth{\hrulefill}}
|
|
\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines}
|
|
\def\endpre{\end{quote}}
|
|
\makeatletter
|
|
\@ifundefined{gt}{\let\gt=\dg}{}
|
|
\makeatother
|
|
'
|
|
end
|
|
|
|
|
|
class Environ_stack
|
|
def initialize(*envs)
|
|
@stack = envs
|
|
end
|
|
def action(tag)
|
|
if tag =~ /^!/ then # comment
|
|
return ["",nil]
|
|
end
|
|
i = @stack.size-1
|
|
while i >= 0
|
|
a = @stack[i].action(tag)
|
|
unless a.nil? then
|
|
return a
|
|
end
|
|
i -= 1
|
|
end
|
|
return nil
|
|
end
|
|
def pop
|
|
@stack.pop
|
|
end
|
|
def push(env)
|
|
@stack.push(env)
|
|
end
|
|
def top
|
|
@stack[@stack.size-1]
|
|
end
|
|
def dup
|
|
@stack.push(top.clone)
|
|
end
|
|
end
|
|
|
|
|
|
class Environment
|
|
def initialize(interp)
|
|
@silent = FALSE
|
|
@in_table = FALSE
|
|
@interp = interp;
|
|
@align = nil;
|
|
end
|
|
def action(tag)
|
|
return @interp[tag]
|
|
end
|
|
|
|
def flush(tok)
|
|
if tok.kind_of?(String) then
|
|
tok = tok.gsub(/&/,"\\&");
|
|
tok = tok.gsub(/%/,"\\%");
|
|
tok = tok.gsub(/#/,"\\#");
|
|
tok = tok.gsub(/\$/,"\\$");
|
|
tok = tok.gsub(/_/,"\\verb+_+");
|
|
tok = tok.gsub(/\^/,"\\verb+^+");
|
|
tok = tok.gsub(/~/,"\\verb+~+");
|
|
end
|
|
if @in_table then
|
|
@table[@table_rows][@table_cols] += tok
|
|
elsif !@silent then
|
|
if !@align.nil? && tok =~ /\n$/ then
|
|
print tok.chop,"\\\\\n"
|
|
else
|
|
print tok
|
|
end
|
|
end
|
|
end
|
|
|
|
def set_interp(interp)
|
|
@interp = interp
|
|
end
|
|
|
|
# tag processing methods
|
|
|
|
# <TITLE>
|
|
def do_silent(tag)
|
|
@silent = TRUE
|
|
end
|
|
|
|
# </TITLE>
|
|
def undo_silent(tag)
|
|
@silent = FALSE
|
|
end
|
|
|
|
# <IMG>
|
|
def img_proc(tag)
|
|
src = tag.switch('src')
|
|
newfile = src.sub(/\.GIF/i,".eps")
|
|
gif2eps(src,newfile)
|
|
flush "\\epsfile{file=#{newfile}}\n"
|
|
end
|
|
|
|
# <TABLE>
|
|
def starttable(tag)
|
|
@table = []
|
|
@tablespan = []
|
|
@table_rows = -1
|
|
@table_cols_max = 0
|
|
@in_table = TRUE
|
|
unless tag.switch('border').nil? then
|
|
@table_border = TRUE
|
|
else
|
|
@table_border = FALSE
|
|
end
|
|
end
|
|
|
|
# <TR>
|
|
def start_row(tag)
|
|
@table_rows += 1
|
|
@table[@table_rows] = []
|
|
@tablespan[@table_rows] = []
|
|
@table_cols = -1
|
|
@colspan = 1
|
|
end
|
|
|
|
# <TD>
|
|
def start_col(tag)
|
|
@colspan = tag.switch('colspan')
|
|
if @colspan.nil? then
|
|
@colspan = 1
|
|
else
|
|
@colspan = @colspan.to_i
|
|
end
|
|
@tablespan[@table_rows][@table_cols+1] = @colspan
|
|
@table_cols += @colspan
|
|
if @table_cols > @table_cols_max then
|
|
@table_cols_max = @table_cols
|
|
end
|
|
end
|
|
|
|
# </TABLE>
|
|
def endtable(tag)
|
|
@in_table = FALSE
|
|
flush "\\begin{tabular}{*{"
|
|
flush @table_cols_max+1
|
|
if @table_border then
|
|
flush "}{|l}|}\n\\hline\n"
|
|
else
|
|
flush "}{l}}\n"
|
|
end
|
|
for i in 0..@table_rows
|
|
j = 0
|
|
while j <= @table_cols
|
|
span = @tablespan[i][j]
|
|
if span == 1 then
|
|
flush @table[i][j]
|
|
elsif @table_border then
|
|
form = "|l"
|
|
if j+span > @table_cols then
|
|
form = "|l|"
|
|
end
|
|
flush "\\multicolumn{"+span.to_s+"}{"+form+"}{"
|
|
flush @table[i][j+span-1]
|
|
flush "}"
|
|
else
|
|
flush "\\multicolumn{"+span.to_s+"}{l}{"
|
|
flush @table[i][j+span-1]
|
|
flush "}"
|
|
end
|
|
j += span
|
|
if j <= @table_cols then
|
|
flush "&"
|
|
end
|
|
end
|
|
flush "\\\\\n"
|
|
flush "\\hline\n" if @table_border
|
|
end
|
|
flush "\\end{tabular}\n"
|
|
end
|
|
|
|
# <CENTER>
|
|
def startcenter(tag)
|
|
if @in_table then
|
|
flush "\\hfil"
|
|
else
|
|
flush "\\begin{center}\n"
|
|
end
|
|
end
|
|
|
|
# </CENTER>
|
|
def endcenter(tag)
|
|
if @in_table then
|
|
flush "\\hfil"
|
|
else
|
|
flush "\\end{center}\n"
|
|
end
|
|
end
|
|
|
|
# <P>
|
|
def paragraph(tag)
|
|
align = tag.switch('align')
|
|
if align.nil? then
|
|
flush "\\par\n"
|
|
@endparagraph = ""
|
|
else
|
|
align = align.downcase
|
|
case align
|
|
when "left" then
|
|
flush "\\begin{flushleft}\n"
|
|
@endparagraph = "\\end{flushleft}\n"
|
|
when "center" then
|
|
flush "\\begin{center}\n"
|
|
@endparagraph = "\\end{center}\n"
|
|
when "right" then
|
|
flush "\\begin{flushright}\n"
|
|
@endparagraph = "\\end{flushright}\n"
|
|
end
|
|
end
|
|
@align = align
|
|
end
|
|
|
|
# </P>
|
|
def endparagraph(tag)
|
|
unless @align.nil? then
|
|
@align = nil
|
|
flush @endparagraph
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
enum_interp = {
|
|
'li' => ["\\item ",nil]
|
|
}
|
|
|
|
item_interp = {
|
|
'li' => ["\\item ",nil]
|
|
}
|
|
|
|
desc_interp = {
|
|
'dt' => ["\\item[",nil],
|
|
'dd' => ["]\n",nil]
|
|
}
|
|
|
|
table_interp = {
|
|
'tr' => [:start_row,nil],
|
|
'td' => [:start_col,nil],
|
|
'/tr' => ["",nil],
|
|
'/td' => ["",nil],
|
|
}
|
|
|
|
para_interp = {
|
|
'/p' => [:endparagraph ,"pop",TRUE],
|
|
}
|
|
|
|
main_interp = {
|
|
'body' => ["\\begin{document}\n",nil,FALSE],
|
|
'/body' => ["\\end{document}\n",nil,FALSE],
|
|
'head' => ["",nil,FALSE],
|
|
'/head' => ["",nil,FALSE],
|
|
'html' => ["",nil,FALSE],
|
|
'/html' => ["",nil,FALSE],
|
|
'title' => [:do_silent,nil,FALSE],
|
|
'/title' => [:undo_silent,nil,FALSE],
|
|
'!' => ["",nil,FALSE],
|
|
'h1' => ["\\section{",nil,TRUE],
|
|
'h2' => ["\\subsection{",nil,TRUE],
|
|
'h3' => ["\\subsubsection{",nil,TRUE],
|
|
'h4' => ["\\paragraph{",nil,TRUE],
|
|
'/h1' => ["}\n",nil,TRUE],
|
|
'/h2' => ["}\n",nil,TRUE],
|
|
'/h3' => ["}\n",nil,TRUE],
|
|
'/h4' => ["}\n",nil,TRUE],
|
|
'a' => ["",nil,TRUE],
|
|
'/a' => ["",nil,TRUE],
|
|
'center' => [:startcenter,nil,TRUE],
|
|
'/center' => [:endcenter,nil,TRUE],
|
|
'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE],
|
|
'/ol' => ["\\end{enumerate}\n","pop",TRUE],
|
|
'ul' => ["\\begin{itemize}\n",item_interp,TRUE],
|
|
'/ul' => ["\\end{itemize}\n","pop",TRUE],
|
|
'dl' => ["\\begin{description}\n",desc_interp,TRUE],
|
|
'/dl' => ["\\end{description}\n","pop",TRUE],
|
|
'pre' => ["\\begin{pre}\n",nil,TRUE],
|
|
'/pre' => ["\\end{pre}\n",nil,TRUE],
|
|
'p' => [:paragraph ,para_interp,TRUE],
|
|
'br' => ["\\par ",nil,TRUE],
|
|
'img' => [:img_proc,nil,TRUE],
|
|
'hr' => ["\\hr ",nil,TRUE],
|
|
'b' => ["{\\bf\\gt ",nil,TRUE],
|
|
'/b' => ["}",nil,TRUE],
|
|
'strong' => ["{\\bf\\gt ",nil,TRUE],
|
|
'/strong' => ["}",nil,TRUE],
|
|
'dfn' => ["{\\bf\\gt ",nil,TRUE],
|
|
'/dfn' => ["}",nil,TRUE],
|
|
'i' => ["{\\it",nil,TRUE],
|
|
'/i' => ["}",nil,TRUE],
|
|
'address' => ["{\\it",nil,TRUE],
|
|
'/address'=> ["}",nil,TRUE],
|
|
'cite' => ["{\\it",nil,TRUE],
|
|
'/cite' => ["}",nil,TRUE],
|
|
'code' => ["{\\tt",nil,TRUE],
|
|
'/code' => ["}",nil,TRUE],
|
|
'kbd' => ["{\\tt",nil,TRUE],
|
|
'/kbd' => ["}",nil,TRUE],
|
|
'tt' => ["{\\tt",nil,TRUE],
|
|
'/tt' => ["}",nil,TRUE],
|
|
'samp' => ["{\\tt",nil,TRUE],
|
|
'/samp' => ["}",nil,TRUE],
|
|
'em' => ["{\\em",nil,TRUE],
|
|
'/em' => ["}",nil,TRUE],
|
|
'u' => ["$\\underline{\\mbox{",nil,TRUE],
|
|
'/u' => ["}}$",nil,TRUE],
|
|
'sub' => ["${}_\mbox{",nil,TRUE],
|
|
'/sub' => ["}$",nil,TRUE],
|
|
'sup' => ["${}^\mbox{",nil,TRUE],
|
|
'/sup' => ["}$",nil,TRUE],
|
|
'table' => [:starttable, table_interp,TRUE],
|
|
'/table' => [:endtable, "pop",TRUE],
|
|
'font' => ["",nil,TRUE],
|
|
'/font' => ["",nil,TRUE],
|
|
}
|
|
|
|
|
|
|
|
|
|
################################ MAIN ####################################
|
|
|
|
$in_document = FALSE
|
|
print_header
|
|
intp = Environ_stack.new(Environment.new(main_interp))
|
|
f = TokenStream.new(ARGV[0])
|
|
until f.eof?
|
|
tok = f.get
|
|
if tok.kind_of?(Tag) then
|
|
case tok.tagname
|
|
when "body"
|
|
$in_document = TRUE
|
|
when "/body"
|
|
$in_document = FALSE
|
|
end
|
|
act = intp.action(tok.tagname)
|
|
if act.nil? then
|
|
STDERR.print "tag ",tok.tagname," ignored\n"
|
|
else
|
|
if act[2] && !$in_document then
|
|
print "\\begin{document}\n"
|
|
$in_document = TRUE
|
|
end
|
|
# environment push
|
|
if act[1].kind_of?(Hash) &&
|
|
(tok.tagname != "p" || tok.switch('align') != nil) then
|
|
intp.dup
|
|
intp.top.set_interp(act[1])
|
|
end
|
|
|
|
if act[0].kind_of?(String) then
|
|
intp.top.flush act[0]
|
|
elsif act[0].kind_of?(Fixnum) then # interned symbol
|
|
intp.top.send(act[0],tok)
|
|
end
|
|
|
|
# environment pop
|
|
if act[1] == "pop" then
|
|
intp.pop
|
|
end
|
|
end
|
|
elsif !tok.nil? then
|
|
intp.top.flush tok
|
|
end
|
|
end
|
|
if $in_document then
|
|
print "\\end{document}\n"
|
|
end
|