Adding upstream version 0.5.1
This commit is contained in:
517
Bonus/html2latex
Executable file
517
Bonus/html2latex
Executable file
@@ -0,0 +1,517 @@
|
||||
#!/usr/local/bin/ruby
|
||||
|
||||
#
|
||||
# HTML to LaTeX converter
|
||||
# by A. Ito, 16 June, 1997
|
||||
#
|
||||
|
||||
require 'kconv'
|
||||
|
||||
# configuration
|
||||
def gif2eps(giffile,epsfile)
|
||||
cmd = "convert #{giffile} #{epsfile}"
|
||||
STDERR.print cmd,"\n"
|
||||
system cmd
|
||||
end
|
||||
|
||||
###########################################################################
|
||||
class Tag
|
||||
def initialize(str)
|
||||
if str =~ /<(.+)>/ then
|
||||
str = $1
|
||||
end
|
||||
tags = str.split
|
||||
@tagname = tags.shift.downcase
|
||||
@vals = {}
|
||||
tags.each do |t|
|
||||
if t =~ /=/ then
|
||||
tn,tv = t.split(/\s*=\s*/,2)
|
||||
tv.sub!(/^"/,"")
|
||||
tv.sub!(/"$/,"")
|
||||
@vals[tn.downcase] = tv
|
||||
else
|
||||
@vals[t.downcase] = TRUE
|
||||
end
|
||||
end
|
||||
end
|
||||
def tagname
|
||||
return @tagname
|
||||
end
|
||||
def each
|
||||
@vals.each do |k,v|
|
||||
yield k,v
|
||||
end
|
||||
end
|
||||
def switch(k)
|
||||
return @vals[k]
|
||||
end
|
||||
end
|
||||
|
||||
class TokenStream
|
||||
TAG_START = ?<
|
||||
TAG_END = ?>
|
||||
AMP_START = ?&
|
||||
AMP_END = ?;
|
||||
|
||||
AMP_REPLACE_TABLE = {
|
||||
'&' => '\\&',
|
||||
'>' => '$>$',
|
||||
'<' => '$<$',
|
||||
' ' => '~',
|
||||
'"' => '"',
|
||||
}
|
||||
def initialize(file)
|
||||
if file.kind_of?(File) then
|
||||
@f = file
|
||||
else
|
||||
@f = File.new(file)
|
||||
end
|
||||
@buf = nil
|
||||
@bpos = 0
|
||||
end
|
||||
|
||||
def read_until(endsym)
|
||||
complete = FALSE
|
||||
tag = []
|
||||
begin
|
||||
while @bpos < @buf.size
|
||||
c = @buf[@bpos]
|
||||
if c == endsym then
|
||||
tag.push(c.chr)
|
||||
complete = TRUE
|
||||
@bpos += 1
|
||||
break
|
||||
end
|
||||
if c == 10 || c == 13 then
|
||||
tag.push(' ')
|
||||
else
|
||||
tag.push(c.chr)
|
||||
end
|
||||
@bpos += 1
|
||||
end
|
||||
unless complete
|
||||
@buf = @f.gets
|
||||
@bpos = 0
|
||||
break if @f.eof?
|
||||
end
|
||||
end until complete
|
||||
return tag.join('')
|
||||
end
|
||||
|
||||
def get
|
||||
while TRUE
|
||||
if @buf.nil? then
|
||||
@buf = Kconv.toeuc(@f.gets)
|
||||
if @f.eof? then
|
||||
return nil
|
||||
end
|
||||
@bpos = 0
|
||||
end
|
||||
if @buf[@bpos] == TAG_START then
|
||||
return Tag.new(read_until(TAG_END))
|
||||
elsif @buf[@bpos] == AMP_START then
|
||||
return replace_amp(read_until(AMP_END))
|
||||
else
|
||||
i = @bpos
|
||||
while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
|
||||
i += 1
|
||||
end
|
||||
r = @buf[@bpos,i-@bpos]
|
||||
if i == @buf.size then
|
||||
@buf = nil
|
||||
else
|
||||
@bpos = i
|
||||
end
|
||||
redo if r =~ /^\s+$/
|
||||
return r
|
||||
end
|
||||
end
|
||||
end
|
||||
public :eof?
|
||||
def eof?
|
||||
@f.eof?
|
||||
end
|
||||
def replace_amp(s)
|
||||
if AMP_REPLACE_TABLE.key?(s) then
|
||||
return AMP_REPLACE_TABLE[s]
|
||||
else
|
||||
return s
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def print_header
|
||||
print '
|
||||
\documentstyle[epsf]{jarticle}
|
||||
\def\hr{\par\hbox to \textwidth{\hrulefill}}
|
||||
\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines}
|
||||
\def\endpre{\end{quote}}
|
||||
\makeatletter
|
||||
\@ifundefined{gt}{\let\gt=\dg}{}
|
||||
\makeatother
|
||||
'
|
||||
end
|
||||
|
||||
|
||||
class Environ_stack
|
||||
def initialize(*envs)
|
||||
@stack = envs
|
||||
end
|
||||
def action(tag)
|
||||
if tag =~ /^!/ then # comment
|
||||
return ["",nil]
|
||||
end
|
||||
i = @stack.size-1
|
||||
while i >= 0
|
||||
a = @stack[i].action(tag)
|
||||
unless a.nil? then
|
||||
return a
|
||||
end
|
||||
i -= 1
|
||||
end
|
||||
return nil
|
||||
end
|
||||
def pop
|
||||
@stack.pop
|
||||
end
|
||||
def push(env)
|
||||
@stack.push(env)
|
||||
end
|
||||
def top
|
||||
@stack[@stack.size-1]
|
||||
end
|
||||
def dup
|
||||
@stack.push(top.clone)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
class Environment
|
||||
def initialize(interp)
|
||||
@silent = FALSE
|
||||
@in_table = FALSE
|
||||
@interp = interp;
|
||||
@align = nil;
|
||||
end
|
||||
def action(tag)
|
||||
return @interp[tag]
|
||||
end
|
||||
|
||||
def flush(tok)
|
||||
if tok.kind_of?(String) then
|
||||
tok = tok.gsub(/&/,"\\&");
|
||||
tok = tok.gsub(/%/,"\\%");
|
||||
tok = tok.gsub(/#/,"\\#");
|
||||
tok = tok.gsub(/\$/,"\\$");
|
||||
tok = tok.gsub(/_/,"\\verb+_+");
|
||||
tok = tok.gsub(/\^/,"\\verb+^+");
|
||||
tok = tok.gsub(/~/,"\\verb+~+");
|
||||
end
|
||||
if @in_table then
|
||||
@table[@table_rows][@table_cols] += tok
|
||||
elsif !@silent then
|
||||
if !@align.nil? && tok =~ /\n$/ then
|
||||
print tok.chop,"\\\\\n"
|
||||
else
|
||||
print tok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def set_interp(interp)
|
||||
@interp = interp
|
||||
end
|
||||
|
||||
# tag processing methods
|
||||
|
||||
# <TITLE>
|
||||
def do_silent(tag)
|
||||
@silent = TRUE
|
||||
end
|
||||
|
||||
# </TITLE>
|
||||
def undo_silent(tag)
|
||||
@silent = FALSE
|
||||
end
|
||||
|
||||
# <IMG>
|
||||
def img_proc(tag)
|
||||
src = tag.switch('src')
|
||||
newfile = src.sub(/\.GIF/i,".eps")
|
||||
gif2eps(src,newfile)
|
||||
flush "\\epsfile{file=#{newfile}}\n"
|
||||
end
|
||||
|
||||
# <TABLE>
|
||||
def starttable(tag)
|
||||
@table = []
|
||||
@tablespan = []
|
||||
@table_rows = -1
|
||||
@table_cols_max = 0
|
||||
@in_table = TRUE
|
||||
unless tag.switch('border').nil? then
|
||||
@table_border = TRUE
|
||||
else
|
||||
@table_border = FALSE
|
||||
end
|
||||
end
|
||||
|
||||
# <TR>
|
||||
def start_row(tag)
|
||||
@table_rows += 1
|
||||
@table[@table_rows] = []
|
||||
@tablespan[@table_rows] = []
|
||||
@table_cols = -1
|
||||
@colspan = 1
|
||||
end
|
||||
|
||||
# <TD>
|
||||
def start_col(tag)
|
||||
@colspan = tag.switch('colspan')
|
||||
if @colspan.nil? then
|
||||
@colspan = 1
|
||||
else
|
||||
@colspan = @colspan.to_i
|
||||
end
|
||||
@tablespan[@table_rows][@table_cols+1] = @colspan
|
||||
@table_cols += @colspan
|
||||
if @table_cols > @table_cols_max then
|
||||
@table_cols_max = @table_cols
|
||||
end
|
||||
end
|
||||
|
||||
# </TABLE>
|
||||
def endtable(tag)
|
||||
@in_table = FALSE
|
||||
flush "\\begin{tabular}{*{"
|
||||
flush @table_cols_max+1
|
||||
if @table_border then
|
||||
flush "}{|l}|}\n\\hline\n"
|
||||
else
|
||||
flush "}{l}}\n"
|
||||
end
|
||||
for i in 0..@table_rows
|
||||
j = 0
|
||||
while j <= @table_cols
|
||||
span = @tablespan[i][j]
|
||||
if span == 1 then
|
||||
flush @table[i][j]
|
||||
elsif @table_border then
|
||||
form = "|l"
|
||||
if j+span > @table_cols then
|
||||
form = "|l|"
|
||||
end
|
||||
flush "\\multicolumn{"+span.to_s+"}{"+form+"}{"
|
||||
flush @table[i][j+span-1]
|
||||
flush "}"
|
||||
else
|
||||
flush "\\multicolumn{"+span.to_s+"}{l}{"
|
||||
flush @table[i][j+span-1]
|
||||
flush "}"
|
||||
end
|
||||
j += span
|
||||
if j <= @table_cols then
|
||||
flush "&"
|
||||
end
|
||||
end
|
||||
flush "\\\\\n"
|
||||
flush "\\hline\n" if @table_border
|
||||
end
|
||||
flush "\\end{tabular}\n"
|
||||
end
|
||||
|
||||
# <CENTER>
|
||||
def startcenter(tag)
|
||||
if @in_table then
|
||||
flush "\\hfil"
|
||||
else
|
||||
flush "\\begin{center}\n"
|
||||
end
|
||||
end
|
||||
|
||||
# </CENTER>
|
||||
def endcenter(tag)
|
||||
if @in_table then
|
||||
flush "\\hfil"
|
||||
else
|
||||
flush "\\end{center}\n"
|
||||
end
|
||||
end
|
||||
|
||||
# <P>
|
||||
def paragraph(tag)
|
||||
align = tag.switch('align')
|
||||
if align.nil? then
|
||||
flush "\\par\n"
|
||||
@endparagraph = ""
|
||||
else
|
||||
align = align.downcase
|
||||
case align
|
||||
when "left" then
|
||||
flush "\\begin{flushleft}\n"
|
||||
@endparagraph = "\\end{flushleft}\n"
|
||||
when "center" then
|
||||
flush "\\begin{center}\n"
|
||||
@endparagraph = "\\end{center}\n"
|
||||
when "right" then
|
||||
flush "\\begin{flushright}\n"
|
||||
@endparagraph = "\\end{flushright}\n"
|
||||
end
|
||||
end
|
||||
@align = align
|
||||
end
|
||||
|
||||
# </P>
|
||||
def endparagraph(tag)
|
||||
unless @align.nil? then
|
||||
@align = nil
|
||||
flush @endparagraph
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
enum_interp = {
|
||||
'li' => ["\\item ",nil]
|
||||
}
|
||||
|
||||
item_interp = {
|
||||
'li' => ["\\item ",nil]
|
||||
}
|
||||
|
||||
desc_interp = {
|
||||
'dt' => ["\\item[",nil],
|
||||
'dd' => ["]\n",nil]
|
||||
}
|
||||
|
||||
table_interp = {
|
||||
'tr' => [:start_row,nil],
|
||||
'td' => [:start_col,nil],
|
||||
'/tr' => ["",nil],
|
||||
'/td' => ["",nil],
|
||||
}
|
||||
|
||||
para_interp = {
|
||||
'/p' => [:endparagraph ,"pop",TRUE],
|
||||
}
|
||||
|
||||
main_interp = {
|
||||
'body' => ["\\begin{document}\n",nil,FALSE],
|
||||
'/body' => ["\\end{document}\n",nil,FALSE],
|
||||
'head' => ["",nil,FALSE],
|
||||
'/head' => ["",nil,FALSE],
|
||||
'html' => ["",nil,FALSE],
|
||||
'/html' => ["",nil,FALSE],
|
||||
'title' => [:do_silent,nil,FALSE],
|
||||
'/title' => [:undo_silent,nil,FALSE],
|
||||
'!' => ["",nil,FALSE],
|
||||
'h1' => ["\\section{",nil,TRUE],
|
||||
'h2' => ["\\subsection{",nil,TRUE],
|
||||
'h3' => ["\\subsubsection{",nil,TRUE],
|
||||
'h4' => ["\\paragraph{",nil,TRUE],
|
||||
'/h1' => ["}\n",nil,TRUE],
|
||||
'/h2' => ["}\n",nil,TRUE],
|
||||
'/h3' => ["}\n",nil,TRUE],
|
||||
'/h4' => ["}\n",nil,TRUE],
|
||||
'a' => ["",nil,TRUE],
|
||||
'/a' => ["",nil,TRUE],
|
||||
'center' => [:startcenter,nil,TRUE],
|
||||
'/center' => [:endcenter,nil,TRUE],
|
||||
'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE],
|
||||
'/ol' => ["\\end{enumerate}\n","pop",TRUE],
|
||||
'ul' => ["\\begin{itemize}\n",item_interp,TRUE],
|
||||
'/ul' => ["\\end{itemize}\n","pop",TRUE],
|
||||
'dl' => ["\\begin{description}\n",desc_interp,TRUE],
|
||||
'/dl' => ["\\end{description}\n","pop",TRUE],
|
||||
'pre' => ["\\begin{pre}\n",nil,TRUE],
|
||||
'/pre' => ["\\end{pre}\n",nil,TRUE],
|
||||
'p' => [:paragraph ,para_interp,TRUE],
|
||||
'br' => ["\\par ",nil,TRUE],
|
||||
'img' => [:img_proc,nil,TRUE],
|
||||
'hr' => ["\\hr ",nil,TRUE],
|
||||
'b' => ["{\\bf\\gt ",nil,TRUE],
|
||||
'/b' => ["}",nil,TRUE],
|
||||
'strong' => ["{\\bf\\gt ",nil,TRUE],
|
||||
'/strong' => ["}",nil,TRUE],
|
||||
'dfn' => ["{\\bf\\gt ",nil,TRUE],
|
||||
'/dfn' => ["}",nil,TRUE],
|
||||
'i' => ["{\\it",nil,TRUE],
|
||||
'/i' => ["}",nil,TRUE],
|
||||
'address' => ["{\\it",nil,TRUE],
|
||||
'/address'=> ["}",nil,TRUE],
|
||||
'cite' => ["{\\it",nil,TRUE],
|
||||
'/cite' => ["}",nil,TRUE],
|
||||
'code' => ["{\\tt",nil,TRUE],
|
||||
'/code' => ["}",nil,TRUE],
|
||||
'kbd' => ["{\\tt",nil,TRUE],
|
||||
'/kbd' => ["}",nil,TRUE],
|
||||
'tt' => ["{\\tt",nil,TRUE],
|
||||
'/tt' => ["}",nil,TRUE],
|
||||
'samp' => ["{\\tt",nil,TRUE],
|
||||
'/samp' => ["}",nil,TRUE],
|
||||
'em' => ["{\\em",nil,TRUE],
|
||||
'/em' => ["}",nil,TRUE],
|
||||
'u' => ["$\\underline{\\mbox{",nil,TRUE],
|
||||
'/u' => ["}}$",nil,TRUE],
|
||||
'sub' => ["${}_\mbox{",nil,TRUE],
|
||||
'/sub' => ["}$",nil,TRUE],
|
||||
'sup' => ["${}^\mbox{",nil,TRUE],
|
||||
'/sup' => ["}$",nil,TRUE],
|
||||
'table' => [:starttable, table_interp,TRUE],
|
||||
'/table' => [:endtable, "pop",TRUE],
|
||||
'font' => ["",nil,TRUE],
|
||||
'/font' => ["",nil,TRUE],
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
################################ MAIN ####################################
|
||||
|
||||
$in_document = FALSE
|
||||
print_header
|
||||
intp = Environ_stack.new(Environment.new(main_interp))
|
||||
f = TokenStream.new(ARGV[0])
|
||||
until f.eof?
|
||||
tok = f.get
|
||||
if tok.kind_of?(Tag) then
|
||||
case tok.tagname
|
||||
when "body"
|
||||
$in_document = TRUE
|
||||
when "/body"
|
||||
$in_document = FALSE
|
||||
end
|
||||
act = intp.action(tok.tagname)
|
||||
if act.nil? then
|
||||
STDERR.print "tag ",tok.tagname," ignored\n"
|
||||
else
|
||||
if act[2] && !$in_document then
|
||||
print "\\begin{document}\n"
|
||||
$in_document = TRUE
|
||||
end
|
||||
# environment push
|
||||
if act[1].kind_of?(Hash) &&
|
||||
(tok.tagname != "p" || tok.switch('align') != nil) then
|
||||
intp.dup
|
||||
intp.top.set_interp(act[1])
|
||||
end
|
||||
|
||||
if act[0].kind_of?(String) then
|
||||
intp.top.flush act[0]
|
||||
elsif act[0].kind_of?(Fixnum) then # interned symbol
|
||||
intp.top.send(act[0],tok)
|
||||
end
|
||||
|
||||
# environment pop
|
||||
if act[1] == "pop" then
|
||||
intp.pop
|
||||
end
|
||||
end
|
||||
elsif !tok.nil? then
|
||||
intp.top.flush tok
|
||||
end
|
||||
end
|
||||
if $in_document then
|
||||
print "\\end{document}\n"
|
||||
end
|
Reference in New Issue
Block a user