518 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			518 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/local/bin/ruby
 | 
						|
 | 
						|
#
 | 
						|
#       HTML to LaTeX converter
 | 
						|
#         by A. Ito, 16 June, 1997
 | 
						|
#
 | 
						|
 | 
						|
require 'kconv'
 | 
						|
 | 
						|
# configuration
 | 
						|
def gif2eps(giffile,epsfile)
 | 
						|
  cmd = "convert #{giffile} #{epsfile}"
 | 
						|
  STDERR.print cmd,"\n"
 | 
						|
  system cmd
 | 
						|
end
 | 
						|
 | 
						|
###########################################################################
 | 
						|
class Tag
 | 
						|
  def initialize(str)
 | 
						|
    if str =~ /<(.+)>/ then
 | 
						|
      str = $1
 | 
						|
    end
 | 
						|
    tags = str.split
 | 
						|
    @tagname = tags.shift.downcase
 | 
						|
    @vals = {}
 | 
						|
    tags.each do |t|
 | 
						|
      if t =~ /=/ then
 | 
						|
	tn,tv = t.split(/\s*=\s*/,2)
 | 
						|
	tv.sub!(/^"/,"")
 | 
						|
	tv.sub!(/"$/,"")
 | 
						|
	@vals[tn.downcase] = tv
 | 
						|
      else
 | 
						|
	@vals[t.downcase] = TRUE
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
  def tagname
 | 
						|
    return @tagname
 | 
						|
  end
 | 
						|
  def each
 | 
						|
    @vals.each do |k,v|
 | 
						|
      yield k,v
 | 
						|
    end
 | 
						|
  end
 | 
						|
  def switch(k)
 | 
						|
    return @vals[k]
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
class TokenStream
 | 
						|
  TAG_START = ?<
 | 
						|
  TAG_END = ?>
 | 
						|
  AMP_START = ?&
 | 
						|
  AMP_END = ?;
 | 
						|
  
 | 
						|
  AMP_REPLACE_TABLE = {
 | 
						|
    '&'   => '\\&',
 | 
						|
    '>'    => '$>$',
 | 
						|
    '<'    => '$<$',
 | 
						|
    ' '  => '~',
 | 
						|
    '"'  => '"',
 | 
						|
  }
 | 
						|
  def initialize(file)
 | 
						|
    if file.kind_of?(File) then
 | 
						|
      @f = file
 | 
						|
    else
 | 
						|
      @f = File.new(file)
 | 
						|
    end
 | 
						|
    @buf = nil
 | 
						|
    @bpos = 0
 | 
						|
  end
 | 
						|
  
 | 
						|
  def read_until(endsym)
 | 
						|
    complete = FALSE
 | 
						|
    tag = []
 | 
						|
    begin
 | 
						|
      while @bpos < @buf.size
 | 
						|
	c = @buf[@bpos]
 | 
						|
	if c == endsym then
 | 
						|
	  tag.push(c.chr)
 | 
						|
	  complete = TRUE
 | 
						|
	  @bpos += 1
 | 
						|
	  break
 | 
						|
	end
 | 
						|
	if c == 10 || c == 13 then
 | 
						|
	  tag.push(' ')
 | 
						|
	else
 | 
						|
	  tag.push(c.chr)
 | 
						|
	end
 | 
						|
	@bpos += 1
 | 
						|
      end
 | 
						|
      unless complete
 | 
						|
	@buf = @f.gets
 | 
						|
	@bpos = 0
 | 
						|
	break if @f.eof?
 | 
						|
      end
 | 
						|
    end until complete
 | 
						|
    return tag.join('')
 | 
						|
  end
 | 
						|
    
 | 
						|
  def get
 | 
						|
    while TRUE
 | 
						|
      if @buf.nil? then
 | 
						|
	@buf = Kconv.toeuc(@f.gets)
 | 
						|
	if @f.eof? then
 | 
						|
	  return nil
 | 
						|
	end
 | 
						|
	@bpos = 0
 | 
						|
      end
 | 
						|
      if @buf[@bpos] == TAG_START then
 | 
						|
	return Tag.new(read_until(TAG_END))
 | 
						|
      elsif @buf[@bpos] == AMP_START then
 | 
						|
	return replace_amp(read_until(AMP_END))
 | 
						|
      else
 | 
						|
	i = @bpos
 | 
						|
	while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
 | 
						|
	  i += 1
 | 
						|
	end
 | 
						|
	r = @buf[@bpos,i-@bpos]
 | 
						|
	if i == @buf.size then
 | 
						|
	  @buf = nil
 | 
						|
	else
 | 
						|
	  @bpos = i
 | 
						|
	end
 | 
						|
	redo if r =~ /^\s+$/
 | 
						|
	return r
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
  public :eof?
 | 
						|
  def eof?
 | 
						|
    @f.eof?
 | 
						|
  end
 | 
						|
  def replace_amp(s)
 | 
						|
    if AMP_REPLACE_TABLE.key?(s) then
 | 
						|
      return AMP_REPLACE_TABLE[s]
 | 
						|
    else
 | 
						|
      return s
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
 | 
						|
def print_header
 | 
						|
  print '
 | 
						|
\documentstyle[epsf]{jarticle}
 | 
						|
\def\hr{\par\hbox to \textwidth{\hrulefill}}
 | 
						|
\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines}
 | 
						|
\def\endpre{\end{quote}}
 | 
						|
\makeatletter
 | 
						|
\@ifundefined{gt}{\let\gt=\dg}{}
 | 
						|
\makeatother
 | 
						|
'
 | 
						|
end
 | 
						|
 | 
						|
 | 
						|
class Environ_stack
 | 
						|
  def initialize(*envs)
 | 
						|
    @stack = envs
 | 
						|
  end
 | 
						|
  def action(tag)
 | 
						|
    if tag =~ /^!/ then # comment
 | 
						|
      return ["",nil]
 | 
						|
    end
 | 
						|
    i = @stack.size-1
 | 
						|
    while i >= 0
 | 
						|
      a = @stack[i].action(tag)
 | 
						|
      unless a.nil? then
 | 
						|
	return a
 | 
						|
      end
 | 
						|
      i -= 1
 | 
						|
    end
 | 
						|
    return nil
 | 
						|
  end
 | 
						|
  def pop
 | 
						|
    @stack.pop
 | 
						|
  end
 | 
						|
  def push(env)
 | 
						|
    @stack.push(env)
 | 
						|
  end
 | 
						|
  def top
 | 
						|
    @stack[@stack.size-1]
 | 
						|
  end
 | 
						|
  def dup
 | 
						|
    @stack.push(top.clone)
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
 | 
						|
class Environment
 | 
						|
  def initialize(interp)
 | 
						|
    @silent = FALSE
 | 
						|
    @in_table = FALSE
 | 
						|
    @interp = interp;
 | 
						|
    @align = nil;
 | 
						|
  end
 | 
						|
  def action(tag)
 | 
						|
    return @interp[tag]
 | 
						|
  end
 | 
						|
  
 | 
						|
  def flush(tok)
 | 
						|
    if tok.kind_of?(String) then
 | 
						|
      tok = tok.gsub(/&/,"\\&");
 | 
						|
      tok = tok.gsub(/%/,"\\%");
 | 
						|
      tok = tok.gsub(/#/,"\\#");
 | 
						|
      tok = tok.gsub(/\$/,"\\$");
 | 
						|
      tok = tok.gsub(/_/,"\\verb+_+");
 | 
						|
      tok = tok.gsub(/\^/,"\\verb+^+");
 | 
						|
      tok = tok.gsub(/~/,"\\verb+~+");
 | 
						|
    end
 | 
						|
    if @in_table then
 | 
						|
      @table[@table_rows][@table_cols] += tok
 | 
						|
    elsif !@silent then
 | 
						|
      if !@align.nil? && tok =~ /\n$/ then
 | 
						|
	print tok.chop,"\\\\\n"
 | 
						|
      else
 | 
						|
	print tok
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
  
 | 
						|
  def set_interp(interp)
 | 
						|
    @interp = interp
 | 
						|
  end
 | 
						|
  
 | 
						|
  # tag processing methods
 | 
						|
  
 | 
						|
  # <TITLE>
 | 
						|
  def do_silent(tag)
 | 
						|
    @silent = TRUE
 | 
						|
  end
 | 
						|
  
 | 
						|
  # </TITLE>
 | 
						|
  def undo_silent(tag)
 | 
						|
    @silent = FALSE
 | 
						|
  end
 | 
						|
  
 | 
						|
  # <IMG>
 | 
						|
  def img_proc(tag)
 | 
						|
    src = tag.switch('src')
 | 
						|
    newfile = src.sub(/\.GIF/i,".eps")
 | 
						|
    gif2eps(src,newfile)
 | 
						|
    flush "\\epsfile{file=#{newfile}}\n"
 | 
						|
  end
 | 
						|
  
 | 
						|
  # <TABLE>
 | 
						|
  def starttable(tag)
 | 
						|
    @table = []
 | 
						|
    @tablespan = []
 | 
						|
    @table_rows = -1
 | 
						|
    @table_cols_max = 0
 | 
						|
    @in_table = TRUE
 | 
						|
    unless tag.switch('border').nil? then
 | 
						|
      @table_border = TRUE
 | 
						|
    else
 | 
						|
      @table_border = FALSE
 | 
						|
    end
 | 
						|
  end
 | 
						|
  
 | 
						|
  # <TR>
 | 
						|
  def start_row(tag)
 | 
						|
    @table_rows += 1
 | 
						|
    @table[@table_rows] = []
 | 
						|
    @tablespan[@table_rows] = []
 | 
						|
    @table_cols = -1
 | 
						|
    @colspan = 1
 | 
						|
  end
 | 
						|
  
 | 
						|
  # <TD>
 | 
						|
  def start_col(tag)
 | 
						|
    @colspan = tag.switch('colspan')
 | 
						|
    if @colspan.nil? then
 | 
						|
      @colspan = 1
 | 
						|
    else
 | 
						|
      @colspan = @colspan.to_i
 | 
						|
    end
 | 
						|
    @tablespan[@table_rows][@table_cols+1] = @colspan
 | 
						|
    @table_cols += @colspan
 | 
						|
    if @table_cols > @table_cols_max then
 | 
						|
      @table_cols_max = @table_cols
 | 
						|
    end
 | 
						|
  end
 | 
						|
  
 | 
						|
  # </TABLE>
 | 
						|
  def endtable(tag)
 | 
						|
    @in_table = FALSE
 | 
						|
    flush "\\begin{tabular}{*{"
 | 
						|
    flush @table_cols_max+1
 | 
						|
    if @table_border then
 | 
						|
      flush "}{|l}|}\n\\hline\n"
 | 
						|
    else
 | 
						|
      flush "}{l}}\n"
 | 
						|
    end
 | 
						|
    for i in 0..@table_rows
 | 
						|
      j = 0
 | 
						|
      while j <= @table_cols
 | 
						|
	span = @tablespan[i][j]
 | 
						|
	if span == 1 then
 | 
						|
	  flush @table[i][j]
 | 
						|
	elsif @table_border then
 | 
						|
	  form = "|l"
 | 
						|
	  if j+span > @table_cols then
 | 
						|
	    form = "|l|"
 | 
						|
	  end
 | 
						|
	  flush "\\multicolumn{"+span.to_s+"}{"+form+"}{"
 | 
						|
	  flush @table[i][j+span-1]
 | 
						|
	  flush "}"
 | 
						|
	else
 | 
						|
	  flush "\\multicolumn{"+span.to_s+"}{l}{"
 | 
						|
	  flush @table[i][j+span-1]
 | 
						|
	  flush "}"
 | 
						|
	end
 | 
						|
	j += span
 | 
						|
	if j <= @table_cols then
 | 
						|
	  flush "&"
 | 
						|
	end
 | 
						|
      end
 | 
						|
      flush "\\\\\n"
 | 
						|
      flush "\\hline\n" if @table_border
 | 
						|
    end
 | 
						|
    flush "\\end{tabular}\n"
 | 
						|
  end  
 | 
						|
  
 | 
						|
  # <CENTER>
 | 
						|
  def startcenter(tag)
 | 
						|
    if @in_table then
 | 
						|
      flush "\\hfil"
 | 
						|
    else
 | 
						|
      flush "\\begin{center}\n"
 | 
						|
    end
 | 
						|
  end
 | 
						|
  
 | 
						|
  # </CENTER>
 | 
						|
  def endcenter(tag)
 | 
						|
    if @in_table then
 | 
						|
      flush "\\hfil"
 | 
						|
    else
 | 
						|
      flush "\\end{center}\n"
 | 
						|
    end
 | 
						|
  end
 | 
						|
  
 | 
						|
  # <P>
 | 
						|
  def paragraph(tag)
 | 
						|
    align = tag.switch('align')
 | 
						|
    if align.nil? then
 | 
						|
      flush "\\par\n"
 | 
						|
      @endparagraph = ""
 | 
						|
    else
 | 
						|
      align = align.downcase
 | 
						|
      case align
 | 
						|
      when "left" then
 | 
						|
	flush "\\begin{flushleft}\n"
 | 
						|
	@endparagraph = "\\end{flushleft}\n"
 | 
						|
      when "center" then
 | 
						|
	flush "\\begin{center}\n"
 | 
						|
	@endparagraph = "\\end{center}\n"
 | 
						|
      when "right" then
 | 
						|
	flush "\\begin{flushright}\n"
 | 
						|
	@endparagraph = "\\end{flushright}\n"
 | 
						|
      end
 | 
						|
    end
 | 
						|
    @align = align
 | 
						|
  end
 | 
						|
  
 | 
						|
  # </P>
 | 
						|
  def endparagraph(tag)
 | 
						|
    unless @align.nil? then
 | 
						|
      @align = nil
 | 
						|
      flush @endparagraph
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
 | 
						|
enum_interp = {
 | 
						|
  'li' => ["\\item ",nil]
 | 
						|
}
 | 
						|
 | 
						|
item_interp = {
 | 
						|
  'li' => ["\\item ",nil]
 | 
						|
}
 | 
						|
 | 
						|
desc_interp = {
 | 
						|
  'dt' => ["\\item[",nil],
 | 
						|
  'dd' => ["]\n",nil]
 | 
						|
}
 | 
						|
 | 
						|
table_interp = {
 | 
						|
  'tr' => [:start_row,nil],
 | 
						|
  'td' => [:start_col,nil],
 | 
						|
  '/tr' => ["",nil],
 | 
						|
  '/td' => ["",nil],
 | 
						|
}
 | 
						|
 | 
						|
para_interp = {
 | 
						|
  '/p'      => [:endparagraph ,"pop",TRUE],
 | 
						|
}
 | 
						|
 | 
						|
main_interp = {
 | 
						|
  'body'    => ["\\begin{document}\n",nil,FALSE],
 | 
						|
  '/body'   => ["\\end{document}\n",nil,FALSE],
 | 
						|
  'head'    => ["",nil,FALSE],
 | 
						|
  '/head'   => ["",nil,FALSE],
 | 
						|
  'html'    => ["",nil,FALSE],
 | 
						|
  '/html'   => ["",nil,FALSE],
 | 
						|
  'title'   => [:do_silent,nil,FALSE],
 | 
						|
  '/title'  => [:undo_silent,nil,FALSE],
 | 
						|
  '!'       => ["",nil,FALSE],
 | 
						|
  'h1'      => ["\\section{",nil,TRUE],
 | 
						|
  'h2'      => ["\\subsection{",nil,TRUE],
 | 
						|
  'h3'      => ["\\subsubsection{",nil,TRUE],
 | 
						|
  'h4'      => ["\\paragraph{",nil,TRUE],
 | 
						|
  '/h1'     => ["}\n",nil,TRUE],
 | 
						|
  '/h2'     => ["}\n",nil,TRUE],
 | 
						|
  '/h3'     => ["}\n",nil,TRUE],
 | 
						|
  '/h4'     => ["}\n",nil,TRUE],
 | 
						|
  'a'       => ["",nil,TRUE],
 | 
						|
  '/a'      => ["",nil,TRUE],
 | 
						|
  'center'  => [:startcenter,nil,TRUE],
 | 
						|
  '/center' => [:endcenter,nil,TRUE],
 | 
						|
  'ol'      => ["\\begin{enumerate}\n",enum_interp,TRUE],
 | 
						|
  '/ol'     => ["\\end{enumerate}\n","pop",TRUE],
 | 
						|
  'ul'      => ["\\begin{itemize}\n",item_interp,TRUE],
 | 
						|
  '/ul'     => ["\\end{itemize}\n","pop",TRUE],
 | 
						|
  'dl'      => ["\\begin{description}\n",desc_interp,TRUE],
 | 
						|
  '/dl'     => ["\\end{description}\n","pop",TRUE],
 | 
						|
  'pre'     => ["\\begin{pre}\n",nil,TRUE],
 | 
						|
  '/pre'    => ["\\end{pre}\n",nil,TRUE],
 | 
						|
  'p'       => [:paragraph ,para_interp,TRUE],
 | 
						|
  'br'      => ["\\par ",nil,TRUE],
 | 
						|
  'img'     => [:img_proc,nil,TRUE],
 | 
						|
  'hr'      => ["\\hr ",nil,TRUE],
 | 
						|
  'b'       => ["{\\bf\\gt ",nil,TRUE],
 | 
						|
  '/b'      => ["}",nil,TRUE],
 | 
						|
  'strong'  => ["{\\bf\\gt ",nil,TRUE],
 | 
						|
  '/strong' => ["}",nil,TRUE],
 | 
						|
  'dfn'     => ["{\\bf\\gt ",nil,TRUE],
 | 
						|
  '/dfn'    => ["}",nil,TRUE],
 | 
						|
  'i'       => ["{\\it",nil,TRUE],
 | 
						|
  '/i'      => ["}",nil,TRUE],
 | 
						|
  'address' => ["{\\it",nil,TRUE],
 | 
						|
  '/address'=> ["}",nil,TRUE],
 | 
						|
  'cite'    => ["{\\it",nil,TRUE],
 | 
						|
  '/cite'   => ["}",nil,TRUE],
 | 
						|
  'code'    => ["{\\tt",nil,TRUE],
 | 
						|
  '/code'   => ["}",nil,TRUE],
 | 
						|
  'kbd'     => ["{\\tt",nil,TRUE],
 | 
						|
  '/kbd'    => ["}",nil,TRUE],
 | 
						|
  'tt'      => ["{\\tt",nil,TRUE],
 | 
						|
  '/tt'     => ["}",nil,TRUE],
 | 
						|
  'samp'    => ["{\\tt",nil,TRUE],
 | 
						|
  '/samp'   => ["}",nil,TRUE],
 | 
						|
  'em'      => ["{\\em",nil,TRUE],
 | 
						|
  '/em'     => ["}",nil,TRUE],
 | 
						|
  'u'       => ["$\\underline{\\mbox{",nil,TRUE],
 | 
						|
  '/u'      => ["}}$",nil,TRUE],
 | 
						|
  'sub'     => ["${}_\mbox{",nil,TRUE],
 | 
						|
  '/sub'    => ["}$",nil,TRUE],
 | 
						|
  'sup'     => ["${}^\mbox{",nil,TRUE],
 | 
						|
  '/sup'    => ["}$",nil,TRUE],
 | 
						|
  'table'   => [:starttable, table_interp,TRUE],
 | 
						|
  '/table'  => [:endtable, "pop",TRUE],
 | 
						|
  'font'    => ["",nil,TRUE],
 | 
						|
  '/font'   => ["",nil,TRUE],
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
################################ MAIN ####################################
 | 
						|
 | 
						|
$in_document = FALSE
 | 
						|
print_header
 | 
						|
intp = Environ_stack.new(Environment.new(main_interp))
 | 
						|
f = TokenStream.new(ARGV[0])
 | 
						|
until f.eof?
 | 
						|
  tok = f.get
 | 
						|
  if tok.kind_of?(Tag) then
 | 
						|
    case tok.tagname
 | 
						|
    when "body"
 | 
						|
      $in_document = TRUE
 | 
						|
    when "/body"
 | 
						|
      $in_document = FALSE
 | 
						|
    end
 | 
						|
    act = intp.action(tok.tagname)
 | 
						|
    if act.nil? then
 | 
						|
      STDERR.print "tag ",tok.tagname," ignored\n"
 | 
						|
    else
 | 
						|
      if act[2] && !$in_document then
 | 
						|
        print "\\begin{document}\n"
 | 
						|
	$in_document = TRUE
 | 
						|
      end
 | 
						|
      # environment push
 | 
						|
      if act[1].kind_of?(Hash) &&
 | 
						|
	  (tok.tagname != "p" || tok.switch('align') != nil) then
 | 
						|
	  intp.dup
 | 
						|
	  intp.top.set_interp(act[1])
 | 
						|
      end
 | 
						|
      
 | 
						|
      if act[0].kind_of?(String) then
 | 
						|
	intp.top.flush act[0]
 | 
						|
      elsif act[0].kind_of?(Fixnum) then # interned symbol
 | 
						|
	intp.top.send(act[0],tok)
 | 
						|
      end
 | 
						|
      
 | 
						|
      # environment pop
 | 
						|
      if act[1] == "pop" then
 | 
						|
	intp.pop
 | 
						|
      end
 | 
						|
    end
 | 
						|
  elsif !tok.nil? then
 | 
						|
    intp.top.flush tok
 | 
						|
  end
 | 
						|
end
 | 
						|
if $in_document then
 | 
						|
  print "\\end{document}\n"
 | 
						|
end
 |