518 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			518 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/local/bin/ruby
 | |
| 
 | |
| #
 | |
| #       HTML to LaTeX converter
 | |
| #         by A. Ito, 16 June, 1997
 | |
| #
 | |
| 
 | |
| require 'kconv'
 | |
| 
 | |
| # configuration
 | |
| def gif2eps(giffile,epsfile)
 | |
|   cmd = "convert #{giffile} #{epsfile}"
 | |
|   STDERR.print cmd,"\n"
 | |
|   system cmd
 | |
| end
 | |
| 
 | |
| ###########################################################################
 | |
| class Tag
 | |
|   def initialize(str)
 | |
|     if str =~ /<(.+)>/ then
 | |
|       str = $1
 | |
|     end
 | |
|     tags = str.split
 | |
|     @tagname = tags.shift.downcase
 | |
|     @vals = {}
 | |
|     tags.each do |t|
 | |
|       if t =~ /=/ then
 | |
| 	tn,tv = t.split(/\s*=\s*/,2)
 | |
| 	tv.sub!(/^"/,"")
 | |
| 	tv.sub!(/"$/,"")
 | |
| 	@vals[tn.downcase] = tv
 | |
|       else
 | |
| 	@vals[t.downcase] = TRUE
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   def tagname
 | |
|     return @tagname
 | |
|   end
 | |
|   def each
 | |
|     @vals.each do |k,v|
 | |
|       yield k,v
 | |
|     end
 | |
|   end
 | |
|   def switch(k)
 | |
|     return @vals[k]
 | |
|   end
 | |
| end
 | |
| 
 | |
| class TokenStream
 | |
|   TAG_START = ?<
 | |
|   TAG_END = ?>
 | |
|   AMP_START = ?&
 | |
|   AMP_END = ?;
 | |
|   
 | |
|   AMP_REPLACE_TABLE = {
 | |
|     '&'   => '\\&',
 | |
|     '>'    => '$>$',
 | |
|     '<'    => '$<$',
 | |
|     ' '  => '~',
 | |
|     '"'  => '"',
 | |
|   }
 | |
|   def initialize(file)
 | |
|     if file.kind_of?(File) then
 | |
|       @f = file
 | |
|     else
 | |
|       @f = File.new(file)
 | |
|     end
 | |
|     @buf = nil
 | |
|     @bpos = 0
 | |
|   end
 | |
|   
 | |
|   def read_until(endsym)
 | |
|     complete = FALSE
 | |
|     tag = []
 | |
|     begin
 | |
|       while @bpos < @buf.size
 | |
| 	c = @buf[@bpos]
 | |
| 	if c == endsym then
 | |
| 	  tag.push(c.chr)
 | |
| 	  complete = TRUE
 | |
| 	  @bpos += 1
 | |
| 	  break
 | |
| 	end
 | |
| 	if c == 10 || c == 13 then
 | |
| 	  tag.push(' ')
 | |
| 	else
 | |
| 	  tag.push(c.chr)
 | |
| 	end
 | |
| 	@bpos += 1
 | |
|       end
 | |
|       unless complete
 | |
| 	@buf = @f.gets
 | |
| 	@bpos = 0
 | |
| 	break if @f.eof?
 | |
|       end
 | |
|     end until complete
 | |
|     return tag.join('')
 | |
|   end
 | |
|     
 | |
|   def get
 | |
|     while TRUE
 | |
|       if @buf.nil? then
 | |
| 	@buf = Kconv.toeuc(@f.gets)
 | |
| 	if @f.eof? then
 | |
| 	  return nil
 | |
| 	end
 | |
| 	@bpos = 0
 | |
|       end
 | |
|       if @buf[@bpos] == TAG_START then
 | |
| 	return Tag.new(read_until(TAG_END))
 | |
|       elsif @buf[@bpos] == AMP_START then
 | |
| 	return replace_amp(read_until(AMP_END))
 | |
|       else
 | |
| 	i = @bpos
 | |
| 	while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
 | |
| 	  i += 1
 | |
| 	end
 | |
| 	r = @buf[@bpos,i-@bpos]
 | |
| 	if i == @buf.size then
 | |
| 	  @buf = nil
 | |
| 	else
 | |
| 	  @bpos = i
 | |
| 	end
 | |
| 	redo if r =~ /^\s+$/
 | |
| 	return r
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   public :eof?
 | |
|   def eof?
 | |
|     @f.eof?
 | |
|   end
 | |
|   def replace_amp(s)
 | |
|     if AMP_REPLACE_TABLE.key?(s) then
 | |
|       return AMP_REPLACE_TABLE[s]
 | |
|     else
 | |
|       return s
 | |
|     end
 | |
|   end
 | |
| end
 | |
| 
 | |
| 
 | |
| def print_header
 | |
|   print '
 | |
| \documentstyle[epsf]{jarticle}
 | |
| \def\hr{\par\hbox to \textwidth{\hrulefill}}
 | |
| \def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines}
 | |
| \def\endpre{\end{quote}}
 | |
| \makeatletter
 | |
| \@ifundefined{gt}{\let\gt=\dg}{}
 | |
| \makeatother
 | |
| '
 | |
| end
 | |
| 
 | |
| 
 | |
| class Environ_stack
 | |
|   def initialize(*envs)
 | |
|     @stack = envs
 | |
|   end
 | |
|   def action(tag)
 | |
|     if tag =~ /^!/ then # comment
 | |
|       return ["",nil]
 | |
|     end
 | |
|     i = @stack.size-1
 | |
|     while i >= 0
 | |
|       a = @stack[i].action(tag)
 | |
|       unless a.nil? then
 | |
| 	return a
 | |
|       end
 | |
|       i -= 1
 | |
|     end
 | |
|     return nil
 | |
|   end
 | |
|   def pop
 | |
|     @stack.pop
 | |
|   end
 | |
|   def push(env)
 | |
|     @stack.push(env)
 | |
|   end
 | |
|   def top
 | |
|     @stack[@stack.size-1]
 | |
|   end
 | |
|   def dup
 | |
|     @stack.push(top.clone)
 | |
|   end
 | |
| end
 | |
| 
 | |
| 
 | |
| class Environment
 | |
|   def initialize(interp)
 | |
|     @silent = FALSE
 | |
|     @in_table = FALSE
 | |
|     @interp = interp;
 | |
|     @align = nil;
 | |
|   end
 | |
|   def action(tag)
 | |
|     return @interp[tag]
 | |
|   end
 | |
|   
 | |
|   def flush(tok)
 | |
|     if tok.kind_of?(String) then
 | |
|       tok = tok.gsub(/&/,"\\&");
 | |
|       tok = tok.gsub(/%/,"\\%");
 | |
|       tok = tok.gsub(/#/,"\\#");
 | |
|       tok = tok.gsub(/\$/,"\\$");
 | |
|       tok = tok.gsub(/_/,"\\verb+_+");
 | |
|       tok = tok.gsub(/\^/,"\\verb+^+");
 | |
|       tok = tok.gsub(/~/,"\\verb+~+");
 | |
|     end
 | |
|     if @in_table then
 | |
|       @table[@table_rows][@table_cols] += tok
 | |
|     elsif !@silent then
 | |
|       if !@align.nil? && tok =~ /\n$/ then
 | |
| 	print tok.chop,"\\\\\n"
 | |
|       else
 | |
| 	print tok
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   
 | |
|   def set_interp(interp)
 | |
|     @interp = interp
 | |
|   end
 | |
|   
 | |
|   # tag processing methods
 | |
|   
 | |
|   # <TITLE>
 | |
|   def do_silent(tag)
 | |
|     @silent = TRUE
 | |
|   end
 | |
|   
 | |
|   # </TITLE>
 | |
|   def undo_silent(tag)
 | |
|     @silent = FALSE
 | |
|   end
 | |
|   
 | |
|   # <IMG>
 | |
|   def img_proc(tag)
 | |
|     src = tag.switch('src')
 | |
|     newfile = src.sub(/\.GIF/i,".eps")
 | |
|     gif2eps(src,newfile)
 | |
|     flush "\\epsfile{file=#{newfile}}\n"
 | |
|   end
 | |
|   
 | |
|   # <TABLE>
 | |
|   def starttable(tag)
 | |
|     @table = []
 | |
|     @tablespan = []
 | |
|     @table_rows = -1
 | |
|     @table_cols_max = 0
 | |
|     @in_table = TRUE
 | |
|     unless tag.switch('border').nil? then
 | |
|       @table_border = TRUE
 | |
|     else
 | |
|       @table_border = FALSE
 | |
|     end
 | |
|   end
 | |
|   
 | |
|   # <TR>
 | |
|   def start_row(tag)
 | |
|     @table_rows += 1
 | |
|     @table[@table_rows] = []
 | |
|     @tablespan[@table_rows] = []
 | |
|     @table_cols = -1
 | |
|     @colspan = 1
 | |
|   end
 | |
|   
 | |
|   # <TD>
 | |
|   def start_col(tag)
 | |
|     @colspan = tag.switch('colspan')
 | |
|     if @colspan.nil? then
 | |
|       @colspan = 1
 | |
|     else
 | |
|       @colspan = @colspan.to_i
 | |
|     end
 | |
|     @tablespan[@table_rows][@table_cols+1] = @colspan
 | |
|     @table_cols += @colspan
 | |
|     if @table_cols > @table_cols_max then
 | |
|       @table_cols_max = @table_cols
 | |
|     end
 | |
|   end
 | |
|   
 | |
|   # </TABLE>
 | |
|   def endtable(tag)
 | |
|     @in_table = FALSE
 | |
|     flush "\\begin{tabular}{*{"
 | |
|     flush @table_cols_max+1
 | |
|     if @table_border then
 | |
|       flush "}{|l}|}\n\\hline\n"
 | |
|     else
 | |
|       flush "}{l}}\n"
 | |
|     end
 | |
|     for i in 0..@table_rows
 | |
|       j = 0
 | |
|       while j <= @table_cols
 | |
| 	span = @tablespan[i][j]
 | |
| 	if span == 1 then
 | |
| 	  flush @table[i][j]
 | |
| 	elsif @table_border then
 | |
| 	  form = "|l"
 | |
| 	  if j+span > @table_cols then
 | |
| 	    form = "|l|"
 | |
| 	  end
 | |
| 	  flush "\\multicolumn{"+span.to_s+"}{"+form+"}{"
 | |
| 	  flush @table[i][j+span-1]
 | |
| 	  flush "}"
 | |
| 	else
 | |
| 	  flush "\\multicolumn{"+span.to_s+"}{l}{"
 | |
| 	  flush @table[i][j+span-1]
 | |
| 	  flush "}"
 | |
| 	end
 | |
| 	j += span
 | |
| 	if j <= @table_cols then
 | |
| 	  flush "&"
 | |
| 	end
 | |
|       end
 | |
|       flush "\\\\\n"
 | |
|       flush "\\hline\n" if @table_border
 | |
|     end
 | |
|     flush "\\end{tabular}\n"
 | |
|   end  
 | |
|   
 | |
|   # <CENTER>
 | |
|   def startcenter(tag)
 | |
|     if @in_table then
 | |
|       flush "\\hfil"
 | |
|     else
 | |
|       flush "\\begin{center}\n"
 | |
|     end
 | |
|   end
 | |
|   
 | |
|   # </CENTER>
 | |
|   def endcenter(tag)
 | |
|     if @in_table then
 | |
|       flush "\\hfil"
 | |
|     else
 | |
|       flush "\\end{center}\n"
 | |
|     end
 | |
|   end
 | |
|   
 | |
|   # <P>
 | |
|   def paragraph(tag)
 | |
|     align = tag.switch('align')
 | |
|     if align.nil? then
 | |
|       flush "\\par\n"
 | |
|       @endparagraph = ""
 | |
|     else
 | |
|       align = align.downcase
 | |
|       case align
 | |
|       when "left" then
 | |
| 	flush "\\begin{flushleft}\n"
 | |
| 	@endparagraph = "\\end{flushleft}\n"
 | |
|       when "center" then
 | |
| 	flush "\\begin{center}\n"
 | |
| 	@endparagraph = "\\end{center}\n"
 | |
|       when "right" then
 | |
| 	flush "\\begin{flushright}\n"
 | |
| 	@endparagraph = "\\end{flushright}\n"
 | |
|       end
 | |
|     end
 | |
|     @align = align
 | |
|   end
 | |
|   
 | |
|   # </P>
 | |
|   def endparagraph(tag)
 | |
|     unless @align.nil? then
 | |
|       @align = nil
 | |
|       flush @endparagraph
 | |
|     end
 | |
|   end
 | |
| end
 | |
| 
 | |
| 
 | |
| enum_interp = {
 | |
|   'li' => ["\\item ",nil]
 | |
| }
 | |
| 
 | |
| item_interp = {
 | |
|   'li' => ["\\item ",nil]
 | |
| }
 | |
| 
 | |
| desc_interp = {
 | |
|   'dt' => ["\\item[",nil],
 | |
|   'dd' => ["]\n",nil]
 | |
| }
 | |
| 
 | |
| table_interp = {
 | |
|   'tr' => [:start_row,nil],
 | |
|   'td' => [:start_col,nil],
 | |
|   '/tr' => ["",nil],
 | |
|   '/td' => ["",nil],
 | |
| }
 | |
| 
 | |
| para_interp = {
 | |
|   '/p'      => [:endparagraph ,"pop",TRUE],
 | |
| }
 | |
| 
 | |
| main_interp = {
 | |
|   'body'    => ["\\begin{document}\n",nil,FALSE],
 | |
|   '/body'   => ["\\end{document}\n",nil,FALSE],
 | |
|   'head'    => ["",nil,FALSE],
 | |
|   '/head'   => ["",nil,FALSE],
 | |
|   'html'    => ["",nil,FALSE],
 | |
|   '/html'   => ["",nil,FALSE],
 | |
|   'title'   => [:do_silent,nil,FALSE],
 | |
|   '/title'  => [:undo_silent,nil,FALSE],
 | |
|   '!'       => ["",nil,FALSE],
 | |
|   'h1'      => ["\\section{",nil,TRUE],
 | |
|   'h2'      => ["\\subsection{",nil,TRUE],
 | |
|   'h3'      => ["\\subsubsection{",nil,TRUE],
 | |
|   'h4'      => ["\\paragraph{",nil,TRUE],
 | |
|   '/h1'     => ["}\n",nil,TRUE],
 | |
|   '/h2'     => ["}\n",nil,TRUE],
 | |
|   '/h3'     => ["}\n",nil,TRUE],
 | |
|   '/h4'     => ["}\n",nil,TRUE],
 | |
|   'a'       => ["",nil,TRUE],
 | |
|   '/a'      => ["",nil,TRUE],
 | |
|   'center'  => [:startcenter,nil,TRUE],
 | |
|   '/center' => [:endcenter,nil,TRUE],
 | |
|   'ol'      => ["\\begin{enumerate}\n",enum_interp,TRUE],
 | |
|   '/ol'     => ["\\end{enumerate}\n","pop",TRUE],
 | |
|   'ul'      => ["\\begin{itemize}\n",item_interp,TRUE],
 | |
|   '/ul'     => ["\\end{itemize}\n","pop",TRUE],
 | |
|   'dl'      => ["\\begin{description}\n",desc_interp,TRUE],
 | |
|   '/dl'     => ["\\end{description}\n","pop",TRUE],
 | |
|   'pre'     => ["\\begin{pre}\n",nil,TRUE],
 | |
|   '/pre'    => ["\\end{pre}\n",nil,TRUE],
 | |
|   'p'       => [:paragraph ,para_interp,TRUE],
 | |
|   'br'      => ["\\par ",nil,TRUE],
 | |
|   'img'     => [:img_proc,nil,TRUE],
 | |
|   'hr'      => ["\\hr ",nil,TRUE],
 | |
|   'b'       => ["{\\bf\\gt ",nil,TRUE],
 | |
|   '/b'      => ["}",nil,TRUE],
 | |
|   'strong'  => ["{\\bf\\gt ",nil,TRUE],
 | |
|   '/strong' => ["}",nil,TRUE],
 | |
|   'dfn'     => ["{\\bf\\gt ",nil,TRUE],
 | |
|   '/dfn'    => ["}",nil,TRUE],
 | |
|   'i'       => ["{\\it",nil,TRUE],
 | |
|   '/i'      => ["}",nil,TRUE],
 | |
|   'address' => ["{\\it",nil,TRUE],
 | |
|   '/address'=> ["}",nil,TRUE],
 | |
|   'cite'    => ["{\\it",nil,TRUE],
 | |
|   '/cite'   => ["}",nil,TRUE],
 | |
|   'code'    => ["{\\tt",nil,TRUE],
 | |
|   '/code'   => ["}",nil,TRUE],
 | |
|   'kbd'     => ["{\\tt",nil,TRUE],
 | |
|   '/kbd'    => ["}",nil,TRUE],
 | |
|   'tt'      => ["{\\tt",nil,TRUE],
 | |
|   '/tt'     => ["}",nil,TRUE],
 | |
|   'samp'    => ["{\\tt",nil,TRUE],
 | |
|   '/samp'   => ["}",nil,TRUE],
 | |
|   'em'      => ["{\\em",nil,TRUE],
 | |
|   '/em'     => ["}",nil,TRUE],
 | |
|   'u'       => ["$\\underline{\\mbox{",nil,TRUE],
 | |
|   '/u'      => ["}}$",nil,TRUE],
 | |
|   'sub'     => ["${}_\mbox{",nil,TRUE],
 | |
|   '/sub'    => ["}$",nil,TRUE],
 | |
|   'sup'     => ["${}^\mbox{",nil,TRUE],
 | |
|   '/sup'    => ["}$",nil,TRUE],
 | |
|   'table'   => [:starttable, table_interp,TRUE],
 | |
|   '/table'  => [:endtable, "pop",TRUE],
 | |
|   'font'    => ["",nil,TRUE],
 | |
|   '/font'   => ["",nil,TRUE],
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| ################################ MAIN ####################################
 | |
| 
 | |
| $in_document = FALSE
 | |
| print_header
 | |
| intp = Environ_stack.new(Environment.new(main_interp))
 | |
| f = TokenStream.new(ARGV[0])
 | |
| until f.eof?
 | |
|   tok = f.get
 | |
|   if tok.kind_of?(Tag) then
 | |
|     case tok.tagname
 | |
|     when "body"
 | |
|       $in_document = TRUE
 | |
|     when "/body"
 | |
|       $in_document = FALSE
 | |
|     end
 | |
|     act = intp.action(tok.tagname)
 | |
|     if act.nil? then
 | |
|       STDERR.print "tag ",tok.tagname," ignored\n"
 | |
|     else
 | |
|       if act[2] && !$in_document then
 | |
|         print "\\begin{document}\n"
 | |
| 	$in_document = TRUE
 | |
|       end
 | |
|       # environment push
 | |
|       if act[1].kind_of?(Hash) &&
 | |
| 	  (tok.tagname != "p" || tok.switch('align') != nil) then
 | |
| 	  intp.dup
 | |
| 	  intp.top.set_interp(act[1])
 | |
|       end
 | |
|       
 | |
|       if act[0].kind_of?(String) then
 | |
| 	intp.top.flush act[0]
 | |
|       elsif act[0].kind_of?(Fixnum) then # interned symbol
 | |
| 	intp.top.send(act[0],tok)
 | |
|       end
 | |
|       
 | |
|       # environment pop
 | |
|       if act[1] == "pop" then
 | |
| 	intp.pop
 | |
|       end
 | |
|     end
 | |
|   elsif !tok.nil? then
 | |
|     intp.top.flush tok
 | |
|   end
 | |
| end
 | |
| if $in_document then
 | |
|   print "\\end{document}\n"
 | |
| end
 |