All Downloads are FREE. Search and download functionalities are using the official Maven repository.

docbook.epub.bin.lib.docbook.rb Maven / Gradle / Ivy

There is a newer version: 2.4
Show newest version
require 'fileutils'
require 'rexml/parsers/pullparser'

module DocBook

  class Epub
    CHECKER = "epubcheck"
    STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
    CALLOUT_PATH = File.join('images', 'callouts')
    CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
    CALLOUT_LIMIT = 15
    CALLOUT_EXT = ".png"
    XSLT_PROCESSOR = "xsltproc"
    OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
    MIMETYPE = "application/epub+zip"
    META_DIR = "META-INF"
    OEBPS_DIR = "OEBPS"
    ZIPPER = "zip"

    attr_reader :output_dir

    def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
      @docbook_file = docbook_file
      @output_dir = output_dir
      @meta_dir  = File.join(@output_dir, META_DIR)
      @oebps_dir = File.join(@output_dir, OEBPS_DIR)
      @css_file = css_file ? File.expand_path(css_file) : css_file
      @embedded_fonts = embedded_fonts
      @to_delete = []
      
      if customization_layer
        @stylesheet = File.expand_path(customization_layer)
      else
        @stylesheet = STYLESHEET
      end

      unless File.exist?(@docbook_file)
        raise ArgumentError.new("File #{@docbook_file} does not exist")
      end
    end

    def render_to_file(output_file, verbose=false)
      render_to_epub(output_file, verbose)
      bundle_epub(output_file, verbose)
      cleanup_files(@to_delete)
    end

    def self.invalid?(file)
      # Obnoxiously, we can't just check for a non-zero output...
      cmd = %Q(#{CHECKER} "#{file}")
      output = `#{cmd} 2>&1`

      if $?.to_i == 0
        return false
      else  
        STDERR.puts output if $DEBUG
        return output
      end  
    end

    private
    def render_to_epub(output_file, verbose)  
      @collapsed_docbook_file = collapse_docbook()

      chunk_quietly =   "--stringparam chunk.quietly " + (verbose ? '0' : '1')
      callout_path =    "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
      callout_limit =   "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
      callout_ext =     "--stringparam callout.graphics.extension #{CALLOUT_EXT}" 
      html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
      base =            "--stringparam base.dir #{OEBPS_DIR}/" 
      unless @embedded_fonts.empty? 
        embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
        font =            "--stringparam epub.embedded.fonts \"#{embedded_fonts}\"" 
      end  
      meta =            "--stringparam epub.metainf.dir #{META_DIR}/" 
      oebps =           "--stringparam epub.oebps.dir #{OEBPS_DIR}/" 
      options = [chunk_quietly, 
                 callout_path, 
                 callout_limit, 
                 callout_ext, 
                 base, 
                 font, 
                 meta, 
                 oebps, 
                 html_stylesheet,
                ].join(" ")
      # Double-quote stylesheet & file to help Windows cmd.exe
      db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
      STDERR.puts db2epub_cmd if $DEBUG
      success = system(db2epub_cmd)
      raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
      @to_delete << Dir["#{@meta_dir}/*"]
      @to_delete << Dir["#{@oebps_dir}/*"]
    end  

    def bundle_epub(output_file, verbose)  

      quiet = verbose ? "" : "-q"
      mimetype_filename = write_mimetype()
      meta   = File.basename(@meta_dir)
      oebps  = File.basename(@oebps_dir)
      images = copy_images()
      csses  = copy_csses()
      fonts  = copy_fonts()
      callouts = copy_callouts()
      # zip -X -r ../book.epub mimetype META-INF OEBPS
      # Double-quote stylesheet & file to help Windows cmd.exe
      zip_cmd = %Q(cd "#{@output_dir}" &&  #{ZIPPER} #{quiet} -X -r  "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
      puts zip_cmd if $DEBUG
      success = system(zip_cmd)
      raise "Could not bundle into .epub file to #{output_file}" unless success
    end

    # Input must be collapsed because REXML couldn't find figures in files that
    # were XIncluded or added by ENTITY
    #   http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
    def collapse_docbook
      # Double-quote stylesheet & file to help Windows cmd.exe
      collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)), 
                                 '.collapsed.' + File.basename(@docbook_file))
      entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
      entity_success = system(entity_collapse_command)
      raise "Could not collapse named entites in #{@docbook_file}" unless entity_success

      xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
      xinclude_success = system(xinclude_collapse_command)
      raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success

      @to_delete << collapsed_file
      return collapsed_file
    end  

    def copy_callouts
      new_callout_images = []
      if has_callouts?
        calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
        Dir.glob(calloutglob).each {|img|
          img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))

          # TODO: What to rescue for these two?
          FileUtils.mkdir_p(File.dirname(img_new_filename)) 
          FileUtils.cp(img, img_new_filename)
          @to_delete << img_new_filename
          new_callout_images << img
        }  
      end  
      return new_callout_images
    end

    def copy_fonts
      new_fonts = []
      @embedded_fonts.each {|font_file|
        font_new_filename = File.join(@oebps_dir, File.basename(font_file))
        FileUtils.cp(font_file, font_new_filename)
        new_fonts << font_file
      }
      return new_fonts
    end

    def copy_csses
      if @css_file 
        css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
        FileUtils.cp(@css_file, css_new_filename)
      end
    end

    def copy_images
      image_references = get_image_refs()
      new_images = []
      image_references.each {|img|
        # TODO: It'd be cooler if we had a filetype lookup rather than just
        # extension
        if img =~ /\.(svg|png|gif|jpe?g|xml)/i
          img_new_filename = File.join(@oebps_dir, img)
          img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)

          # TODO: What to rescue for these two?
          FileUtils.mkdir_p(File.dirname(img_new_filename)) 
          puts(img_full + ": " + img_new_filename) if $DEBUG
          FileUtils.cp(img_full, img_new_filename)
          @to_delete << img_new_filename
          new_images << img_full
        end
      }  
      return new_images
    end

    def write_mimetype
      mimetype_filename = File.join(@output_dir, "mimetype")
      File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
      @to_delete << mimetype_filename
      return File.basename(mimetype_filename)
    end  

    def cleanup_files(file_list)
      file_list.flatten.each {|f|
        # Yikes
        FileUtils.rm_r(f, :force => true )
      }  
    end  

    # Returns an Array of all of the (image) @filerefs in a document
    def get_image_refs
      parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
      image_refs = []
      while parser.has_next?
        el = parser.pull
        if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
          image_refs << el[1]['fileref'] 
        end  
      end
      return image_refs.uniq
    end  

    # Returns true if the document has code callouts
    def has_callouts?
      parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
      while parser.has_next?
        el = parser.pull
        if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
          return true
        end  
      end
      return false
    end  
  end
end




© 2015 - 2025 Weber Informatics LLC | Privacy Policy