"""
Uses the http://sf.net/projects/pdftohtml bin to do its handy work

"""
from Products.PortalTransforms.interfaces import itransform
from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.commandtransform import popentransform
from Products.CMFDefault.utils import bodyfinder
import os

class pdf_to_html(popentransform):
    __implements__ = itransform
    
    __version__ = '2004-07-02.01'

    __name__ = "pdf_to_html"
    inputs   = ('application/pdf',)
    output  = 'text/html'
    output_encoding = 'utf-8'

    binaryName = "pdftohtml"
    binaryArgs = "%(infile)s -noframes -stdout -enc UTF-8"
    useStdin = False

    def getData(self, couterr):
        return bodyfinder(couterr.read())

class old_pdf_to_html(commandtransform):
    __implements__ = itransform

    __name__ = "pdf_to_html"
    inputs   = ('application/pdf',)
    output  = 'text/html'

    binaryName = "pdftohtml"
    binaryArgs = "-noframes"

    def __init__(self):
        commandtransform.__init__(self, binary=self.binaryName)

    def convert(self, data, cache, **kwargs):
        kwargs['filename'] = os.path.basename((kwargs.get('filename') or 'unknown.pdf'))

        tmpdir, fullname = self.initialize_tmpdir(data, **kwargs)
        html = self.invokeCommand(tmpdir, fullname)
        path, images = self.subObjects(tmpdir)
        objects = {}
        if images:
            self.fixImages(path, images, objects)
        self.cleanDir(tmpdir)
        cache.setData(html)
        cache.setSubObjects(objects)
        return cache

    def invokeCommand(self, tmpdir, fullname):
        # FIXME: windows users...
        cmd = 'cd "%s" && %s %s "%s" 2>error_log 1>/dev/null' % (
            tmpdir, self.binary, self.binaryArgs, fullname)
        os.system(cmd)
        try:
            htmlfile = open("%s/%s.html" % (tmpdir, sansext(fullname)), 'r')
            html = htmlfile.read()
            htmlfile.close()
        except:
            try:
                return open("%s/error_log" % tmpdir, 'r').read()
            except:
                return ''
        return html

def register():
    return pdf_to_html()
