#!/usr/bin/python

# md5sum CHUNK_SIZE bytes of file

CHUNK_SIZE=512 #4096 for full file

# Note this takes 60% more CPU than md5sum on FC4 at least,
# when checksumming the whole file.
# On F7 I retested it takes around 20% more CPU.

#TODO: see how kernel readahead affects this

import os,sys

try:
    # md5 module is deprecated on python 2.6
    # so try the newer hashlib first
    import hashlib
    md5_new = hashlib.md5
except ImportError:
    import md5
    md5_new = md5.new

def md5sum(filename, CHUNK=CHUNK_SIZE):
    """takes filename, hand back Checksum of it"""

    try:
        fo = open(filename, 'r', CHUNK)

        sum = md5_new()

        # To sum the whole file do:
        #chunk = fo.read
        #while chunk:
        #    chunk = fo.read(CHUNK)
        #    sum.update(chunk) #update with nothing does not change sum


        # Note if seek past end of file then read() returns ''
        # Consequently the md5 will be the same as if one did:
        # md5sum /dev/null
        #fo.seek(1024)
        chunk=fo.read(CHUNK)
        sum.update(chunk)

        fo.close()
        del fo

        return sum.hexdigest()
    except (IOError, OSError), value:
        # One gets the following if you do
        # fo.seek(-CHUNK, 2) above and file is too small
        if value.errno == 22: #Invalid arg
            return "d41d8cd98f00b204e9800998ecf8427e"
        else:
            raise

def printsum(filename):
    try:
        sys.stdout.write("%s  %s\n" % (md5sum(filename),filename))
    except (IOError, OSError), value:
        sys.stderr.write("md5sum: %s: %s\n" % (filename, value.strerror))

map(printsum, sys.argv[1:])
