Python札记1-HTTP Download

Python代码
# -*- coding: utf-8 -*-
__author__ = 'gull'

import os, urllib2, log_factory
from urlparse import urlsplit

def get(url, filePath, fileName = None, buffer = 16 * 1024):
    log = log_factory.getLogger()
    log.info("send http request to %s", url)

    def writefile(fsrc, fdst, totalLength):
        """copy data from file-like object fsrc to file-like object fdst"""
        if not totalLength:
            totalLength = "?"
        else:
            totalLength = float(totalLength)
        bytesRead = 0.0
        while 1:
            buf = fsrc.read(buffer)
            if not buf:
                break
            fdst.write(buf)

            bytesRead += len(buf)
            if totalLength != "?":
                log.info("%s: %.02f/%.02f kb (%d%%)" % (
                    fileName,
                    bytesRead / 1024.0,
                    totalLength / 1024.0,
                    100 * bytesRead / totalLength
                    ))
            else:
                log.info("%s: %.02f/? kb (?%%)" % (
                    fileName,
                    bytesRead / 1024.0
                    ))

    def getFileName(openUrl):
        if 'Content-Disposition' in openUrl.info():
            # If the response has Content-Disposition, try to get filename from it
            cd = dict(map(
                lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''),
                openUrl.info().split(';')))
            if 'filename' in cd:
                filename = cd['filename'].strip("\"'")
                if filename: return filename
            # if no filename was found above, parse it out of the final URL.
        return os.path.basename(urlsplit(openUrl.url)[2])

    def getFileLength(openUrl):
        return openUrl.info().getheader("Content-Length")

    r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s
    try:
        fileName = fileName or getFileName(r)
        fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName)
        totalLength = getFileLength(r)
        log.info("write response date to %s", fullfileName)
        with open(fullfileName, 'wb') as f:
            writefile(r, f, totalLength)

        return fileName, totalLength, fullfileName
    finally:
        r.close()

    log.info("http request finished.")

参数说明：
url:即下载路径，如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gz
filePath:下载文件保存的文件夹

fileName:下载后保存的文件名，可选参数。若为空，则会却reponse header中的filename信息（如下图）

继续判空，则取url后缀名（如:apache-tomcat-7.0.20.tar.gz)
buffer:下载缓冲区大小，默认16k

可继续加入以下特性:
支持代理
文件分块，多线程下载
异步下载，回调机制
...

补充：Web开发 , Python ,