当前位置:编程学习 > 网站相关 >>

Python札记1-HTTP Download

Python代码 
# -*- coding: utf-8 -*- 
__author__ = 'gull' 
 
import os, urllib2, log_factory 
from urlparse import urlsplit 
 
def get(url, filePath, fileName = None, buffer = 16 * 1024): 
    log = log_factory.getLogger() 
    log.info("send http request to %s", url) 
 
    def writefile(fsrc, fdst, totalLength): 
        """copy data from file-like object fsrc to file-like object fdst""" 
        if not totalLength: 
            totalLength = "?" 
        else: 
            totalLength = float(totalLength) 
        bytesRead = 0.0 
        while 1: 
            buf = fsrc.read(buffer) 
            if not buf: 
                break 
            fdst.write(buf) 
 
            bytesRead += len(buf) 
            if totalLength != "?": 
                log.info("%s: %.02f/%.02f kb (%d%%)" % ( 
                    fileName, 
                    bytesRead / 1024.0, 
                    totalLength / 1024.0, 
                    100 * bytesRead / totalLength 
                    )) 
            else: 
                log.info("%s: %.02f/? kb (?%%)" % ( 
                    fileName, 
                    bytesRead / 1024.0 
                    )) 
 
    def getFileName(openUrl): 
        if 'Content-Disposition' in openUrl.info(): 
            # If the response has Content-Disposition, try to get filename from it 
            cd = dict(map( 
                lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''), 
                openUrl.info().split(';'))) 
            if 'filename' in cd: 
                filename = cd['filename'].strip("\"'") 
                if filename: return filename 
            # if no filename was found above, parse it out of the final URL. 
        return os.path.basename(urlsplit(openUrl.url)[2]) 
 
    def getFileLength(openUrl): 
        return openUrl.info().getheader("Content-Length") 
 
    r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s 
    try: 
        fileName = fileName or getFileName(r) 
        fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName) 
        totalLength = getFileLength(r) 
        log.info("write response date to %s", fullfileName) 
        with open(fullfileName, 'wb') as f: 
            writefile(r, f, totalLength) 
 
        return fileName, totalLength, fullfileName 
    finally: 
        r.close() 
 
    log.info("http request finished.") 
 
参数说明:
url:即下载路径,如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gz
filePath:下载文件保存的文件夹

fileName:下载后保存的文件名,可选参数。若为空,则会却reponse header中的filename信息(如下图)

继续判空,则取url后缀名(如:apache-tomcat-7.0.20.tar.gz)
buffer:下载缓冲区大小,默认16k
 
可继续加入以下特性:
支持代理
文件分块,多线程下载
异步下载,回调机制
...

补充:Web开发 , Python ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,