删除相同图片,Python实现
原理:读取图片二进制码,使用MD5或SHA-1散列,生成图片唯一编码,与图片字典做比对,存在则图片重复,移除.Python实现:
01
import os
02
import re
03
import hashlib
04
from time import time
05
06
rootPath = 'F:/Image/照片'
07
backupPath = 'F:/Image/backup'
08
picDic = {}
09
regular = re.compile(r'^(.*)\.(jpg|jpeg|bmp|gif|png|JPG|JPEG|BMP|GIF|PNG)$')
10
11
def RemoverRePic(dirPath):
12
quantity = 0
13
for childPath in os.listdir(unicode(dirPath)):
14
childPath = dirPath + '/' + childPath
15
if os.path.isdir(childPath):
16
quantity =+ RemoverRePic(childPath)
17
else:
18
if regular.match(childPath):
19
pic = open(childPath, 'rb')
20
picMd5 = hashlib.md5(pic.read()).hexdigest()
21
pic.close()
22
if picDic.has_key(picMd5):
23
newPath = backupPath + '/' + hashlib.md5(childPath)\
24
.hexdigest() + childPath[childPath.find('.'):]
25
os.rename(childPath, newPath)
26
quantity =+ 1
27
else:
28
picDic[picMd5] = childPath
29
return quantity
30
31
if __name__ == '__main__':
32
t = time()
33
print 'start:'
34
print t
35
print RemoverRePic(rootPath)
36
print 'end:'
37
print time() - t
作者:西麦
补充:Web开发 , Python ,