Python写的BloomFilter
由于Python没有内建的bitset数据结构,不过有需要自己安装的BitVector,用起来还是很方便的安装BitVector过程同Python安装第三方模块的方法:
命令行进入目录后,输入 python setup.py install
不过由于是在windows上做的实验,安装后只能在那个目录下使用BitVector这点有点迷惑,待解决...
下面是我根据java版的BloomFilter改写的代码:
[python]
#_*_coding:utf_8_
import BitVector
import os
import sys
class SimpleHash():
def __init__(self, cap, seed):
self.cap = cap
self.seed = seed
def hash(self, value):
ret = 0
for i in range(len(value)):
ret += self.seed*ret + ord(value[i])
return (self.cap-1) & ret
class BloomFilter():
def __init__(self, BIT_SIZE=1<<25):
self.BIT_SIZE = 1 << 25
self.seeds = [5, 7, 11, 13, 31, 37, 61]
self.bitset = BitVector.BitVector(size=self.BIT_SIZE)
self.hashFunc = []
for i in range(len(self.seeds)):
self.hashFunc.append(SimpleHash(self.BIT_SIZE, self.seeds[i]))
def insert(self, value):
for f in self.hashFunc:
loc = f.hash(value)
self.bitset[loc] = 1
def isContaions(self, value):
if value == None:
return False
ret = True
for f in self.hashFunc:
loc = f.hash(value)
ret = ret & self.bitset[loc]
return ret
def main():
fd = open("urls.txt")
bloomfilter = BloomFilter()
while True:
#url = raw_input()
url = fd.readline()
if cmp(url, 'exit') == 0: #if url is equal exit break
break
if bloomfilter.isContaions(url) == False:
bloomfilter.insert(url)
else:
print 'url :%s has exist' % url
main()
补充:Web开发 , Python ,