布隆过滤器
数据结构,C++,大数据2016-11-12
#pragma once
#include<vector>
#include<iostream>
using namespace std;
class BitMap
{
public:
BitMap(size_t range)
{
_bitMap.resize((range>>5) +1);
}
void Set(size_t x)
{
size_t index=x>>5;
size_t num =x%32;
/*size_t tmp=1;
tmp<<=num;*/
//_bitMap[index]|=tmp;
_bitMap[index]=(1<<num);
}
void Reset(size_t x)
{
size_t index=x>>5;
size_t num =x%32;
size_t tmp=0;
tmp<<=num;
_bitMap[index] &=(~(tmp));
}
bool Test(size_t x)
{
size_t index=x>>5;
size_t num =x%32;
size_t tmp=1;
tmp<<=num;
_bitMap[index] &=tmp;
return _bitMap[index];
}
~BitMap()
{}
protected:
vector<size_t> _bitMap;
};BloonFilter.h#pragma once
#include <iostream>
#include <string>
#include <vector>
#include "BitMap.h"
using namespace std;
struct __HashFunc1
{
size_t BKDRHash(const char *str)
{
register size_t hash = 0;
while (size_t ch = (size_t)*str++)
{
hash = hash * 131 + ch; // 也可以乘以31、131、1313、13131、131313..
}
return hash;
}
size_t operator()(const string& str)
{
return BKDRHash(str.c_str());
}
};
struct __HashFunc2
{
size_t SDBMHash(const char *str)
{
register size_t hash = 0;
while (size_t ch = (size_t)*str++)
{
hash = 65599 * hash + ch;
//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
size_t operator()(const string& str)
{
return SDBMHash(str.c_str());
}
};
struct __HashFunc3
{
size_t RSHash(const char *str)
{
register size_t hash = 0;
size_t magic = 63689;
while (size_t ch = (size_t)*str++)
{
hash = hash * magic + ch;
magic *= 378551;
}
return hash;
}
size_t operator()(const string& str)
{
return RSHash(str.c_str());
}
};
struct __HashFunc4
{
size_t APHash(const char *str)
{
register size_t hash = 0;
size_t ch;
for (long i = 0; ch = (size_t)*str++; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
return hash;
}
size_t operator()(const string& str)
{
return APHash(str.c_str());
}
};
struct __HashFunc5
{
size_t JSHash(const char *str)
{
if(!*str)
return 0;
register size_t hash = 1315423911;
while (size_t ch = (size_t)*str++)
{
hash ^= ((hash << 5) + ch + (hash >> 2));
}
return hash;
}
size_t operator()(const string& str)
{
return JSHash(str.c_str());
}
};
template<class K=string
,class HashFunc1=__HashFunc1
,class HashFunc2=__HashFunc2
,class HashFunc3=__HashFunc3
,class HashFunc4=__HashFunc4
,class HashFunc5=__HashFunc5>
class RefBloomFilter
{
public:
RefBloomFilter(size_t num)
:_range(num*5)
,_bitMap(num*5)
{}
void Set(const K& key)
{
size_t index1=HashFunc1()(key)%_range;
size_t index2=HashFunc2()(key)%_range;
size_t index3=HashFunc3()(key)%_range;
size_t index4=HashFunc4()(key)%_range;
size_t index5=HashFunc5()(key)%_range;
_bitMap.Set(index1);
_bitMap.Set(index2);
_bitMap.Set(index3);
_bitMap.Set(index4);
_bitMap.Set(index5);
cout<<index1<<endl;
cout<<index2<<endl;
cout<<index3<<endl;
cout<<index4<<endl;
cout<<index5<<endl<<endl;
}
bool Reset(const K& key)
{
size_t index1=HashFunc1()(key)%_range;
size_t index2=HashFunc2()(key)%_range;
size_t index3=HashFunc3()(key)%_range;
size_t index4=HashFunc4()(key)%_range;
size_t index5=HashFunc5()(key)%_range;
if(_bitMap[index1]==0
||_bitMap[index2]==0
||_bitMap[index3]==0
||_bitMap[index4]==0
||_bitMap[index5]==0)
return false;
_bitMap.Reset(index1);
_bitMap.Reset(index2);
_bitMap.Reset(index3);
_bitMap.Reset(index4);
_bitMap.Reset(index5);
cout<<index1<<endl;
cout<<index2<<endl;
cout<<index3<<endl;
cout<<index4<<endl;
cout<<index5<<endl<<endl;
return true;
}
bool Test(const K& key)
{
size_t index1=HashFunc1()(key)%_range;
size_t index2=HashFunc2()(key)%_range;
size_t index3=HashFunc3()(key)%_range;
size_t index4=HashFunc4()(key)%_range;
size_t index5=HashFunc5()(key)%_range;
if(_bitMap.Test(index1)!=0
&&_bitMap.Test(index2)!=0
&&_bitMap.Test(index3)!=0
&&_bitMap.Test(index4)!=0
&&_bitMap.Test(index5)!=0)
{
cout<<index1<<endl;
cout<<index2<<endl;
cout<<index3<<endl;
cout<<index4<<endl;
cout<<index5<<endl<<endl;
return true;
}
cout<<index1<<endl;
cout<<index2<<endl;
cout<<index3<<endl;
cout<<index4<<endl;
cout<<index5<<endl<<endl;
return false;
}
protected:
BitMap _bitMap;
size_t _range;
};
void test()
{
RefBloomFilter<> bf(1);
char* str1="http://blog.csdn.net/shangguan_1234";
char* str2="http://blog.csdn.net/shangguan_1235";
char* str3="http://blog.csdn.net/shangguan_1233";
char* str4="http://blog.csdn.net/shangguan_1232";
char* str5="http://blog.csdn.net/shangguan_1231";
bf.Set(str1);
bf.Set(str2);
bf.Set(str3);
bf.Set(str4);
bf.Set(str5);
cout<<bf.Test(str1)<<endl;
cout<<bf.Test(str2)<<endl;
cout<<bf.Test(str3)<<endl;
cout<<bf.Test(str4)<<endl;
cout<<bf.Test(str5)<<endl;
}#include "BloomFilter.h"
int main()
{
test();
system("pause");
return 0;
}