void Main()
{
var filterSize = 8192;
var words = 5000;
var streamLength = 200000;
var filter = new BloomFilter(filterSize);
var source = new List(Words.Take(words));
var bloomCount = 0;
var realCount = 0;
foreach (var word in source)
filter.Add(word);
var bloomTime = Stopwatch.StartNew();
foreach (var word in Text.Take(streamLength))
{
if (filter.Contains(word))
bloomCount++;
}
bloomTime.Stop();
var realTime = Stopwatch.StartNew();
foreach (var word in Text.Take(streamLength))
{
if (source.Contains(word))
realCount++;
}
realTime.Stop();
Console.WriteLine("False positives: {0}.", (bloomCount - realCount));
Console.WriteLine("Time exact: {0}ms.", realTime.ElapsedMilliseconds);
Console.WriteLine();
Console.WriteLine("Passed filter: {0}.", bloomCount);
Console.WriteLine("Time estimate: {0}ms.", bloomTime.ElapsedMilliseconds);
}
String basePath = @"C:\Users\Florian\Dropbox\Studienstiftung\Academy\Code\";
IEnumerable Words
{
get
{
using (var fs = File.Open(basePath + "words.txt", FileMode.Open))
{
using (var reader = new StreamReader(fs))
{
while (!reader.EndOfStream)
yield return reader.ReadLine();
}
}
}
}
IEnumerable Text
{
get
{
using (var fs = File.Open(basePath + "text.txt", FileMode.Open))
{
using (var reader = new StreamReader(fs))
{
while (!reader.EndOfStream)
{
var line = reader.ReadLine();
var words = line.Split(' ');
foreach (var word in words)
yield return word;
}
}
}
}
}
public class BloomFilter
{
readonly Int32 hashFunctionCount;
readonly BitArray hashBits;
readonly Func getHashSecondary;
public BloomFilter(Int32 capacity) :
this(capacity, null)
{
}
public BloomFilter(Int32 capacity, Func hashFunction) :
this(capacity, BestErrorRate(capacity), hashFunction)
{
}
public BloomFilter(Int32 capacity, Single errorRate, Func hashFunction) :
this(capacity, errorRate, hashFunction, BestN(capacity, errorRate), BestK(capacity, errorRate))
{
}
public BloomFilter(Int32 capacity, Single errorRate, Func hashFunction, Int32 n, Int32 k)
{
if (capacity < 1)
throw new ArgumentOutOfRangeException("capacity", capacity, "capacity must be > 0");
if (errorRate >= 1 || errorRate <= 0)
throw new ArgumentOutOfRangeException("errorRate", errorRate, String.Format("errorRate must be between 0 and 1, exclusive. Was {0}", errorRate));
if (n < 1) // from overflow in bestM calculation
throw new ArgumentOutOfRangeException(String.Format("The provided capacity and errorRate values would result in an array of length > int.MaxValue. Please reduce either of these values. Capacity: {0}, Error rate: {1}", capacity, errorRate));
if (hashFunction == null)
{
if (typeof(T) == typeof(String))
getHashSecondary = HashString;
else if (typeof(T) == typeof(Int32))
getHashSecondary = HashInt32;
else
throw new ArgumentNullException("hashFunction", "Please provide a hash function for your type T, when T is not a string or int.");
}
else
getHashSecondary = hashFunction;
hashFunctionCount = k;
hashBits = new BitArray(n);
}
public void Add(T item)
{
// start flipping bits for each hash of item
var primaryHash = item.GetHashCode();
var secondaryHash = getHashSecondary(item);
for (int i = 0; i < hashFunctionCount; i++)
{
var hash = ComputeHash(primaryHash, secondaryHash, i);
hashBits[hash] = true;
}
}
public Boolean Contains(T item)
{
var primaryHash = item.GetHashCode();
var secondaryHash = getHashSecondary(item);
for (int i = 0; i < hashFunctionCount; i++)
{
var hash = ComputeHash(primaryHash, secondaryHash, i);
if (hashBits[hash] == false)
return false;
}
return true;
}
public Double Truthiness
{
get { return (Double)TrueBits / hashBits.Count; }
}
Int32 TrueBits
{
get
{
int output = 0;
foreach (bool bit in hashBits)
{
if (bit)
output++;
}
return output;
}
}
Int32 ComputeHash(Int32 primaryHash, Int32 secondaryHash, Int32 i)
{
var resultingHash = (primaryHash + (i * secondaryHash)) % hashBits.Count;
return Math.Abs(resultingHash);
}
static Int32 BestK(Int32 capacity, Single errorRate)
{
return (Int32)Math.Round(Math.Log(2.0) * BestN(capacity, errorRate) / capacity);
}
static Int32 BestN(Int32 capacity, Single errorRate)
{
return (Int32)Math.Ceiling(capacity * Math.Log(errorRate, (1.0 / Math.Pow(2, Math.Log(2.0)))));
}
static Single BestErrorRate(Int32 capacity)
{
var c = 1f / capacity;
if (c != 0)
return c;
// http://www.cs.princeton.edu/courses/archive/spring02/cs493/lec7.pdf
return (Single)Math.Pow(0.6185, (Double)Int32.MaxValue / capacity);
}
static Int32 HashInt32(T input)
{
var x = (input as Int32?).Value;
unchecked
{
x = ~x + (x << 15); // x = (x << 15) - x- 1, as (~x) + y is equivalent to y - x - 1 in two's complement representation
x = x ^ (x >> 12);
x = x + (x << 2);
x = x ^ (x >> 4);
x = x * 2057; // x = (x + (x << 3)) + (x << 11);
x = x ^ (x >> 16);
return (int)x;
}
}
static Int32 HashString(T input)
{
var s = input as String;
int hash = 0;
for (int i = 0; i < s.Length; i++)
{
hash += s[i];
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
}
}