bloom_8cpp_source.html

// Copyright (c) 2012-2020 The Bitcoin Core developers

// Distributed under the MIT software license, see the accompanying

// file COPYING or http://www.opensource.org/licenses/mit-license.php.


#include <common/bloom.h>


#include <hash.h>

#include <primitives/transaction.h>

#include <random.h>

#include <script/script.h>

#include <script/standard.h>

#include <span.h>

#include <streams.h>


#include <algorithm>

#include <cmath>

#include <cstdlib>

#include <limits>

#include <vector>


static constexpr double LN2SQUARED = 0.4804530139182014246671025263266649717305529515945455;

static constexpr double LN2 = 0.6931471805599453094172321214581765680755001343602552;


CBloomFilter::CBloomFilter(const unsigned int nElements, const double nFPRate, const unsigned int nTweakIn, unsigned char nFlagsIn) :

    vData(std::min((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),

    nHashFuncs(std::min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),

    nTweak(nTweakIn),

    nFlags(nFlagsIn)

{

}


inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, Span<const unsigned char> vDataToHash) const

{

    // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.

    return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);

}


void CBloomFilter::insert(Span<const unsigned char> vKey)

{

    if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700)

        return;

    for (unsigned int i = 0; i < nHashFuncs; i++)

    {

        unsigned int nIndex = Hash(i, vKey);

        // Sets bit nIndex of vData

        vData[nIndex >> 3] |= (1 << (7 & nIndex));

    }

}


void CBloomFilter::insert(const COutPoint& outpoint)

{

    CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);

    stream << outpoint;

    insert(stream);

}


bool CBloomFilter::contains(Span<const unsigned char> vKey) const

{

    if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700)

        return true;

    for (unsigned int i = 0; i < nHashFuncs; i++)

    {

        unsigned int nIndex = Hash(i, vKey);

        // Checks bit nIndex of vData

        if (!(vData[nIndex >> 3] & (1 << (7 & nIndex))))

            return false;

    }

    return true;

}


bool CBloomFilter::contains(const COutPoint& outpoint) const

{

    CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);

    stream << outpoint;

    return contains(stream);

}


bool CBloomFilter::IsWithinSizeConstraints() const

{

    return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;

}


bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx)

{

    bool fFound = false;

    // Match if the filter contains the hash of tx

    //  for finding tx when they appear in a block

    if (vData.empty()) // zero-size = "match-all" filter

        return true;

    const uint256& hash = tx.GetHash();

    if (contains(hash))

        fFound = true;


    for (unsigned int i = 0; i < tx.vout.size(); i++)

    {

        const CTxOut& txout = tx.vout[i];

        // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx

        // If this matches, also add the specific output that was matched.

        // This means clients don't have to update the filter themselves when a new relevant tx

        // is discovered in order to find spending transactions, which avoids round-tripping and race conditions.

        CScript::const_iterator pc = txout.scriptPubKey.begin();

        std::vector<unsigned char> data;

        while (pc < txout.scriptPubKey.end())

        {

            opcodetype opcode;

            if (!txout.scriptPubKey.GetOp(pc, opcode, data))

                break;

            if (data.size() != 0 && contains(data))

            {

                fFound = true;

                if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_ALL)

                    insert(COutPoint(hash, i));

                else if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_P2PUBKEY_ONLY)

                {

                    std::vector<std::vector<unsigned char> > vSolutions;

                    TxoutType type = Solver(txout.scriptPubKey, vSolutions);

                    if (type == TxoutType::PUBKEY || type == TxoutType::MULTISIG) {

                        insert(COutPoint(hash, i));

                    }

                }

                break;

            }

        }

    }


    if (fFound)

        return true;


    for (const CTxIn& txin : tx.vin)

    {

        // Match if the filter contains an outpoint tx spends

        if (contains(txin.prevout))

            return true;


        // Match if the filter contains any arbitrary script data element in any scriptSig in tx

        CScript::const_iterator pc = txin.scriptSig.begin();

        std::vector<unsigned char> data;

        while (pc < txin.scriptSig.end())

        {

            opcodetype opcode;

            if (!txin.scriptSig.GetOp(pc, opcode, data))

                break;

            if (data.size() != 0 && contains(data))

                return true;

        }

    }


    return false;

}


CRollingBloomFilter::CRollingBloomFilter(const unsigned int nElements, const double fpRate)

{

    double logFpRate = log(fpRate);

    /* The optimal number of hash functions is log(fpRate) / log(0.5), but

     * restrict it to the range 1-50. */

    nHashFuncs = std::max(1, std::min((int)round(logFpRate / log(0.5)), 50));

    /* In this rolling bloom filter, we'll store between 2 and 3 generations of nElements / 2 entries. */

    nEntriesPerGeneration = (nElements + 1) / 2;

    uint32_t nMaxElements = nEntriesPerGeneration * 3;

    /* The maximum fpRate = pow(1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits), nHashFuncs)

     * =>          pow(fpRate, 1.0 / nHashFuncs) = 1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits)

     * =>          1.0 - pow(fpRate, 1.0 / nHashFuncs) = exp(-nHashFuncs * nMaxElements / nFilterBits)

     * =>          log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) = -nHashFuncs * nMaxElements / nFilterBits

     * =>          nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - pow(fpRate, 1.0 / nHashFuncs))

     * =>          nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs))

     */

    uint32_t nFilterBits = (uint32_t)ceil(-1.0 * nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs)));

    data.clear();

    /* For each data element we need to store 2 bits. If both bits are 0, the

     * bit is treated as unset. If the bits are (01), (10), or (11), the bit is

     * treated as set in generation 1, 2, or 3 respectively.

     * These bits are stored in separate integers: position P corresponds to bit

     * (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */

    data.resize(((nFilterBits + 63) / 64) << 1);

    reset();

}


/* Similar to CBloomFilter::Hash */

static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, Span<const unsigned char> vDataToHash)

{

    return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);

}


// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value

// which should be the case for a good hash.

// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/

static inline uint32_t FastMod(uint32_t x, size_t n) {

    return ((uint64_t)x * (uint64_t)n) >> 32;

}


void CRollingBloomFilter::insert(Span<const unsigned char> vKey)

{

    if (nEntriesThisGeneration == nEntriesPerGeneration) {

        nEntriesThisGeneration = 0;

        nGeneration++;

        if (nGeneration == 4) {

            nGeneration = 1;

        }

        uint64_t nGenerationMask1 = 0 - (uint64_t)(nGeneration & 1);

        uint64_t nGenerationMask2 = 0 - (uint64_t)(nGeneration >> 1);

        /* Wipe old entries that used this generation number. */

        for (uint32_t p = 0; p < data.size(); p += 2) {

            uint64_t p1 = data[p], p2 = data[p + 1];

            uint64_t mask = (p1 ^ nGenerationMask1) | (p2 ^ nGenerationMask2);

            data[p] = p1 & mask;

            data[p + 1] = p2 & mask;

        }

    }

    nEntriesThisGeneration++;


    for (int n = 0; n < nHashFuncs; n++) {

        uint32_t h = RollingBloomHash(n, nTweak, vKey);

        int bit = h & 0x3F;

        /* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */

        uint32_t pos = FastMod(h, data.size());

        /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */

        data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;

        data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;

    }

}


bool CRollingBloomFilter::contains(Span<const unsigned char> vKey) const

{

    for (int n = 0; n < nHashFuncs; n++) {

        uint32_t h = RollingBloomHash(n, nTweak, vKey);

        int bit = h & 0x3F;

        uint32_t pos = FastMod(h, data.size());

        /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */

        if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {

            return false;

        }

    }

    return true;

}


void CRollingBloomFilter::reset()

{

    nTweak = GetRand(std::numeric_limits<unsigned int>::max());

    nEntriesThisGeneration = 0;

    nGeneration = 1;

    std::fill(data.begin(), data.end(), 0);

}

LN2SQUARED
static constexpr double LN2SQUARED
Definition: bloom.cpp:21

FastMod
static uint32_t FastMod(uint32_t x, size_t n)
Definition: bloom.cpp:198

LN2
static constexpr double LN2
Definition: bloom.cpp:22

RollingBloomHash
static uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, Span< const unsigned char > vDataToHash)
Definition: bloom.cpp:189

bloom.h

MAX_BLOOM_FILTER_SIZE
static constexpr unsigned int MAX_BLOOM_FILTER_SIZE
20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
Definition: bloom.h:17

MAX_HASH_FUNCS
static constexpr unsigned int MAX_HASH_FUNCS
Definition: bloom.h:18

BLOOM_UPDATE_P2PUBKEY_ONLY
@ BLOOM_UPDATE_P2PUBKEY_ONLY
Definition: bloom.h:29

BLOOM_UPDATE_ALL
@ BLOOM_UPDATE_ALL
Definition: bloom.h:27

BLOOM_UPDATE_MASK
@ BLOOM_UPDATE_MASK
Definition: bloom.h:30

CBloomFilter::IsWithinSizeConstraints
bool IsWithinSizeConstraints() const
True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS (c...
Definition: bloom.cpp:88

CBloomFilter::insert
void insert(Span< const unsigned char > vKey)
Definition: bloom.cpp:48

CBloomFilter::contains
bool contains(Span< const unsigned char > vKey) const
Definition: bloom.cpp:67

CBloomFilter::nFlags
unsigned char nFlags
Definition: bloom.h:50

CBloomFilter::vData
std::vector< unsigned char > vData
Definition: bloom.h:47

CBloomFilter::nHashFuncs
unsigned int nHashFuncs
Definition: bloom.h:48

CBloomFilter::Hash
unsigned int Hash(unsigned int nHashNum, Span< const unsigned char > vDataToHash) const
Definition: bloom.cpp:42

CBloomFilter::CBloomFilter
CBloomFilter()
Definition: bloom.h:65

CBloomFilter::IsRelevantAndUpdate
bool IsRelevantAndUpdate(const CTransaction &tx)
Also adds any outputs which match the filter to the filter (to match their spending txes)
Definition: bloom.cpp:93

CBloomFilter::nTweak
unsigned int nTweak
Definition: bloom.h:49

CDataStream
Double ended buffer combining vector and stream-like interfaces.
Definition: streams.h:205

COutPoint
An outpoint - a combination of a transaction hash and an index n into its vout.
Definition: transaction.h:27

CRollingBloomFilter::contains
bool contains(Span< const unsigned char > vKey) const
Definition: bloom.cpp:233

CRollingBloomFilter::nTweak
unsigned int nTweak
Definition: bloom.h:123

CRollingBloomFilter::nGeneration
int nGeneration
Definition: bloom.h:121

CRollingBloomFilter::nHashFuncs
int nHashFuncs
Definition: bloom.h:124

CRollingBloomFilter::CRollingBloomFilter
CRollingBloomFilter(const unsigned int nElements, const double nFPRate)
Definition: bloom.cpp:161

CRollingBloomFilter::reset
void reset()
Definition: bloom.cpp:247

CRollingBloomFilter::insert
void insert(Span< const unsigned char > vKey)
Definition: bloom.cpp:202

CRollingBloomFilter::nEntriesPerGeneration
int nEntriesPerGeneration
Definition: bloom.h:119

CRollingBloomFilter::nEntriesThisGeneration
int nEntriesThisGeneration
Definition: bloom.h:120

CRollingBloomFilter::data
std::vector< uint64_t > data
Definition: bloom.h:122

CScript::GetOp
bool GetOp(const_iterator &pc, opcodetype &opcodeRet, std::vector< unsigned char > &vchRet) const
Definition: script.h:487

CTransaction
The basic transaction that is broadcasted on the network and contained in blocks.
Definition: transaction.h:260

CTransaction::GetHash
const uint256 & GetHash() const
Definition: transaction.h:302

CTransaction::vout
const std::vector< CTxOut > vout
Definition: transaction.h:271

CTransaction::vin
const std::vector< CTxIn > vin
Definition: transaction.h:270

CTxIn
An input of a transaction.
Definition: transaction.h:66

CTxIn::scriptSig
CScript scriptSig
Definition: transaction.h:69

CTxIn::prevout
COutPoint prevout
Definition: transaction.h:68

CTxOut
An output of a transaction.
Definition: transaction.h:129

CTxOut::scriptPubKey
CScript scriptPubKey
Definition: transaction.h:132

Span
A Span is an object that can refer to a contiguous sequence of objects.
Definition: span.h:93

prevector::const_iterator
Definition: prevector.h:98

prevector::begin
iterator begin()
Definition: prevector.h:290

prevector::end
iterator end()
Definition: prevector.h:292

uint256
256-bit opaque blob.
Definition: uint256.h:124

MurmurHash3
unsigned int MurmurHash3(unsigned int nHashSeed, Span< const unsigned char > vDataToHash)
Definition: hash.cpp:17

transaction.h

GetRand
uint64_t GetRand(uint64_t nMax) noexcept
Generate a uniform random integer in the range [0..range).
Definition: random.cpp:591

random.h

script.h

opcodetype
opcodetype
Script opcodes.
Definition: script.h:67

SER_NETWORK
@ SER_NETWORK
Definition: serialize.h:138

span.h

Solver
TxoutType Solver(const CScript &scriptPubKey, std::vector< std::vector< unsigned char > > &vSolutionsRet)
Parse a scriptPubKey and identify script type for standard scripts.
Definition: standard.cpp:144

standard.h

TxoutType
TxoutType
Definition: standard.h:59

TxoutType::MULTISIG
@ MULTISIG

TxoutType::PUBKEY
@ PUBKEY

streams.h

PROTOCOL_VERSION
static const int PROTOCOL_VERSION
network protocol versioning
Definition: version.h:12