All Downloads are FREE. Search and download functionalities are using the official Maven repository.

z3-z3-4.13.0.src.util.chashtable.h Maven / Gradle / Ivy

The newest version!
/*++
Copyright (c) 2011 Microsoft Corporation

Module Name:

    chashtable.h

Abstract:

    Hashtable with chaining.  

    The performance of the hashtable in hashtable.h deteriorates if
    there is a huge number of deletions. In this case, the hashtable
    starts to contain many cells marked as deleted, and insertion/deletion
    start to suffer.
    
    The hashtable defined in this class addresses this problem by using
    chaining. Of course, there is the cost of storing the link to the next
    cell.
    
Author:

    Leonardo de Moura (leonardo) 2011-04-14.

Revision History:

--*/
#pragma once

#include "util/memory_manager.h"
#include "util/debug.h"
#include "util/trace.h"
#include "util/tptr.h"
#include "util/util.h"
#ifdef Z3DEBUG
#include "util/hashtable.h"
#endif

#define CH_STATISTICS

#ifdef CH_STATISTICS
#define CHS_CODE(CODE) { CODE }
#else
#define CHS_CODE(CODE) 
#endif

template
class chashtable : private HashProc, private EqProc {
public:
    static const unsigned default_init_slots  = 8;
    static const unsigned default_init_cellar = 2;

protected:
    struct cell {
        cell *  m_next;
        T       m_data;
        cell():m_next(reinterpret_cast(1)) {}
        bool is_free() const { return GET_TAG(m_next) == 1; }
        void mark_free() { m_next = TAG(cell*, m_next, 1); }
        void unmark_free() { m_next = UNTAG(cell*, m_next); }
    };
    
    cell *    m_table;         // array of cells.
    unsigned  m_capacity;      // size of the array of cells.
    unsigned  m_init_slots; 
    unsigned  m_init_cellar;   
    unsigned  m_slots;         // m_slots < m_capacity, and m_slots is a power of two, the cells [m_slots, m_capacity) are used for chaining. 
    unsigned  m_used_slots;    // m_used_slots <= m_slots (number of used slots).
    unsigned  m_size;          // number of occupied cells.
#ifdef CH_STATISTICS
    unsigned  m_collisions;
#endif
    cell *    m_next_cell;   
    cell *    m_free_cell;   
    cell *    m_tofree_cell;
    
    unsigned get_hash(T const & d) const { return HashProc::operator()(d); }
    bool equals(T const & e1, T const & e2) const { return EqProc::operator()(e1, e2); }

    static cell * alloc_table(unsigned sz) {
        return alloc_vect(sz); 
    }
    
    void delete_table() {
        dealloc_vect(m_table, m_capacity);
    }
    
    // Return the next free cell in the cellar, and the number of used slots
    // Return 0 if the cellar is too small (unlikely but it might happen with a bad hash)
    cell * copy_table(cell * source, unsigned source_slots, unsigned source_capacity, 
                      cell * target, unsigned target_slots, unsigned target_capacity,
                      unsigned & used_slots) {
        TRACE("chashtable", tout << "copy_table...\n";);
        SASSERT(target_slots >= source_slots);
        SASSERT(target_capacity >= source_capacity);
        unsigned target_mask  = target_slots - 1;
        used_slots   = 0;
        cell *  source_end    = source + source_slots;
        cell *  target_cellar = target + target_slots;
        cell *  target_end    = target + target_capacity;
        for (cell * source_it = source; source_it != source_end; ++source_it) {
            if (!source_it->is_free()) {
                cell * list_it = source_it;
                do {
                    unsigned h    = get_hash(list_it->m_data);
                    unsigned idx  = h & target_mask;
                    cell * target_it = target + idx;
                    SASSERT(target_it >= target);
                    SASSERT(target_it < target + target_slots);
                    if (target_it->is_free()) {
                        target_it->m_data = list_it->m_data;
                        target_it->m_next = nullptr;
                        used_slots++;
                    }
                    else {
                        SASSERT((get_hash(target_it->m_data) & target_mask) == idx);
                        if (target_cellar == target_end)
                            return nullptr; // the cellar is too small...
                        SASSERT(target_cellar >= target + target_slots);
                        SASSERT(target_cellar < target_end);
                        *target_cellar    = *target_it;
                        target_it->m_data = list_it->m_data;
                        target_it->m_next = target_cellar;
                        target_cellar++;
                    }
                    SASSERT(!target_it->is_free());
                    list_it = list_it->m_next;
                }
                while (list_it != nullptr);
            }
        }
#if 0
        TRACE("chashtable", 
              for (unsigned i = 0; i < source_capacity; i++) {
                  tout << i << ":[";
                  if (source[i].m_next == 0)
                      tout << "null";
                  else if (source[i].m_next == reinterpret_cast(1))
                      tout << "X";
                  else
                      tout << (source[i].m_next - source);
                  tout << ", " << source[i].m_data << "]\n";
              }
              tout << "\n";
              for (unsigned i = 0; i < target_capacity; i++) {
                  tout << i << ":[";
                  if (target[i].m_next == 0)
                      tout << "null";
                  else if (target[i].m_next == reinterpret_cast(1))
                      tout << "X";
                  else
                      tout << (target[i].m_next - target);
                  tout << ", " << target[i].m_data << "]\n";
              }
              tout << "\n";);
#endif
        return target_cellar;
    }
    
    void expand_table() {
        unsigned curr_cellar  = (m_capacity - m_slots);
        unsigned new_slots    = m_slots * 2;
        unsigned new_cellar   = curr_cellar * 2;
        if (new_slots < m_slots || new_cellar < curr_cellar)
            throw default_exception("table overflow");            
        while (true) {
            unsigned new_capacity = new_slots + new_cellar;
            if (new_capacity < new_slots)
                throw default_exception("table overflow");
            cell * new_table      = alloc_table(new_capacity);
            cell * next_cell      = copy_table(m_table, m_slots, m_capacity,
                                               new_table, new_slots, new_capacity,
                                               m_used_slots);
            if (next_cell != nullptr) {
                delete_table();
                m_table      = new_table;
                m_capacity   = new_capacity;
                m_slots      = new_slots;
                m_next_cell  = next_cell;
                m_free_cell  = nullptr;
                m_tofree_cell = nullptr;
                CASSERT("chashtable", check_invariant());
                return;
            }
            dealloc_vect(new_table, new_capacity);
            if (2*new_cellar < new_cellar)
                throw default_exception("table overflow");                
            new_cellar *= 2;
        }
    }

    bool has_free_cells() const {
        return m_free_cell != nullptr || m_next_cell < m_table + m_capacity;
    }

    cell * get_free_cell() {
        if (m_free_cell != nullptr) {
            cell * c    = m_free_cell;
            m_free_cell = c->m_next;
            return c;
        }
        else {
            cell * c = m_next_cell;
            m_next_cell++;
            return c;
        }
    }

    void recycle_cell(cell * c) {
        // c is in the cellar
        SASSERT(c >= m_table + m_slots);
        SASSERT(c < m_table + m_capacity);
        c->m_next   = m_free_cell;
        m_free_cell = c;
    }

    void push_recycle_cell(cell* c) {
        SASSERT(c < m_table + m_capacity);
        c->m_next = m_tofree_cell;
        m_tofree_cell = c;
    }

    void init(unsigned slots, unsigned cellar) {
        m_capacity   = slots + cellar;
        m_table      = alloc_table(m_capacity);
        m_slots      = slots;
        m_used_slots = 0;
        m_size       = 0;
        m_next_cell  = m_table + slots;
        m_free_cell  = nullptr;
        m_tofree_cell = nullptr;
    }

public:
    chashtable(HashProc const & h = HashProc(),
               EqProc const & e = EqProc(),
               unsigned init_slots  = default_init_slots,
               unsigned init_cellar = default_init_cellar):
        HashProc(h),
        EqProc(e) {
        SASSERT(is_power_of_two(init_slots));
        SASSERT(init_cellar > 0);
        m_init_slots = init_slots;
        m_init_cellar = init_cellar;
        init(m_init_slots, m_init_cellar);
        CHS_CODE(m_collisions = 0;);
    }

    ~chashtable() {
#if 0
        cell * it  = m_table;
        cell * end = m_table + m_slots;
        verbose_stream() << "[chashtable] free slots: ";
        for (; it != end; ++it) {
            if (it->is_free())
                verbose_stream() << (it - m_table) << " ";
        }
        verbose_stream() << "\n";
#endif
        delete_table();
    }

    void reset() {
        if (m_size == 0) 
            return;
        finalize();
    }

    void finalize() {
        delete_table();
        init(m_init_slots, m_init_cellar);
    }

    bool empty() const {
        return m_size == 0;
    }

    unsigned size() const {
        return m_size;
    }

    unsigned capacity() const {
        return m_capacity;
    }

    unsigned used_slots() const {
        return m_used_slots;
    }

    void insert_fresh(T const& d) {
        if (!has_free_cells()) {
            expand_table();
        }
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        cell * new_c  = nullptr;
        m_size++;
        if (c->is_free()) {
            m_used_slots++;
        }
        else {
            new_c = get_free_cell();
            *new_c = *c;
        }
        c->m_next = new_c;
        c->m_data = d;
        CASSERT("chashtable_bug", check_invariant());
    }
    
    void insert(T const & d) {
        if (!has_free_cells())
            expand_table();
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free()) {
            m_size++;
            m_used_slots++;
            c->m_data = d;
            c->m_next = nullptr;
            CASSERT("chashtable_bug", check_invariant());
            return;
        }
        else {
            cell * it = c;
            do { 
                if (equals(it->m_data, d)) {
                    // already there
                    it->m_data = d;
                    CASSERT("chashtable_bug", check_invariant());
                    return;
                }
                CHS_CODE(m_collisions++;);
                it = it->m_next;
            }
            while (it != nullptr);
            // d is not in the table.
            m_size++;
            cell * new_c = get_free_cell();
            *new_c = *c;
            c->m_data    = d;
            c->m_next    = new_c;
            CASSERT("chashtable_bug", check_invariant());
            return;
        }
    }

    T & insert_if_not_there(T const & d) {
        if (!has_free_cells())
            expand_table();
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free()) {
            m_size++;
            m_used_slots++;
            c->m_data = d;
            c->m_next = nullptr;
            CASSERT("chashtable_bug", check_invariant());
            return c->m_data;
        }
        else {
            cell * it = c;
            do { 
                if (equals(it->m_data, d)) {
                    // already there
                    CASSERT("chashtable_bug", check_invariant());
                    return it->m_data;
                }
                CHS_CODE(m_collisions++;);
                it = it->m_next;
            }
            while (it != nullptr);
            // d is not in the table.
            m_size++;
            cell * new_c = get_free_cell();
            *new_c = *c;
            c->m_data    = d;
            c->m_next    = new_c;
            CASSERT("chashtable_bug", check_invariant());
            return c->m_data;
        }
    }

    bool insert_if_not_there2(T const & d) {
        if (!has_free_cells())
            expand_table();
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free()) {
            m_size++;
            m_used_slots++;
            c->m_data = d;
            c->m_next = nullptr;
            CASSERT("chashtable_bug", check_invariant());
            return true;
        }
        else {
            cell * it = c;
            do { 
                if (equals(it->m_data, d)) {
                    // already there
                    CASSERT("chashtable_bug", check_invariant());
                    return false;
                }
                CHS_CODE(m_collisions++;);
                it = it->m_next;
            }
            while (it != nullptr);
            // d is not in the table.
            m_size++;
            cell * new_c = get_free_cell();
            *new_c = *c;
            c->m_data    = d;
            c->m_next    = new_c;
            CASSERT("chashtable_bug", check_invariant());
            return true;
        }
    }

    bool contains(T const & d) const {
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free())
            return false;
        do { 
            if (equals(c->m_data, d)) {
                return true;
            }
            CHS_CODE(const_cast(this)->m_collisions++;);
            c = c->m_next;
        }
        while (c != nullptr);
        return false;
    }

    T * find_core(T const & d) const {
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free())
            return nullptr;
        do { 
            if (equals(c->m_data, d)) {
                return &(c->m_data);
            }
            CHS_CODE(const_cast(this)->m_collisions++;);
            c = c->m_next;
        }
        while (c != nullptr);
        return nullptr;
    }

    bool find(T const & d, T & r) {
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free())
            return false;
        do { 
            if (equals(c->m_data, d)) {
                r = c->m_data;
                return true;
            }
            CHS_CODE(const_cast(this)->m_collisions++;);
            c = c->m_next;
        }
        while (c != nullptr);
        return false;
    }

    void erase(T const & d) {
        unsigned mask = m_slots - 1;
        unsigned h    = get_hash(d);
        unsigned idx  = h & mask;
        cell * c      = m_table + idx;
        if (c->is_free())
            return; 
        cell * prev = nullptr;
        do { 
            if (equals(c->m_data, d)) {
                m_size--;
                if (prev == nullptr) {
                    cell * next = c->m_next;
                    if (next == nullptr) {
                        m_used_slots--;
                        c->mark_free();
                        SASSERT(c->is_free());
                    }
                    else {
                        *c = *next;
                        recycle_cell(next);
                    }
                }
                else {
                    prev->m_next = c->m_next;
                    recycle_cell(c);
                }
                CASSERT("chashtable_bug", check_invariant());
                return;
            }
            CHS_CODE(m_collisions++;);
            prev = c;
            c = c->m_next;
        }
        while (c != nullptr);
    }

    class iterator {
        cell  * m_it;
        cell  * m_end;
        cell  * m_list_it;

        void move_to_used() {
            while (m_it != m_end) {
                if (!m_it->is_free()) {
                    m_list_it = m_it;
                    return;
                }
                m_it++;
            }
            m_list_it = nullptr;
        }
        
    public:
        iterator(cell * start, cell * end): m_it(start), m_end(end) { move_to_used(); }
        iterator():m_it(nullptr), m_end(nullptr), m_list_it(nullptr) {}
        T & operator*() { 
            return m_list_it->m_data; 
        }
        T const & operator*() const { 
            return m_list_it->m_data; 
        }
        T const * operator->() const { return &(operator*()); }
        T * operator->() { return &(operator*()); }
        iterator & operator++() { 
            m_list_it = m_list_it->m_next;
            if (m_list_it == nullptr) {
                m_it++;
                move_to_used();
            }
            return *this;
        }
        iterator operator++(int) { iterator tmp = *this; ++*this; return tmp; }
        bool operator==(iterator const & it) const { return m_list_it == it.m_list_it; }
        bool operator!=(iterator const & it) const { return m_list_it != it.m_list_it; }
    };
    
    iterator begin() const { return iterator(m_table, m_table + m_slots); }
    iterator end() const { return iterator(); }

    void swap(chashtable & other) noexcept {
        std::swap(m_table,       other.m_table);
        std::swap(m_capacity,    other.m_capacity);
        std::swap(m_init_slots,  other.m_init_slots);
        std::swap(m_init_cellar, other.m_init_cellar);
        std::swap(m_slots,       other.m_slots);
        std::swap(m_used_slots,  other.m_used_slots);
        std::swap(m_size,        other.m_size);
#ifdef CH_STATISTICS
        std::swap(m_collisions,  other.m_collisions);
#endif
        std::swap(m_next_cell,   other.m_next_cell);
        std::swap(m_free_cell,   other.m_free_cell);
    }

    unsigned collisions() const {
#ifdef CH_STATISTICS
        return m_collisions;
#else
        return 0;
#endif
    }

#ifdef Z3DEBUG
    bool check_invariant() const {
        ptr_addr_hashtable visited;
        unsigned sz = 0;
        cell *  _end = m_table + m_slots;
        for (cell * it = m_table; it != _end; ++it) {
            if (!it->is_free()) {
                cell * list_it = it;
                while (list_it != 0) {
                    sz++;
                    SASSERT(!visited.contains(list_it));
                    visited.insert(list_it);
                    list_it = list_it->m_next;
                }
            }
        }
        SASSERT(m_size == sz);
        return true;
    }
#endif
};

template
class cmap {
public:
    struct key_value {
        Key    m_key;
        Value  m_value;
        key_value() {}
        key_value(Key const & k):m_key(k) {}
        key_value(Key const & k, Value const & v):m_key(k), m_value(v) {}
    };

protected:
    struct key_value_hash_proc : private HashProc {
        key_value_hash_proc(HashProc const & p):HashProc(p) {}
        unsigned operator()(key_value const & d) const { return HashProc::operator()(d.m_key); }
    };

    struct key_value_eq_proc : private EqProc {
        key_value_eq_proc(EqProc const & p):EqProc(p) {}
        bool operator()(key_value const & d1, key_value const & d2) const { return EqProc::operator()(d1.m_key, d2.m_key); }
    };

    typedef chashtable table;
    
    table m_table;

public:
    cmap(HashProc const & h = HashProc(),
         EqProc const & e = EqProc(),
         unsigned init_slots  = table::default_init_slots,
         unsigned init_cellar = table::default_init_cellar):
        m_table(key_value_hash_proc(h),
                key_value_eq_proc(e),
                init_slots,
                init_cellar) {
    }

    typedef typename table::iterator iterator;
    
    void reset() {
        m_table.reset();
    }

    void finalize() {
        m_table.finalize();
    }
    
    bool empty() const { 
        return m_table.empty();
    }
    
    unsigned size() const { 
        return m_table.size(); 
    }
    
    unsigned capacity() const { 
        return m_table.capacity();
    }

    unsigned used_slots() const { 
        return m_table.used_slots();
    }

    unsigned collisions() const {
        return m_table.collisions();
    }
    
    iterator begin() const { 
        return m_table.begin();
    }
    
    iterator end() const { 
        return m_table.end();
    }
    
    void insert(Key const & k, Value const & v) {
        return m_table.insert(key_value(k, v));
    }

    key_value & insert_if_not_there(Key const & k, Value const & v) {
        return m_table.insert_if_not_there(key_value(k, v));
    }

    bool contains(Key const & k) const {
        return m_table.contains(key_value(k));
    }

    key_value * find_core(Key const & k) const {
        return m_table.find_core(key_value(k)); 
    }

    bool find(Key const & k, Value & v) const {
        key_value * e = m_table.find_core(key_value(k));
        if (e == nullptr)
            return false;
        v = e->m_value;
        return true;
    }

    void erase(Key const & k) {
        m_table.erase(key_value(k));
    }
};





© 2015 - 2024 Weber Informatics LLC | Privacy Policy