z3-z3-4.13.0.src.util.chashtable.h Maven / Gradle / Ivy
The newest version!
/*++
Copyright (c) 2011 Microsoft Corporation
Module Name:
chashtable.h
Abstract:
Hashtable with chaining.
The performance of the hashtable in hashtable.h deteriorates if
there is a huge number of deletions. In this case, the hashtable
starts to contain many cells marked as deleted, and insertion/deletion
start to suffer.
The hashtable defined in this class addresses this problem by using
chaining. Of course, there is the cost of storing the link to the next
cell.
Author:
Leonardo de Moura (leonardo) 2011-04-14.
Revision History:
--*/
#pragma once
#include "util/memory_manager.h"
#include "util/debug.h"
#include "util/trace.h"
#include "util/tptr.h"
#include "util/util.h"
#ifdef Z3DEBUG
#include "util/hashtable.h"
#endif
#define CH_STATISTICS
#ifdef CH_STATISTICS
#define CHS_CODE(CODE) { CODE }
#else
#define CHS_CODE(CODE)
#endif
template
class chashtable : private HashProc, private EqProc {
public:
static const unsigned default_init_slots = 8;
static const unsigned default_init_cellar = 2;
protected:
struct cell {
cell * m_next;
T m_data;
cell():m_next(reinterpret_cast(1)) {}
bool is_free() const { return GET_TAG(m_next) == 1; }
void mark_free() { m_next = TAG(cell*, m_next, 1); }
void unmark_free() { m_next = UNTAG(cell*, m_next); }
};
cell * m_table; // array of cells.
unsigned m_capacity; // size of the array of cells.
unsigned m_init_slots;
unsigned m_init_cellar;
unsigned m_slots; // m_slots < m_capacity, and m_slots is a power of two, the cells [m_slots, m_capacity) are used for chaining.
unsigned m_used_slots; // m_used_slots <= m_slots (number of used slots).
unsigned m_size; // number of occupied cells.
#ifdef CH_STATISTICS
unsigned m_collisions;
#endif
cell * m_next_cell;
cell * m_free_cell;
cell * m_tofree_cell;
unsigned get_hash(T const & d) const { return HashProc::operator()(d); }
bool equals(T const & e1, T const & e2) const { return EqProc::operator()(e1, e2); }
static cell * alloc_table(unsigned sz) {
return alloc_vect(sz);
}
void delete_table() {
dealloc_vect(m_table, m_capacity);
}
// Return the next free cell in the cellar, and the number of used slots
// Return 0 if the cellar is too small (unlikely but it might happen with a bad hash)
cell * copy_table(cell * source, unsigned source_slots, unsigned source_capacity,
cell * target, unsigned target_slots, unsigned target_capacity,
unsigned & used_slots) {
TRACE("chashtable", tout << "copy_table...\n";);
SASSERT(target_slots >= source_slots);
SASSERT(target_capacity >= source_capacity);
unsigned target_mask = target_slots - 1;
used_slots = 0;
cell * source_end = source + source_slots;
cell * target_cellar = target + target_slots;
cell * target_end = target + target_capacity;
for (cell * source_it = source; source_it != source_end; ++source_it) {
if (!source_it->is_free()) {
cell * list_it = source_it;
do {
unsigned h = get_hash(list_it->m_data);
unsigned idx = h & target_mask;
cell * target_it = target + idx;
SASSERT(target_it >= target);
SASSERT(target_it < target + target_slots);
if (target_it->is_free()) {
target_it->m_data = list_it->m_data;
target_it->m_next = nullptr;
used_slots++;
}
else {
SASSERT((get_hash(target_it->m_data) & target_mask) == idx);
if (target_cellar == target_end)
return nullptr; // the cellar is too small...
SASSERT(target_cellar >= target + target_slots);
SASSERT(target_cellar < target_end);
*target_cellar = *target_it;
target_it->m_data = list_it->m_data;
target_it->m_next = target_cellar;
target_cellar++;
}
SASSERT(!target_it->is_free());
list_it = list_it->m_next;
}
while (list_it != nullptr);
}
}
#if 0
TRACE("chashtable",
for (unsigned i = 0; i < source_capacity; i++) {
tout << i << ":[";
if (source[i].m_next == 0)
tout << "null";
else if (source[i].m_next == reinterpret_cast(1))
tout << "X";
else
tout << (source[i].m_next - source);
tout << ", " << source[i].m_data << "]\n";
}
tout << "\n";
for (unsigned i = 0; i < target_capacity; i++) {
tout << i << ":[";
if (target[i].m_next == 0)
tout << "null";
else if (target[i].m_next == reinterpret_cast(1))
tout << "X";
else
tout << (target[i].m_next - target);
tout << ", " << target[i].m_data << "]\n";
}
tout << "\n";);
#endif
return target_cellar;
}
void expand_table() {
unsigned curr_cellar = (m_capacity - m_slots);
unsigned new_slots = m_slots * 2;
unsigned new_cellar = curr_cellar * 2;
if (new_slots < m_slots || new_cellar < curr_cellar)
throw default_exception("table overflow");
while (true) {
unsigned new_capacity = new_slots + new_cellar;
if (new_capacity < new_slots)
throw default_exception("table overflow");
cell * new_table = alloc_table(new_capacity);
cell * next_cell = copy_table(m_table, m_slots, m_capacity,
new_table, new_slots, new_capacity,
m_used_slots);
if (next_cell != nullptr) {
delete_table();
m_table = new_table;
m_capacity = new_capacity;
m_slots = new_slots;
m_next_cell = next_cell;
m_free_cell = nullptr;
m_tofree_cell = nullptr;
CASSERT("chashtable", check_invariant());
return;
}
dealloc_vect(new_table, new_capacity);
if (2*new_cellar < new_cellar)
throw default_exception("table overflow");
new_cellar *= 2;
}
}
bool has_free_cells() const {
return m_free_cell != nullptr || m_next_cell < m_table + m_capacity;
}
cell * get_free_cell() {
if (m_free_cell != nullptr) {
cell * c = m_free_cell;
m_free_cell = c->m_next;
return c;
}
else {
cell * c = m_next_cell;
m_next_cell++;
return c;
}
}
void recycle_cell(cell * c) {
// c is in the cellar
SASSERT(c >= m_table + m_slots);
SASSERT(c < m_table + m_capacity);
c->m_next = m_free_cell;
m_free_cell = c;
}
void push_recycle_cell(cell* c) {
SASSERT(c < m_table + m_capacity);
c->m_next = m_tofree_cell;
m_tofree_cell = c;
}
void init(unsigned slots, unsigned cellar) {
m_capacity = slots + cellar;
m_table = alloc_table(m_capacity);
m_slots = slots;
m_used_slots = 0;
m_size = 0;
m_next_cell = m_table + slots;
m_free_cell = nullptr;
m_tofree_cell = nullptr;
}
public:
chashtable(HashProc const & h = HashProc(),
EqProc const & e = EqProc(),
unsigned init_slots = default_init_slots,
unsigned init_cellar = default_init_cellar):
HashProc(h),
EqProc(e) {
SASSERT(is_power_of_two(init_slots));
SASSERT(init_cellar > 0);
m_init_slots = init_slots;
m_init_cellar = init_cellar;
init(m_init_slots, m_init_cellar);
CHS_CODE(m_collisions = 0;);
}
~chashtable() {
#if 0
cell * it = m_table;
cell * end = m_table + m_slots;
verbose_stream() << "[chashtable] free slots: ";
for (; it != end; ++it) {
if (it->is_free())
verbose_stream() << (it - m_table) << " ";
}
verbose_stream() << "\n";
#endif
delete_table();
}
void reset() {
if (m_size == 0)
return;
finalize();
}
void finalize() {
delete_table();
init(m_init_slots, m_init_cellar);
}
bool empty() const {
return m_size == 0;
}
unsigned size() const {
return m_size;
}
unsigned capacity() const {
return m_capacity;
}
unsigned used_slots() const {
return m_used_slots;
}
void insert_fresh(T const& d) {
if (!has_free_cells()) {
expand_table();
}
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
cell * new_c = nullptr;
m_size++;
if (c->is_free()) {
m_used_slots++;
}
else {
new_c = get_free_cell();
*new_c = *c;
}
c->m_next = new_c;
c->m_data = d;
CASSERT("chashtable_bug", check_invariant());
}
void insert(T const & d) {
if (!has_free_cells())
expand_table();
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free()) {
m_size++;
m_used_slots++;
c->m_data = d;
c->m_next = nullptr;
CASSERT("chashtable_bug", check_invariant());
return;
}
else {
cell * it = c;
do {
if (equals(it->m_data, d)) {
// already there
it->m_data = d;
CASSERT("chashtable_bug", check_invariant());
return;
}
CHS_CODE(m_collisions++;);
it = it->m_next;
}
while (it != nullptr);
// d is not in the table.
m_size++;
cell * new_c = get_free_cell();
*new_c = *c;
c->m_data = d;
c->m_next = new_c;
CASSERT("chashtable_bug", check_invariant());
return;
}
}
T & insert_if_not_there(T const & d) {
if (!has_free_cells())
expand_table();
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free()) {
m_size++;
m_used_slots++;
c->m_data = d;
c->m_next = nullptr;
CASSERT("chashtable_bug", check_invariant());
return c->m_data;
}
else {
cell * it = c;
do {
if (equals(it->m_data, d)) {
// already there
CASSERT("chashtable_bug", check_invariant());
return it->m_data;
}
CHS_CODE(m_collisions++;);
it = it->m_next;
}
while (it != nullptr);
// d is not in the table.
m_size++;
cell * new_c = get_free_cell();
*new_c = *c;
c->m_data = d;
c->m_next = new_c;
CASSERT("chashtable_bug", check_invariant());
return c->m_data;
}
}
bool insert_if_not_there2(T const & d) {
if (!has_free_cells())
expand_table();
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free()) {
m_size++;
m_used_slots++;
c->m_data = d;
c->m_next = nullptr;
CASSERT("chashtable_bug", check_invariant());
return true;
}
else {
cell * it = c;
do {
if (equals(it->m_data, d)) {
// already there
CASSERT("chashtable_bug", check_invariant());
return false;
}
CHS_CODE(m_collisions++;);
it = it->m_next;
}
while (it != nullptr);
// d is not in the table.
m_size++;
cell * new_c = get_free_cell();
*new_c = *c;
c->m_data = d;
c->m_next = new_c;
CASSERT("chashtable_bug", check_invariant());
return true;
}
}
bool contains(T const & d) const {
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free())
return false;
do {
if (equals(c->m_data, d)) {
return true;
}
CHS_CODE(const_cast(this)->m_collisions++;);
c = c->m_next;
}
while (c != nullptr);
return false;
}
T * find_core(T const & d) const {
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free())
return nullptr;
do {
if (equals(c->m_data, d)) {
return &(c->m_data);
}
CHS_CODE(const_cast(this)->m_collisions++;);
c = c->m_next;
}
while (c != nullptr);
return nullptr;
}
bool find(T const & d, T & r) {
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free())
return false;
do {
if (equals(c->m_data, d)) {
r = c->m_data;
return true;
}
CHS_CODE(const_cast(this)->m_collisions++;);
c = c->m_next;
}
while (c != nullptr);
return false;
}
void erase(T const & d) {
unsigned mask = m_slots - 1;
unsigned h = get_hash(d);
unsigned idx = h & mask;
cell * c = m_table + idx;
if (c->is_free())
return;
cell * prev = nullptr;
do {
if (equals(c->m_data, d)) {
m_size--;
if (prev == nullptr) {
cell * next = c->m_next;
if (next == nullptr) {
m_used_slots--;
c->mark_free();
SASSERT(c->is_free());
}
else {
*c = *next;
recycle_cell(next);
}
}
else {
prev->m_next = c->m_next;
recycle_cell(c);
}
CASSERT("chashtable_bug", check_invariant());
return;
}
CHS_CODE(m_collisions++;);
prev = c;
c = c->m_next;
}
while (c != nullptr);
}
class iterator {
cell * m_it;
cell * m_end;
cell * m_list_it;
void move_to_used() {
while (m_it != m_end) {
if (!m_it->is_free()) {
m_list_it = m_it;
return;
}
m_it++;
}
m_list_it = nullptr;
}
public:
iterator(cell * start, cell * end): m_it(start), m_end(end) { move_to_used(); }
iterator():m_it(nullptr), m_end(nullptr), m_list_it(nullptr) {}
T & operator*() {
return m_list_it->m_data;
}
T const & operator*() const {
return m_list_it->m_data;
}
T const * operator->() const { return &(operator*()); }
T * operator->() { return &(operator*()); }
iterator & operator++() {
m_list_it = m_list_it->m_next;
if (m_list_it == nullptr) {
m_it++;
move_to_used();
}
return *this;
}
iterator operator++(int) { iterator tmp = *this; ++*this; return tmp; }
bool operator==(iterator const & it) const { return m_list_it == it.m_list_it; }
bool operator!=(iterator const & it) const { return m_list_it != it.m_list_it; }
};
iterator begin() const { return iterator(m_table, m_table + m_slots); }
iterator end() const { return iterator(); }
void swap(chashtable & other) noexcept {
std::swap(m_table, other.m_table);
std::swap(m_capacity, other.m_capacity);
std::swap(m_init_slots, other.m_init_slots);
std::swap(m_init_cellar, other.m_init_cellar);
std::swap(m_slots, other.m_slots);
std::swap(m_used_slots, other.m_used_slots);
std::swap(m_size, other.m_size);
#ifdef CH_STATISTICS
std::swap(m_collisions, other.m_collisions);
#endif
std::swap(m_next_cell, other.m_next_cell);
std::swap(m_free_cell, other.m_free_cell);
}
unsigned collisions() const {
#ifdef CH_STATISTICS
return m_collisions;
#else
return 0;
#endif
}
#ifdef Z3DEBUG
bool check_invariant() const {
ptr_addr_hashtable visited;
unsigned sz = 0;
cell * _end = m_table + m_slots;
for (cell * it = m_table; it != _end; ++it) {
if (!it->is_free()) {
cell * list_it = it;
while (list_it != 0) {
sz++;
SASSERT(!visited.contains(list_it));
visited.insert(list_it);
list_it = list_it->m_next;
}
}
}
SASSERT(m_size == sz);
return true;
}
#endif
};
template
class cmap {
public:
struct key_value {
Key m_key;
Value m_value;
key_value() {}
key_value(Key const & k):m_key(k) {}
key_value(Key const & k, Value const & v):m_key(k), m_value(v) {}
};
protected:
struct key_value_hash_proc : private HashProc {
key_value_hash_proc(HashProc const & p):HashProc(p) {}
unsigned operator()(key_value const & d) const { return HashProc::operator()(d.m_key); }
};
struct key_value_eq_proc : private EqProc {
key_value_eq_proc(EqProc const & p):EqProc(p) {}
bool operator()(key_value const & d1, key_value const & d2) const { return EqProc::operator()(d1.m_key, d2.m_key); }
};
typedef chashtable table;
table m_table;
public:
cmap(HashProc const & h = HashProc(),
EqProc const & e = EqProc(),
unsigned init_slots = table::default_init_slots,
unsigned init_cellar = table::default_init_cellar):
m_table(key_value_hash_proc(h),
key_value_eq_proc(e),
init_slots,
init_cellar) {
}
typedef typename table::iterator iterator;
void reset() {
m_table.reset();
}
void finalize() {
m_table.finalize();
}
bool empty() const {
return m_table.empty();
}
unsigned size() const {
return m_table.size();
}
unsigned capacity() const {
return m_table.capacity();
}
unsigned used_slots() const {
return m_table.used_slots();
}
unsigned collisions() const {
return m_table.collisions();
}
iterator begin() const {
return m_table.begin();
}
iterator end() const {
return m_table.end();
}
void insert(Key const & k, Value const & v) {
return m_table.insert(key_value(k, v));
}
key_value & insert_if_not_there(Key const & k, Value const & v) {
return m_table.insert_if_not_there(key_value(k, v));
}
bool contains(Key const & k) const {
return m_table.contains(key_value(k));
}
key_value * find_core(Key const & k) const {
return m_table.find_core(key_value(k));
}
bool find(Key const & k, Value & v) const {
key_value * e = m_table.find_core(key_value(k));
if (e == nullptr)
return false;
v = e->m_value;
return true;
}
void erase(Key const & k) {
m_table.erase(key_value(k));
}
};
| | | | |