diff options
Diffstat (limited to 'kernel/hashlib.h')
-rw-r--r-- | kernel/hashlib.h | 887 |
1 files changed, 887 insertions, 0 deletions
diff --git a/kernel/hashlib.h b/kernel/hashlib.h new file mode 100644 index 000000000..94b573e47 --- /dev/null +++ b/kernel/hashlib.h @@ -0,0 +1,887 @@ +// This is free and unencumbered software released into the public domain. +// +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. + +// ------------------------------------------------------- +// Written by Clifford Wolf <clifford@clifford.at> in 2014 +// ------------------------------------------------------- + +#ifndef HASHLIB_H + +#include <stdexcept> +#include <algorithm> +#include <string> +#include <vector> + +namespace hashlib { + +const int hashtable_size_trigger = 2; +const int hashtable_size_factor = 3; + +// The XOR version of DJB2 +inline unsigned int mkhash(unsigned int a, unsigned int b) { + return ((a << 5) + a) ^ b; +} + +// traditionally 5381 is used as starting value for the djb2 hash +const unsigned int mkhash_init = 5381; + +// The ADD version of DJB2 +// (usunsigned int mkhashe this version for cache locality in b) +inline unsigned int mkhash_add(unsigned int a, unsigned int b) { + return ((a << 5) + a) + b; +} + +inline unsigned int mkhash_xorshift(unsigned int a) { + if (sizeof(a) == 4) { + a ^= a << 13; + a ^= a >> 17; + a ^= a << 5; + } else if (sizeof(a) == 8) { + a ^= a << 13; + a ^= a >> 7; + a ^= a << 17; + } else + throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints"); + return a; +} + +template<typename T> struct hash_ops { + static inline bool cmp(const T &a, const T &b) { + return a == b; + } + static inline unsigned int hash(const T &a) { + return a.hash(); + } +}; + +template<> struct hash_ops<int> { + template<typename T> + static inline bool cmp(T a, T b) { + return a == b; + } + template<typename T> + static inline unsigned int hash(T a) { + return a; + } +}; + +template<> struct hash_ops<std::string> { + static inline bool cmp(const std::string &a, const std::string &b) { + return a == b; + } + static inline unsigned int hash(const std::string &a) { + unsigned int v = 0; + for (auto c : a) + v = mkhash(v, c); + return v; + } +}; + +template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> { + static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) { + return a == b; + } + static inline unsigned int hash(std::pair<P, Q> a) { + hash_ops<P> p_ops; + hash_ops<Q> q_ops; + return mkhash(p_ops.hash(a.first), q_ops.hash(a.second)); + } +}; + +template<typename T> struct hash_ops<std::vector<T>> { + static inline bool cmp(std::vector<T> a, std::vector<T> b) { + return a == b; + } + static inline unsigned int hash(std::vector<T> a) { + hash_ops<T> t_ops; + unsigned int h = mkhash_init; + for (auto k : a) + h = mkhash(h, t_ops.hash(k)); + return h; + } +}; + +struct hash_cstr_ops { + static inline bool cmp(const char *a, const char *b) { + for (int i = 0; a[i] || b[i]; i++) + if (a[i] != b[i]) + return false; + return true; + } + static inline unsigned int hash(const char *a) { + unsigned int hash = mkhash_init; + while (*a) + hash = mkhash(hash, *(a++)); + return hash; + } +}; + +struct hash_ptr_ops { + static inline bool cmp(const void *a, const void *b) { + return a == b; + } + static inline unsigned int hash(const void *a) { + return (unsigned long)a; + } +}; + +struct hash_obj_ops { + static inline bool cmp(const void *a, const void *b) { + return a == b; + } + template<typename T> + static inline unsigned int hash(const T *a) { + return a->hash(); + } +}; + +inline int hashtable_size(int min_size) +{ + static std::vector<int> zero_and_some_primes = { + 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677, + 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289, + 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281, + 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697, + 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037, + 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411, + 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239, + 121590311, 151987889, 189984863, 237481091, 296851369, 371064217 + }; + + for (auto p : zero_and_some_primes) + if (p >= min_size) return p; + + if (sizeof(int) == 4) + throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables."); + + for (auto p : zero_and_some_primes) + if (100129 * p > min_size) return 100129 * p; + + throw std::length_error("hash table exceeded maximum size."); +} + +template<typename K, typename T, typename OPS = hash_ops<K>> class dict; +template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict; +template<typename K, typename OPS = hash_ops<K>> class pool; + +template<typename K, typename T, typename OPS> +class dict +{ + struct entry_t + { + std::pair<K, T> udata; + int next; + + entry_t() { } + entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { } + entry_t(std::pair<K, T> &&udata, int next) : udata(std::move(udata)), next(next) { } + }; + + std::vector<int> hashtable; + std::vector<entry_t> entries; + OPS ops; + +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else + static inline void do_assert(bool cond) { + if (!cond) throw std::runtime_error("dict<> assert failed."); + } +#endif + + int do_hash(const K &key) const + { + unsigned int hash = 0; + if (!hashtable.empty()) + hash = ops.hash(key) % (unsigned int)(hashtable.size()); + return hash; + } + + void do_rehash() + { + hashtable.clear(); + hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + + for (int i = 0; i < int(entries.size()); i++) { + do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); + int hash = do_hash(entries[i].udata.first); + entries[i].next = hashtable[hash]; + hashtable[hash] = i; + } + } + + int do_erase(int index, int hash) + { + do_assert(index < int(entries.size())); + if (hashtable.empty() || index < 0) + return 0; + + int k = hashtable[hash]; + do_assert(0 <= k && k < int(entries.size())); + + if (k == index) { + hashtable[hash] = entries[index].next; + } else { + while (entries[k].next != index) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = entries[index].next; + } + + int back_idx = entries.size()-1; + + if (index != back_idx) + { + int back_hash = do_hash(entries[back_idx].udata.first); + + k = hashtable[back_hash]; + do_assert(0 <= k && k < int(entries.size())); + + if (k == back_idx) { + hashtable[back_hash] = index; + } else { + while (entries[k].next != back_idx) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = index; + } + + entries[index] = std::move(entries[back_idx]); + } + + entries.pop_back(); + + if (entries.empty()) + hashtable.clear(); + + return 1; + } + + int do_lookup(const K &key, int &hash) const + { + if (hashtable.empty()) + return -1; + + if (entries.size() * hashtable_size_trigger > hashtable.size()) { + ((dict*)this)->do_rehash(); + hash = do_hash(key); + } + + int index = hashtable[hash]; + + while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) { + index = entries[index].next; + do_assert(-1 <= index && index < int(entries.size())); + } + + return index; + } + + int do_insert(const K &key, int &hash) + { + if (hashtable.empty()) { + entries.push_back(entry_t(std::pair<K, T>(key, T()), -1)); + do_rehash(); + hash = do_hash(key); + } else { + entries.push_back(entry_t(std::pair<K, T>(key, T()), hashtable[hash])); + hashtable[hash] = entries.size() - 1; + } + return entries.size() - 1; + } + + int do_insert(const std::pair<K, T> &value, int &hash) + { + if (hashtable.empty()) { + entries.push_back(entry_t(value, -1)); + do_rehash(); + hash = do_hash(value.first); + } else { + entries.push_back(entry_t(value, hashtable[hash])); + hashtable[hash] = entries.size() - 1; + } + return entries.size() - 1; + } + +public: + class const_iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>> + { + friend class dict; + protected: + const dict *ptr; + int index; + const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { } + public: + const_iterator() { } + const_iterator operator++() { index--; return *this; } + bool operator<(const const_iterator &other) const { return index > other.index; } + bool operator==(const const_iterator &other) const { return index == other.index; } + bool operator!=(const const_iterator &other) const { return index != other.index; } + const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; } + const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; } + }; + + class iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>> + { + friend class dict; + protected: + dict *ptr; + int index; + iterator(dict *ptr, int index) : ptr(ptr), index(index) { } + public: + iterator() { } + iterator operator++() { index--; return *this; } + bool operator<(const iterator &other) const { return index > other.index; } + bool operator==(const iterator &other) const { return index == other.index; } + bool operator!=(const iterator &other) const { return index != other.index; } + std::pair<K, T> &operator*() { return ptr->entries[index].udata; } + std::pair<K, T> *operator->() { return &ptr->entries[index].udata; } + const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; } + const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; } + operator const_iterator() const { return const_iterator(ptr, index); } + }; + + dict() + { + } + + dict(const dict &other) + { + entries = other.entries; + do_rehash(); + } + + dict(dict &&other) + { + swap(other); + } + + dict &operator=(const dict &other) { + entries = other.entries; + do_rehash(); + return *this; + } + + dict &operator=(dict &&other) { + clear(); + swap(other); + return *this; + } + + dict(const std::initializer_list<std::pair<K, T>> &list) + { + for (auto &it : list) + insert(it); + } + + template<class InputIterator> + dict(InputIterator first, InputIterator last) + { + insert(first, last); + } + + template<class InputIterator> + void insert(InputIterator first, InputIterator last) + { + for (; first != last; ++first) + insert(*first); + } + + std::pair<iterator, bool> insert(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i >= 0) + return std::pair<iterator, bool>(iterator(this, i), false); + i = do_insert(key, hash); + return std::pair<iterator, bool>(iterator(this, i), true); + } + + std::pair<iterator, bool> insert(const std::pair<K, T> &value) + { + int hash = do_hash(value.first); + int i = do_lookup(value.first, hash); + if (i >= 0) + return std::pair<iterator, bool>(iterator(this, i), false); + i = do_insert(value, hash); + return std::pair<iterator, bool>(iterator(this, i), true); + } + + int erase(const K &key) + { + int hash = do_hash(key); + int index = do_lookup(key, hash); + return do_erase(index, hash); + } + + iterator erase(iterator it) + { + int hash = do_hash(it->first); + do_erase(it.index, hash); + return ++it; + } + + int count(const K &key) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 ? 0 : 1; + } + + int count(const K &key, const_iterator it) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 || i > it.index ? 0 : 1; + } + + iterator find(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return end(); + return iterator(this, i); + } + + const_iterator find(const K &key) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return end(); + return const_iterator(this, i); + } + + T& at(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + throw std::out_of_range("dict::at()"); + return entries[i].udata.second; + } + + const T& at(const K &key) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + throw std::out_of_range("dict::at()"); + return entries[i].udata.second; + } + + T& operator[](const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + i = do_insert(std::pair<K, T>(key, T()), hash); + return entries[i].udata.second; + } + + template<typename Compare = std::less<K>> + void sort(Compare comp = Compare()) + { + std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); }); + do_rehash(); + } + + void swap(dict &other) + { + hashtable.swap(other.hashtable); + entries.swap(other.entries); + } + + bool operator==(const dict &other) const { + if (size() != other.size()) + return false; + for (auto &it : entries) { + auto oit = other.find(it.udata.first); + if (oit == other.end() || !(oit->second == it.udata.second)) + return false; + } + return true; + } + + bool operator!=(const dict &other) const { + return !operator==(other); + } + + size_t size() const { return entries.size(); } + bool empty() const { return entries.empty(); } + void clear() { hashtable.clear(); entries.clear(); } + + iterator begin() { return iterator(this, int(entries.size())-1); } + iterator end() { return iterator(nullptr, -1); } + + const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } + const_iterator end() const { return const_iterator(nullptr, -1); } +}; + +template<typename K, typename OPS> +class pool +{ + template<typename, int, typename> friend class idict; + +protected: + struct entry_t + { + K udata; + int next; + + entry_t() { } + entry_t(const K &udata, int next) : udata(udata), next(next) { } + }; + + std::vector<int> hashtable; + std::vector<entry_t> entries; + OPS ops; + +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else + static inline void do_assert(bool cond) { + if (!cond) throw std::runtime_error("pool<> assert failed."); + } +#endif + + int do_hash(const K &key) const + { + unsigned int hash = 0; + if (!hashtable.empty()) + hash = ops.hash(key) % (unsigned int)(hashtable.size()); + return hash; + } + + void do_rehash() + { + hashtable.clear(); + hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + + for (int i = 0; i < int(entries.size()); i++) { + do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); + int hash = do_hash(entries[i].udata); + entries[i].next = hashtable[hash]; + hashtable[hash] = i; + } + } + + int do_erase(int index, int hash) + { + do_assert(index < int(entries.size())); + if (hashtable.empty() || index < 0) + return 0; + + int k = hashtable[hash]; + if (k == index) { + hashtable[hash] = entries[index].next; + } else { + while (entries[k].next != index) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = entries[index].next; + } + + int back_idx = entries.size()-1; + + if (index != back_idx) + { + int back_hash = do_hash(entries[back_idx].udata); + + k = hashtable[back_hash]; + if (k == back_idx) { + hashtable[back_hash] = index; + } else { + while (entries[k].next != back_idx) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = index; + } + + entries[index] = std::move(entries[back_idx]); + } + + entries.pop_back(); + + if (entries.empty()) + hashtable.clear(); + + return 1; + } + + int do_lookup(const K &key, int &hash) const + { + if (hashtable.empty()) + return -1; + + if (entries.size() * hashtable_size_trigger > hashtable.size()) { + ((pool*)this)->do_rehash(); + hash = do_hash(key); + } + + int index = hashtable[hash]; + + while (index >= 0 && !ops.cmp(entries[index].udata, key)) { + index = entries[index].next; + do_assert(-1 <= index && index < int(entries.size())); + } + + return index; + } + + int do_insert(const K &value, int &hash) + { + if (hashtable.empty()) { + entries.push_back(entry_t(value, -1)); + do_rehash(); + hash = do_hash(value); + } else { + entries.push_back(entry_t(value, hashtable[hash])); + hashtable[hash] = entries.size() - 1; + } + return entries.size() - 1; + } + +public: + class const_iterator : public std::iterator<std::forward_iterator_tag, K> + { + friend class pool; + protected: + const pool *ptr; + int index; + const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { } + public: + const_iterator() { } + const_iterator operator++() { index--; return *this; } + bool operator==(const const_iterator &other) const { return index == other.index; } + bool operator!=(const const_iterator &other) const { return index != other.index; } + const K &operator*() const { return ptr->entries[index].udata; } + const K *operator->() const { return &ptr->entries[index].udata; } + }; + + class iterator : public std::iterator<std::forward_iterator_tag, K> + { + friend class pool; + protected: + pool *ptr; + int index; + iterator(pool *ptr, int index) : ptr(ptr), index(index) { } + public: + iterator() { } + iterator operator++() { index--; return *this; } + bool operator==(const iterator &other) const { return index == other.index; } + bool operator!=(const iterator &other) const { return index != other.index; } + K &operator*() { return ptr->entries[index].udata; } + K *operator->() { return &ptr->entries[index].udata; } + const K &operator*() const { return ptr->entries[index].udata; } + const K *operator->() const { return &ptr->entries[index].udata; } + operator const_iterator() const { return const_iterator(ptr, index); } + }; + + pool() + { + } + + pool(const pool &other) + { + entries = other.entries; + do_rehash(); + } + + pool(pool &&other) + { + swap(other); + } + + pool &operator=(const pool &other) { + entries = other.entries; + do_rehash(); + return *this; + } + + pool &operator=(pool &&other) { + clear(); + swap(other); + return *this; + } + + pool(const std::initializer_list<K> &list) + { + for (auto &it : list) + insert(it); + } + + template<class InputIterator> + pool(InputIterator first, InputIterator last) + { + insert(first, last); + } + + template<class InputIterator> + void insert(InputIterator first, InputIterator last) + { + for (; first != last; ++first) + insert(*first); + } + + std::pair<iterator, bool> insert(const K &value) + { + int hash = do_hash(value); + int i = do_lookup(value, hash); + if (i >= 0) + return std::pair<iterator, bool>(iterator(this, i), false); + i = do_insert(value, hash); + return std::pair<iterator, bool>(iterator(this, i), true); + } + + int erase(const K &key) + { + int hash = do_hash(key); + int index = do_lookup(key, hash); + return do_erase(index, hash); + } + + iterator erase(iterator it) + { + int hash = do_hash(*it); + do_erase(it.index, hash); + return ++it; + } + + int count(const K &key) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 ? 0 : 1; + } + + int count(const K &key, const_iterator it) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 || i > it.index ? 0 : 1; + } + + iterator find(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return end(); + return iterator(this, i); + } + + const_iterator find(const K &key) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return end(); + return const_iterator(this, i); + } + + bool operator[](const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i >= 0; + } + + template<typename Compare = std::less<K>> + void sort(Compare comp = Compare()) + { + std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); }); + do_rehash(); + } + + void swap(pool &other) + { + hashtable.swap(other.hashtable); + entries.swap(other.entries); + } + + bool operator==(const pool &other) const { + if (size() != other.size()) + return false; + for (auto &it : entries) + if (!other.count(it.udata)) + return false; + return true; + } + + bool operator!=(const pool &other) const { + return !operator==(other); + } + + size_t size() const { return entries.size(); } + bool empty() const { return entries.empty(); } + void clear() { hashtable.clear(); entries.clear(); } + + iterator begin() { return iterator(this, int(entries.size())-1); } + iterator end() { return iterator(nullptr, -1); } + + const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } + const_iterator end() const { return const_iterator(nullptr, -1); } +}; + +template<typename K, int offset, typename OPS> +class idict +{ + pool<K, OPS> database; + +public: + typedef typename pool<K, OPS>::const_iterator const_iterator; + + int operator()(const K &key) + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + i = database.do_insert(key, hash); + return i + offset; + } + + int at(const K &key) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + throw std::out_of_range("idict::at()"); + return i + offset; + } + + int count(const K &key) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + return i < 0 ? 0 : 1; + } + + void expect(const K &key, int i) + { + int j = (*this)(key); + if (i != j) + throw std::out_of_range("idict::expect()"); + } + + const K &operator[](int index) const + { + return database.entries.at(index - offset).udata; + } + + const_iterator begin() const { return database.begin(); } + const_iterator end() const { return database.end(); } +}; + +} /* namespace hashlib */ + +#endif |