diff options
Diffstat (limited to 'kernel/hashlib.h')
-rw-r--r-- | kernel/hashlib.h | 241 |
1 files changed, 217 insertions, 24 deletions
diff --git a/kernel/hashlib.h b/kernel/hashlib.h index c96023920..3c824b8c3 100644 --- a/kernel/hashlib.h +++ b/kernel/hashlib.h @@ -1,5 +1,5 @@ // This is free and unencumbered software released into the public domain. -// +// // Anyone is free to copy, modify, publish, use, compile, sell, or // distribute this software, either in source code form or as a compiled // binary, for any purpose, commercial or non-commercial, and by any @@ -10,6 +10,7 @@ // ------------------------------------------------------- #ifndef HASHLIB_H +#define HASHLIB_H #include <stdexcept> #include <algorithm> @@ -30,7 +31,7 @@ inline unsigned int mkhash(unsigned int a, unsigned int b) { const unsigned int mkhash_init = 5381; // The ADD version of DJB2 -// (usunsigned int mkhashe this version for cache locality in b) +// (use this version for cache locality in b) inline unsigned int mkhash_add(unsigned int a, unsigned int b) { return ((a << 5) + a) + b; } @@ -58,16 +59,25 @@ template<typename T> struct hash_ops { } }; -template<> struct hash_ops<int> { +struct hash_int_ops { template<typename T> static inline bool cmp(T a, T b) { return a == b; } - template<typename T> - static inline unsigned int hash(T a) { +}; + +template<> struct hash_ops<int32_t> : hash_int_ops +{ + static inline unsigned int hash(int32_t a) { return a; } }; +template<> struct hash_ops<int64_t> : hash_int_ops +{ + static inline unsigned int hash(int64_t a) { + return mkhash((unsigned int)(a), (unsigned int)(a >> 32)); + } +}; template<> struct hash_ops<std::string> { static inline bool cmp(const std::string &a, const std::string &b) { @@ -86,9 +96,22 @@ template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> { return a == b; } static inline unsigned int hash(std::pair<P, Q> a) { - hash_ops<P> p_ops; - hash_ops<Q> q_ops; - return mkhash(p_ops.hash(a.first), q_ops.hash(a.second)); + return mkhash(hash_ops<P>::hash(a.first), hash_ops<Q>::hash(a.second)); + } +}; + +template<typename... T> struct hash_ops<std::tuple<T...>> { + static inline bool cmp(std::tuple<T...> a, std::tuple<T...> b) { + return a == b; + } + template<size_t I = 0> + static inline typename std::enable_if<I == sizeof...(T), unsigned int>::type hash(std::tuple<T...>) { + return mkhash_init; + } + template<size_t I = 0> + static inline typename std::enable_if<I != sizeof...(T), unsigned int>::type hash(std::tuple<T...> a) { + typedef hash_ops<typename std::tuple_element<I, std::tuple<T...>>::type> element_ops_t; + return mkhash(hash<I+1>(a), element_ops_t::hash(std::get<I>(a))); } }; @@ -97,10 +120,9 @@ template<typename T> struct hash_ops<std::vector<T>> { return a == b; } static inline unsigned int hash(std::vector<T> a) { - hash_ops<T> t_ops; unsigned int h = mkhash_init; for (auto k : a) - h = mkhash(h, t_ops.hash(k)); + h = mkhash(h, hash_ops<T>::hash(k)); return h; } }; @@ -115,8 +137,8 @@ struct hash_cstr_ops { static inline unsigned int hash(const char *a) { unsigned int hash = mkhash_init; while (*a) - hash = mkhash(hash, *(a++)); - return hash; + hash = mkhash(hash, *(a++)); + return hash; } }; @@ -135,10 +157,15 @@ struct hash_obj_ops { } template<typename T> static inline unsigned int hash(const T *a) { - return a->hash(); + return a ? a->hash() : 0; } }; +template<typename T> +inline unsigned int mkhash(const T &v) { + return hash_ops<T>().hash(v); +} + inline int hashtable_size(int min_size) { static std::vector<int> zero_and_some_primes = { @@ -167,6 +194,7 @@ inline int hashtable_size(int min_size) template<typename K, typename T, typename OPS = hash_ops<K>> class dict; template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict; template<typename K, typename OPS = hash_ops<K>> class pool; +template<typename K, typename OPS = hash_ops<K>> class mfp; template<typename K, typename T, typename OPS> class dict @@ -178,18 +206,19 @@ class dict entry_t() { } entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { } + entry_t(std::pair<K, T> &&udata, int next) : udata(std::move(udata)), next(next) { } }; std::vector<int> hashtable; std::vector<entry_t> entries; OPS ops; -#if 0 +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else static inline void do_assert(bool cond) { if (!cond) throw std::runtime_error("dict<> assert failed."); } -#else - static inline void do_assert(bool) { } #endif int do_hash(const K &key) const @@ -203,7 +232,7 @@ class dict void do_rehash() { hashtable.clear(); - hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1); for (int i = 0; i < int(entries.size()); i++) { do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); @@ -220,6 +249,8 @@ class dict return 0; int k = hashtable[hash]; + do_assert(0 <= k && k < int(entries.size())); + if (k == index) { hashtable[hash] = entries[index].next; } else { @@ -237,6 +268,8 @@ class dict int back_hash = do_hash(entries[back_idx].udata.first); k = hashtable[back_hash]; + do_assert(0 <= k && k < int(entries.size())); + if (k == back_idx) { hashtable[back_hash] = index; } else { @@ -278,6 +311,19 @@ class dict return index; } + int do_insert(const K &key, int &hash) + { + if (hashtable.empty()) { + entries.push_back(entry_t(std::pair<K, T>(key, T()), -1)); + do_rehash(); + hash = do_hash(key); + } else { + entries.push_back(entry_t(std::pair<K, T>(key, T()), hashtable[hash])); + hashtable[hash] = entries.size() - 1; + } + return entries.size() - 1; + } + int do_insert(const std::pair<K, T> &value, int &hash) { if (hashtable.empty()) { @@ -375,6 +421,16 @@ public: insert(*first); } + std::pair<iterator, bool> insert(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i >= 0) + return std::pair<iterator, bool>(iterator(this, i), false); + i = do_insert(key, hash); + return std::pair<iterator, bool>(iterator(this, i), true); + } + std::pair<iterator, bool> insert(const std::pair<K, T> &value) { int hash = do_hash(value.first); @@ -449,6 +505,15 @@ public: return entries[i].udata.second; } + T at(const K &key, const T &defval) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return defval; + return entries[i].udata.second; + } + T& operator[](const K &key) { int hash = do_hash(key); @@ -476,16 +541,17 @@ public: return false; for (auto &it : entries) { auto oit = other.find(it.udata.first); - if (oit == other.end() || oit->second != it.udata.second) + if (oit == other.end() || !(oit->second == it.udata.second)) return false; } return true; } bool operator!=(const dict &other) const { - return !(*this == other); + return !operator==(other); } + void reserve(size_t n) { entries.reserve(n); } size_t size() const { return entries.size(); } bool empty() const { return entries.empty(); } void clear() { hashtable.clear(); entries.clear(); } @@ -516,12 +582,12 @@ protected: std::vector<entry_t> entries; OPS ops; -#if 0 +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else static inline void do_assert(bool cond) { if (!cond) throw std::runtime_error("pool<> assert failed."); } -#else - static inline void do_assert(bool) { } #endif int do_hash(const K &key) const @@ -535,7 +601,7 @@ protected: void do_rehash() { hashtable.clear(); - hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1); for (int i = 0; i < int(entries.size()); i++) { do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); @@ -775,6 +841,14 @@ public: do_rehash(); } + K pop() + { + iterator it = begin(); + K ret = *it; + erase(it); + return ret; + } + void swap(pool &other) { hashtable.swap(other.hashtable); @@ -791,9 +865,10 @@ public: } bool operator!=(const pool &other) const { - return !(*this == other); + return !operator==(other); } + void reserve(size_t n) { entries.reserve(n); } size_t size() const { return entries.size(); } bool empty() const { return entries.empty(); } void clear() { hashtable.clear(); entries.clear(); } @@ -831,6 +906,15 @@ public: return i + offset; } + int at(const K &key, int defval) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + return defval; + return i + offset; + } + int count(const K &key) const { int hash = database.do_hash(key); @@ -850,6 +934,115 @@ public: return database.entries.at(index - offset).udata; } + void swap(idict &other) + { + database.swap(other.database); + } + + void reserve(size_t n) { database.reserve(n); } + size_t size() const { return database.size(); } + bool empty() const { return database.empty(); } + void clear() { database.clear(); } + + const_iterator begin() const { return database.begin(); } + const_iterator end() const { return database.end(); } +}; + +template<typename K, typename OPS> +class mfp +{ + mutable idict<K, 0, OPS> database; + mutable std::vector<int> parents; + +public: + typedef typename idict<K, 0, OPS>::const_iterator const_iterator; + + int operator()(const K &key) const + { + int i = database(key); + parents.resize(database.size(), -1); + return i; + } + + const K &operator[](int index) const + { + return database[index]; + } + + int ifind(int i) const + { + int p = i, k = i; + + while (parents[p] != -1) + p = parents[p]; + + while (k != p) { + int next_k = parents[k]; + parents[k] = p; + k = next_k; + } + + return p; + } + + void imerge(int i, int j) + { + i = ifind(i); + j = ifind(j); + + if (i != j) + parents[i] = j; + } + + void ipromote(int i) + { + int k = i; + + while (k != -1) { + int next_k = parents[k]; + parents[k] = i; + k = next_k; + } + + parents[i] = -1; + } + + int lookup(const K &a) const + { + return ifind((*this)(a)); + } + + const K &find(const K &a) const + { + int i = database.at(a, -1); + if (i < 0) + return a; + return (*this)[ifind(i)]; + } + + void merge(const K &a, const K &b) + { + imerge((*this)(a), (*this)(b)); + } + + void promote(const K &a) + { + int i = database.at(a, -1); + if (i >= 0) + ipromote(i); + } + + void swap(mfp &other) + { + database.swap(other.database); + parents.swap(other.parents); + } + + void reserve(size_t n) { database.reserve(n); } + size_t size() const { return database.size(); } + bool empty() const { return database.empty(); } + void clear() { database.clear(); parents.clear(); } + const_iterator begin() const { return database.begin(); } const_iterator end() const { return database.end(); } }; |