diff options
Diffstat (limited to 'kernel/hashlib.h')
| -rw-r--r-- | kernel/hashlib.h | 887 | 
1 files changed, 887 insertions, 0 deletions
diff --git a/kernel/hashlib.h b/kernel/hashlib.h new file mode 100644 index 000000000..94b573e47 --- /dev/null +++ b/kernel/hashlib.h @@ -0,0 +1,887 @@ +// This is free and unencumbered software released into the public domain. +//  +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. + +// ------------------------------------------------------- +// Written by Clifford Wolf <clifford@clifford.at> in 2014 +// ------------------------------------------------------- + +#ifndef HASHLIB_H + +#include <stdexcept> +#include <algorithm> +#include <string> +#include <vector> + +namespace hashlib { + +const int hashtable_size_trigger = 2; +const int hashtable_size_factor = 3; + +// The XOR version of DJB2 +inline unsigned int mkhash(unsigned int a, unsigned int b) { +	return ((a << 5) + a) ^ b; +} + +// traditionally 5381 is used as starting value for the djb2 hash +const unsigned int mkhash_init = 5381; + +// The ADD version of DJB2 +// (usunsigned int mkhashe this version for cache locality in b) +inline unsigned int mkhash_add(unsigned int a, unsigned int b) { +	return ((a << 5) + a) + b; +} + +inline unsigned int mkhash_xorshift(unsigned int a) { +	if (sizeof(a) == 4) { +		a ^= a << 13; +		a ^= a >> 17; +		a ^= a << 5; +	} else if (sizeof(a) == 8) { +		a ^= a << 13; +		a ^= a >> 7; +		a ^= a << 17; +	} else +		throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints"); +	return a; +} + +template<typename T> struct hash_ops { +	static inline bool cmp(const T &a, const T &b) { +		return a == b; +	} +	static inline unsigned int hash(const T &a) { +		return a.hash(); +	} +}; + +template<> struct hash_ops<int> { +	template<typename T> +	static inline bool cmp(T a, T b) { +		return a == b; +	} +	template<typename T> +	static inline unsigned int hash(T a) { +		return a; +	} +}; + +template<> struct hash_ops<std::string> { +	static inline bool cmp(const std::string &a, const std::string &b) { +		return a == b; +	} +	static inline unsigned int hash(const std::string &a) { +		unsigned int v = 0; +		for (auto c : a) +			v = mkhash(v, c); +		return v; +	} +}; + +template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> { +	static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) { +		return a == b; +	} +	static inline unsigned int hash(std::pair<P, Q> a) { +		hash_ops<P> p_ops; +		hash_ops<Q> q_ops; +		return mkhash(p_ops.hash(a.first), q_ops.hash(a.second)); +	} +}; + +template<typename T> struct hash_ops<std::vector<T>> { +	static inline bool cmp(std::vector<T> a, std::vector<T> b) { +		return a == b; +	} +	static inline unsigned int hash(std::vector<T> a) { +		hash_ops<T> t_ops; +		unsigned int h = mkhash_init; +		for (auto k : a) +			h = mkhash(h, t_ops.hash(k)); +		return h; +	} +}; + +struct hash_cstr_ops { +	static inline bool cmp(const char *a, const char *b) { +		for (int i = 0; a[i] || b[i]; i++) +			if (a[i] != b[i]) +				return false; +		return true; +	} +	static inline unsigned int hash(const char *a) { +		unsigned int hash = mkhash_init; +		while (*a) +			 hash = mkhash(hash, *(a++)); +		 return hash; +	} +}; + +struct hash_ptr_ops { +	static inline bool cmp(const void *a, const void *b) { +		return a == b; +	} +	static inline unsigned int hash(const void *a) { +		return (unsigned long)a; +	} +}; + +struct hash_obj_ops { +	static inline bool cmp(const void *a, const void *b) { +		return a == b; +	} +	template<typename T> +	static inline unsigned int hash(const T *a) { +		return a->hash(); +	} +}; + +inline int hashtable_size(int min_size) +{ +	static std::vector<int> zero_and_some_primes = { +		0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677, +		853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289, +		12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281, +		120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697, +		897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037, +		5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411, +		25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239, +		121590311, 151987889, 189984863, 237481091, 296851369, 371064217 +	}; + +	for (auto p : zero_and_some_primes) +		if (p >= min_size) return p; + +	if (sizeof(int) == 4) +		throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables."); + +	for (auto p : zero_and_some_primes) +		if (100129 * p > min_size) return 100129 * p; + +	throw std::length_error("hash table exceeded maximum size."); +} + +template<typename K, typename T, typename OPS = hash_ops<K>> class dict; +template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict; +template<typename K, typename OPS = hash_ops<K>> class pool; + +template<typename K, typename T, typename OPS> +class dict +{ +	struct entry_t +	{ +		std::pair<K, T> udata; +		int next; + +		entry_t() { } +		entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { } +		entry_t(std::pair<K, T> &&udata, int next) : udata(std::move(udata)), next(next) { } +	}; + +	std::vector<int> hashtable; +	std::vector<entry_t> entries; +	OPS ops; + +#ifdef NDEBUG +	static inline void do_assert(bool) { } +#else +	static inline void do_assert(bool cond) { +		if (!cond) throw std::runtime_error("dict<> assert failed."); +	} +#endif + +	int do_hash(const K &key) const +	{ +		unsigned int hash = 0; +		if (!hashtable.empty()) +			hash = ops.hash(key) % (unsigned int)(hashtable.size()); +		return hash; +	} + +	void do_rehash() +	{ +		hashtable.clear(); +		hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + +		for (int i = 0; i < int(entries.size()); i++) { +			do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); +			int hash = do_hash(entries[i].udata.first); +			entries[i].next = hashtable[hash]; +			hashtable[hash] = i; +		} +	} + +	int do_erase(int index, int hash) +	{ +		do_assert(index < int(entries.size())); +		if (hashtable.empty() || index < 0) +			return 0; + +		int k = hashtable[hash]; +		do_assert(0 <= k && k < int(entries.size())); + +		if (k == index) { +			hashtable[hash] = entries[index].next; +		} else { +			while (entries[k].next != index) { +				k = entries[k].next; +				do_assert(0 <= k && k < int(entries.size())); +			} +			entries[k].next = entries[index].next; +		} + +		int back_idx = entries.size()-1; + +		if (index != back_idx) +		{ +			int back_hash = do_hash(entries[back_idx].udata.first); + +			k = hashtable[back_hash]; +			do_assert(0 <= k && k < int(entries.size())); + +			if (k == back_idx) { +				hashtable[back_hash] = index; +			} else { +				while (entries[k].next != back_idx) { +					k = entries[k].next; +					do_assert(0 <= k && k < int(entries.size())); +				} +				entries[k].next = index; +			} + +			entries[index] = std::move(entries[back_idx]); +		} + +		entries.pop_back(); + +		if (entries.empty()) +			hashtable.clear(); + +		return 1; +	} + +	int do_lookup(const K &key, int &hash) const +	{ +		if (hashtable.empty()) +			return -1; + +		if (entries.size() * hashtable_size_trigger > hashtable.size()) { +			((dict*)this)->do_rehash(); +			hash = do_hash(key); +		} + +		int index = hashtable[hash]; + +		while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) { +			index = entries[index].next; +			do_assert(-1 <= index && index < int(entries.size())); +		} + +		return index; +	} + +	int do_insert(const K &key, int &hash) +	{ +		if (hashtable.empty()) { +			entries.push_back(entry_t(std::pair<K, T>(key, T()), -1)); +			do_rehash(); +			hash = do_hash(key); +		} else { +			entries.push_back(entry_t(std::pair<K, T>(key, T()), hashtable[hash])); +			hashtable[hash] = entries.size() - 1; +		} +		return entries.size() - 1; +	} + +	int do_insert(const std::pair<K, T> &value, int &hash) +	{ +		if (hashtable.empty()) { +			entries.push_back(entry_t(value, -1)); +			do_rehash(); +			hash = do_hash(value.first); +		} else { +			entries.push_back(entry_t(value, hashtable[hash])); +			hashtable[hash] = entries.size() - 1; +		} +		return entries.size() - 1; +	} + +public: +	class const_iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>> +	{ +		friend class dict; +	protected: +		const dict *ptr; +		int index; +		const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { } +	public: +		const_iterator() { } +		const_iterator operator++() { index--; return *this; } +		bool operator<(const const_iterator &other) const { return index > other.index; } +		bool operator==(const const_iterator &other) const { return index == other.index; } +		bool operator!=(const const_iterator &other) const { return index != other.index; } +		const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; } +		const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; } +	}; + +	class iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>> +	{ +		friend class dict; +	protected: +		dict *ptr; +		int index; +		iterator(dict *ptr, int index) : ptr(ptr), index(index) { } +	public: +		iterator() { } +		iterator operator++() { index--; return *this; } +		bool operator<(const iterator &other) const { return index > other.index; } +		bool operator==(const iterator &other) const { return index == other.index; } +		bool operator!=(const iterator &other) const { return index != other.index; } +		std::pair<K, T> &operator*() { return ptr->entries[index].udata; } +		std::pair<K, T> *operator->() { return &ptr->entries[index].udata; } +		const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; } +		const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; } +		operator const_iterator() const { return const_iterator(ptr, index); } +	}; + +	dict() +	{ +	} + +	dict(const dict &other) +	{ +		entries = other.entries; +		do_rehash(); +	} + +	dict(dict &&other) +	{ +		swap(other); +	} + +	dict &operator=(const dict &other) { +		entries = other.entries; +		do_rehash(); +		return *this; +	} + +	dict &operator=(dict &&other) { +		clear(); +		swap(other); +		return *this; +	} + +	dict(const std::initializer_list<std::pair<K, T>> &list) +	{ +		for (auto &it : list) +			insert(it); +	} + +	template<class InputIterator> +	dict(InputIterator first, InputIterator last) +	{ +		insert(first, last); +	} + +	template<class InputIterator> +	void insert(InputIterator first, InputIterator last) +	{ +		for (; first != last; ++first) +			insert(*first); +	} + +	std::pair<iterator, bool> insert(const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i >= 0) +			return std::pair<iterator, bool>(iterator(this, i), false); +		i = do_insert(key, hash); +		return std::pair<iterator, bool>(iterator(this, i), true); +	} + +	std::pair<iterator, bool> insert(const std::pair<K, T> &value) +	{ +		int hash = do_hash(value.first); +		int i = do_lookup(value.first, hash); +		if (i >= 0) +			return std::pair<iterator, bool>(iterator(this, i), false); +		i = do_insert(value, hash); +		return std::pair<iterator, bool>(iterator(this, i), true); +	} + +	int erase(const K &key) +	{ +		int hash = do_hash(key); +		int index = do_lookup(key, hash); +		return do_erase(index, hash); +	} + +	iterator erase(iterator it) +	{ +		int hash = do_hash(it->first); +		do_erase(it.index, hash); +		return ++it; +	} + +	int count(const K &key) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		return i < 0 ? 0 : 1; +	} + +	int count(const K &key, const_iterator it) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		return i < 0 || i > it.index ? 0 : 1; +	} + +	iterator find(const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			return end(); +		return iterator(this, i); +	} + +	const_iterator find(const K &key) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			return end(); +		return const_iterator(this, i); +	} + +	T& at(const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			throw std::out_of_range("dict::at()"); +		return entries[i].udata.second; +	} + +	const T& at(const K &key) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			throw std::out_of_range("dict::at()"); +		return entries[i].udata.second; +	} + +	T& operator[](const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			i = do_insert(std::pair<K, T>(key, T()), hash); +		return entries[i].udata.second; +	} + +	template<typename Compare = std::less<K>> +	void sort(Compare comp = Compare()) +	{ +		std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); }); +		do_rehash(); +	} + +	void swap(dict &other) +	{ +		hashtable.swap(other.hashtable); +		entries.swap(other.entries); +	} + +	bool operator==(const dict &other) const { +		if (size() != other.size()) +			return false; +		for (auto &it : entries) { +			auto oit = other.find(it.udata.first); +			if (oit == other.end() || !(oit->second == it.udata.second)) +				return false; +		} +		return true; +	} + +	bool operator!=(const dict &other) const { +		return !operator==(other); +	} + +	size_t size() const { return entries.size(); } +	bool empty() const { return entries.empty(); } +	void clear() { hashtable.clear(); entries.clear(); } + +	iterator begin() { return iterator(this, int(entries.size())-1); } +	iterator end() { return iterator(nullptr, -1); } + +	const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } +	const_iterator end() const { return const_iterator(nullptr, -1); } +}; + +template<typename K, typename OPS> +class pool +{ +	template<typename, int, typename> friend class idict; + +protected: +	struct entry_t +	{ +		K udata; +		int next; + +		entry_t() { } +		entry_t(const K &udata, int next) : udata(udata), next(next) { } +	}; + +	std::vector<int> hashtable; +	std::vector<entry_t> entries; +	OPS ops; + +#ifdef NDEBUG +	static inline void do_assert(bool) { } +#else +	static inline void do_assert(bool cond) { +		if (!cond) throw std::runtime_error("pool<> assert failed."); +	} +#endif + +	int do_hash(const K &key) const +	{ +		unsigned int hash = 0; +		if (!hashtable.empty()) +			hash = ops.hash(key) % (unsigned int)(hashtable.size()); +		return hash; +	} + +	void do_rehash() +	{ +		hashtable.clear(); +		hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1); + +		for (int i = 0; i < int(entries.size()); i++) { +			do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); +			int hash = do_hash(entries[i].udata); +			entries[i].next = hashtable[hash]; +			hashtable[hash] = i; +		} +	} + +	int do_erase(int index, int hash) +	{ +		do_assert(index < int(entries.size())); +		if (hashtable.empty() || index < 0) +			return 0; + +		int k = hashtable[hash]; +		if (k == index) { +			hashtable[hash] = entries[index].next; +		} else { +			while (entries[k].next != index) { +				k = entries[k].next; +				do_assert(0 <= k && k < int(entries.size())); +			} +			entries[k].next = entries[index].next; +		} + +		int back_idx = entries.size()-1; + +		if (index != back_idx) +		{ +			int back_hash = do_hash(entries[back_idx].udata); + +			k = hashtable[back_hash]; +			if (k == back_idx) { +				hashtable[back_hash] = index; +			} else { +				while (entries[k].next != back_idx) { +					k = entries[k].next; +					do_assert(0 <= k && k < int(entries.size())); +				} +				entries[k].next = index; +			} + +			entries[index] = std::move(entries[back_idx]); +		} + +		entries.pop_back(); + +		if (entries.empty()) +			hashtable.clear(); + +		return 1; +	} + +	int do_lookup(const K &key, int &hash) const +	{ +		if (hashtable.empty()) +			return -1; + +		if (entries.size() * hashtable_size_trigger > hashtable.size()) { +			((pool*)this)->do_rehash(); +			hash = do_hash(key); +		} + +		int index = hashtable[hash]; + +		while (index >= 0 && !ops.cmp(entries[index].udata, key)) { +			index = entries[index].next; +			do_assert(-1 <= index && index < int(entries.size())); +		} + +		return index; +	} + +	int do_insert(const K &value, int &hash) +	{ +		if (hashtable.empty()) { +			entries.push_back(entry_t(value, -1)); +			do_rehash(); +			hash = do_hash(value); +		} else { +			entries.push_back(entry_t(value, hashtable[hash])); +			hashtable[hash] = entries.size() - 1; +		} +		return entries.size() - 1; +	} + +public: +	class const_iterator : public std::iterator<std::forward_iterator_tag, K> +	{ +		friend class pool; +	protected: +		const pool *ptr; +		int index; +		const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { } +	public: +		const_iterator() { } +		const_iterator operator++() { index--; return *this; } +		bool operator==(const const_iterator &other) const { return index == other.index; } +		bool operator!=(const const_iterator &other) const { return index != other.index; } +		const K &operator*() const { return ptr->entries[index].udata; } +		const K *operator->() const { return &ptr->entries[index].udata; } +	}; + +	class iterator : public std::iterator<std::forward_iterator_tag, K> +	{ +		friend class pool; +	protected: +		pool *ptr; +		int index; +		iterator(pool *ptr, int index) : ptr(ptr), index(index) { } +	public: +		iterator() { } +		iterator operator++() { index--; return *this; } +		bool operator==(const iterator &other) const { return index == other.index; } +		bool operator!=(const iterator &other) const { return index != other.index; } +		K &operator*() { return ptr->entries[index].udata; } +		K *operator->() { return &ptr->entries[index].udata; } +		const K &operator*() const { return ptr->entries[index].udata; } +		const K *operator->() const { return &ptr->entries[index].udata; } +		operator const_iterator() const { return const_iterator(ptr, index); } +	}; + +	pool() +	{ +	} + +	pool(const pool &other) +	{ +		entries = other.entries; +		do_rehash(); +	} + +	pool(pool &&other) +	{ +		swap(other); +	} + +	pool &operator=(const pool &other) { +		entries = other.entries; +		do_rehash(); +		return *this; +	} + +	pool &operator=(pool &&other) { +		clear(); +		swap(other); +		return *this; +	} + +	pool(const std::initializer_list<K> &list) +	{ +		for (auto &it : list) +			insert(it); +	} + +	template<class InputIterator> +	pool(InputIterator first, InputIterator last) +	{ +		insert(first, last); +	} + +	template<class InputIterator> +	void insert(InputIterator first, InputIterator last) +	{ +		for (; first != last; ++first) +			insert(*first); +	} + +	std::pair<iterator, bool> insert(const K &value) +	{ +		int hash = do_hash(value); +		int i = do_lookup(value, hash); +		if (i >= 0) +			return std::pair<iterator, bool>(iterator(this, i), false); +		i = do_insert(value, hash); +		return std::pair<iterator, bool>(iterator(this, i), true); +	} + +	int erase(const K &key) +	{ +		int hash = do_hash(key); +		int index = do_lookup(key, hash); +		return do_erase(index, hash); +	} + +	iterator erase(iterator it) +	{ +		int hash = do_hash(*it); +		do_erase(it.index, hash); +		return ++it; +	} + +	int count(const K &key) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		return i < 0 ? 0 : 1; +	} + +	int count(const K &key, const_iterator it) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		return i < 0 || i > it.index ? 0 : 1; +	} + +	iterator find(const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			return end(); +		return iterator(this, i); +	} + +	const_iterator find(const K &key) const +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		if (i < 0) +			return end(); +		return const_iterator(this, i); +	} + +	bool operator[](const K &key) +	{ +		int hash = do_hash(key); +		int i = do_lookup(key, hash); +		return i >= 0; +	} + +	template<typename Compare = std::less<K>> +	void sort(Compare comp = Compare()) +	{ +		std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); }); +		do_rehash(); +	} + +	void swap(pool &other) +	{ +		hashtable.swap(other.hashtable); +		entries.swap(other.entries); +	} + +	bool operator==(const pool &other) const { +		if (size() != other.size()) +			return false; +		for (auto &it : entries) +			if (!other.count(it.udata)) +				return false; +		return true; +	} + +	bool operator!=(const pool &other) const { +		return !operator==(other); +	} + +	size_t size() const { return entries.size(); } +	bool empty() const { return entries.empty(); } +	void clear() { hashtable.clear(); entries.clear(); } + +	iterator begin() { return iterator(this, int(entries.size())-1); } +	iterator end() { return iterator(nullptr, -1); } + +	const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } +	const_iterator end() const { return const_iterator(nullptr, -1); } +}; + +template<typename K, int offset, typename OPS> +class idict +{ +	pool<K, OPS> database; + +public: +	typedef typename pool<K, OPS>::const_iterator const_iterator; + +	int operator()(const K &key) +	{ +		int hash = database.do_hash(key); +		int i = database.do_lookup(key, hash); +		if (i < 0) +			i = database.do_insert(key, hash); +		return i + offset; +	} + +	int at(const K &key) const +	{ +		int hash = database.do_hash(key); +		int i = database.do_lookup(key, hash); +		if (i < 0) +			throw std::out_of_range("idict::at()"); +		return i + offset; +	} + +	int count(const K &key) const +	{ +		int hash = database.do_hash(key); +		int i = database.do_lookup(key, hash); +		return i < 0 ? 0 : 1; +	} + +	void expect(const K &key, int i) +	{ +		int j = (*this)(key); +		if (i != j) +			throw std::out_of_range("idict::expect()"); +	} + +	const K &operator[](int index) const +	{ +		return database.entries.at(index - offset).udata; +	} + +	const_iterator begin() const { return database.begin(); } +	const_iterator end() const { return database.end(); } +}; + +} /* namespace hashlib */ + +#endif  | 
