aboutsummaryrefslogtreecommitdiffstats
path: root/tools/memshr
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-12-17 06:27:56 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-12-17 06:27:56 +0000
commitab562bd46c7041d82523322dde38d42494fb37ca (patch)
treed249f5ba799f603f43370f9eccac5b98821762b3 /tools/memshr
parent7e31226c7a62a1b88727b9e718eb11b745de16ab (diff)
downloadxen-ab562bd46c7041d82523322dde38d42494fb37ca.tar.gz
xen-ab562bd46c7041d82523322dde38d42494fb37ca.tar.bz2
xen-ab562bd46c7041d82523322dde38d42494fb37ca.zip
Generic bi-directional map, and related initialisation functions. At the moment
a single map is used to store mappings between sharing handles and disk blocks. This is used to share pages which store data read of the same blocks on (virtual) disk. Note that the map is stored in a shared memory region, as it needs to be accessed by multiple tapdisk processes. This complicates memory allocation (malloc cannot be used), prevents poniters to be stored directly (as the shared memory region might and is mapped at different base address) and finally pthread locks need to be multi-process aware. Signed-off-by: Grzegorz Milos <Grzegorz.Milos@citrix.com>
Diffstat (limited to 'tools/memshr')
-rw-r--r--tools/memshr/Makefile47
-rw-r--r--tools/memshr/bidir-hash.c1275
-rw-r--r--tools/memshr/bidir-hash.h113
-rw-r--r--tools/memshr/bidir-namedefs.h80
-rw-r--r--tools/memshr/interface.c92
-rw-r--r--tools/memshr/memshr-priv.h34
-rw-r--r--tools/memshr/memshr.h29
-rw-r--r--tools/memshr/shm.c182
-rw-r--r--tools/memshr/shm.h35
9 files changed, 1887 insertions, 0 deletions
diff --git a/tools/memshr/Makefile b/tools/memshr/Makefile
new file mode 100644
index 0000000000..e7cf33b360
--- /dev/null
+++ b/tools/memshr/Makefile
@@ -0,0 +1,47 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBMEMSHR-BUILD := libmemshr.a
+
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+CFLAGS += -I../include
+CFLAGS += $(CFLAGS_libxenctrl)
+CFLAGS += -D_GNU_SOURCE
+CFLAGS += -fPIC
+CFLAGS += -g
+
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+LIB-SRCS := interface.c
+LIB-SRCS += shm.c
+LIB-SRCS += bidir-hash.c
+
+LIB-OBJS := interface.o
+LIB-OBJS += shm.o
+LIB-OBJS += bidir-hash-fgprtshr.o
+LIB-OBJS += bidir-hash-blockshr.o
+
+all: build
+
+build: $(LIBMEMSHR-BUILD)
+
+bidir-hash-fgprtshr.o: bidir-hash.c
+ $(CC) $(CFLAGS) -DFINGERPRINT_MAP -c -o $*.o bidir-hash.c
+
+bidir-hash-blockshr.o: bidir-hash.c
+ $(CC) $(CFLAGS) -DBLOCK_MAP -c -o $*.o bidir-hash.c
+
+libmemshr.a: $(LIB-OBJS)
+ $(AR) rc $@ $^
+
+install: all
+
+clean:
+ rm -rf *.a *.o *~ $(DEPS)
+
+.PHONY: all build clean install
+
+-include $(DEPS)
diff --git a/tools/memshr/bidir-hash.c b/tools/memshr/bidir-hash.c
new file mode 100644
index 0000000000..9304ad15d8
--- /dev/null
+++ b/tools/memshr/bidir-hash.c
@@ -0,0 +1,1275 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bidir-hash.h"
+
+static const uint32_t hash_sizes[] = {53, 97, 193, 389, 769, 1543, 3079, 6151,
+ 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739,
+ 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
+ 805306457, 1610612741};
+static const uint16_t hash_sizes_len =
+ sizeof(hash_sizes)/sizeof(hash_sizes[0]);
+static const float hash_max_load_fact = 0.65;
+static const float hash_min_load_fact = 0.10;
+
+/* How many buckets will be covered by a single rw lock */
+#define BUCKETS_PER_LOCK 64
+#define nr_locks(_nr_buckets) (1 + (_nr_buckets) / BUCKETS_PER_LOCK)
+
+
+#define HASH_LOCK \
+ pthread_rwlock_t hash_lock
+
+#define BUCKET_LOCK \
+ pthread_rwlock_t bucket_lock
+
+struct hash_entry
+{
+ __k_t key;
+ __v_t value;
+ /* This structure will belong to two buckets, one in each hash table */
+ struct hash_entry *key_next;
+ struct hash_entry *value_next;
+};
+
+struct bucket
+{
+ struct hash_entry *hash_entry;
+};
+
+struct bucket_lock
+{
+ BUCKET_LOCK;
+};
+
+struct __hash
+{
+ int lock_alive;
+ HASH_LOCK; /* protects:
+ * *_tab, tab_size, size_idx, *_load
+ * (all writes with wrlock)
+ */
+ volatile uint32_t nr_ent; /* # entries held in hashtables */
+ struct bucket *key_tab; /* forward mapping hashtable */
+ struct bucket *value_tab; /* backward mapping hashtable */
+ struct bucket_lock *key_lock_tab; /* key table bucket locks */
+ struct bucket_lock *value_lock_tab; /* value table bucket locks */
+ uint32_t tab_size; /* # buckets is hashtables */
+ uint16_t size_idx; /* table size index */
+ uint32_t max_load; /* # entries before rehash */
+ uint32_t min_load; /* # entries before rehash */
+};
+
+struct __hash *__hash_init (struct __hash *h, uint32_t min_size);
+int __key_lookup (struct __hash *h, __k_t k, __v_t *vp);
+int __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int __insert (struct __hash *h, __k_t k, __v_t v);
+int __key_remove (struct __hash *h, __k_t k, __v_t *vp);
+int __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int __hash_destroy(struct __hash *h,
+ void (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d);
+int __hash_iterator(struct __hash *h,
+ int (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d);
+static void hash_resize(struct __hash *h);
+
+
+#ifdef BIDIR_USE_STDMALLOC
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+ return malloc(size);
+}
+
+static void alloc_buckets(struct __hash *h,
+ int nr_buckets,
+ struct bucket **bucket_tab,
+ struct bucket_lock **bucket_locks_tab)
+{
+ *bucket_tab = (struct bucket*)
+ malloc(nr_buckets * sizeof(struct bucket));
+ *bucket_locks_tab = (struct bucket_lock*)
+ malloc(nr_locks(nr_buckets) * sizeof(struct bucket_lock));
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+ free(p);
+}
+
+static void free_buckets(struct __hash *h,
+ struct bucket *buckets,
+ struct bucket_lock *bucket_locks)
+{
+ if(buckets) free(buckets);
+ if(bucket_locks) free(bucket_locks);
+}
+
+static int max_entries(struct __hash *h)
+{
+ /* There are no explicit restrictions to how many entries we can store */
+ return -1;
+}
+
+#else
+
+/*****************************************************************************/
+/** Memory allocator for shared memory region **/
+/*****************************************************************************/
+#define SHM_TABLE_SLOTS 4
+
+struct shm_hdr
+{
+ int hash_allocated;
+ pthread_mutex_t mutex;
+ int free_tab_slots[SHM_TABLE_SLOTS];
+
+ unsigned long freelist_offset;
+
+ unsigned long entries_offset;
+ unsigned long nr_entries;
+
+ unsigned long tabs_offset;
+ unsigned long max_tab_size;
+ unsigned long max_lock_tab_size;
+
+ struct __hash hash;
+};
+
+static unsigned long get_shm_baddr(void *hdr)
+{
+ return ((unsigned long)hdr - offsetof(struct shm_hdr, hash));
+}
+
+
+/** Locations of various structures/memory areas **/
+static struct shm_hdr* get_shm_hdr(struct __hash *h)
+{
+ return (struct shm_hdr *)
+ ((unsigned long)h - offsetof(struct shm_hdr, hash));
+}
+
+static uint32_t* get_shm_freelist(struct shm_hdr *hdr)
+{
+ unsigned long shm_baddr = (unsigned long)hdr;
+ return ((uint32_t *)(shm_baddr + hdr->freelist_offset));
+}
+
+static struct hash_entry* get_shm_entries(struct shm_hdr *hdr)
+{
+ unsigned long shm_baddr = (unsigned long)hdr;
+ return ((struct hash_entry *)(shm_baddr + hdr->entries_offset));
+}
+
+static struct bucket* get_shm_tab(struct shm_hdr *hdr, int i)
+{
+ unsigned long shm_baddr = (unsigned long)hdr;
+ return ((struct bucket *)
+ ((shm_baddr + hdr->tabs_offset) +
+ i * (hdr->max_tab_size + hdr->max_lock_tab_size)));
+}
+
+static struct bucket_lock* get_shm_lock_tab(struct shm_hdr *hdr, int i)
+{
+ unsigned long shm_baddr = (unsigned long)hdr;
+ return ((struct bucket_lock *)
+ ((shm_baddr + hdr->tabs_offset) +
+ i * (hdr->max_tab_size + hdr->max_lock_tab_size) +
+ hdr->max_tab_size));
+}
+
+static int get_shm_slot(struct shm_hdr *hdr, void *p)
+{
+ unsigned long shm_baddr = (unsigned long)hdr;
+ return ((unsigned long)p - (shm_baddr + hdr->tabs_offset)) /
+ (hdr->max_tab_size + hdr->max_lock_tab_size);
+}
+
+/* Shared memory allocator locks */
+static int shm_mutex_init(struct shm_hdr *h)
+{
+ int ret;
+ pthread_mutexattr_t _attr;
+
+ ret = pthread_mutexattr_init(&_attr);
+ if(ret == 0)
+ ret = pthread_mutexattr_setpshared(&_attr, PTHREAD_PROCESS_SHARED);
+ if(ret == 0)
+ ret = pthread_mutex_init(&h->mutex, &_attr);
+ if(ret == 0)
+ ret = pthread_mutexattr_destroy(&_attr);
+
+ return ret;
+};
+
+static int shm_mutex_lock(struct shm_hdr *h)
+{
+ return pthread_mutex_lock(&h->mutex);
+}
+
+static int shm_mutex_unlock(struct shm_hdr *h)
+{
+ return pthread_mutex_unlock(&h->mutex);
+}
+
+
+/* Shared memory allocator freelist */
+static void shm_add_to_freelist(struct shm_hdr *hdr, uint32_t sl)
+{
+ uint32_t *freelist = get_shm_freelist(hdr);
+
+ shm_mutex_lock(hdr);
+ freelist[sl+1] = freelist[0];
+ freelist[0] = sl;
+ shm_mutex_unlock(hdr);
+}
+
+static uint32_t shm_get_from_freelist(struct shm_hdr *hdr)
+{
+ uint32_t *freelist = get_shm_freelist(hdr);
+ uint32_t slot;
+
+ shm_mutex_lock(hdr);
+ slot = freelist[0];
+ freelist[0] = freelist[slot+1];
+ shm_mutex_unlock(hdr);
+
+ return (slot == 0 ? -1 : slot);
+}
+
+
+#define SHM_ALLOC_MAIN(_n)
+
+static unsigned long shm_init_offsets(
+ struct shm_hdr *hdr, int nr_entries)
+{
+ hdr->freelist_offset = sizeof(struct shm_hdr);
+
+ /* Freelist needs one extra slot in the array for the freelist head */
+ hdr->entries_offset =
+ hdr->freelist_offset + (nr_entries + 1) * sizeof(uint32_t);
+ hdr->nr_entries = nr_entries;
+
+ hdr->tabs_offset = hdr->entries_offset +
+ nr_entries * sizeof(struct hash_entry);
+ /* We want to allocate table 1.5 larger than the number of entries
+ we want to hold in it */
+ hdr->max_tab_size =
+ (nr_entries * 3 / 2) * sizeof(struct bucket);
+ hdr->max_lock_tab_size =
+ nr_locks(hdr->max_tab_size) * sizeof(struct bucket_lock);
+
+ return hdr->tabs_offset + (hdr->max_tab_size + hdr->max_lock_tab_size) * 4;
+}
+
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size)
+{
+ uint32_t i;
+ struct shm_hdr *hdr;
+
+ /* Some sanity checks */
+ hdr = (struct shm_hdr *)shm_baddr;
+ memset(hdr, 0, sizeof(struct shm_hdr));
+
+ /* Find the maximum number of entries we can store in the given shm_size */
+ for(i=1; shm_init_offsets(hdr, i) < shm_size; i++){};
+ shm_init_offsets(hdr, (i-1));
+
+ memset(get_shm_freelist(hdr), 0,
+ (hdr->nr_entries + 1) * sizeof(uint32_t));
+ if(shm_mutex_init(hdr) != 0)
+ return NULL;
+ for(i=0; i<hdr->nr_entries; i++)
+ shm_add_to_freelist(hdr, i);
+ for(i=0; i<SHM_TABLE_SLOTS; i++)
+ hdr->free_tab_slots[i] = 1;
+
+ shm_mutex_lock(hdr);
+ assert(!hdr->hash_allocated);
+ hdr->hash_allocated = 1;
+ shm_mutex_unlock(hdr);
+
+ return __hash_init(&hdr->hash, 1000);
+}
+
+struct __hash* __shm_hash_get(unsigned long shm_baddr)
+{
+ struct shm_hdr *hdr = (struct shm_hdr *)shm_baddr;
+
+ return (hdr->hash_allocated ? &hdr->hash : NULL);
+}
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+ struct shm_hdr *hdr = get_shm_hdr(h);
+ uint32_t slot = shm_get_from_freelist(hdr);
+
+ assert(size == sizeof(struct hash_entry));
+ if(slot == -1)
+ return NULL;
+
+ return (get_shm_entries(hdr) + slot);
+}
+
+static void alloc_buckets(struct __hash *h,
+ int nr_buckets,
+ struct bucket **buckets_tab,
+ struct bucket_lock **bucket_locks_tab)
+{
+ struct shm_hdr *hdr = get_shm_hdr(h);
+ int free_slot;
+
+ *buckets_tab = NULL;
+ *bucket_locks_tab = NULL;
+
+ if(((nr_buckets * sizeof(struct bucket)) > hdr->max_tab_size) ||
+ ((nr_locks(nr_buckets) * sizeof(struct bucket_lock)) >
+ hdr->max_lock_tab_size))
+ return;
+
+ shm_mutex_lock(hdr);
+ for(free_slot=0; free_slot<SHM_TABLE_SLOTS; free_slot++)
+ if(hdr->free_tab_slots[free_slot])
+ break;
+ if(free_slot == SHM_TABLE_SLOTS)
+ {
+ shm_mutex_unlock(hdr);
+ return;
+ }
+ hdr->free_tab_slots[free_slot] = 0;
+ shm_mutex_unlock(hdr);
+ *buckets_tab = get_shm_tab(hdr, free_slot);
+ *bucket_locks_tab = get_shm_lock_tab(hdr, free_slot);
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+ struct shm_hdr *hdr = get_shm_hdr(h);
+ uint32_t slot;
+
+ slot = ((uint32_t)((struct hash_entry *)p -
+ get_shm_entries(hdr)));
+ shm_add_to_freelist(hdr, slot);
+}
+
+static void free_buckets(struct __hash *h,
+ struct bucket *buckets,
+ struct bucket_lock *bucket_locks)
+{
+ struct shm_hdr *hdr = get_shm_hdr(h);
+ int slot;
+
+ if(!buckets || !bucket_locks)
+ {
+ assert(!buckets && !bucket_locks);
+ return;
+ }
+ slot = get_shm_slot(hdr, buckets);
+ assert(slot < SHM_TABLE_SLOTS);
+ assert((char *)bucket_locks == (char *)buckets + hdr->max_tab_size);
+ shm_mutex_lock(hdr);
+ assert(hdr->free_tab_slots[slot] == 0);
+ hdr->free_tab_slots[slot] = 1;
+ shm_mutex_unlock(hdr);
+}
+
+static int max_entries(struct __hash *h)
+{
+ struct shm_hdr *hdr = get_shm_hdr(h);
+
+ return hdr->nr_entries;
+}
+
+#endif /* !BIDIR_USE_STDMALLOC */
+
+
+/* The structures may be stored in shared memory region, with base address */
+/* stored in shm_base_addr. All the pointers in the above structures need */
+/* to be relative to this base address (otherwise they would not make */
+/* sense to other processes). Bellow accessor functions are used to */
+/* convert between canonical (base address relative) and local addresses. */
+/* C2L stands for CANONICAL_TO_LOCAL, and vice versa */
+#define C2L(_h, _p) ((typeof(_p))((unsigned long)(_p) + \
+ get_shm_baddr(_h)))
+#define L2C(_h, _p) ((typeof(_p))((unsigned long)(_p) - \
+ get_shm_baddr(_h)))
+
+
+#define HASH_LOCK_INIT(_h) ({ \
+ int _ret; \
+ pthread_rwlockattr_t _attr; \
+ \
+ h->lock_alive = 1; \
+ _ret = pthread_rwlockattr_init(&_attr); \
+ if(_ret == 0) \
+ _ret = pthread_rwlockattr_setpshared(&_attr, \
+ PTHREAD_PROCESS_SHARED); \
+ if(_ret == 0) \
+ _ret = pthread_rwlock_init(&(_h)->hash_lock, &_attr); \
+ if(_ret == 0) \
+ _ret = pthread_rwlockattr_destroy(&_attr); \
+ \
+ _ret; \
+})
+
+#define HASH_LOCK_RDLOCK(_h) ({ \
+ int _ret; \
+ \
+ if(!_h->lock_alive) _ret = ENOLCK; \
+ else \
+ { \
+ struct timespec _ts; \
+ /* 10s timeout, long but ~matches disk spin-up times */ \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0; \
+ _ret = pthread_rwlock_timedrdlock(&(_h)->hash_lock, &_ts); \
+ if(_ret == ETIMEDOUT) _h->lock_alive = 0; \
+ } \
+ _ret; \
+})
+
+#define HASH_LOCK_RDUNLOCK(_h) \
+ pthread_rwlock_unlock(&(_h)->hash_lock)
+
+#define HASH_LOCK_WRLOCK(_h) ({ \
+ int _ret; \
+ \
+ if(!_h->lock_alive) _ret = ENOLCK; \
+ else \
+ { \
+ struct timespec _ts; \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0UL; \
+ _ret = pthread_rwlock_timedwrlock(&(_h)->hash_lock, &_ts); \
+ if(_ret == ETIMEDOUT) _h->lock_alive = 0; \
+ } \
+ _ret; \
+})
+
+#define HASH_LOCK_TRYWRLOCK(_h) ({ \
+ int _ret = (_h->lock_alive ? \
+ pthread_rwlock_trywrlock(&(_h)->hash_lock) : \
+ ENOLCK); \
+ _ret; \
+})
+
+#define HASH_LOCK_WRUNLOCK(_h) \
+ pthread_rwlock_unlock(&(_h)->hash_lock)
+
+
+#define BUCKET_LOCK_INIT(_h, _b) ({ \
+ int _ret; \
+ pthread_rwlockattr_t _attr; \
+ \
+ _ret = pthread_rwlockattr_init(&_attr); \
+ if(_ret == 0) \
+ _ret = pthread_rwlockattr_setpshared(&_attr, \
+ PTHREAD_PROCESS_SHARED); \
+ if(_ret == 0) \
+ _ret = pthread_rwlock_init(&(_b)->bucket_lock, &_attr); \
+ if(_ret == 0) \
+ _ret = pthread_rwlockattr_destroy(&_attr); \
+ \
+ _ret; \
+})
+
+
+#define BUCKET_LOCK_RDLOCK(_h, _lock_tab, _idx) ({ \
+ int _ret; \
+ struct timespec _ts; \
+ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \
+ \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0; \
+ _ret = pthread_rwlock_timedrdlock(&(_lock)->bucket_lock, &_ts); \
+ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \
+ _ret; \
+})
+
+
+#define BUCKET_LOCK_RDUNLOCK(_h, _lock_tab, _idx) ({ \
+ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \
+ pthread_rwlock_unlock(&(_lock)->bucket_lock); \
+})
+
+#define BUCKET_LOCK_WRLOCK(_h, _lock_tab, _idx) ({ \
+ int _ret; \
+ struct timespec _ts; \
+ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \
+ \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0; \
+ _ret = pthread_rwlock_timedwrlock(&(_lock)->bucket_lock, &_ts); \
+ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \
+ _ret; \
+})
+
+#define BUCKET_LOCK_WRUNLOCK(_h, _lock_tab, _idx) ({ \
+ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \
+ pthread_rwlock_unlock(&(_lock)->bucket_lock); \
+})
+
+#define TWO_BUCKETS_LOCK_WRLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({ \
+ int _ret; \
+ pthread_rwlock_t *_l1, *_l2; \
+ struct timespec _ts; \
+ struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK]; \
+ struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK]; \
+ \
+ assert((_bl1) != (_bl2)); \
+ if((_bl1) < (_bl2)) \
+ { \
+ _l1 = &(_bl1)->bucket_lock; \
+ _l2 = &(_bl2)->bucket_lock; \
+ } \
+ else \
+ { \
+ _l1 = &(_bl2)->bucket_lock; \
+ _l2 = &(_bl1)->bucket_lock; \
+ } \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0; \
+ _ret = pthread_rwlock_timedwrlock(_l1, &_ts); \
+ _ts.tv_sec = time(NULL) + 10; \
+ _ts.tv_nsec = 0; \
+ if(_ret == 0) \
+ _ret = pthread_rwlock_timedwrlock(_l2, &_ts); \
+ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \
+ \
+ _ret; \
+})
+
+#define TWO_BUCKETS_LOCK_WRUNLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({ \
+ int _ret; \
+ struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK]; \
+ struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK]; \
+ \
+ _ret = pthread_rwlock_unlock(&(_bl1)->bucket_lock); \
+ if(_ret == 0) \
+ _ret = pthread_rwlock_unlock(&(_bl2)->bucket_lock); \
+ \
+ _ret; \
+})
+
+
+
+
+static uint32_t hash_to_idx(struct __hash *h, uint32_t hash)
+{
+ return (hash % h->tab_size);
+}
+
+static void alloc_tab(struct __hash *h,
+ int size,
+ struct bucket **buckets_tab,
+ struct bucket_lock **bucket_locks_tab)
+{
+ int i;
+
+ alloc_buckets(h, size, buckets_tab, bucket_locks_tab);
+ if(!(*buckets_tab) || !(*bucket_locks_tab))
+ goto error_out;
+ memset(*buckets_tab, 0, size * sizeof(struct bucket));
+ memset(*bucket_locks_tab, 0, nr_locks(size) * sizeof(struct bucket_lock));
+ for(i=0; i<nr_locks(size); i++)
+ if(BUCKET_LOCK_INIT(h, *bucket_locks_tab + i) != 0)
+ goto error_out;
+
+ return;
+error_out:
+ free_buckets(h, *buckets_tab, *bucket_locks_tab);
+ *buckets_tab = NULL;
+ *bucket_locks_tab = NULL;
+ return;
+}
+
+
+struct __hash *__hash_init(struct __hash *h, uint32_t min_size)
+{
+ uint32_t size;
+ uint16_t size_idx;
+ struct bucket *buckets;
+ struct bucket_lock *bucket_locks;
+
+ /* Sanity check on args */
+ if (min_size > hash_sizes[hash_sizes_len-1]) return NULL;
+ /* Find least size greater than init_size */
+ for(size_idx = 0; size_idx < hash_sizes_len; size_idx++)
+ if(hash_sizes[size_idx] >= min_size)
+ break;
+ size = hash_sizes[size_idx];
+
+ if(!h) return NULL;
+ alloc_tab(h, size, &buckets, &bucket_locks);
+ if(!buckets || !bucket_locks) goto alloc_fail;
+ h->key_tab = L2C(h, buckets);
+ h->key_lock_tab = L2C(h, bucket_locks);
+ alloc_tab(h, size, &buckets, &bucket_locks);
+ if(!buckets || !bucket_locks) goto alloc_fail;
+ h->value_tab = L2C(h, buckets);
+ h->value_lock_tab = L2C(h, bucket_locks);
+ /* Init all h variables */
+ if(HASH_LOCK_INIT(h) != 0) goto alloc_fail;
+ h->nr_ent = 0;
+ h->tab_size = size;
+ h->size_idx = size_idx;
+ h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+ h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+ return h;
+
+alloc_fail:
+ if(h->key_tab || h->key_lock_tab)
+ free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+ return NULL;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim key
+#define __prim_t __k_t
+#define __prim_tab key_tab
+#define __prim_lock_tab key_lock_tab
+#define __prim_hash __key_hash
+#define __prim_cmp __key_cmp
+#define __prim_next key_next
+#define __sec value
+#define __sec_t __v_t
+int __key_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+ struct hash_entry *entry;
+ struct bucket *b;
+ struct bucket_lock *blt;
+ uint32_t idx;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+ idx = hash_to_idx(h, __prim_hash(k));
+ b = C2L(h, &h->__prim_tab[idx]);
+ blt = C2L(h, h->__prim_lock_tab);
+ if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+ entry = b->hash_entry;
+ while(entry != NULL)
+ {
+ entry = C2L(h, entry);
+ if(__prim_cmp(k, entry->__prim))
+ {
+ /* Unlock here */
+ *vp = entry->__sec;
+ BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+ HASH_LOCK_RDUNLOCK(h);
+ return 1;
+ }
+ entry = entry->__prim_next;
+ }
+ BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+ HASH_LOCK_RDUNLOCK(h);
+ return 0;
+}
+
+/* value lookup is an almost exact copy of key lookup */
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim value
+#define __prim_t __v_t
+#define __prim_tab value_tab
+#define __prim_lock_tab value_lock_tab
+#define __prim_hash __value_hash
+#define __prim_cmp __value_cmp
+#define __prim_next value_next
+#define __sec key
+#define __sec_t __k_t
+int __value_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+ struct hash_entry *entry;
+ struct bucket *b;
+ struct bucket_lock *blt;
+ uint32_t idx;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+ idx = hash_to_idx(h, __prim_hash(k));
+ b = C2L(h, &h->__prim_tab[idx]);
+ blt = C2L(h, h->__prim_lock_tab);
+ if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+ entry = b->hash_entry;
+ while(entry != NULL)
+ {
+ entry = C2L(h, entry);
+ if(__prim_cmp(k, entry->__prim))
+ {
+ /* Unlock here */
+ *vp = entry->__sec;
+ BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+ HASH_LOCK_RDUNLOCK(h);
+ return 1;
+ }
+ entry = entry->__prim_next;
+ }
+ BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+ HASH_LOCK_RDUNLOCK(h);
+ return 0;
+}
+
+int __insert(struct __hash *h, __k_t k, __v_t v)
+{
+ uint32_t k_idx, v_idx;
+ struct hash_entry *entry;
+ struct bucket *bk, *bv;
+ struct bucket_lock *bltk, *bltv;
+
+
+ /* Allocate new entry before any locks (in case it fails) */
+ entry = (struct hash_entry*)
+ alloc_entry(h, sizeof(struct hash_entry));
+ if(!entry) return 0;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+ /* Read from nr_ent is atomic(TODO check), no need for fancy accessors */
+ if(h->nr_ent+1 > h->max_load)
+ {
+ /* Resize needs the write lock, drop read lock temporarily */
+ HASH_LOCK_RDUNLOCK(h);
+ hash_resize(h);
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+ }
+
+ /* Init the entry */
+ entry->key = k;
+ entry->value = v;
+
+ /* Work out the indicies */
+ k_idx = hash_to_idx(h, __key_hash(k));
+ v_idx = hash_to_idx(h, __value_hash(v));
+
+ /* Insert */
+ bk = C2L(h, &h->key_tab[k_idx]);
+ bv = C2L(h, &h->value_tab[v_idx]);
+ bltk = C2L(h, h->key_lock_tab);
+ bltv = C2L(h, h->value_lock_tab);
+ if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, k_idx, bltv, v_idx) != 0)
+ return -ENOLCK;
+ entry->key_next = bk->hash_entry;
+ bk->hash_entry = L2C(h, entry);
+ entry->value_next = bv->hash_entry;
+ bv->hash_entry = L2C(h, entry);
+ TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, k_idx, bltv, v_idx);
+
+ /* Book keeping */
+ __sync_add_and_fetch(&h->nr_ent, 1);
+
+ HASH_LOCK_RDUNLOCK(h);
+
+ return 1;
+}
+
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim key
+#define __prim_t __k_t
+#define __prim_tab key_tab
+#define __prim_lock_tab key_lock_tab
+#define __prim_hash __key_hash
+#define __prim_cmp __key_cmp
+#define __prim_next key_next
+#define __sec value
+#define __sec_t __v_t
+#define __sec_tab value_tab
+#define __sec_lock_tab value_lock_tab
+#define __sec_hash __value_hash
+#define __sec_next value_next
+
+int __key_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+ struct hash_entry *e, *es, **pek, **pev;
+ struct bucket *bk, *bv;
+ struct bucket_lock *bltk, *bltv;
+ uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+ __prim_t ks;
+ __sec_t vs;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+ old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+ bk = C2L(h, &h->__prim_tab[kidx]);
+ bltk = C2L(h, h->__prim_lock_tab);
+ if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+ pek = &(bk->hash_entry);
+ e = *pek;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(__prim_cmp(k, e->__prim))
+ {
+ goto found;
+ }
+ pek = &(e->__prim_next);
+ e = *pek;
+ }
+
+ BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+ HASH_LOCK_RDUNLOCK(h);
+
+ return 0;
+
+found:
+ /*
+ * Make local copy of key and value.
+ */
+ es = e;
+ ks = e->__prim;
+ vs = e->__sec;
+ kidx = hash_to_idx(h, __prim_hash(ks));
+ /* Being paranoid: check if kidx has not changed, so that we unlock the
+ * right bucket */
+ assert(old_kidx == kidx);
+ vidx = hash_to_idx(h, __sec_hash(vs));
+ bk = C2L(h, &h->__prim_tab[kidx]);
+ bv = C2L(h, &h->__sec_tab[vidx]);
+ bltk = C2L(h, h->__prim_lock_tab);
+ bltv = C2L(h, h->__sec_lock_tab);
+ BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+ if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+ pek = &(bk->hash_entry);
+ pev = &(bv->hash_entry);
+
+ /* Find the entry in both tables */
+ e = *pek;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(e == es)
+ {
+ /* Being paranoid: make sure that the key and value are
+ * still the same. This is still not 100%, because, in
+ * principle, the entry could have got deleted, when we
+ * didn't hold the locks for a little while, and exactly
+ * the same entry reinserted. If the __k_t & __v_t are
+ * simple types than it probably doesn't matter, but if
+ * either is a pointer type, the actual structure might
+ * now be different. The chances that happens are very
+ * slim, but still, if that's a problem, the user needs to
+ * pay attention to the structure re-allocation */
+ if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+ (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+ break;
+ goto found_again;
+ }
+ pek = &(e->__prim_next);
+ e = *pek;
+ }
+
+ TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+ /* Entry got removed in the meantime, try again */
+ goto again;
+
+found_again:
+ /* We are now comitted to the removal */
+ e = *pev;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(e == es)
+ {
+ /* Both pek and pev are pointing to the right place, remove */
+ *pek = e->__prim_next;
+ *pev = e->__sec_next;
+
+ nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+ /* read min_load still under the hash lock! */
+ min_load = h->min_load;
+
+ TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+ HASH_LOCK_RDUNLOCK(h);
+
+ if(nr_ent < min_load)
+ hash_resize(h);
+ if(vp != NULL)
+ *vp = e->__sec;
+ free_entry(h, e);
+ return 1;
+ }
+ pev = &(e->__sec_next);
+ e = *pev;
+ }
+
+ /* We should never get here!, no need to unlock anything */
+ return -ENOLCK;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim value
+#define __prim_t __v_t
+#define __prim_tab value_tab
+#define __prim_lock_tab value_lock_tab
+#define __prim_hash __value_hash
+#define __prim_cmp __value_cmp
+#define __prim_next value_next
+#define __sec key
+#define __sec_t __k_t
+#define __sec_tab key_tab
+#define __sec_lock_tab key_lock_tab
+#define __sec_hash __key_hash
+#define __sec_next key_next
+
+int __value_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+ struct hash_entry *e, *es, **pek, **pev;
+ struct bucket *bk, *bv;
+ struct bucket_lock *bltk, *bltv;
+ uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+ __prim_t ks;
+ __sec_t vs;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+ old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+ bk = C2L(h, &h->__prim_tab[kidx]);
+ bltk = C2L(h, h->__prim_lock_tab);
+ if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+ pek = &(bk->hash_entry);
+ e = *pek;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(__prim_cmp(k, e->__prim))
+ {
+ goto found;
+ }
+ pek = &(e->__prim_next);
+ e = *pek;
+ }
+
+ BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+ HASH_LOCK_RDUNLOCK(h);
+
+ return 0;
+
+found:
+ /*
+ * Make local copy of key and value.
+ */
+ es = e;
+ ks = e->__prim;
+ vs = e->__sec;
+ kidx = hash_to_idx(h, __prim_hash(ks));
+ /* Being paranoid: check if kidx has not changed, so that we unlock the
+ * right bucket */
+ assert(old_kidx == kidx);
+ vidx = hash_to_idx(h, __sec_hash(vs));
+ bk = C2L(h, &h->__prim_tab[kidx]);
+ bv = C2L(h, &h->__sec_tab[vidx]);
+ bltk = C2L(h, h->__prim_lock_tab);
+ bltv = C2L(h, h->__sec_lock_tab);
+ BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+ if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+ pek = &(bk->hash_entry);
+ pev = &(bv->hash_entry);
+
+ /* Find the entry in both tables */
+ e = *pek;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(e == es)
+ {
+ /* Being paranoid: make sure that the key and value are
+ * still the same. This is still not 100%, because, in
+ * principle, the entry could have got deleted, when we
+ * didn't hold the locks for a little while, and exactly
+ * the same entry reinserted. If the __k_t & __v_t are
+ * simple types than it probably doesn't matter, but if
+ * either is a pointer type, the actual structure might
+ * now be different. The chances that happens are very
+ * slim, but still, if that's a problem, the user needs to
+ * pay attention to the structure re-allocation */
+ if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+ (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+ break;
+ goto found_again;
+ }
+ pek = &(e->__prim_next);
+ e = *pek;
+ }
+
+ TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+ /* Entry got removed in the meantime, try again */
+ goto again;
+
+found_again:
+ /* We are now comitted to the removal */
+ e = *pev;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ if(e == es)
+ {
+ /* Both pek and pev are pointing to the right place, remove */
+ *pek = e->__prim_next;
+ *pev = e->__sec_next;
+
+ nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+ /* read min_load still under the hash lock! */
+ min_load = h->min_load;
+
+ TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+ HASH_LOCK_RDUNLOCK(h);
+
+ if(nr_ent < min_load)
+ hash_resize(h);
+ if(vp != NULL)
+ *vp = e->__sec;
+ free_entry(h, e);
+ return 1;
+ }
+ pev = &(e->__sec_next);
+ e = *pev;
+ }
+
+ /* We should never get here!, no need to unlock anything */
+ return -ENOLCK;
+}
+
+
+int __hash_destroy(struct __hash *h,
+ void (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d)
+{
+ struct hash_entry *e, *n;
+ struct bucket *b;
+ int i;
+
+ if(HASH_LOCK_WRLOCK(h) != 0) return -ENOLCK;
+
+ /* No need to lock individual buckets, with hash write lock */
+ for(i=0; i < h->tab_size; i++)
+ {
+ b = C2L(h, &h->key_tab[i]);
+ e = b->hash_entry;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ n = e->key_next;
+ if(entry_consumer)
+ entry_consumer(e->key, e->value, d);
+ free_entry(h, e);
+ e = n;
+ }
+ }
+ free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+ free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+
+ HASH_LOCK_WRUNLOCK(h);
+ h->lock_alive = 0;
+
+ return 0;
+}
+
+static void hash_resize(struct __hash *h)
+{
+ int new_size_idx, i, lock_ret;
+ uint32_t size, old_size, kidx, vidx;
+ struct bucket *t1, *t2, *b;
+ struct bucket_lock *l1, *l2;
+ struct hash_entry *e, *n;
+
+ /* We may fail to allocate the lock, if the resize is triggered while
+ we are iterating (under read lock) */
+ lock_ret = HASH_LOCK_TRYWRLOCK(h);
+ if(lock_ret != 0) return;
+
+ new_size_idx = h->size_idx;
+ /* Work out the new size */
+ if(h->nr_ent >= h->max_load)
+ new_size_idx = h->size_idx+1;
+ if(h->nr_ent < h->min_load)
+ new_size_idx = h->size_idx-1;
+ if((new_size_idx == h->size_idx) ||
+ (new_size_idx >= hash_sizes_len) ||
+ (new_size_idx < 0))
+ {
+ HASH_LOCK_WRUNLOCK(h);
+ return;
+ }
+
+ size = hash_sizes[new_size_idx];
+
+ /* Allocate the new sizes */
+ t1 = t2 = NULL;
+ l1 = l2 = NULL;
+ alloc_tab(h, size, &t1, &l1);
+ if(!t1 || !l1) goto alloc_fail;
+ alloc_tab(h, size, &t2, &l2);
+ if(!t2 || !l2) goto alloc_fail;
+
+ old_size = h->tab_size;
+ h->tab_size = size;
+ h->size_idx = new_size_idx;
+ h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+ h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+ /* Move the entries */
+ for(i=0; i < old_size; i++)
+ {
+ b = C2L(h, &h->key_tab[i]);
+ e = b->hash_entry;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ n = e->key_next;
+ kidx =hash_to_idx(h, __key_hash(e->key));
+ vidx =hash_to_idx(h, __value_hash(e->value));
+ /* Move to the correct bucket */
+ e->key_next = t1[kidx].hash_entry;
+ t1[kidx].hash_entry = L2C(h, e);
+ e->value_next = t2[vidx].hash_entry;
+ t2[vidx].hash_entry = L2C(h, e);
+ e = n;
+ }
+ }
+ free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+ free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+ h->key_tab = L2C(h, t1);
+ h->key_lock_tab = L2C(h, l1);
+ h->value_tab = L2C(h, t2);
+ h->value_lock_tab = L2C(h, l2);
+
+ HASH_LOCK_WRUNLOCK(h);
+
+ return;
+
+alloc_fail:
+ /* If we failed to resize, adjust max/min load. This will stop us from
+ * retrying resize too frequently */
+ if(new_size_idx > h->size_idx)
+ h->max_load = (h->max_load + 2 * h->tab_size) / 2 + 1;
+ else
+ if (new_size_idx < h->size_idx)
+ h->min_load = h->min_load / 2;
+ HASH_LOCK_WRUNLOCK(h);
+ if(t1 || l1) free_buckets(h, t1, l1);
+ if(t2 || l2) free_buckets(h, t2, l2);
+ return;
+}
+
+int __hash_iterator(struct __hash *h,
+ int (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d)
+{
+ struct hash_entry *e, *n;
+ struct bucket *b;
+ struct bucket_lock *blt;
+ int i, brk_early;
+
+ if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+ for(i=0; i < h->tab_size; i++)
+ {
+ b = C2L(h, &h->key_tab[i]);
+ blt = C2L(h, h->key_lock_tab);
+ if(BUCKET_LOCK_RDLOCK(h, blt, i) != 0) return -ENOLCK;
+ e = b->hash_entry;
+ while(e != NULL)
+ {
+ e = C2L(h, e);
+ n = e->key_next;
+ brk_early = entry_consumer(e->key, e->value, d);
+ if(brk_early)
+ {
+ BUCKET_LOCK_RDUNLOCK(h, blt, i);
+ goto out;
+ }
+ e = n;
+ }
+ BUCKET_LOCK_RDUNLOCK(h, blt, i);
+ }
+out:
+ HASH_LOCK_RDUNLOCK(h);
+ return 0;
+}
+
+void __hash_sizes(struct __hash *h,
+ uint32_t *nr_ent,
+ uint32_t *max_nr_ent,
+ uint32_t *tab_size,
+ uint32_t *max_load,
+ uint32_t *min_load)
+{
+ if(nr_ent != NULL) *nr_ent = h->nr_ent;
+ if(max_nr_ent != NULL) *max_nr_ent = max_entries(h);
+ if(tab_size != NULL) *tab_size = h->tab_size;
+ if(max_load != NULL) *max_load = h->max_load;
+ if(min_load != NULL) *min_load = h->min_load;
+}
+
diff --git a/tools/memshr/bidir-hash.h b/tools/memshr/bidir-hash.h
new file mode 100644
index 0000000000..ed9ec58ff4
--- /dev/null
+++ b/tools/memshr/bidir-hash.h
@@ -0,0 +1,113 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __BIDIR_HASH_H__
+#define __BIDIR_HASH_H__
+
+#include <stdint.h>
+#include "memshr-priv.h"
+
+typedef struct vbdblk {
+ uint64_t sec;
+ uint16_t disk_id;
+} vbdblk_t;
+
+
+#if defined FINGERPRINT_MAP || BLOCK_MAP
+#define DEFINE_SINGLE_MAP
+#endif
+
+/*******************************************************/
+/* Fingerprint map */
+/*******************************************************/
+#if defined FINGERPRINT_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+static uint32_t fgprtshr_fgprt_hash(uint32_t h)
+{
+ return h;
+}
+
+static uint32_t fgprtshr_mfn_hash(uint64_t m)
+{
+ return (uint32_t)m;
+}
+
+static int fgprtshr_fgprt_cmp(uint32_t h1, uint32_t h2)
+{
+ return (h1 == h2);
+}
+
+static int fgprtshr_mfn_cmp(uint32_t m1, uint32_t m2)
+{
+ return (m1 == m2);
+}
+#define BIDIR_NAME_PREFIX fgprtshr
+#define BIDIR_KEY fgprt
+#define BIDIR_VALUE mfn
+#define BIDIR_KEY_T uint32_t
+#define BIDIR_VALUE_T xen_mfn_t
+#include "bidir-namedefs.h"
+
+#endif /* FINGERPRINT_MAP */
+
+
+/*******************************************************/
+/* Block<->Memory sharing handles */
+/*******************************************************/
+#if defined BLOCK_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+/* TODO better hashes! */
+static inline uint32_t blockshr_block_hash(vbdblk_t block)
+{
+ return (uint32_t)(block.sec) ^ (uint32_t)(block.disk_id);
+}
+
+static inline uint32_t blockshr_shrhnd_hash(uint64_t shrhnd)
+{
+ return (uint32_t)shrhnd;
+}
+
+static inline int blockshr_block_cmp(vbdblk_t b1, vbdblk_t b2)
+{
+ return (b1.sec == b2.sec) && (b1.disk_id == b2.disk_id);
+}
+
+static inline int blockshr_shrhnd_cmp(uint64_t h1, uint64_t h2)
+{
+ return (h1 == h2);
+}
+#define BIDIR_NAME_PREFIX blockshr
+#define BIDIR_KEY block
+#define BIDIR_VALUE shrhnd
+#define BIDIR_KEY_T vbdblk_t
+#define BIDIR_VALUE_T uint64_t
+#include "bidir-namedefs.h"
+
+#endif /* BLOCK_MAP */
+
+#endif /* __BIDIR_HASH_H__ */
diff --git a/tools/memshr/bidir-namedefs.h b/tools/memshr/bidir-namedefs.h
new file mode 100644
index 0000000000..649f063b70
--- /dev/null
+++ b/tools/memshr/bidir-namedefs.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "memshr-priv.h"
+
+/* Macros used to assemble the names */
+#define BIDIR_NAME_ONE_INTERNAL(prefix, name) \
+ prefix ## _ ## name
+#define BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2) \
+ prefix ## _ ## name1 ## _ ## name2
+
+#define BIDIR_NAME_ONE(prefix, name) \
+ BIDIR_NAME_ONE_INTERNAL(prefix, name)
+#define BIDIR_NAME_TWO(prefix, name1, name2) \
+ BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2)
+
+#define INTERNAL_NAME_ONE(name) BIDIR_NAME_ONE(BIDIR_NAME_PREFIX, name)
+#define INTERNAL_NAME_TWO(name1, name2) \
+ BIDIR_NAME_TWO(BIDIR_NAME_PREFIX, name1, name2)
+
+/* Function/type names */
+#define __k_t BIDIR_KEY_T
+#define __v_t BIDIR_VALUE_T
+
+#define __hash INTERNAL_NAME_ONE(hash)
+#define __shm_hash_init INTERNAL_NAME_ONE(shm_hash_init)
+#define __shm_hash_get INTERNAL_NAME_ONE(shm_hash_get)
+#define __hash_init INTERNAL_NAME_ONE(hash_init)
+#define __key_lookup INTERNAL_NAME_TWO(BIDIR_KEY, lookup)
+#define __value_lookup INTERNAL_NAME_TWO(BIDIR_VALUE, lookup)
+#define __insert INTERNAL_NAME_ONE(insert)
+#define __key_remove INTERNAL_NAME_TWO(BIDIR_KEY, remove)
+#define __value_remove INTERNAL_NAME_TWO(BIDIR_VALUE, remove)
+#define __hash_destroy INTERNAL_NAME_ONE(hash_destroy)
+#define __hash_iterator INTERNAL_NAME_ONE(hash_iterator)
+
+#define __key_hash INTERNAL_NAME_TWO(BIDIR_KEY, hash)
+#define __key_cmp INTERNAL_NAME_TWO(BIDIR_KEY, cmp)
+#define __value_hash INTERNAL_NAME_TWO(BIDIR_VALUE, hash)
+#define __value_cmp INTERNAL_NAME_TWO(BIDIR_VALUE, cmp)
+
+#define __hash_sizes INTERNAL_NAME_ONE(hash_sizes)
+
+
+/* Final function exports */
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size);
+struct __hash* __shm_hash_get(unsigned long shm_baddr);
+struct __hash *__hash_init (struct __hash *h, uint32_t min_size);
+int __key_lookup (struct __hash *h, __k_t k, __v_t *vp);
+int __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int __insert (struct __hash *h, __k_t k, __v_t v);
+int __key_remove (struct __hash *h, __k_t k, __v_t *vp);
+int __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int __hash_destroy(struct __hash *h,
+ void (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d);
+int __hash_iterator(struct __hash *h,
+ int (*entry_consumer)(__k_t k, __v_t v, void *p),
+ void *d);
+void __hash_sizes(struct __hash *h,
+ uint32_t *nr_ent,
+ uint32_t *max_nr_ent,
+ uint32_t *tab_size,
+ uint32_t *max_load,
+ uint32_t *min_load);
diff --git a/tools/memshr/interface.c b/tools/memshr/interface.c
new file mode 100644
index 0000000000..9ab20c213c
--- /dev/null
+++ b/tools/memshr/interface.c
@@ -0,0 +1,92 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <string.h>
+
+#include "memshr-priv.h"
+#include "shm.h"
+
+
+typedef struct {
+ struct shared_memshr_info *shared_info;
+ struct fgprtshr_hash *fgprts;
+ struct blockshr_hash *blks;
+} private_memshr_info_t;
+
+private_memshr_info_t memshr;
+
+#define SHARED_INFO (memshr.shared_info)
+
+void memshr_daemon_initialize(void)
+{
+ void *shm_base_addr;
+ struct fgprtshr_hash *h;
+
+ memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+ if((SHARED_INFO = shm_shared_info_open(1)) == NULL)
+ {
+ DPRINTF("Failed to init shared info.\n");
+ return;
+ }
+
+ if((memshr.fgprts = shm_fgprtshr_hash_open(1)) == NULL)
+ {
+ DPRINTF("Failed to init fgprtshr hash.\n");
+ return;
+ }
+ memshr.shared_info->fgprtshr_hash_inited = 1;
+
+ if((memshr.blks = shm_blockshr_hash_open(1)) == NULL)
+ {
+ DPRINTF("Failed to init blockshr hash.\n");
+ return;
+ }
+ memshr.shared_info->blockshr_hash_inited = 1;
+}
+
+
+void memshr_vbd_initialize(void)
+{
+ memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+ if((SHARED_INFO = shm_shared_info_open(0)) == NULL)
+ {
+ DPRINTF("Failed to open shared info.\n");
+ return;
+ }
+
+ if(!SHARED_INFO->fgprtshr_hash_inited)
+ {
+ DPRINTF("fgprtshr hash not inited.\n");
+ return;
+ }
+
+ if((memshr.fgprts = shm_fgprtshr_hash_open(0)) == NULL)
+ {
+ DPRINTF("Failed to open fgprtshr_hash.\n");
+ return;
+ }
+
+ if((memshr.blks = shm_blockshr_hash_open(0)) == NULL)
+ {
+ DPRINTF("Failed to open blockshr_hash.\n");
+ return;
+ }
+}
+
diff --git a/tools/memshr/memshr-priv.h b/tools/memshr/memshr-priv.h
new file mode 100644
index 0000000000..d83179889e
--- /dev/null
+++ b/tools/memshr/memshr-priv.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __MEMSHR_PRIV_H__
+#define __MEMSHR_PRIV_H__
+
+#include <syslog.h>
+#include <xenctrl.h>
+#include "memshr.h"
+
+#if 1
+#define DPRINTF(_f, _a...) syslog(LOG_INFO, _f, ##_a)
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define EPRINTF(_f, _a...) syslog(LOG_ERR, "memshr:%s: " _f, __func__, ##_a)
+
+#endif /* __MEMSHR_PRIV_H__ */
diff --git a/tools/memshr/memshr.h b/tools/memshr/memshr.h
new file mode 100644
index 0000000000..412ddb1a42
--- /dev/null
+++ b/tools/memshr/memshr.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __MEMSHR_H__
+#define __MEMSHR_H__
+
+#include <stdint.h>
+
+typedef uint64_t xen_mfn_t;
+
+extern void memshr_daemon_initialize(void);
+extern void memshr_vbd_initialize(void);
+
+#endif /* __MEMSHR_H__ */
diff --git a/tools/memshr/shm.c b/tools/memshr/shm.c
new file mode 100644
index 0000000000..6489f61466
--- /dev/null
+++ b/tools/memshr/shm.c
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "memshr-priv.h"
+#include "bidir-hash.h"
+#include "shm.h"
+
+#define MEMSHR_INFO_SHM_FILE "/memshr-info"
+#define MEMSHR_INFO_MAGIC 0x15263748
+
+#define FGPRT_HASH_SHM_FILE "/blktap-fgprts"
+#define FGPRT_HASH_PAGES 10000
+
+#define BLOCK_HASH_SHM_FILE "/blktap-blks"
+#define BLOCK_HASH_PAGES 10000
+
+typedef struct shm_area {
+ void* base_addr;
+ size_t size;
+ int fd;
+} shm_area_t;
+
+typedef struct {
+ struct shm_area shared_info_area;
+ struct shm_area fgprts_area;
+ struct shm_area blocks_area;
+} private_shm_info_t;
+
+private_shm_info_t shm_info;
+
+
+
+static int shm_area_open(const char *file, size_t size, int unlink, shm_area_t *shma)
+{
+ /* TODO: If blktapctrl can be restarted while system is running, this needs
+ * to be cleverer */
+ if(unlink) shm_unlink(file);
+
+ shma->size = size;
+ shma->fd = shm_open(file,
+ (O_CREAT | O_RDWR),
+ (S_IREAD | S_IWRITE));
+
+ if(shma->fd < 0) return -1;
+
+ if(ftruncate(shma->fd, size) < 0) return -2;
+
+ shma->base_addr = mmap(NULL,
+ size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ shma->fd,
+ 0);
+
+ if(shma->base_addr == MAP_FAILED) return -2;
+
+ return 0;
+}
+
+static void shm_area_close(shm_area_t *shma)
+{
+ munmap(shma->base_addr, shma->size);
+ close(shma->fd);
+}
+
+
+shared_memshr_info_t * shm_shared_info_open(int unlink)
+{
+ shared_memshr_info_t *shared_info;
+ pthread_mutexattr_t lock_attr;
+
+ assert(sizeof(shared_memshr_info_t) < XC_PAGE_SIZE);
+ if(shm_area_open(MEMSHR_INFO_SHM_FILE,
+ XC_PAGE_SIZE,
+ unlink,
+ &(shm_info.shared_info_area)) < 0)
+ {
+ DPRINTF("Failed to open shma for shared info.\n");
+ return NULL;
+ }
+ shared_info = (shared_memshr_info_t *)
+ shm_info.shared_info_area.base_addr;
+ if(unlink)
+ {
+ memset(shared_info, 0, sizeof(shared_memshr_info_t));
+ if(pthread_mutexattr_init(&lock_attr) ||
+ pthread_mutexattr_setpshared(&lock_attr, PTHREAD_PROCESS_SHARED) ||
+ pthread_mutex_init(&shared_info->lock, &lock_attr) ||
+ pthread_mutexattr_destroy(&lock_attr))
+ {
+ DPRINTF("Failed to init shared info lock.\n");
+ return NULL;
+ }
+ shared_info->magic = MEMSHR_INFO_MAGIC;
+ }
+ else
+ if(shared_info->magic != MEMSHR_INFO_MAGIC)
+ {
+ DPRINTF("Incorrect magic in shared info.\n");
+ return NULL;
+ }
+
+ return shared_info;
+}
+
+
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink)
+{
+ struct fgprtshr_hash *h;
+ if(shm_area_open(FGPRT_HASH_SHM_FILE,
+ FGPRT_HASH_PAGES * XC_PAGE_SIZE,
+ unlink,
+ &(shm_info.fgprts_area)) < 0)
+ {
+ DPRINTF("Failed to init shma for fgprtshr_hash.\n");
+ return NULL;
+ }
+
+ if(unlink)
+ {
+ h = fgprtshr_shm_hash_init(
+ (unsigned long) shm_info.fgprts_area.base_addr,
+ FGPRT_HASH_PAGES * XC_PAGE_SIZE);
+ } else
+ {
+ h = fgprtshr_shm_hash_get(
+ (unsigned long) shm_info.fgprts_area.base_addr);
+ }
+
+ return h;
+}
+
+struct blockshr_hash * shm_blockshr_hash_open(int unlink)
+{
+ struct blockshr_hash *h;
+ if(shm_area_open(BLOCK_HASH_SHM_FILE,
+ BLOCK_HASH_PAGES * XC_PAGE_SIZE,
+ unlink,
+ &(shm_info.blocks_area)) < 0)
+ {
+ DPRINTF("Failed to init shma for blockshr_hash.\n");
+ return NULL;
+ }
+
+ if(unlink)
+ {
+ h = blockshr_shm_hash_init(
+ (unsigned long) shm_info.blocks_area.base_addr,
+ BLOCK_HASH_PAGES * XC_PAGE_SIZE);
+ } else
+ {
+ h = blockshr_shm_hash_get(
+ (unsigned long) shm_info.blocks_area.base_addr);
+ }
+
+ return h;
+}
+
diff --git a/tools/memshr/shm.h b/tools/memshr/shm.h
new file mode 100644
index 0000000000..0f6f665c6d
--- /dev/null
+++ b/tools/memshr/shm.h
@@ -0,0 +1,35 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __SHM_H__
+#define __SHM_H__
+
+#include <pthread.h>
+
+typedef struct shared_memshr_info {
+ unsigned long magic;
+ pthread_mutex_t lock;
+ int fgprtshr_hash_inited;
+ int blockshr_hash_inited;
+} shared_memshr_info_t;
+
+shared_memshr_info_t * shm_shared_info_open(int unlink);
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink);
+struct blockshr_hash * shm_blockshr_hash_open(int unlink);
+
+#endif /* __SHM_H__ */