9 files changed, 1887 insertions, 0 deletions
diff --git a/tools/memshr/Makefile b/tools/memshr/Makefile
new file mode 100644
index 0000000000..e7cf33b360
--- /dev/null
+++ b/tools/memshr/Makefile
@@ -0,0 +1,47 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBMEMSHR-BUILD := libmemshr.a
+
+CFLAGS          += -Werror
+CFLAGS          += -Wno-unused
+CFLAGS          += -I../include
+CFLAGS          += $(CFLAGS_libxenctrl)
+CFLAGS          += -D_GNU_SOURCE
+CFLAGS          += -fPIC
+CFLAGS          += -g
+
+# Get gcc to generate the dependencies for us.
+CFLAGS          += -Wp,-MD,.$(@F).d
+DEPS             = .*.d
+
+LIB-SRCS        := interface.c
+LIB-SRCS        += shm.c
+LIB-SRCS        += bidir-hash.c
+
+LIB-OBJS        := interface.o
+LIB-OBJS        += shm.o
+LIB-OBJS        += bidir-hash-fgprtshr.o
+LIB-OBJS        += bidir-hash-blockshr.o
+
+all: build
+
+build: $(LIBMEMSHR-BUILD)
+
+bidir-hash-fgprtshr.o: bidir-hash.c
+	$(CC) $(CFLAGS) -DFINGERPRINT_MAP -c -o $*.o bidir-hash.c 
+
+bidir-hash-blockshr.o: bidir-hash.c
+	$(CC) $(CFLAGS) -DBLOCK_MAP -c -o $*.o bidir-hash.c 
+
+libmemshr.a: $(LIB-OBJS)
+	$(AR) rc $@ $^
+
+install: all
+
+clean:
+	rm -rf *.a *.o *~ $(DEPS)
+
+.PHONY: all build clean install
+
+-include $(DEPS)
diff --git a/tools/memshr/bidir-hash.c b/tools/memshr/bidir-hash.c
new file mode 100644
index 0000000000..9304ad15d8
--- /dev/null
+++ b/tools/memshr/bidir-hash.c
@@ -0,0 +1,1275 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bidir-hash.h"
+
+static const uint32_t hash_sizes[] = {53, 97, 193, 389, 769, 1543, 3079, 6151,
+    12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739,
+    6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
+    805306457, 1610612741};
+static const uint16_t hash_sizes_len =
+            sizeof(hash_sizes)/sizeof(hash_sizes[0]);
+static const float hash_max_load_fact = 0.65;
+static const float hash_min_load_fact = 0.10;
+
+/* How many buckets will be covered by a single rw lock */
+#define BUCKETS_PER_LOCK    64
+#define nr_locks(_nr_buckets)   (1 + (_nr_buckets) / BUCKETS_PER_LOCK)
+
+
+#define HASH_LOCK                                                              \
+    pthread_rwlock_t hash_lock
+
+#define BUCKET_LOCK                                                            \
+    pthread_rwlock_t bucket_lock
+
+struct hash_entry
+{
+    __k_t key;
+    __v_t value;
+    /* This structure will belong to two buckets, one in each hash table */
+    struct hash_entry *key_next;
+    struct hash_entry *value_next;
+};
+
+struct bucket
+{
+    struct hash_entry *hash_entry;
+};
+
+struct bucket_lock
+{
+    BUCKET_LOCK;
+};
+
+struct __hash
+{
+    int lock_alive;
+    HASH_LOCK;                            /* protects:
+                                           * *_tab, tab_size, size_idx, *_load
+                                           * (all writes with wrlock)
+                                           */
+    volatile uint32_t nr_ent;             /* # entries held in hashtables */
+    struct bucket *key_tab;               /* forward mapping hashtable    */
+    struct bucket *value_tab;             /* backward mapping hashtable   */
+    struct bucket_lock *key_lock_tab;     /* key table bucket locks       */
+    struct bucket_lock *value_lock_tab;   /* value table bucket locks     */
+    uint32_t tab_size;                    /* # buckets is hashtables      */
+    uint16_t size_idx;                    /* table size index             */
+    uint32_t max_load;                    /* # entries before rehash      */
+    uint32_t min_load;                    /* # entries before rehash      */
+};
+
+struct __hash *__hash_init   (struct __hash *h, uint32_t min_size);
+int            __key_lookup  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int            __insert      (struct __hash *h, __k_t k, __v_t v);
+int            __key_remove  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int            __hash_destroy(struct __hash *h,
+                    void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d);
+int            __hash_iterator(struct __hash *h,
+                        int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                        void *d);
+static void      hash_resize(struct __hash *h);
+
+
+#ifdef BIDIR_USE_STDMALLOC
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+    return malloc(size);
+}
+
+static void alloc_buckets(struct __hash *h,
+                          int nr_buckets,
+                          struct bucket **bucket_tab,
+                          struct bucket_lock **bucket_locks_tab)
+{
+    *bucket_tab = (struct bucket*)
+        malloc(nr_buckets * sizeof(struct bucket));
+    *bucket_locks_tab = (struct bucket_lock*)
+        malloc(nr_locks(nr_buckets) * sizeof(struct bucket_lock));
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+    free(p);
+}
+
+static void free_buckets(struct __hash *h,
+                         struct bucket *buckets,
+                         struct bucket_lock *bucket_locks)
+{
+    if(buckets) free(buckets);
+    if(bucket_locks) free(bucket_locks);
+}
+
+static int max_entries(struct __hash *h)
+{
+    /* There are no explicit restrictions to how many entries we can store */
+    return -1;
+}
+
+#else
+
+/*****************************************************************************/
+/** Memory allocator for shared memory region **/
+/*****************************************************************************/
+#define SHM_TABLE_SLOTS 4
+
+struct shm_hdr
+{
+    int             hash_allocated;
+    pthread_mutex_t mutex;
+    int             free_tab_slots[SHM_TABLE_SLOTS];
+
+    unsigned long   freelist_offset;
+                    
+    unsigned long   entries_offset;
+    unsigned long   nr_entries;
+                    
+    unsigned long   tabs_offset;
+    unsigned long   max_tab_size;
+    unsigned long   max_lock_tab_size;
+
+    struct __hash   hash;
+};
+
+static unsigned long get_shm_baddr(void *hdr)
+{
+    return ((unsigned long)hdr - offsetof(struct shm_hdr, hash));
+}
+
+
+/** Locations of various structures/memory areas **/
+static struct shm_hdr* get_shm_hdr(struct __hash *h)
+{
+    return (struct shm_hdr *)
+            ((unsigned long)h - offsetof(struct shm_hdr, hash));
+}
+
+static uint32_t* get_shm_freelist(struct shm_hdr *hdr)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((uint32_t *)(shm_baddr + hdr->freelist_offset));
+}
+
+static struct hash_entry* get_shm_entries(struct shm_hdr *hdr)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct hash_entry *)(shm_baddr + hdr->entries_offset));
+}
+
+static struct bucket* get_shm_tab(struct shm_hdr *hdr, int i)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct bucket *)
+               ((shm_baddr + hdr->tabs_offset) +
+                 i * (hdr->max_tab_size + hdr->max_lock_tab_size)));
+}
+
+static struct bucket_lock* get_shm_lock_tab(struct shm_hdr *hdr, int i)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((struct bucket_lock *)
+               ((shm_baddr + hdr->tabs_offset) +
+                 i * (hdr->max_tab_size + hdr->max_lock_tab_size) +
+                 hdr->max_tab_size));
+}
+
+static int get_shm_slot(struct shm_hdr *hdr, void *p)
+{
+    unsigned long shm_baddr = (unsigned long)hdr;
+    return ((unsigned long)p - (shm_baddr + hdr->tabs_offset)) /
+              (hdr->max_tab_size + hdr->max_lock_tab_size);
+}
+
+/* Shared memory allocator locks */
+static int shm_mutex_init(struct shm_hdr *h)
+{
+    int ret;
+    pthread_mutexattr_t _attr;
+
+    ret = pthread_mutexattr_init(&_attr);
+    if(ret == 0)
+        ret = pthread_mutexattr_setpshared(&_attr, PTHREAD_PROCESS_SHARED);
+    if(ret == 0)
+        ret = pthread_mutex_init(&h->mutex, &_attr);
+    if(ret == 0)
+        ret = pthread_mutexattr_destroy(&_attr);
+
+    return ret;
+};
+
+static int shm_mutex_lock(struct shm_hdr *h)
+{
+    return pthread_mutex_lock(&h->mutex);
+}
+
+static int shm_mutex_unlock(struct shm_hdr *h)
+{
+    return pthread_mutex_unlock(&h->mutex);
+}
+
+
+/* Shared memory allocator freelist */
+static void shm_add_to_freelist(struct shm_hdr *hdr, uint32_t sl)
+{
+    uint32_t *freelist = get_shm_freelist(hdr);
+
+    shm_mutex_lock(hdr);
+    freelist[sl+1] = freelist[0];
+    freelist[0] = sl;
+    shm_mutex_unlock(hdr);
+}
+
+static uint32_t shm_get_from_freelist(struct shm_hdr *hdr)
+{
+    uint32_t *freelist = get_shm_freelist(hdr);
+    uint32_t slot;
+
+    shm_mutex_lock(hdr);
+    slot = freelist[0];
+    freelist[0] = freelist[slot+1];
+    shm_mutex_unlock(hdr);
+
+    return (slot == 0 ? -1 : slot);
+}
+
+
+#define SHM_ALLOC_MAIN(_n)
+
+static unsigned long shm_init_offsets(
+                                    struct shm_hdr *hdr, int nr_entries)
+{
+    hdr->freelist_offset = sizeof(struct shm_hdr);
+
+    /* Freelist needs one extra slot in the array for the freelist head */
+    hdr->entries_offset =
+        hdr->freelist_offset + (nr_entries + 1) * sizeof(uint32_t);
+    hdr->nr_entries = nr_entries;
+
+    hdr->tabs_offset = hdr->entries_offset +
+        nr_entries * sizeof(struct hash_entry);
+    /* We want to allocate table 1.5 larger than the number of entries
+       we want to hold in it */
+    hdr->max_tab_size =
+        (nr_entries * 3 / 2) * sizeof(struct bucket);
+    hdr->max_lock_tab_size =
+        nr_locks(hdr->max_tab_size) * sizeof(struct bucket_lock);
+
+    return hdr->tabs_offset + (hdr->max_tab_size + hdr->max_lock_tab_size) * 4;
+}
+
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size)
+{
+    uint32_t i;
+    struct shm_hdr *hdr;
+
+    /* Some sanity checks */
+    hdr = (struct shm_hdr *)shm_baddr;
+    memset(hdr, 0, sizeof(struct shm_hdr));
+
+    /* Find the maximum number of entries we can store in the given shm_size */
+    for(i=1; shm_init_offsets(hdr, i) < shm_size; i++){};
+    shm_init_offsets(hdr, (i-1));
+
+    memset(get_shm_freelist(hdr), 0,
+           (hdr->nr_entries + 1) * sizeof(uint32_t));
+    if(shm_mutex_init(hdr) != 0)
+        return NULL;
+    for(i=0; i<hdr->nr_entries; i++)
+        shm_add_to_freelist(hdr, i);
+    for(i=0; i<SHM_TABLE_SLOTS; i++)
+        hdr->free_tab_slots[i] = 1;
+
+    shm_mutex_lock(hdr);
+    assert(!hdr->hash_allocated);
+    hdr->hash_allocated = 1;
+    shm_mutex_unlock(hdr);
+
+    return __hash_init(&hdr->hash, 1000);
+}
+
+struct __hash* __shm_hash_get(unsigned long shm_baddr)
+{
+    struct shm_hdr *hdr = (struct shm_hdr *)shm_baddr;
+
+    return (hdr->hash_allocated ? &hdr->hash : NULL);
+}
+
+static void* alloc_entry(struct __hash *h, int size)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    uint32_t slot = shm_get_from_freelist(hdr);
+
+    assert(size == sizeof(struct hash_entry));
+    if(slot == -1)
+        return NULL;
+
+    return (get_shm_entries(hdr) + slot);
+}
+
+static void alloc_buckets(struct __hash *h,
+                          int nr_buckets,
+                          struct bucket **buckets_tab,
+                          struct bucket_lock **bucket_locks_tab)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    int free_slot;
+
+    *buckets_tab = NULL;
+    *bucket_locks_tab = NULL;
+
+    if(((nr_buckets * sizeof(struct bucket)) > hdr->max_tab_size) ||
+       ((nr_locks(nr_buckets) * sizeof(struct bucket_lock)) >
+                                                hdr->max_lock_tab_size))
+        return;
+
+    shm_mutex_lock(hdr);
+    for(free_slot=0; free_slot<SHM_TABLE_SLOTS; free_slot++)
+        if(hdr->free_tab_slots[free_slot])
+            break;
+    if(free_slot == SHM_TABLE_SLOTS)
+    {
+        shm_mutex_unlock(hdr);
+        return;
+    }
+    hdr->free_tab_slots[free_slot] = 0;
+    shm_mutex_unlock(hdr);
+    *buckets_tab      = get_shm_tab(hdr, free_slot);
+    *bucket_locks_tab = get_shm_lock_tab(hdr, free_slot);
+}
+
+static void free_entry(struct __hash *h, void *p)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    uint32_t slot;
+
+    slot = ((uint32_t)((struct hash_entry *)p -
+                get_shm_entries(hdr)));
+    shm_add_to_freelist(hdr, slot);
+}
+
+static void free_buckets(struct __hash *h,
+                         struct bucket *buckets,
+                         struct bucket_lock *bucket_locks)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+    int slot;
+
+    if(!buckets || !bucket_locks)
+    {
+        assert(!buckets && !bucket_locks);
+        return;
+    }
+    slot = get_shm_slot(hdr, buckets);
+    assert(slot < SHM_TABLE_SLOTS);
+    assert((char *)bucket_locks == (char *)buckets + hdr->max_tab_size);
+    shm_mutex_lock(hdr);
+    assert(hdr->free_tab_slots[slot] == 0);
+    hdr->free_tab_slots[slot] = 1;
+    shm_mutex_unlock(hdr);
+}
+
+static int max_entries(struct __hash *h)
+{
+    struct shm_hdr *hdr = get_shm_hdr(h);
+
+    return hdr->nr_entries;
+}
+
+#endif /* !BIDIR_USE_STDMALLOC */
+
+
+/* The structures may be stored in shared memory region, with base address */
+/* stored in shm_base_addr. All the pointers in the above structures need  */
+/* to be relative to this base address (otherwise they would not make      */
+/* sense to other processes). Bellow accessor functions are used to        */
+/* convert between canonical (base address relative) and local addresses.  */
+/* C2L stands for CANONICAL_TO_LOCAL, and vice versa                       */
+#define C2L(_h, _p) ((typeof(_p))((unsigned long)(_p) +                        \
+            get_shm_baddr(_h)))
+#define L2C(_h, _p) ((typeof(_p))((unsigned long)(_p) -                        \
+            get_shm_baddr(_h)))
+
+
+#define HASH_LOCK_INIT(_h) ({                                                  \
+    int _ret;                                                                  \
+    pthread_rwlockattr_t _attr;                                                \
+                                                                               \
+    h->lock_alive = 1;                                                         \
+    _ret = pthread_rwlockattr_init(&_attr);                                    \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlockattr_setpshared(&_attr,                           \
+                                             PTHREAD_PROCESS_SHARED);          \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlock_init(&(_h)->hash_lock, &_attr);                  \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlockattr_destroy(&_attr);                             \
+                                                                               \
+    _ret;                                                                      \
+})
+
+#define HASH_LOCK_RDLOCK(_h) ({                                                \
+    int _ret;                                                                  \
+                                                                               \
+    if(!_h->lock_alive) _ret = ENOLCK;                                         \
+    else                                                                       \
+    {                                                                          \
+        struct timespec _ts;                                                   \
+        /* 10s timeout, long but ~matches disk spin-up times */                \
+        _ts.tv_sec = time(NULL) + 10;                                          \
+        _ts.tv_nsec = 0;                                                       \
+        _ret = pthread_rwlock_timedrdlock(&(_h)->hash_lock, &_ts);             \
+        if(_ret == ETIMEDOUT) _h->lock_alive = 0;                              \
+    }                                                                          \
+    _ret;                                                                      \
+})
+
+#define HASH_LOCK_RDUNLOCK(_h)                                                 \
+    pthread_rwlock_unlock(&(_h)->hash_lock)
+
+#define HASH_LOCK_WRLOCK(_h) ({                                                \
+    int _ret;                                                                  \
+                                                                               \
+    if(!_h->lock_alive) _ret = ENOLCK;                                         \
+    else                                                                       \
+    {                                                                          \
+        struct timespec _ts;                                                   \
+        _ts.tv_sec = time(NULL) + 10;                                          \
+        _ts.tv_nsec = 0UL;                                                     \
+        _ret = pthread_rwlock_timedwrlock(&(_h)->hash_lock, &_ts);             \
+        if(_ret == ETIMEDOUT) _h->lock_alive = 0;                              \
+    }                                                                          \
+    _ret;                                                                      \
+})
+
+#define HASH_LOCK_TRYWRLOCK(_h) ({                                             \
+    int _ret = (_h->lock_alive ?                                               \
+                    pthread_rwlock_trywrlock(&(_h)->hash_lock) :               \
+                    ENOLCK);                                                   \
+    _ret;                                                                      \
+})
+
+#define HASH_LOCK_WRUNLOCK(_h)                                                 \
+    pthread_rwlock_unlock(&(_h)->hash_lock)
+
+
+#define BUCKET_LOCK_INIT(_h, _b) ({                                            \
+    int _ret;                                                                  \
+    pthread_rwlockattr_t _attr;                                                \
+                                                                               \
+    _ret = pthread_rwlockattr_init(&_attr);                                    \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlockattr_setpshared(&_attr,                           \
+                                             PTHREAD_PROCESS_SHARED);          \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlock_init(&(_b)->bucket_lock, &_attr);                \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlockattr_destroy(&_attr);                             \
+                                                                               \
+    _ret;                                                                      \
+})
+
+
+#define BUCKET_LOCK_RDLOCK(_h, _lock_tab, _idx) ({                             \
+    int _ret;                                                                  \
+    struct timespec _ts;                                                       \
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       \
+                                                                               \
+    _ts.tv_sec = time(NULL) + 10;                                              \
+    _ts.tv_nsec = 0;                                                           \
+    _ret = pthread_rwlock_timedrdlock(&(_lock)->bucket_lock, &_ts);            \
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                \
+    _ret;                                                                      \
+})
+
+
+#define BUCKET_LOCK_RDUNLOCK(_h, _lock_tab, _idx) ({                           \
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       \
+    pthread_rwlock_unlock(&(_lock)->bucket_lock);                              \
+})
+
+#define BUCKET_LOCK_WRLOCK(_h, _lock_tab, _idx) ({                             \
+    int _ret;                                                                  \
+    struct timespec _ts;                                                       \
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       \
+                                                                               \
+    _ts.tv_sec = time(NULL) + 10;                                              \
+    _ts.tv_nsec = 0;                                                           \
+    _ret = pthread_rwlock_timedwrlock(&(_lock)->bucket_lock, &_ts);            \
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                \
+    _ret;                                                                      \
+})
+
+#define BUCKET_LOCK_WRUNLOCK(_h, _lock_tab, _idx) ({                           \
+    struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK];       \
+    pthread_rwlock_unlock(&(_lock)->bucket_lock);                              \
+})
+
+#define TWO_BUCKETS_LOCK_WRLOCK(_h, _blt1, _idx1, _blt2, _idx2)  ({            \
+    int _ret;                                                                  \
+    pthread_rwlock_t *_l1, *_l2;                                               \
+    struct timespec _ts;                                                       \
+    struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK];           \
+    struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK];           \
+                                                                               \
+    assert((_bl1) != (_bl2));                                                  \
+    if((_bl1) < (_bl2))                                                        \
+    {                                                                          \
+        _l1 = &(_bl1)->bucket_lock;                                            \
+        _l2 = &(_bl2)->bucket_lock;                                            \
+    }                                                                          \
+    else                                                                       \
+    {                                                                          \
+        _l1 = &(_bl2)->bucket_lock;                                            \
+        _l2 = &(_bl1)->bucket_lock;                                            \
+    }                                                                          \
+    _ts.tv_sec = time(NULL) + 10;                                              \
+    _ts.tv_nsec = 0;                                                           \
+    _ret = pthread_rwlock_timedwrlock(_l1, &_ts);                              \
+    _ts.tv_sec = time(NULL) + 10;                                              \
+    _ts.tv_nsec = 0;                                                           \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlock_timedwrlock(_l2, &_ts);                          \
+    if(_ret == ETIMEDOUT) (_h)->lock_alive = 0;                                \
+                                                                               \
+    _ret;                                                                      \
+})
+
+#define TWO_BUCKETS_LOCK_WRUNLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({           \
+    int _ret;                                                                  \
+    struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK];           \
+    struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK];           \
+                                                                               \
+    _ret = pthread_rwlock_unlock(&(_bl1)->bucket_lock);                        \
+    if(_ret == 0)                                                              \
+        _ret = pthread_rwlock_unlock(&(_bl2)->bucket_lock);                    \
+                                                                               \
+    _ret;                                                                      \
+})
+
+
+
+
+static uint32_t hash_to_idx(struct __hash *h, uint32_t hash)
+{
+    return (hash % h->tab_size);
+}
+
+static void alloc_tab(struct __hash *h,
+                      int size,
+                      struct bucket **buckets_tab,
+                      struct bucket_lock **bucket_locks_tab)
+{
+    int i;
+
+    alloc_buckets(h, size, buckets_tab, bucket_locks_tab);
+    if(!(*buckets_tab) || !(*bucket_locks_tab))
+        goto error_out;
+    memset(*buckets_tab, 0, size * sizeof(struct bucket));
+    memset(*bucket_locks_tab, 0, nr_locks(size) * sizeof(struct bucket_lock));
+    for(i=0; i<nr_locks(size); i++)
+        if(BUCKET_LOCK_INIT(h, *bucket_locks_tab + i) != 0)
+            goto error_out;
+
+    return;
+error_out:
+    free_buckets(h, *buckets_tab, *bucket_locks_tab);
+    *buckets_tab = NULL;
+    *bucket_locks_tab = NULL;
+    return;
+}
+
+
+struct __hash *__hash_init(struct __hash *h, uint32_t min_size)
+{
+    uint32_t size;
+    uint16_t size_idx;
+    struct bucket *buckets;
+    struct bucket_lock *bucket_locks;
+
+    /* Sanity check on args */
+    if (min_size > hash_sizes[hash_sizes_len-1]) return NULL;
+    /* Find least size greater than init_size */
+    for(size_idx = 0; size_idx < hash_sizes_len; size_idx++)
+            if(hash_sizes[size_idx] >= min_size)
+                break;
+    size = hash_sizes[size_idx];
+
+    if(!h) return NULL;
+    alloc_tab(h, size, &buckets, &bucket_locks);
+    if(!buckets || !bucket_locks) goto alloc_fail;
+    h->key_tab         = L2C(h, buckets);
+    h->key_lock_tab    = L2C(h, bucket_locks);
+    alloc_tab(h, size, &buckets, &bucket_locks);
+    if(!buckets || !bucket_locks) goto alloc_fail;
+    h->value_tab       = L2C(h, buckets);
+    h->value_lock_tab  = L2C(h, bucket_locks);
+    /* Init all h variables */
+    if(HASH_LOCK_INIT(h) != 0) goto alloc_fail;
+    h->nr_ent = 0;
+    h->tab_size = size;
+    h->size_idx = size_idx;
+    h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+    h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+    return h;
+
+alloc_fail:
+    if(h->key_tab || h->key_lock_tab)
+        free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    return NULL;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim             key
+#define __prim_t         __k_t
+#define __prim_tab         key_tab
+#define __prim_lock_tab    key_lock_tab
+#define __prim_hash      __key_hash
+#define __prim_cmp       __key_cmp
+#define __prim_next        key_next
+#define __sec              value
+#define __sec_t          __v_t
+int __key_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *entry;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    uint32_t idx;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    idx = hash_to_idx(h, __prim_hash(k));
+    b = C2L(h, &h->__prim_tab[idx]);
+    blt = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+    entry = b->hash_entry;
+    while(entry != NULL)
+    {
+        entry = C2L(h, entry);
+        if(__prim_cmp(k, entry->__prim))
+        {
+            /* Unlock here */
+            *vp = entry->__sec;
+            BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+            HASH_LOCK_RDUNLOCK(h);
+            return 1;
+        }
+        entry = entry->__prim_next;
+    }
+    BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+/* value lookup is an almost exact copy of key lookup */
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+
+#define __prim             value
+#define __prim_t         __v_t
+#define __prim_tab         value_tab
+#define __prim_lock_tab    value_lock_tab
+#define __prim_hash      __value_hash
+#define __prim_cmp       __value_cmp
+#define __prim_next        value_next
+#define __sec              key
+#define __sec_t          __k_t
+int __value_lookup(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *entry;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    uint32_t idx;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    idx = hash_to_idx(h, __prim_hash(k));
+    b = C2L(h, &h->__prim_tab[idx]);
+    blt = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK;
+    entry = b->hash_entry;
+    while(entry != NULL)
+    {
+        entry = C2L(h, entry);
+        if(__prim_cmp(k, entry->__prim))
+        {
+            /* Unlock here */
+            *vp = entry->__sec;
+            BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+            HASH_LOCK_RDUNLOCK(h);
+            return 1;
+        }
+        entry = entry->__prim_next;
+    }
+    BUCKET_LOCK_RDUNLOCK(h, blt, idx);
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+int __insert(struct __hash *h, __k_t k, __v_t v)
+{
+    uint32_t k_idx, v_idx;
+    struct hash_entry *entry;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+
+
+    /* Allocate new entry before any locks (in case it fails) */
+    entry = (struct hash_entry*)
+                    alloc_entry(h, sizeof(struct hash_entry));
+    if(!entry) return 0;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    /* Read from nr_ent is atomic(TODO check), no need for fancy accessors */
+    if(h->nr_ent+1 > h->max_load)
+    {
+        /* Resize needs the write lock, drop read lock temporarily */
+        HASH_LOCK_RDUNLOCK(h);
+        hash_resize(h);
+        if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+    }
+
+    /* Init the entry */
+    entry->key = k;
+    entry->value = v;
+
+    /* Work out the indicies */
+    k_idx = hash_to_idx(h, __key_hash(k));
+    v_idx = hash_to_idx(h, __value_hash(v));
+
+    /* Insert */
+    bk   = C2L(h, &h->key_tab[k_idx]);
+    bv   = C2L(h, &h->value_tab[v_idx]);
+    bltk = C2L(h, h->key_lock_tab);
+    bltv = C2L(h, h->value_lock_tab);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, k_idx, bltv, v_idx) != 0)
+        return -ENOLCK;
+    entry->key_next = bk->hash_entry;
+    bk->hash_entry = L2C(h, entry);
+    entry->value_next = bv->hash_entry;
+    bv->hash_entry = L2C(h, entry);
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, k_idx, bltv, v_idx);
+
+    /* Book keeping */
+    __sync_add_and_fetch(&h->nr_ent, 1);
+
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 1;
+}
+
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim             key
+#define __prim_t         __k_t
+#define __prim_tab         key_tab
+#define __prim_lock_tab    key_lock_tab
+#define __prim_hash      __key_hash
+#define __prim_cmp       __key_cmp
+#define __prim_next        key_next
+#define __sec              value
+#define __sec_t          __v_t
+#define __sec_tab          value_tab
+#define __sec_lock_tab     value_lock_tab
+#define __sec_hash       __value_hash
+#define __sec_next         value_next
+
+int __key_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *e, *es, **pek, **pev;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+    uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+    __prim_t ks;
+    __sec_t vs;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+    old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+    bk = C2L(h, &h->__prim_tab[kidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(__prim_cmp(k, e->__prim))
+        {
+            goto found;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 0;
+
+found:
+    /*
+     * Make local copy of key and value.
+     */
+    es = e;
+    ks = e->__prim;
+    vs = e->__sec;
+    kidx = hash_to_idx(h, __prim_hash(ks));
+    /* Being paranoid: check if kidx has not changed, so that we unlock the
+     * right bucket */
+    assert(old_kidx == kidx);
+    vidx = hash_to_idx(h, __sec_hash(vs));
+    bk   = C2L(h, &h->__prim_tab[kidx]);
+    bv   = C2L(h, &h->__sec_tab[vidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    bltv = C2L(h, h->__sec_lock_tab);
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    pev = &(bv->hash_entry);
+
+    /* Find the entry in both tables */
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Being paranoid: make sure that the key and value are
+             * still the same. This is still not 100%, because, in
+             * principle, the entry could have got deleted, when we
+             * didn't hold the locks for a little while, and exactly
+             * the same entry reinserted. If the __k_t & __v_t are
+             * simple types than it probably doesn't matter, but if
+             * either is a pointer type, the actual structure might
+             * now be different. The chances that happens are very
+             * slim, but still, if that's a problem, the user needs to
+             * pay attention to the structure re-allocation */
+            if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+               (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+                break;
+            goto found_again;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+    /* Entry got removed in the meantime, try again */
+    goto again;
+
+found_again:
+    /* We are now comitted to the removal */
+    e = *pev;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Both pek and pev are pointing to the right place, remove */
+            *pek = e->__prim_next;
+            *pev = e->__sec_next;
+
+            nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+            /* read min_load still under the hash lock! */
+            min_load = h->min_load;
+
+            TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+            HASH_LOCK_RDUNLOCK(h);
+
+            if(nr_ent < min_load)
+                hash_resize(h);
+            if(vp != NULL)
+                *vp = e->__sec;
+            free_entry(h, e);
+            return 1;
+        }
+        pev = &(e->__sec_next);
+        e = *pev;
+    }
+
+    /* We should never get here!, no need to unlock anything */
+    return -ENOLCK;
+}
+
+#undef __prim
+#undef __prim_t
+#undef __prim_tab
+#undef __prim_lock_tab
+#undef __prim_hash
+#undef __prim_cmp
+#undef __prim_next
+#undef __sec
+#undef __sec_t
+#undef __sec_tab
+#undef __sec_lock_tab
+#undef __sec_hash
+#undef __sec_next
+
+#define __prim             value
+#define __prim_t         __v_t
+#define __prim_tab         value_tab
+#define __prim_lock_tab    value_lock_tab
+#define __prim_hash      __value_hash
+#define __prim_cmp       __value_cmp
+#define __prim_next        value_next
+#define __sec              key
+#define __sec_t          __k_t
+#define __sec_tab          key_tab
+#define __sec_lock_tab     key_lock_tab
+#define __sec_hash       __key_hash
+#define __sec_next         key_next
+
+int __value_remove(struct __hash *h, __prim_t k, __sec_t *vp)
+{
+    struct hash_entry *e, *es, **pek, **pev;
+    struct bucket *bk, *bv;
+    struct bucket_lock *bltk, *bltv;
+    uint32_t old_kidx, kidx, vidx, min_load, nr_ent;
+    __prim_t ks;
+    __sec_t vs;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+again:
+    old_kidx = kidx = hash_to_idx(h, __prim_hash(k));
+    bk = C2L(h, &h->__prim_tab[kidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(__prim_cmp(k, e->__prim))
+        {
+            goto found;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    HASH_LOCK_RDUNLOCK(h);
+
+    return 0;
+
+found:
+    /*
+     * Make local copy of key and value.
+     */
+    es = e;
+    ks = e->__prim;
+    vs = e->__sec;
+    kidx = hash_to_idx(h, __prim_hash(ks));
+    /* Being paranoid: check if kidx has not changed, so that we unlock the
+     * right bucket */
+    assert(old_kidx == kidx);
+    vidx = hash_to_idx(h, __sec_hash(vs));
+    bk   = C2L(h, &h->__prim_tab[kidx]);
+    bv   = C2L(h, &h->__sec_tab[vidx]);
+    bltk = C2L(h, h->__prim_lock_tab);
+    bltv = C2L(h, h->__sec_lock_tab);
+    BUCKET_LOCK_RDUNLOCK(h, bltk, kidx);
+    if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK;
+    pek = &(bk->hash_entry);
+    pev = &(bv->hash_entry);
+
+    /* Find the entry in both tables */
+    e = *pek;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Being paranoid: make sure that the key and value are
+             * still the same. This is still not 100%, because, in
+             * principle, the entry could have got deleted, when we
+             * didn't hold the locks for a little while, and exactly
+             * the same entry reinserted. If the __k_t & __v_t are
+             * simple types than it probably doesn't matter, but if
+             * either is a pointer type, the actual structure might
+             * now be different. The chances that happens are very
+             * slim, but still, if that's a problem, the user needs to
+             * pay attention to the structure re-allocation */
+            if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) ||
+               (memcmp(&(e->__sec), &vs, sizeof(__sec_t))))
+                break;
+            goto found_again;
+        }
+        pek = &(e->__prim_next);
+        e = *pek;
+    }
+
+    TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+
+    /* Entry got removed in the meantime, try again */
+    goto again;
+
+found_again:
+    /* We are now comitted to the removal */
+    e = *pev;
+    while(e != NULL)
+    {
+        e = C2L(h, e);
+        if(e == es)
+        {
+            /* Both pek and pev are pointing to the right place, remove */
+            *pek = e->__prim_next;
+            *pev = e->__sec_next;
+
+            nr_ent = __sync_sub_and_fetch(&h->nr_ent, 1);
+            /* read min_load still under the hash lock! */
+            min_load = h->min_load;
+
+            TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx);
+            HASH_LOCK_RDUNLOCK(h);
+
+            if(nr_ent < min_load)
+                hash_resize(h);
+            if(vp != NULL)
+                *vp = e->__sec;
+            free_entry(h, e);
+            return 1;
+        }
+        pev = &(e->__sec_next);
+        e = *pev;
+    }
+
+    /* We should never get here!, no need to unlock anything */
+    return -ENOLCK;
+}
+
+
+int __hash_destroy(struct __hash *h,
+                   void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                   void *d)
+{
+    struct hash_entry *e, *n;
+    struct bucket *b;
+    int i;
+
+    if(HASH_LOCK_WRLOCK(h) != 0) return -ENOLCK;
+
+    /* No need to lock individual buckets, with hash write lock  */
+    for(i=0; i < h->tab_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            if(entry_consumer)
+                entry_consumer(e->key, e->value, d);
+            free_entry(h, e);
+            e = n;
+        }
+    }
+    free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+
+    HASH_LOCK_WRUNLOCK(h);
+    h->lock_alive = 0;
+
+    return 0;
+}
+
+static void hash_resize(struct __hash *h)
+{
+    int new_size_idx, i, lock_ret;
+    uint32_t size, old_size, kidx, vidx;
+    struct bucket *t1, *t2, *b;
+    struct bucket_lock *l1, *l2;
+    struct hash_entry *e, *n;
+
+    /* We may fail to allocate the lock, if the resize is triggered while
+       we are iterating (under read lock) */
+    lock_ret = HASH_LOCK_TRYWRLOCK(h);
+    if(lock_ret != 0) return;
+
+    new_size_idx = h->size_idx;
+    /* Work out the new size */
+    if(h->nr_ent >= h->max_load)
+        new_size_idx = h->size_idx+1;
+    if(h->nr_ent < h->min_load)
+        new_size_idx = h->size_idx-1;
+    if((new_size_idx == h->size_idx) ||
+       (new_size_idx >= hash_sizes_len) ||
+       (new_size_idx < 0))
+    {
+        HASH_LOCK_WRUNLOCK(h);
+        return;
+    }
+
+    size = hash_sizes[new_size_idx];
+
+    /* Allocate the new sizes */
+    t1 = t2 = NULL;
+    l1 = l2 = NULL;
+    alloc_tab(h, size, &t1, &l1);
+    if(!t1 || !l1) goto alloc_fail;
+    alloc_tab(h, size, &t2, &l2);
+    if(!t2 || !l2) goto alloc_fail;
+
+    old_size = h->tab_size;
+    h->tab_size = size;
+    h->size_idx = new_size_idx;
+    h->max_load = (uint32_t)ceilf(hash_max_load_fact * size);
+    h->min_load = (uint32_t)ceilf(hash_min_load_fact * size);
+
+    /* Move the entries */
+    for(i=0; i < old_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            kidx =hash_to_idx(h, __key_hash(e->key));
+            vidx =hash_to_idx(h, __value_hash(e->value));
+            /* Move to the correct bucket */
+            e->key_next = t1[kidx].hash_entry;
+            t1[kidx].hash_entry = L2C(h, e);
+            e->value_next = t2[vidx].hash_entry;
+            t2[vidx].hash_entry = L2C(h, e);
+            e = n;
+        }
+    }
+    free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab));
+    free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab));
+    h->key_tab         = L2C(h, t1);
+    h->key_lock_tab    = L2C(h, l1);
+    h->value_tab       = L2C(h, t2);
+    h->value_lock_tab  = L2C(h, l2);
+
+    HASH_LOCK_WRUNLOCK(h);
+
+    return;
+
+alloc_fail:
+    /* If we failed to resize, adjust max/min load. This will stop us from
+     * retrying resize too frequently */ 
+    if(new_size_idx > h->size_idx)
+        h->max_load = (h->max_load + 2 * h->tab_size) / 2 + 1;
+    else 
+    if (new_size_idx < h->size_idx)
+        h->min_load = h->min_load / 2;
+    HASH_LOCK_WRUNLOCK(h);
+    if(t1 || l1) free_buckets(h, t1, l1);
+    if(t2 || l2) free_buckets(h, t2, l2);
+    return;
+}
+
+int __hash_iterator(struct __hash *h,
+                    int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d)
+{
+    struct hash_entry *e, *n;
+    struct bucket *b;
+    struct bucket_lock *blt;
+    int i, brk_early;
+
+    if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK;
+
+    for(i=0; i < h->tab_size; i++)
+    {
+        b = C2L(h, &h->key_tab[i]);
+        blt = C2L(h, h->key_lock_tab);
+        if(BUCKET_LOCK_RDLOCK(h, blt, i) != 0) return -ENOLCK;
+        e = b->hash_entry;
+        while(e != NULL)
+        {
+            e = C2L(h, e);
+            n = e->key_next;
+            brk_early = entry_consumer(e->key, e->value, d);
+            if(brk_early)
+            {
+                BUCKET_LOCK_RDUNLOCK(h, blt, i);
+                goto out;
+            }
+            e = n;
+        }
+        BUCKET_LOCK_RDUNLOCK(h, blt, i);
+    }
+out:
+    HASH_LOCK_RDUNLOCK(h);
+    return 0;
+}
+
+void __hash_sizes(struct __hash *h,
+                  uint32_t *nr_ent,
+                  uint32_t *max_nr_ent,
+                  uint32_t *tab_size,
+                  uint32_t *max_load,
+                  uint32_t *min_load)
+{
+    if(nr_ent     != NULL) *nr_ent     = h->nr_ent;
+    if(max_nr_ent != NULL) *max_nr_ent = max_entries(h); 
+    if(tab_size   != NULL) *tab_size   = h->tab_size;
+    if(max_load   != NULL) *max_load   = h->max_load;
+    if(min_load   != NULL) *min_load   = h->min_load;
+}
+
diff --git a/tools/memshr/bidir-hash.h b/tools/memshr/bidir-hash.h
new file mode 100644
index 0000000000..ed9ec58ff4
--- /dev/null
+++ b/tools/memshr/bidir-hash.h
@@ -0,0 +1,113 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __BIDIR_HASH_H__
+#define __BIDIR_HASH_H__
+
+#include <stdint.h>
+#include "memshr-priv.h"
+
+typedef struct vbdblk {
+    uint64_t sec;
+    uint16_t disk_id;
+} vbdblk_t;
+
+
+#if defined FINGERPRINT_MAP || BLOCK_MAP 
+#define DEFINE_SINGLE_MAP 
+#endif
+
+/*******************************************************/
+/* Fingerprint map                                     */
+/*******************************************************/
+#if defined FINGERPRINT_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+static uint32_t fgprtshr_fgprt_hash(uint32_t h)
+{
+    return h;
+}
+
+static uint32_t fgprtshr_mfn_hash(uint64_t m)
+{
+    return (uint32_t)m;
+}
+
+static int fgprtshr_fgprt_cmp(uint32_t h1, uint32_t h2)
+{
+    return (h1 == h2);
+}
+
+static int fgprtshr_mfn_cmp(uint32_t m1, uint32_t m2)
+{
+    return (m1 == m2);
+}
+#define BIDIR_NAME_PREFIX       fgprtshr 
+#define BIDIR_KEY               fgprt 
+#define BIDIR_VALUE             mfn 
+#define BIDIR_KEY_T             uint32_t
+#define BIDIR_VALUE_T           xen_mfn_t
+#include "bidir-namedefs.h"
+
+#endif /* FINGERPRINT_MAP */
+
+
+/*******************************************************/
+/* Block<->Memory sharing handles                      */
+/*******************************************************/
+#if defined BLOCK_MAP || !defined DEFINE_SINGLE_MAP
+
+#undef BIDIR_NAME_PREFIX
+#undef BIDIR_KEY
+#undef BIDIR_VALUE
+#undef BIDIR_KEY_T
+#undef BIDIR_VALUE_T
+/* TODO better hashes! */
+static inline uint32_t blockshr_block_hash(vbdblk_t block)
+{
+    return (uint32_t)(block.sec) ^ (uint32_t)(block.disk_id);
+}
+
+static inline uint32_t blockshr_shrhnd_hash(uint64_t shrhnd)
+{
+    return (uint32_t)shrhnd;
+}
+
+static inline int blockshr_block_cmp(vbdblk_t b1, vbdblk_t b2)
+{
+    return (b1.sec == b2.sec) && (b1.disk_id == b2.disk_id);
+}
+
+static inline int blockshr_shrhnd_cmp(uint64_t h1, uint64_t h2)
+{
+    return (h1 == h2);
+}
+#define BIDIR_NAME_PREFIX       blockshr
+#define BIDIR_KEY               block
+#define BIDIR_VALUE             shrhnd
+#define BIDIR_KEY_T             vbdblk_t
+#define BIDIR_VALUE_T           uint64_t
+#include "bidir-namedefs.h"
+
+#endif /* BLOCK_MAP */
+
+#endif /* __BIDIR_HASH_H__ */
diff --git a/tools/memshr/bidir-namedefs.h b/tools/memshr/bidir-namedefs.h
new file mode 100644
index 0000000000..649f063b70
--- /dev/null
+++ b/tools/memshr/bidir-namedefs.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "memshr-priv.h"
+
+/* Macros used to assemble the names */
+#define BIDIR_NAME_ONE_INTERNAL(prefix, name) \
+                                prefix ## _ ## name
+#define BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2) \
+                                prefix ## _ ## name1 ## _ ## name2
+
+#define BIDIR_NAME_ONE(prefix, name) \
+                                BIDIR_NAME_ONE_INTERNAL(prefix, name)
+#define BIDIR_NAME_TWO(prefix, name1, name2) \
+                                BIDIR_NAME_TWO_INTERNAL(prefix, name1, name2)
+
+#define INTERNAL_NAME_ONE(name) BIDIR_NAME_ONE(BIDIR_NAME_PREFIX, name)
+#define INTERNAL_NAME_TWO(name1, name2) \
+                                BIDIR_NAME_TWO(BIDIR_NAME_PREFIX, name1, name2)
+
+/* Function/type names */
+#define __k_t                   BIDIR_KEY_T
+#define __v_t                   BIDIR_VALUE_T
+
+#define __hash                  INTERNAL_NAME_ONE(hash)
+#define __shm_hash_init         INTERNAL_NAME_ONE(shm_hash_init)
+#define __shm_hash_get          INTERNAL_NAME_ONE(shm_hash_get)
+#define __hash_init             INTERNAL_NAME_ONE(hash_init)
+#define __key_lookup            INTERNAL_NAME_TWO(BIDIR_KEY, lookup)
+#define __value_lookup          INTERNAL_NAME_TWO(BIDIR_VALUE, lookup)
+#define __insert                INTERNAL_NAME_ONE(insert)
+#define __key_remove            INTERNAL_NAME_TWO(BIDIR_KEY, remove)
+#define __value_remove          INTERNAL_NAME_TWO(BIDIR_VALUE, remove)
+#define __hash_destroy          INTERNAL_NAME_ONE(hash_destroy)
+#define __hash_iterator         INTERNAL_NAME_ONE(hash_iterator)
+
+#define __key_hash              INTERNAL_NAME_TWO(BIDIR_KEY, hash)
+#define __key_cmp               INTERNAL_NAME_TWO(BIDIR_KEY, cmp)
+#define __value_hash            INTERNAL_NAME_TWO(BIDIR_VALUE, hash)
+#define __value_cmp             INTERNAL_NAME_TWO(BIDIR_VALUE, cmp)
+
+#define __hash_sizes            INTERNAL_NAME_ONE(hash_sizes)
+
+
+/* Final function exports */
+struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size);
+struct __hash* __shm_hash_get(unsigned long shm_baddr);
+struct __hash *__hash_init   (struct __hash *h, uint32_t min_size);
+int            __key_lookup  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_lookup(struct __hash *h, __v_t v, __k_t *kp);
+int            __insert      (struct __hash *h, __k_t k, __v_t v);
+int            __key_remove  (struct __hash *h, __k_t k, __v_t *vp);
+int            __value_remove(struct __hash *h, __v_t v, __k_t *kp);
+int            __hash_destroy(struct __hash *h,
+                    void (*entry_consumer)(__k_t k, __v_t v, void *p),
+                    void *d);
+int            __hash_iterator(struct __hash *h,
+                        int (*entry_consumer)(__k_t k, __v_t v, void *p),
+                        void *d);
+void           __hash_sizes(struct __hash *h,
+                            uint32_t *nr_ent,
+                            uint32_t *max_nr_ent,
+                            uint32_t *tab_size,
+                            uint32_t *max_load,
+                            uint32_t *min_load);
diff --git a/tools/memshr/interface.c b/tools/memshr/interface.c
new file mode 100644
index 0000000000..9ab20c213c
--- /dev/null
+++ b/tools/memshr/interface.c
@@ -0,0 +1,92 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <string.h>
+
+#include "memshr-priv.h"
+#include "shm.h"
+
+
+typedef struct {
+    struct shared_memshr_info *shared_info;
+    struct fgprtshr_hash      *fgprts;
+    struct blockshr_hash      *blks;
+} private_memshr_info_t;
+
+private_memshr_info_t memshr;
+
+#define SHARED_INFO  (memshr.shared_info)
+
+void memshr_daemon_initialize(void)
+{
+    void *shm_base_addr;
+    struct fgprtshr_hash *h;
+
+    memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+    if((SHARED_INFO = shm_shared_info_open(1)) == NULL)
+    {
+        DPRINTF("Failed to init shared info.\n");
+        return;
+    }
+
+    if((memshr.fgprts = shm_fgprtshr_hash_open(1)) == NULL) 
+    {
+        DPRINTF("Failed to init fgprtshr hash.\n");
+        return;
+    }
+    memshr.shared_info->fgprtshr_hash_inited = 1;
+
+    if((memshr.blks = shm_blockshr_hash_open(1)) == NULL) 
+    {
+        DPRINTF("Failed to init blockshr hash.\n");
+        return;
+    }
+    memshr.shared_info->blockshr_hash_inited = 1;
+}
+
+
+void memshr_vbd_initialize(void)
+{
+    memset(&memshr, 0, sizeof(private_memshr_info_t));
+
+    if((SHARED_INFO = shm_shared_info_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open shared info.\n");
+        return;
+    }
+
+    if(!SHARED_INFO->fgprtshr_hash_inited)
+    {
+        DPRINTF("fgprtshr hash not inited.\n");
+        return;
+    }
+
+    if((memshr.fgprts = shm_fgprtshr_hash_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open fgprtshr_hash.\n");
+        return;
+    }
+
+    if((memshr.blks = shm_blockshr_hash_open(0)) == NULL)
+    {
+        DPRINTF("Failed to open blockshr_hash.\n");
+        return;
+    }
+}
+
diff --git a/tools/memshr/memshr-priv.h b/tools/memshr/memshr-priv.h
new file mode 100644
index 0000000000..d83179889e
--- /dev/null
+++ b/tools/memshr/memshr-priv.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __MEMSHR_PRIV_H__
+#define __MEMSHR_PRIV_H__
+
+#include <syslog.h>
+#include <xenctrl.h>
+#include "memshr.h"
+
+#if 1
+#define DPRINTF(_f, _a...) syslog(LOG_INFO, _f, ##_a)
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#define EPRINTF(_f, _a...) syslog(LOG_ERR, "memshr:%s: " _f, __func__, ##_a)
+
+#endif /* __MEMSHR_PRIV_H__ */
diff --git a/tools/memshr/memshr.h b/tools/memshr/memshr.h
new file mode 100644
index 0000000000..412ddb1a42
--- /dev/null
+++ b/tools/memshr/memshr.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __MEMSHR_H__
+#define __MEMSHR_H__
+
+#include <stdint.h>
+
+typedef uint64_t xen_mfn_t;
+
+extern void memshr_daemon_initialize(void);
+extern void memshr_vbd_initialize(void);
+
+#endif /* __MEMSHR_H__ */
diff --git a/tools/memshr/shm.c b/tools/memshr/shm.c
new file mode 100644
index 0000000000..6489f61466
--- /dev/null
+++ b/tools/memshr/shm.c
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "memshr-priv.h"
+#include "bidir-hash.h"
+#include "shm.h"
+
+#define MEMSHR_INFO_SHM_FILE  "/memshr-info"
+#define MEMSHR_INFO_MAGIC     0x15263748 
+
+#define FGPRT_HASH_SHM_FILE "/blktap-fgprts"
+#define FGPRT_HASH_PAGES    10000
+
+#define BLOCK_HASH_SHM_FILE "/blktap-blks"
+#define BLOCK_HASH_PAGES    10000
+
+typedef struct shm_area {
+    void*  base_addr;
+    size_t size;
+    int    fd;
+} shm_area_t;
+
+typedef struct {
+    struct shm_area shared_info_area;
+    struct shm_area fgprts_area;
+    struct shm_area blocks_area;
+} private_shm_info_t;
+
+private_shm_info_t shm_info;
+
+
+
+static int shm_area_open(const char *file, size_t size, int unlink, shm_area_t *shma)
+{ 
+    /* TODO: If blktapctrl can be restarted while system is running, this needs
+     * to be cleverer */
+    if(unlink) shm_unlink(file);
+
+    shma->size = size;
+    shma->fd = shm_open(file,
+                        (O_CREAT | O_RDWR),
+                        (S_IREAD | S_IWRITE));
+
+    if(shma->fd < 0) return -1;
+
+    if(ftruncate(shma->fd, size) < 0) return -2;
+
+    shma->base_addr = mmap(NULL, 
+                      size,
+                      PROT_READ | PROT_WRITE, 
+                      MAP_SHARED,
+                      shma->fd,
+                      0);
+    
+    if(shma->base_addr == MAP_FAILED) return -2;
+
+    return 0;
+}
+
+static void shm_area_close(shm_area_t *shma)
+{
+    munmap(shma->base_addr, shma->size);
+    close(shma->fd);
+}
+
+
+shared_memshr_info_t * shm_shared_info_open(int unlink)
+{
+    shared_memshr_info_t *shared_info;
+    pthread_mutexattr_t  lock_attr;
+
+    assert(sizeof(shared_memshr_info_t) < XC_PAGE_SIZE);
+    if(shm_area_open(MEMSHR_INFO_SHM_FILE, 
+                     XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.shared_info_area)) < 0)
+    {
+        DPRINTF("Failed to open shma for shared info.\n");
+        return NULL;
+    }
+    shared_info = (shared_memshr_info_t *)
+                             shm_info.shared_info_area.base_addr;
+    if(unlink)
+    {
+        memset(shared_info, 0, sizeof(shared_memshr_info_t));
+        if(pthread_mutexattr_init(&lock_attr) ||
+           pthread_mutexattr_setpshared(&lock_attr, PTHREAD_PROCESS_SHARED) ||
+           pthread_mutex_init(&shared_info->lock, &lock_attr) ||
+           pthread_mutexattr_destroy(&lock_attr))
+        {
+            DPRINTF("Failed to init shared info lock.\n");
+            return NULL;
+        }
+        shared_info->magic = MEMSHR_INFO_MAGIC;
+    } 
+    else
+    if(shared_info->magic != MEMSHR_INFO_MAGIC)
+    {
+        DPRINTF("Incorrect magic in shared info.\n");
+        return NULL;
+    }
+    
+    return shared_info;
+}
+
+
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink)
+{
+    struct fgprtshr_hash *h;
+    if(shm_area_open(FGPRT_HASH_SHM_FILE, 
+                     FGPRT_HASH_PAGES * XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.fgprts_area)) < 0)
+    {
+        DPRINTF("Failed to init shma for fgprtshr_hash.\n");
+        return NULL;
+    }
+
+    if(unlink)
+    {
+        h = fgprtshr_shm_hash_init(
+                     (unsigned long) shm_info.fgprts_area.base_addr, 
+                     FGPRT_HASH_PAGES * XC_PAGE_SIZE);
+    } else
+    {
+        h = fgprtshr_shm_hash_get(
+                     (unsigned long) shm_info.fgprts_area.base_addr); 
+    }
+        
+    return h;
+} 
+
+struct blockshr_hash * shm_blockshr_hash_open(int unlink)
+{
+    struct blockshr_hash *h;
+    if(shm_area_open(BLOCK_HASH_SHM_FILE, 
+                     BLOCK_HASH_PAGES * XC_PAGE_SIZE,
+                     unlink, 
+                     &(shm_info.blocks_area)) < 0)
+    {
+        DPRINTF("Failed to init shma for blockshr_hash.\n");
+        return NULL;
+    }
+
+    if(unlink)
+    {
+        h = blockshr_shm_hash_init(
+                     (unsigned long) shm_info.blocks_area.base_addr, 
+                     BLOCK_HASH_PAGES * XC_PAGE_SIZE);
+    } else
+    {
+        h = blockshr_shm_hash_get(
+                     (unsigned long) shm_info.blocks_area.base_addr); 
+    }
+        
+    return h;
+} 
+
diff --git a/tools/memshr/shm.h b/tools/memshr/shm.h
new file mode 100644
index 0000000000..0f6f665c6d
--- /dev/null
+++ b/tools/memshr/shm.h
@@ -0,0 +1,35 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Grzegorz Milos)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __SHM_H__
+#define __SHM_H__
+
+#include <pthread.h>
+
+typedef struct shared_memshr_info {
+    unsigned long   magic;
+    pthread_mutex_t lock;
+    int             fgprtshr_hash_inited;
+    int             blockshr_hash_inited;
+} shared_memshr_info_t;
+
+shared_memshr_info_t * shm_shared_info_open(int unlink);
+struct fgprtshr_hash * shm_fgprtshr_hash_open(int unlink);
+struct blockshr_hash * shm_blockshr_hash_open(int unlink);
+
+#endif /* __SHM_H__ */