/****************************************************************************** * * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include "bidir-hash.h" static const uint32_t hash_sizes[] = {53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741}; static const uint16_t hash_sizes_len = sizeof(hash_sizes)/sizeof(hash_sizes[0]); static const float hash_max_load_fact = 0.65; static const float hash_min_load_fact = 0.10; /* How many buckets will be covered by a single rw lock */ #define BUCKETS_PER_LOCK 64 #define nr_locks(_nr_buckets) (1 + (_nr_buckets) / BUCKETS_PER_LOCK) #define HASH_LOCK \ pthread_rwlock_t hash_lock #define BUCKET_LOCK \ pthread_rwlock_t bucket_lock struct hash_entry { __k_t key; __v_t value; /* This structure will belong to two buckets, one in each hash table */ struct hash_entry *key_next; struct hash_entry *value_next; }; struct bucket { struct hash_entry *hash_entry; }; struct bucket_lock { BUCKET_LOCK; }; struct __hash { int lock_alive; HASH_LOCK; /* protects: * *_tab, tab_size, size_idx, *_load * (all writes with wrlock) */ uint32_t nr_ent; /* # entries held in hashtables */ struct bucket *key_tab; /* forward mapping hashtable */ struct bucket *value_tab; /* backward mapping hashtable */ struct bucket_lock *key_lock_tab; /* key table bucket locks */ struct bucket_lock *value_lock_tab; /* value table bucket locks */ uint32_t tab_size; /* # buckets is hashtables */ uint16_t size_idx; /* table size index */ uint32_t max_load; /* # entries before rehash */ uint32_t min_load; /* # entries before rehash */ }; struct __hash *__hash_init (struct __hash *h, uint32_t min_size); int __key_lookup (struct __hash *h, __k_t k, __v_t *vp); int __value_lookup(struct __hash *h, __v_t v, __k_t *kp); int __insert (struct __hash *h, __k_t k, __v_t v); int __key_remove (struct __hash *h, __k_t k, __v_t *vp); int __value_remove(struct __hash *h, __v_t v, __k_t *kp); int __hash_destroy(struct __hash *h, void (*entry_consumer)(__k_t k, __v_t v, void *p), void *d); int __hash_iterator(struct __hash *h, int (*entry_consumer)(__k_t k, __v_t v, void *p), void *d); static void hash_resize(struct __hash *h); #if defined(__arm__) static inline void atomic_inc(uint32_t *v) { unsigned long tmp; int result; __asm__ __volatile__("@ atomic_inc\n" "1: ldrex %0, [%3]\n" " add %0, %0, #1\n" " strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" : "=&r" (result), "=&r" (tmp), "+Qo" (*v) : "r" (v) : "cc"); } static inline void atomic_dec(uint32_t *v) { unsigned long tmp; int result; __asm__ __volatile__("@ atomic_dec\n" "1: ldrex %0, [%3]\n" " sub %0, %0, #1\n" " strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" : "=&r" (result), "=&r" (tmp), "+Qo" (*v) : "r" (v) : "cc"); } #elif defined(__aarch64__) static inline void atomic_inc(uint32_t *v) { unsigned long tmp; int result; asm volatile("// atomic_inc\n" "1: ldxr %w0, [%3]\n" " add %w0, %w0, #1\n" " stxr %w1, %w0, [%3]\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+o" (v) : "r" (v) : "cc"); } static inline void atomic_dec(uint32_t *v) { unsigned long tmp; int result; asm volatile("// atomic_dec\n" "1: ldxr %w0, [%3]\n" " sub %w0, %w0, #1\n" " stxr %w1, %w0, [%3]\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+o" (v) : "r" (v) : "cc"); } #else /* __x86__ */ static inline void atomic_inc(uint32_t *v) { asm volatile ( "lock ; incl %0" : "=m" (*(volatile uint32_t *)v) : "m" (*(volatile uint32_t *)v) ); } static inline void atomic_dec(uint32_t *v) { asm volatile ( "lock ; decl %0" : "=m" (*(volatile uint32_t *)v) : "m" (*(volatile uint32_t *)v) ); } #endif #ifdef BIDIR_USE_STDMALLOC static void* alloc_entry(struct __hash *h, int size) { return malloc(size); } static void alloc_buckets(struct __hash *h, int nr_buckets, struct bucket **bucket_tab, struct bucket_lock **bucket_locks_tab) { *bucket_tab = (struct bucket*) malloc(nr_buckets * sizeof(struct bucket)); *bucket_locks_tab = (struct bucket_lock*) malloc(nr_locks(nr_buckets) * sizeof(struct bucket_lock)); } static void free_entry(struct __hash *h, void *p) { free(p); } static void free_buckets(struct __hash *h, struct bucket *buckets, struct bucket_lock *bucket_locks) { if(buckets) free(buckets); if(bucket_locks) free(bucket_locks); } static int max_entries(struct __hash *h) { /* There are no explicit restrictions to how many entries we can store */ return -1; } #else /*****************************************************************************/ /** Memory allocator for shared memory region **/ /*****************************************************************************/ #define SHM_TABLE_SLOTS 4 struct shm_hdr { int hash_allocated; pthread_mutex_t mutex; int free_tab_slots[SHM_TABLE_SLOTS]; unsigned long freelist_offset; unsigned long entries_offset; unsigned long nr_entries; unsigned long tabs_offset; unsigned long max_tab_size; unsigned long max_lock_tab_size; struct __hash hash; }; static unsigned long get_shm_baddr(void *hdr) { return ((unsigned long)hdr - offsetof(struct shm_hdr, hash)); } /** Locations of various structures/memory areas **/ static struct shm_hdr* get_shm_hdr(struct __hash *h) { return (struct shm_hdr *) ((unsigned long)h - offsetof(struct shm_hdr, hash)); } static uint32_t* get_shm_freelist(struct shm_hdr *hdr) { unsigned long shm_baddr = (unsigned long)hdr; return ((uint32_t *)(shm_baddr + hdr->freelist_offset)); } static struct hash_entry* get_shm_entries(struct shm_hdr *hdr) { unsigned long shm_baddr = (unsigned long)hdr; return ((struct hash_entry *)(shm_baddr + hdr->entries_offset)); } static struct bucket* get_shm_tab(struct shm_hdr *hdr, int i) { unsigned long shm_baddr = (unsigned long)hdr; return ((struct bucket *) ((shm_baddr + hdr->tabs_offset) + i * (hdr->max_tab_size + hdr->max_lock_tab_size))); } static struct bucket_lock* get_shm_lock_tab(struct shm_hdr *hdr, int i) { unsigned long shm_baddr = (unsigned long)hdr; return ((struct bucket_lock *) ((shm_baddr + hdr->tabs_offset) + i * (hdr->max_tab_size + hdr->max_lock_tab_size) + hdr->max_tab_size)); } static int get_shm_slot(struct shm_hdr *hdr, void *p) { unsigned long shm_baddr = (unsigned long)hdr; return ((unsigned long)p - (shm_baddr + hdr->tabs_offset)) / (hdr->max_tab_size + hdr->max_lock_tab_size); } /* Shared memory allocator locks */ static int shm_mutex_init(struct shm_hdr *h) { int ret; pthread_mutexattr_t _attr; ret = pthread_mutexattr_init(&_attr); if(ret == 0) ret = pthread_mutexattr_setpshared(&_attr, PTHREAD_PROCESS_SHARED); if(ret == 0) ret = pthread_mutex_init(&h->mutex, &_attr); if(ret == 0) ret = pthread_mutexattr_destroy(&_attr); return ret; }; static int shm_mutex_lock(struct shm_hdr *h) { return pthread_mutex_lock(&h->mutex); } static int shm_mutex_unlock(struct shm_hdr *h) { return pthread_mutex_unlock(&h->mutex); } /* Shared memory allocator freelist */ static void shm_add_to_freelist(struct shm_hdr *hdr, uint32_t sl) { uint32_t *freelist = get_shm_freelist(hdr); shm_mutex_lock(hdr); freelist[sl+1] = freelist[0]; freelist[0] = sl; shm_mutex_unlock(hdr); } static uint32_t shm_get_from_freelist(struct shm_hdr *hdr) { uint32_t *freelist = get_shm_freelist(hdr); uint32_t slot; shm_mutex_lock(hdr); slot = freelist[0]; freelist[0] = freelist[slot+1]; shm_mutex_unlock(hdr); return (slot == 0 ? -1 : slot); } #define SHM_ALLOC_MAIN(_n) static unsigned long shm_init_offsets( struct shm_hdr *hdr, int nr_entries) { hdr->freelist_offset = sizeof(struct shm_hdr); /* Freelist needs one extra slot in the array for the freelist head */ hdr->entries_offset = hdr->freelist_offset + (nr_entries + 1) * sizeof(uint32_t); hdr->nr_entries = nr_entries; hdr->tabs_offset = hdr->entries_offset + nr_entries * sizeof(struct hash_entry); /* We want to allocate table 1.5 larger than the number of entries we want to hold in it */ hdr->max_tab_size = (nr_entries * 3 / 2) * sizeof(struct bucket); hdr->max_lock_tab_size = nr_locks(hdr->max_tab_size) * sizeof(struct bucket_lock); return hdr->tabs_offset + (hdr->max_tab_size + hdr->max_lock_tab_size) * 4; } struct __hash* __shm_hash_init(unsigned long shm_baddr, unsigned long shm_size) { uint32_t i; struct shm_hdr *hdr; /* Some sanity checks */ hdr = (struct shm_hdr *)shm_baddr; memset(hdr, 0, sizeof(struct shm_hdr)); /* Find the maximum number of entries we can store in the given shm_size */ for(i=1; shm_init_offsets(hdr, i) < shm_size; i++){}; shm_init_offsets(hdr, (i-1)); memset(get_shm_freelist(hdr), 0, (hdr->nr_entries + 1) * sizeof(uint32_t)); if(shm_mutex_init(hdr) != 0) return NULL; for(i=0; inr_entries; i++) shm_add_to_freelist(hdr, i); for(i=0; ifree_tab_slots[i] = 1; shm_mutex_lock(hdr); assert(!hdr->hash_allocated); hdr->hash_allocated = 1; shm_mutex_unlock(hdr); return __hash_init(&hdr->hash, 1000); } struct __hash* __shm_hash_get(unsigned long shm_baddr) { struct shm_hdr *hdr = (struct shm_hdr *)shm_baddr; return (hdr->hash_allocated ? &hdr->hash : NULL); } static void* alloc_entry(struct __hash *h, int size) { struct shm_hdr *hdr = get_shm_hdr(h); uint32_t slot = shm_get_from_freelist(hdr); assert(size == sizeof(struct hash_entry)); if(slot == -1) return NULL; return (get_shm_entries(hdr) + slot); } static void alloc_buckets(struct __hash *h, int nr_buckets, struct bucket **buckets_tab, struct bucket_lock **bucket_locks_tab) { struct shm_hdr *hdr = get_shm_hdr(h); int free_slot; *buckets_tab = NULL; *bucket_locks_tab = NULL; if(((nr_buckets * sizeof(struct bucket)) > hdr->max_tab_size) || ((nr_locks(nr_buckets) * sizeof(struct bucket_lock)) > hdr->max_lock_tab_size)) return; shm_mutex_lock(hdr); for(free_slot=0; free_slotfree_tab_slots[free_slot]) break; if(free_slot == SHM_TABLE_SLOTS) { shm_mutex_unlock(hdr); return; } hdr->free_tab_slots[free_slot] = 0; shm_mutex_unlock(hdr); *buckets_tab = get_shm_tab(hdr, free_slot); *bucket_locks_tab = get_shm_lock_tab(hdr, free_slot); } static void free_entry(struct __hash *h, void *p) { struct shm_hdr *hdr = get_shm_hdr(h); uint32_t slot; slot = ((uint32_t)((struct hash_entry *)p - get_shm_entries(hdr))); shm_add_to_freelist(hdr, slot); } static void free_buckets(struct __hash *h, struct bucket *buckets, struct bucket_lock *bucket_locks) { struct shm_hdr *hdr = get_shm_hdr(h); int slot; if(!buckets || !bucket_locks) { assert(!buckets && !bucket_locks); return; } slot = get_shm_slot(hdr, buckets); assert(slot < SHM_TABLE_SLOTS); assert((char *)bucket_locks == (char *)buckets + hdr->max_tab_size); shm_mutex_lock(hdr); assert(hdr->free_tab_slots[slot] == 0); hdr->free_tab_slots[slot] = 1; shm_mutex_unlock(hdr); } static int max_entries(struct __hash *h) { struct shm_hdr *hdr = get_shm_hdr(h); return hdr->nr_entries; } #endif /* !BIDIR_USE_STDMALLOC */ /* The structures may be stored in shared memory region, with base address */ /* stored in shm_base_addr. All the pointers in the above structures need */ /* to be relative to this base address (otherwise they would not make */ /* sense to other processes). Bellow accessor functions are used to */ /* convert between canonical (base address relative) and local addresses. */ /* C2L stands for CANONICAL_TO_LOCAL, and vice versa */ #define C2L(_h, _p) ((typeof(_p))((unsigned long)(_p) + \ get_shm_baddr(_h))) #define L2C(_h, _p) ((typeof(_p))((unsigned long)(_p) - \ get_shm_baddr(_h))) #define HASH_LOCK_INIT(_h) ({ \ int _ret; \ pthread_rwlockattr_t _attr; \ \ h->lock_alive = 1; \ _ret = pthread_rwlockattr_init(&_attr); \ if(_ret == 0) \ _ret = pthread_rwlockattr_setpshared(&_attr, \ PTHREAD_PROCESS_SHARED); \ if(_ret == 0) \ _ret = pthread_rwlock_init(&(_h)->hash_lock, &_attr); \ if(_ret == 0) \ _ret = pthread_rwlockattr_destroy(&_attr); \ \ _ret; \ }) #define HASH_LOCK_RDLOCK(_h) ({ \ int _ret; \ \ if(!_h->lock_alive) _ret = ENOLCK; \ else \ { \ struct timespec _ts; \ /* 10s timeout, long but ~matches disk spin-up times */ \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0; \ _ret = pthread_rwlock_timedrdlock(&(_h)->hash_lock, &_ts); \ if(_ret == ETIMEDOUT) _h->lock_alive = 0; \ } \ _ret; \ }) #define HASH_LOCK_RDUNLOCK(_h) \ pthread_rwlock_unlock(&(_h)->hash_lock) #define HASH_LOCK_WRLOCK(_h) ({ \ int _ret; \ \ if(!_h->lock_alive) _ret = ENOLCK; \ else \ { \ struct timespec _ts; \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0UL; \ _ret = pthread_rwlock_timedwrlock(&(_h)->hash_lock, &_ts); \ if(_ret == ETIMEDOUT) _h->lock_alive = 0; \ } \ _ret; \ }) #define HASH_LOCK_TRYWRLOCK(_h) ({ \ int _ret = (_h->lock_alive ? \ pthread_rwlock_trywrlock(&(_h)->hash_lock) : \ ENOLCK); \ _ret; \ }) #define HASH_LOCK_WRUNLOCK(_h) \ pthread_rwlock_unlock(&(_h)->hash_lock) #define BUCKET_LOCK_INIT(_h, _b) ({ \ int _ret; \ pthread_rwlockattr_t _attr; \ \ _ret = pthread_rwlockattr_init(&_attr); \ if(_ret == 0) \ _ret = pthread_rwlockattr_setpshared(&_attr, \ PTHREAD_PROCESS_SHARED); \ if(_ret == 0) \ _ret = pthread_rwlock_init(&(_b)->bucket_lock, &_attr); \ if(_ret == 0) \ _ret = pthread_rwlockattr_destroy(&_attr); \ \ _ret; \ }) #define BUCKET_LOCK_RDLOCK(_h, _lock_tab, _idx) ({ \ int _ret; \ struct timespec _ts; \ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \ \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0; \ _ret = pthread_rwlock_timedrdlock(&(_lock)->bucket_lock, &_ts); \ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \ _ret; \ }) #define BUCKET_LOCK_RDUNLOCK(_h, _lock_tab, _idx) ({ \ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \ pthread_rwlock_unlock(&(_lock)->bucket_lock); \ }) #define BUCKET_LOCK_WRLOCK(_h, _lock_tab, _idx) ({ \ int _ret; \ struct timespec _ts; \ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \ \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0; \ _ret = pthread_rwlock_timedwrlock(&(_lock)->bucket_lock, &_ts); \ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \ _ret; \ }) #define BUCKET_LOCK_WRUNLOCK(_h, _lock_tab, _idx) ({ \ struct bucket_lock *_lock = &(_lock_tab)[(_idx) / BUCKETS_PER_LOCK]; \ pthread_rwlock_unlock(&(_lock)->bucket_lock); \ }) #define TWO_BUCKETS_LOCK_WRLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({ \ int _ret; \ pthread_rwlock_t *_l1, *_l2; \ struct timespec _ts; \ struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK]; \ struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK]; \ \ assert((_bl1) != (_bl2)); \ if((_bl1) < (_bl2)) \ { \ _l1 = &(_bl1)->bucket_lock; \ _l2 = &(_bl2)->bucket_lock; \ } \ else \ { \ _l1 = &(_bl2)->bucket_lock; \ _l2 = &(_bl1)->bucket_lock; \ } \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0; \ _ret = pthread_rwlock_timedwrlock(_l1, &_ts); \ _ts.tv_sec = time(NULL) + 10; \ _ts.tv_nsec = 0; \ if(_ret == 0) \ _ret = pthread_rwlock_timedwrlock(_l2, &_ts); \ if(_ret == ETIMEDOUT) (_h)->lock_alive = 0; \ \ _ret; \ }) #define TWO_BUCKETS_LOCK_WRUNLOCK(_h, _blt1, _idx1, _blt2, _idx2) ({ \ int _ret; \ struct bucket_lock *_bl1 = &(_blt1)[(_idx1) / BUCKETS_PER_LOCK]; \ struct bucket_lock *_bl2 = &(_blt2)[(_idx2) / BUCKETS_PER_LOCK]; \ \ _ret = pthread_rwlock_unlock(&(_bl1)->bucket_lock); \ if(_ret == 0) \ _ret = pthread_rwlock_unlock(&(_bl2)->bucket_lock); \ \ _ret; \ }) static uint32_t hash_to_idx(struct __hash *h, uint32_t hash) { return (hash % h->tab_size); } static void alloc_tab(struct __hash *h, int size, struct bucket **buckets_tab, struct bucket_lock **bucket_locks_tab) { int i; alloc_buckets(h, size, buckets_tab, bucket_locks_tab); if(!(*buckets_tab) || !(*bucket_locks_tab)) goto error_out; memset(*buckets_tab, 0, size * sizeof(struct bucket)); memset(*bucket_locks_tab, 0, nr_locks(size) * sizeof(struct bucket_lock)); for(i=0; i hash_sizes[hash_sizes_len-1]) return NULL; /* Find least size greater than init_size */ for(size_idx = 0; size_idx < hash_sizes_len; size_idx++) if(hash_sizes[size_idx] >= min_size) break; size = hash_sizes[size_idx]; if(!h) return NULL; alloc_tab(h, size, &buckets, &bucket_locks); if(!buckets || !bucket_locks) goto alloc_fail; h->key_tab = L2C(h, buckets); h->key_lock_tab = L2C(h, bucket_locks); alloc_tab(h, size, &buckets, &bucket_locks); if(!buckets || !bucket_locks) goto alloc_fail; h->value_tab = L2C(h, buckets); h->value_lock_tab = L2C(h, bucket_locks); /* Init all h variables */ if(HASH_LOCK_INIT(h) != 0) goto alloc_fail; h->nr_ent = 0; h->tab_size = size; h->size_idx = size_idx; h->max_load = (uint32_t)ceilf(hash_max_load_fact * size); h->min_load = (uint32_t)ceilf(hash_min_load_fact * size); return h; alloc_fail: if(h->key_tab || h->key_lock_tab) free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab)); return NULL; } #undef __prim #undef __prim_t #undef __prim_tab #undef __prim_lock_tab #undef __prim_hash #undef __prim_cmp #undef __prim_next #undef __sec #undef __sec_t #define __prim key #define __prim_t __k_t #define __prim_tab key_tab #define __prim_lock_tab key_lock_tab #define __prim_hash __key_hash #define __prim_cmp __key_cmp #define __prim_next key_next #define __sec value #define __sec_t __v_t int __key_lookup(struct __hash *h, __prim_t k, __sec_t *vp) { struct hash_entry *entry; struct bucket *b; struct bucket_lock *blt; uint32_t idx; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; idx = hash_to_idx(h, __prim_hash(k)); b = C2L(h, &h->__prim_tab[idx]); blt = C2L(h, h->__prim_lock_tab); if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK; entry = b->hash_entry; while(entry != NULL) { entry = C2L(h, entry); if(__prim_cmp(k, entry->__prim)) { /* Unlock here */ *vp = entry->__sec; BUCKET_LOCK_RDUNLOCK(h, blt, idx); HASH_LOCK_RDUNLOCK(h); return 1; } entry = entry->__prim_next; } BUCKET_LOCK_RDUNLOCK(h, blt, idx); HASH_LOCK_RDUNLOCK(h); return 0; } /* value lookup is an almost exact copy of key lookup */ #undef __prim #undef __prim_t #undef __prim_tab #undef __prim_lock_tab #undef __prim_hash #undef __prim_cmp #undef __prim_next #undef __sec #undef __sec_t #define __prim value #define __prim_t __v_t #define __prim_tab value_tab #define __prim_lock_tab value_lock_tab #define __prim_hash __value_hash #define __prim_cmp __value_cmp #define __prim_next value_next #define __sec key #define __sec_t __k_t int __value_lookup(struct __hash *h, __prim_t k, __sec_t *vp) { struct hash_entry *entry; struct bucket *b; struct bucket_lock *blt; uint32_t idx; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; idx = hash_to_idx(h, __prim_hash(k)); b = C2L(h, &h->__prim_tab[idx]); blt = C2L(h, h->__prim_lock_tab); if(BUCKET_LOCK_RDLOCK(h, blt, idx) != 0) return -ENOLCK; entry = b->hash_entry; while(entry != NULL) { entry = C2L(h, entry); if(__prim_cmp(k, entry->__prim)) { /* Unlock here */ *vp = entry->__sec; BUCKET_LOCK_RDUNLOCK(h, blt, idx); HASH_LOCK_RDUNLOCK(h); return 1; } entry = entry->__prim_next; } BUCKET_LOCK_RDUNLOCK(h, blt, idx); HASH_LOCK_RDUNLOCK(h); return 0; } int __insert(struct __hash *h, __k_t k, __v_t v) { uint32_t k_idx, v_idx; struct hash_entry *entry; struct bucket *bk, *bv; struct bucket_lock *bltk, *bltv; /* Allocate new entry before any locks (in case it fails) */ entry = (struct hash_entry*) alloc_entry(h, sizeof(struct hash_entry)); if(!entry) return 0; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; /* Read from nr_ent is atomic(TODO check), no need for fancy accessors */ if(h->nr_ent+1 > h->max_load) { /* Resize needs the write lock, drop read lock temporarily */ HASH_LOCK_RDUNLOCK(h); hash_resize(h); if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; } /* Init the entry */ entry->key = k; entry->value = v; /* Work out the indicies */ k_idx = hash_to_idx(h, __key_hash(k)); v_idx = hash_to_idx(h, __value_hash(v)); /* Insert */ bk = C2L(h, &h->key_tab[k_idx]); bv = C2L(h, &h->value_tab[v_idx]); bltk = C2L(h, h->key_lock_tab); bltv = C2L(h, h->value_lock_tab); if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, k_idx, bltv, v_idx) != 0) return -ENOLCK; entry->key_next = bk->hash_entry; bk->hash_entry = L2C(h, entry); entry->value_next = bv->hash_entry; bv->hash_entry = L2C(h, entry); TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, k_idx, bltv, v_idx); /* Book keeping */ atomic_inc(&h->nr_ent); HASH_LOCK_RDUNLOCK(h); return 1; } #undef __prim #undef __prim_t #undef __prim_tab #undef __prim_lock_tab #undef __prim_hash #undef __prim_cmp #undef __prim_next #undef __sec #undef __sec_t #undef __sec_tab #undef __sec_lock_tab #undef __sec_hash #undef __sec_next #define __prim key #define __prim_t __k_t #define __prim_tab key_tab #define __prim_lock_tab key_lock_tab #define __prim_hash __key_hash #define __prim_cmp __key_cmp #define __prim_next key_next #define __sec value #define __sec_t __v_t #define __sec_tab value_tab #define __sec_lock_tab value_lock_tab #define __sec_hash __value_hash #define __sec_next value_next int __key_remove(struct __hash *h, __prim_t k, __sec_t *vp) { struct hash_entry *e, *es, **pek, **pev; struct bucket *bk, *bv; struct bucket_lock *bltk, *bltv; uint32_t old_kidx, kidx, vidx, min_load, nr_ent; __prim_t ks; __sec_t vs; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; again: old_kidx = kidx = hash_to_idx(h, __prim_hash(k)); bk = C2L(h, &h->__prim_tab[kidx]); bltk = C2L(h, h->__prim_lock_tab); if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK; pek = &(bk->hash_entry); e = *pek; while(e != NULL) { e = C2L(h, e); if(__prim_cmp(k, e->__prim)) { goto found; } pek = &(e->__prim_next); e = *pek; } BUCKET_LOCK_RDUNLOCK(h, bltk, kidx); HASH_LOCK_RDUNLOCK(h); return 0; found: /* * Make local copy of key and value. */ es = e; ks = e->__prim; vs = e->__sec; kidx = hash_to_idx(h, __prim_hash(ks)); /* Being paranoid: check if kidx has not changed, so that we unlock the * right bucket */ assert(old_kidx == kidx); vidx = hash_to_idx(h, __sec_hash(vs)); bk = C2L(h, &h->__prim_tab[kidx]); bv = C2L(h, &h->__sec_tab[vidx]); bltk = C2L(h, h->__prim_lock_tab); bltv = C2L(h, h->__sec_lock_tab); BUCKET_LOCK_RDUNLOCK(h, bltk, kidx); if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK; pek = &(bk->hash_entry); pev = &(bv->hash_entry); /* Find the entry in both tables */ e = *pek; while(e != NULL) { e = C2L(h, e); if(e == es) { /* Being paranoid: make sure that the key and value are * still the same. This is still not 100%, because, in * principle, the entry could have got deleted, when we * didn't hold the locks for a little while, and exactly * the same entry reinserted. If the __k_t & __v_t are * simple types than it probably doesn't matter, but if * either is a pointer type, the actual structure might * now be different. The chances that happens are very * slim, but still, if that's a problem, the user needs to * pay attention to the structure re-allocation */ if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) || (memcmp(&(e->__sec), &vs, sizeof(__sec_t)))) break; goto found_again; } pek = &(e->__prim_next); e = *pek; } TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx); /* Entry got removed in the meantime, try again */ goto again; found_again: /* We are now comitted to the removal */ e = *pev; while(e != NULL) { e = C2L(h, e); if(e == es) { /* Both pek and pev are pointing to the right place, remove */ *pek = e->__prim_next; *pev = e->__sec_next; atomic_dec(&h->nr_ent); nr_ent = h->nr_ent; /* read min_load still under the hash lock! */ min_load = h->min_load; TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx); HASH_LOCK_RDUNLOCK(h); if(nr_ent < min_load) hash_resize(h); if(vp != NULL) *vp = e->__sec; free_entry(h, e); return 1; } pev = &(e->__sec_next); e = *pev; } /* We should never get here!, no need to unlock anything */ return -ENOLCK; } #undef __prim #undef __prim_t #undef __prim_tab #undef __prim_lock_tab #undef __prim_hash #undef __prim_cmp #undef __prim_next #undef __sec #undef __sec_t #undef __sec_tab #undef __sec_lock_tab #undef __sec_hash #undef __sec_next #define __prim value #define __prim_t __v_t #define __prim_tab value_tab #define __prim_lock_tab value_lock_tab #define __prim_hash __value_hash #define __prim_cmp __value_cmp #define __prim_next value_next #define __sec key #define __sec_t __k_t #define __sec_tab key_tab #define __sec_lock_tab key_lock_tab #define __sec_hash __key_hash #define __sec_next key_next int __value_remove(struct __hash *h, __prim_t k, __sec_t *vp) { struct hash_entry *e, *es, **pek, **pev; struct bucket *bk, *bv; struct bucket_lock *bltk, *bltv; uint32_t old_kidx, kidx, vidx, min_load, nr_ent; __prim_t ks; __sec_t vs; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; again: old_kidx = kidx = hash_to_idx(h, __prim_hash(k)); bk = C2L(h, &h->__prim_tab[kidx]); bltk = C2L(h, h->__prim_lock_tab); if(BUCKET_LOCK_RDLOCK(h, bltk, kidx) != 0) return -ENOLCK; pek = &(bk->hash_entry); e = *pek; while(e != NULL) { e = C2L(h, e); if(__prim_cmp(k, e->__prim)) { goto found; } pek = &(e->__prim_next); e = *pek; } BUCKET_LOCK_RDUNLOCK(h, bltk, kidx); HASH_LOCK_RDUNLOCK(h); return 0; found: /* * Make local copy of key and value. */ es = e; ks = e->__prim; vs = e->__sec; kidx = hash_to_idx(h, __prim_hash(ks)); /* Being paranoid: check if kidx has not changed, so that we unlock the * right bucket */ assert(old_kidx == kidx); vidx = hash_to_idx(h, __sec_hash(vs)); bk = C2L(h, &h->__prim_tab[kidx]); bv = C2L(h, &h->__sec_tab[vidx]); bltk = C2L(h, h->__prim_lock_tab); bltv = C2L(h, h->__sec_lock_tab); BUCKET_LOCK_RDUNLOCK(h, bltk, kidx); if(TWO_BUCKETS_LOCK_WRLOCK(h, bltk, kidx, bltv, vidx) != 0) return -ENOLCK; pek = &(bk->hash_entry); pev = &(bv->hash_entry); /* Find the entry in both tables */ e = *pek; while(e != NULL) { e = C2L(h, e); if(e == es) { /* Being paranoid: make sure that the key and value are * still the same. This is still not 100%, because, in * principle, the entry could have got deleted, when we * didn't hold the locks for a little while, and exactly * the same entry reinserted. If the __k_t & __v_t are * simple types than it probably doesn't matter, but if * either is a pointer type, the actual structure might * now be different. The chances that happens are very * slim, but still, if that's a problem, the user needs to * pay attention to the structure re-allocation */ if((memcmp(&(e->__prim), &ks, sizeof(__prim_t))) || (memcmp(&(e->__sec), &vs, sizeof(__sec_t)))) break; goto found_again; } pek = &(e->__prim_next); e = *pek; } TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx); /* Entry got removed in the meantime, try again */ goto again; found_again: /* We are now comitted to the removal */ e = *pev; while(e != NULL) { e = C2L(h, e); if(e == es) { /* Both pek and pev are pointing to the right place, remove */ *pek = e->__prim_next; *pev = e->__sec_next; atomic_dec(&h->nr_ent); nr_ent = h->nr_ent; /* read min_load still under the hash lock! */ min_load = h->min_load; TWO_BUCKETS_LOCK_WRUNLOCK(h, bltk, kidx, bltv, vidx); HASH_LOCK_RDUNLOCK(h); if(nr_ent < min_load) hash_resize(h); if(vp != NULL) *vp = e->__sec; free_entry(h, e); return 1; } pev = &(e->__sec_next); e = *pev; } /* We should never get here!, no need to unlock anything */ return -ENOLCK; } int __hash_destroy(struct __hash *h, void (*entry_consumer)(__k_t k, __v_t v, void *p), void *d) { struct hash_entry *e, *n; struct bucket *b; int i; if(HASH_LOCK_WRLOCK(h) != 0) return -ENOLCK; /* No need to lock individual buckets, with hash write lock */ for(i=0; i < h->tab_size; i++) { b = C2L(h, &h->key_tab[i]); e = b->hash_entry; while(e != NULL) { e = C2L(h, e); n = e->key_next; if(entry_consumer) entry_consumer(e->key, e->value, d); free_entry(h, e); e = n; } } free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab)); free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab)); HASH_LOCK_WRUNLOCK(h); h->lock_alive = 0; return 0; } static void hash_resize(struct __hash *h) { int new_size_idx, i, lock_ret; uint32_t size, old_size, kidx, vidx; struct bucket *t1, *t2, *b; struct bucket_lock *l1, *l2; struct hash_entry *e, *n; /* We may fail to allocate the lock, if the resize is triggered while we are iterating (under read lock) */ lock_ret = HASH_LOCK_TRYWRLOCK(h); if(lock_ret != 0) return; new_size_idx = h->size_idx; /* Work out the new size */ if(h->nr_ent >= h->max_load) new_size_idx = h->size_idx+1; if(h->nr_ent < h->min_load) new_size_idx = h->size_idx-1; if((new_size_idx == h->size_idx) || (new_size_idx >= hash_sizes_len) || (new_size_idx < 0)) { HASH_LOCK_WRUNLOCK(h); return; } size = hash_sizes[new_size_idx]; /* Allocate the new sizes */ t1 = t2 = NULL; l1 = l2 = NULL; alloc_tab(h, size, &t1, &l1); if(!t1 || !l1) goto alloc_fail; alloc_tab(h, size, &t2, &l2); if(!t2 || !l2) goto alloc_fail; old_size = h->tab_size; h->tab_size = size; h->size_idx = new_size_idx; h->max_load = (uint32_t)ceilf(hash_max_load_fact * size); h->min_load = (uint32_t)ceilf(hash_min_load_fact * size); /* Move the entries */ for(i=0; i < old_size; i++) { b = C2L(h, &h->key_tab[i]); e = b->hash_entry; while(e != NULL) { e = C2L(h, e); n = e->key_next; kidx =hash_to_idx(h, __key_hash(e->key)); vidx =hash_to_idx(h, __value_hash(e->value)); /* Move to the correct bucket */ e->key_next = t1[kidx].hash_entry; t1[kidx].hash_entry = L2C(h, e); e->value_next = t2[vidx].hash_entry; t2[vidx].hash_entry = L2C(h, e); e = n; } } free_buckets(h, C2L(h, h->key_tab), C2L(h, h->key_lock_tab)); free_buckets(h, C2L(h, h->value_tab), C2L(h, h->value_lock_tab)); h->key_tab = L2C(h, t1); h->key_lock_tab = L2C(h, l1); h->value_tab = L2C(h, t2); h->value_lock_tab = L2C(h, l2); HASH_LOCK_WRUNLOCK(h); return; alloc_fail: /* If we failed to resize, adjust max/min load. This will stop us from * retrying resize too frequently */ if(new_size_idx > h->size_idx) h->max_load = (h->max_load + 2 * h->tab_size) / 2 + 1; else if (new_size_idx < h->size_idx) h->min_load = h->min_load / 2; HASH_LOCK_WRUNLOCK(h); if(t1 || l1) free_buckets(h, t1, l1); if(t2 || l2) free_buckets(h, t2, l2); return; } int __hash_iterator(struct __hash *h, int (*entry_consumer)(__k_t k, __v_t v, void *p), void *d) { struct hash_entry *e, *n; struct bucket *b; struct bucket_lock *blt; int i, brk_early; if(HASH_LOCK_RDLOCK(h) != 0) return -ENOLCK; for(i=0; i < h->tab_size; i++) { b = C2L(h, &h->key_tab[i]); blt = C2L(h, h->key_lock_tab); if(BUCKET_LOCK_RDLOCK(h, blt, i) != 0) return -ENOLCK; e = b->hash_entry; while(e != NULL) { e = C2L(h, e); n = e->key_next; brk_early = entry_consumer(e->key, e->value, d); if(brk_early) { BUCKET_LOCK_RDUNLOCK(h, blt, i); goto out; } e = n; } BUCKET_LOCK_RDUNLOCK(h, blt, i); } out: HASH_LOCK_RDUNLOCK(h); return 0; } void __hash_sizes(struct __hash *h, uint32_t *nr_ent, uint32_t *max_nr_ent, uint32_t *tab_size, uint32_t *max_load, uint32_t *min_load) { if(nr_ent != NULL) *nr_ent = h->nr_ent; if(max_nr_ent != NULL) *max_nr_ent = max_entries(h); if(tab_size != NULL) *tab_size = h->tab_size; if(max_load != NULL) *max_load = h->max_load; if(min_load != NULL) *min_load = h->min_load; }