/* map.c - associative array code */ /* PUBLIC DOMAIN - Jon Mayo - July 15, 2007 * initial version: 2007-07-15 * last modified: 2007-12-29 * $Id$ */ /* TODO: * - support map_cursor_next() * - implement case-insensitive mode * - hold cached hash value in entry to faster compare and resize * - support incremental resize of table */ #include #include #include #include #include "map.h" #ifdef NDEBUG #define TRACE(x) #else #define TRACE(x) printf x #endif /** this clears the cursor used by map_cursor_next() */ #define MAP_NEXT_CURSOR_CLEAR(m) { (m)->map_next_cursor.bucket=0; (m)->map_next_cursor.entry=0; } struct map_entry { struct map_entry *next; /* hash collision resolution */ /* TODO: global list of all elements for full table scans */ const char *key; /* immutable when allocated_key flag is false */ void *data; /* TODO: full hash could be stored to speed up rehashing and for walking * through collision lists */ }; struct map_container { unsigned auto_resize:1, /* support automatic resize */ allocated_key:1, /* 0 indicates that key points into 'data' */ allow_replace:1; /* allow map_add to replace a duplicate entry */ void (*free_cb)(void*); /* callback for freeing (used for replace) */ struct map_entry **table; /* hash table */ unsigned table_mask; /* mask used for hashing on this table */ unsigned count; /* total number of elements */ struct { /* current state of map_cursor_next() call - used to track position */ unsigned bucket; struct map_entry *entry; } map_next_cursor; }; static unsigned long hash(const char *key) { unsigned long h = 0; while(*key) { h=h*65599+*key++; /* this might be faster on some systems with fast shifts and slow mult: * h=(h<<6)+(h<<16)-h+*key++; */ } return h; } /* notice - weird semantics! * return NULL on success, a pointer to a duplicate entry on failure */ static struct map_entry *insert_entry(struct map_container *m, unsigned long slot, struct map_entry *ent) { struct map_entry **curr; int compare_result; MAP_NEXT_CURSOR_CLEAR(m); /* we're modifying the table, clear the cursor */ assert(m!=NULL); assert(m->table!=NULL); assert(ent!=NULL); assert(slot<=m->table_mask); /* point to the slot according to the hash result * use a pointer to a pointer so we can modify this entry when the correct * position in the linked list is found. */ curr=&m->table[slot]; /* find a duplicate entry and keep the collision list sorted */ while(*curr) { assert((*curr)->key!=NULL); assert((*curr)->data!=NULL); compare_result=strcmp((*curr)->key, ent->key); if(compare_result>=0) { if(compare_result==0) { /* duplicate entry detected */ return *curr; } else { /* >0 */ /* insert entry at this point, because the strings are getting * bigger and we wish to keep the list sorted. */ break; } } curr=&(*curr)->next; } ent->next=*curr; *curr=ent; return 0; /* success */ } static void redistribute(struct map_container *m, unsigned slot) { struct map_entry **curr, *ent; unsigned long newslot; assert(m!=NULL); assert(slot<=m->table_mask); curr=&m->table[slot]; if(*curr) { TRACE(("Redistributing nodes in slot %u @ size %u\n", slot, m->table_mask+1)); } while(*curr) { ent=*curr; newslot=hash(ent->key)&m->table_mask; if(newslot!=slot) { /* we need to move this to one of the expanded slots */ /* remove from original table */ *curr=ent->next; /* insert into new position */ insert_entry(m, newslot, ent); } else { curr=&ent->next; } } } /* TODO: write unit test for this */ /* call before adding entry to see if we should resize */ static int auto_resize(struct map_container *m) { struct map_entry **tmp; unsigned i; unsigned newmask; /* will the next operation keep us at 100% utilization or less? */ if(m->count <= m->table_mask) return 0; newmask=(m->table_mask<<1)|1; /* increase the mask by a power of 2 */ TRACE(("Resizing table from %u to %u\n", m->table_mask, newmask)); /* reallocate the data */ tmp=realloc(m->table, sizeof **m->table * newmask+1); if(!tmp) { /* verify the allocation */ perror("realloc()"); return 0; /* could not resize */ } /* clear newly allocated entries * starting from the original mask and going to the new mask */ for(i=m->table_mask+1;i<=newmask;i++) { tmp[i]=0; } /* everything is ready, set the data */ m->table=tmp; m->table_mask=newmask; /* redistribute everything in the old part of the table */ for(i=0;i<=m->table_mask/2;i++) { redistribute(m, i); } return 1; } /** initialized the map container * option_flags : * MAP_FEATURE_FLAG_ALLOCATE_KEY * - if set, then key is duplicated. if clear then the key must point inside * the data structure. * MAP_FEATURE_FLAG_REPLACE * - entries can be replaced (free_cb must be set) * MAP_FEATURE_FLAG_RESIZE * - can be increased in size when full * initial_size_bits - number of bits for initial table. */ struct map_container *map_create(unsigned initial_size_bits, unsigned option_flags, void (*free_cb)(void*)) { struct map_container *m; m=malloc(sizeof *m); if(!m) { /* verify the allocation */ perror("malloc()"); return 0; } m->auto_resize=(option_flags&MAP_FEATURE_FLAG_RESIZE)!=0; m->allocated_key=(option_flags&MAP_FEATURE_FLAG_ALLOCATE_KEY)!=0; /* free_cb function needed to support replace feature */ m->allow_replace=(option_flags&MAP_FEATURE_FLAG_REPLACE) && free_cb!=NULL; m->free_cb=free_cb; /* callback for freeing data */ m->table_mask=(1<table_mask) { /* overflow - fill in a fake value */ fprintf(stderr, "WARNING: map_create() called with too large of a size bits value(%u).\n", initial_size_bits); m->table_mask=255; } m->table=calloc(m->table_mask+1, sizeof **m->table); if(!m->table) { /* verify the allocation */ perror("calloc()"); free(m); return 0; } m->count=0; MAP_NEXT_CURSOR_CLEAR(m); TRACE(("map_create() (table=%p) allow_replace:%u auto_resize:%u\n", m, m->allow_replace, m->auto_resize)); return m; } /** call before using map_cursor_next to rewind the cursor */ void map_cursor_clear(struct map_container *m) { MAP_NEXT_CURSOR_CLEAR(m); } #if 0 /** return the next element after the one returned by the previous lookup */ void *map_cursor_next(struct map_container *m) { /* TODO: implement this function */ /* TODO: add both table sizes together to find the limit for the state * variable. */ return 0; /* END OF MAP */ } #endif /** look up some data */ void *map_lookup(struct map_container *m, const char *key) { unsigned long h; struct map_entry *curr; assert(m!=NULL); assert(key!=NULL); MAP_NEXT_CURSOR_CLEAR(m); h=hash(key); /* point to the slot according to the hash result * loop through the linked lists until a match is found. */ for(curr=m->table[h & m->table_mask];curr;curr=curr->next) { if(strcmp(curr->key, key)==0) { return curr->data; } } return 0; } /** * if allocated_key is unset, then key must point inside of data * else the key will be strdup'd. * adding a duplicate entry is an error and returns a failure. * running out of memory is an error and returns a failure. * data cannot be NULL, because map_lookup treats NULL specially. */ int map_add(struct map_container *m, const char *key, void *data) { unsigned long h; struct map_entry *ent, *tmp; assert(m!=NULL); assert(key!=NULL); assert(data!=NULL); if(m->auto_resize) { /* use only if we support resizing */ auto_resize(m); /* checks if we should resize, then resize */ } h=hash(key); /* insert the new entry at the perfect sorted position in the list */ ent=malloc(sizeof *ent); if(!ent) { /* verify the allocation */ perror("malloc()"); return 0; } ent->next=0; if(m->allocated_key) { ent->key=strdup(key); if(!ent->key) { /* verify the allocation */ free(ent); perror("strdup()"); return 0; } } else { ent->key=key; } ent->data=data; tmp=insert_entry(m, h&m->table_mask, ent); if(tmp) { /* duplicate entry found */ /* back out the memory we allocated earlier */ if(m->allocated_key) { free((char*)ent->key); } free(ent); if(!m->allow_replace) { return 0; /* failed - we refuse duplicate entries */ } if(!m->free_cb) { return 0; /* free the node only if a free callback was defined */ } TRACE(("Replacing a duplicate entry (table=%p) key='%s'\n", m, key)); /* replace the duplicate entry */ m->free_cb(tmp->data); tmp->data=data; if(!m->allocated_key) /* update the key pointer into data */ tmp->key=key; } else { /* only increment if we added a fresh entry, * replace doesn't alter this count */ m->count++; } return 1; /* success */ } /** looks up and removes a key. * it is up to the user to catch the result and free it * return 0 if key is not found */ void *map_remove(struct map_container *m, const char *key) { unsigned long h; struct map_entry **curr, *ent; int compare_result; assert(m!=NULL); assert(key!=NULL); MAP_NEXT_CURSOR_CLEAR(m); h=hash(key); /* point to the slot according to the hash result * use a pointer to a pointer so we can modify this entry when the correct * position in the linked list is found. */ curr=&m->table[h & m->table_mask]; /* look for a matching entry */ while(*curr) { compare_result=strcmp((*curr)->key, key); if(compare_result>=0) { if(compare_result==0) { void *ret; ent=*curr; ret=ent->data; *curr=ent->next; /* TRACE(("Removed entry (table=%p) key='%s'\n", m, ent->key)); */ if(m->allocated_key) { free((char*)ent->key); } free(ent); m->count--; return ret; } else { /* >0 */ break; } } curr=&(*curr)->next; } TRACE(("Cannot remove, entry not found (table=%p) key='%s'\n", m, key)); return 0; /* failed - not found */ } #if !defined(NDEBUG) || defined(STAND_ALONE) /* test if the m->count field matches the actual node count */ static void test_count(struct map_container *m) { unsigned total; unsigned i; struct map_entry *curr; assert(m!=NULL); fprintf(stderr, "%s:%u:Counting test ... \n", __FILE__, __LINE__); total=0; for(i=0;i<=m->table_mask;i++) { for(curr=m->table[i];curr;curr=curr->next) { total++; } } fprintf(stderr, "%s:%u:Total: %u (count=%u) -- %s\n", __FILE__, __LINE__, total, m->count, (total==m->count) ? "PASSED" : "FAILED"); } #endif /** frees the map and all elements. * does not free any elements in the hash if m->free is NULL */ void map_free(struct map_container *m) { unsigned i; struct map_entry **curr, *tmp; assert(m!=NULL); /* free the nodes only if a free callback was defined */ if(m->free_cb) { for(i=0;m->count && i<=m->table_mask;i++) { for(curr=&m->table[i];*curr;) { tmp=*curr; /* remove node from list */ *curr=(*curr)->next; /* free the node */ if(m->allocated_key) free((char*)tmp->key); m->free_cb(tmp->data); free(tmp); m->count--; } } } #ifndef NDEBUG test_count(m); #endif /* finished returning elements */ free(m->table); free(m); } /***** test code *****/ #ifdef STAND_ALONE static char *random_string(unsigned lower, unsigned upper) { char *ret; unsigned i, len; len=rand()%(upper-lower)+lower; ret=malloc(len+1); if(!ret) { /* verify the allocation */ perror("malloc()"); return 0; } for(i=0;icount); if(i!=m->count) { printf("Count %u does not match total added.\n", m->count); abort(); } printf("PASSED\n"); } static void verify_sorting(struct map_container *m) { unsigned i; struct map_entry *curr; unsigned count; count=0; printf("Verify sorting ... "); fflush(stdout); for(i=0;i<=m->table_mask;i++) { for(curr=m->table[i];curr;curr=curr->next) { /* verify that the data is sorted */ if(curr->next) { if(strcmp(curr->key, curr->next->key)>0) { printf("FAILED sorting verify @ entry %u: '%s'\n", i, curr->key); abort(); } } count++; } } printf("PASSED\n"); printf("Total: %u (count=%u)\n", i, m->count); } static void test_lookup(struct map_container *m) { unsigned i, max; char *result, *tmp; /* look up things we know aren't in the table */ printf("Lookup negative test ... "); fflush(stdout); result=map_lookup(m, "Does not exist."); if(!result) { printf("PASSED\n"); } else { printf("Failed\n"); } srand(31337); /* reuse the same seed to get the same random strings */ printf("Lookup test ... "); fflush(stdout); max=m->count; for(i=0;icount); } static void test_remove(struct map_container *m) { unsigned i, max; srand(31337); /* reuse the same seed to get the same random strings */ printf("Removing entries ...\n"); max=m->count/2; for(i=0;icount); printf("PASSED\n"); } int main(int argc, char **argv) { struct map_container *testmap; testmap=map_create(4, MAP_FEATURE_FLAG_RESIZE, free); /* TODO: test alloc mode */ /* TODO: test replace mode */ /* TODO: test no resize mode */ test_add_noreplace(testmap); verify_sorting(testmap); test_lookup(testmap); /* can display some warnings because it will remove duplicate entries */ test_remove(testmap); #if !defined(NDEBUG) || defined(STAND_ALONE) test_count(testmap); #endif map_free(testmap); return 0; } #endif /* vim: ts=4 sts=0 noet sw=4 */