| htable.c |
| /*************************************************************************** * * * Hash Table Program in C * * * * COSC 242 Assignment 06/09/01 * * * * JAMES LITTLE * * * ***************************************************************************/ #include <stdio.h> #include <stdlib.h> #include "htable.h" #include "mylib.h" struct hashtable { int size; int num_entries; hashing_t method; char **keys; int *freqs; int *stats; }; /************************** * * * htable_new * * * ************************** Declares and initialises a reference to a pointer of the same type as htable. Allocates memory to the new pointer. And initialises all of the hashtables attributes. Asigning memory to pointers and setting the size of the hashtable, setting the method type and setting num_entries and all cells in the arrays to zero or NULL values. PARAMETERS: size = the size of the hashtable. method = a special enumeration type which defines which hashing technique should be used. RETURN VALUE: the newly created hashtable, hashing technique set to the requested method, size to the requested size. Pointers alocated memory of the required size. All other variables set to NULL or zero values. */ htable htable_new(int size, hashing_t method){ int i; htable ht = emalloc(sizeof *ht); ht->size = size; ht->method = method; ht->num_entries = 0; ht->keys = emalloc(size * sizeof ht->keys[0]); ht->freqs = emalloc(size * sizeof ht->freqs[0]); ht->stats = emalloc(size * sizeof ht->stats[0]); for(i = 0; i<size; i++){ ht->keys[i] = NULL; ht->freqs[i] = 0; ht->stats[i] = 0; } return ht; } /************************** * * * htable_step * * * ************************** The secondary hashing function used by DOUBLE hashing routine. Uses an integer to take the modulo that is slightly less than table size, this improves the appearance of random hashing. And improves our hashing performance. PARAMETERS: word = the string to be converted to integer. RETURN VALUE: the integer representation of the word. */ static unsigned int htable_step(htable h, unsigned int i_key){ return 1 + (i_key % (h->size - 1)); } /************************** * * * htable_wtoi * * * ************************** Converts word to integer. This function returns an integer representation of the string word argument. Result - accumulates results of integer creating loop. PARAMETERS: word = the string to be converted to integer. RETURN VALUE: the integer representation of the word. */ static unsigned int htable_wtoi(char *word){ unsigned int result = 0; while(*word != '\0') result = (*word++ +31 * result); return result; } /************************** * * * htable_hash * * * ************************** Returns the current position index, for searching and inserting. PARAMETERS: h = hashtable to free memory from. i_key = the current integer key. RETURN VALUE: the integer key modulo the hash table size, used to index the hash table. */ static unsigned int htable_hash(htable h, unsigned int i_key){ return i_key % h->size; } /************************** * * * htable_insert * * * ************************** Inserts entries into the hashtable, using either LINEAR probing or DOUBLE hashing to deal with collisions. Keeps count of collisions and stores this information in the hashtable stats array. Number of Collisions - the most collisions that have occurred while placing a key. Integer Key - an integer representation of our original string key. Position - initially the integer key modulo the hashtable size. Is used to access array elements in the hashtable. If hash values collide with previous entries then we use add step to position to find the new poistion. Step - is added to position to find the new position. If we are LINEAR probing, step is always 1. If we are DOUBLE hashing, step is a secondary hash value. PARAMETERS: h = hashtable to free memory from. key = string to insert into the hashtable RETURN VALUE: the updated frequency of the entry at that position. If that position was NULL previous to insert, then updated frequency is 1. */ int htable_insert(htable h, char *key){ int num_collisions = 0; int i_key = htable_wtoi(key); int pos = htable_hash(h, i_key); int step = 1; if(h->method == DOUBLE) step = htable_step(h, i_key); while(h->keys[pos]!=NULL && strcmp(h->keys[pos],key)!=0 && num_collisions < h->size ){ pos = htable_hash(h, pos + step); num_collisions++; } if(h->keys[pos] == NULL){ h->keys[pos] = emalloc((strlen(key)+1) * sizeof h->keys[0][0]); strcpy(h->keys[pos],key); h->stats[h->num_entries] = num_collisions; h->num_entries++; } if(num_collisions >= h->size) /* We must be full, so return zero.*/ return 0; return ++(h->freqs[pos]); } /************************** * * * htable_print * * * ************************** Iterates the hashtable sending to output frequencies and keys of all entries that are not NULL. PARAMETERS: h = hashtable to free memory from. stream = where to send output. RETURN VALUE: none */ void htable_print(htable h, FILE *stream){ int i; for(i = 0; i<h->size; i++){ if(h->keys[i] != NULL) fprintf(stream, "%d\t%s\n",i, h->freqs[i], h->keys[i]); } } /************************** * * * htable_print_stats * * * ************************** Searches hashtable for a specific entry, called key. If this key is found the frequency of that key is returned. Otherwise zero is returned. Number of Collisions - the most collisions that have occurred while placing a key. Integer Key - an integer representation of our original string key. Position - initially the integer key modulo the hashtable size. Is used to access array elements in the hashtable. If hash values collide with previous entries then we use add step to position to find the new poistion. Step - is added to position to find the new position. If we are LINEAR probing, step is always 1. If we are DOUBLE hashing, step is a secondary hash value. PARAMETERS: ht = hashtable to search for a key in. key = the string to search for. RETURN VALUE: The number of hits for this key entry. So if collisions are greater than or equal to the size of the hashtable, then the key is not in the table, hence its frequency is zero. Otherwise the frequency of that key is returned. */ int htable_search(htable h, char *key){ int num_collisions = 0; int i_key = htable_wtoi(key); int pos = htable_hash(h, i_key); int step = 1; if(h->method == DOUBLE) step = htable_step(h, i_key); while(h->keys[pos]!=NULL && strcmp(h->keys[pos],key)!=0 && num_collisions < h->size ){ pos = htable_hash(h, pos + step); num_collisions++; } if(num_collisions >= h->size) return 0; else return h->freqs[pos]; } /************************** * * * htable_destroy * * * ************************** Is used to free the memory allocated to a hashtable. First freeing the memory allocated to the key strings, then freeing the memory allocated to the pointers to the keys and frequencies. Then freeing the memory to the pointer to the hashtable itself. PARAMETERS: h = hashtable to free memory from. RETURN VALUE: none */ void htable_destroy(htable h){ int i; for(i = 0; i<h->size; i++){ if(h->keys[i] != NULL){ free(h->keys[i]); } } if(h->keys != NULL){ free(h->keys); } if(h->freqs != NULL){ free(h->freqs); } free(h); } /************************** * * * print_stats_line * * * ************************** Prints stats for one segment (snap shot) of hash table stats. Which were stored while hash table was being built. Percent At Home - how many keys were placed without a collision occurring. Average Collisions - how many collisions have occurred on average while placing all of the keys so far. Maximum Collisions - the most collisions that have occurred while placing a key. Current Entries - the number of entries that this percentage of the hashtables includes. PARAMETERS: ht = hashtable to print statistics summary from. stream = where to send output. (use stdout for the assignment) percent_full = the section [0 to percent_full] of data we are printing. RETURN VALUE: none */ static void print_stats_line(htable ht, FILE *stream, int percent_full) { int current_entries = ht->size * percent_full / 100; double average_collisions = 0.0; int at_home = 0; int max_collisions = 0; int i; if(current_entries > ht->num_entries); else { for (i = 0; i < current_entries; i++) { average_collisions += ht->stats[i]; if(ht->stats[i] == 0) at_home++; if(ht->stats[i] > max_collisions) max_collisions = ht->stats[i]; } /* this line is exact code so your output will be standard */ fprintf(stream, "%4d %10d %10.1f %10.2f %11d\n", percent_full, current_entries, at_home * 100.0 / current_entries, average_collisions / current_entries, max_collisions); } } /************************** * * * htable_print_stats * * * ************************** Prints out a table showing what the following attributes were like at regular intervals (as determined by num_stats) while the hashtable was being built. Percent At Home - how many keys were placed without a collision occurring. Average Collisions - how many collisions have occurred on average while placing all of the keys so far. Maximum Collisions - the most collisions that have occurred while placing a key. Note: print_stats_line() prints a line of data when given what percent full the hashtable should be as its third parameter. If the hashtable is less full than that, then no data will be printed. PARAMETERS: ht = hashtable to print statistics summary from. stream = where to send output. (use stdout for the assignment) num_stats = the maximum number of statistical snapshots to print. RETURN VALUE: none */ void htable_print_stats(htable ht, FILE *stream, int num_stats) { int i; fprintf(stream, "\nUsing %s\n\n", ht->method == LINEAR ? "Linear Probing. " : "Double Hashing."); fprintf(stream, "Percent Current Percent Average Maximum\n"); fprintf(stream, " Full Entries At Home Collisions Collisions\n"); fprintf(stream, "-----------------------------------------------------\n"); for (i = 1; i <= num_stats; i++) { print_stats_line(ht, stream, 100 * i / num_stats); } fprintf(stream, "-----------------------------------------------------\n\n"); } |
| James Little |