
/*****************************************************************************
                Copyright Carnegie Mellon University 1992

                      All Rights Reserved

 Permission to use, copy, modify, and distribute this software and its
 documentation for any purpose and without fee is hereby granted,
 provided that the above copyright notice appear in all copies and that
 both that copyright notice and this permission notice appear in
 supporting documentation, and that the name of CMU not be
 used in advertising or publicity pertaining to distribution of the
 software without specific, written prior permission.

 CMU DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
 CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 SOFTWARE.
*****************************************************************************/

#include <stdio.h>
#include "hash.h"

/*  hash.c -- Generic routines for implementing a closed hash table.

$Header: hash.c,v 1.2 90/12/12 07:25:28 mwm Exp $

	Program by:  Mark Maimone  27 June 90  CMU Computer Science
	Last Modified:  7 August 90

	These routines implement double hashing on a closed hash table.
   The user must provide the hash functions (although several defaults are
   available).

	Some annoying features of this implementation:

	- Very little static type checking can be done, because most
   pointers are cast to (char *) right away.

	- The key field must have at least one designated illegal value.
   For example, strings might use NULL, and user ids might use -1.

	- Its generality means things go a little slowly.  For instance,
   when integers are key values you've still got to pass them by reference.

$Log:	hash.c,v $
 * Revision 1.2  90/12/12  07:25:28  mwm
 * added check for empty key to Insert_hlp
 * 
 * Revision 1.1  90/11/27  23:17:53  mwm
 * Initial revision
 * 
 * Revision 1.1  90/09/11  13:39:57  mwm
 * Initial revision
 * 

*/

#define DEFAULT_PRINT_BUF_SIZE 1000

static char *myalloc ();
char *Lookup (), *default_print (), *memcpy ();
int default_hash1 (), default_hash2 (), Empty (), digits ();

/* default_str_hash -- useful first hashing function when string pointers
   are stored in the hash table.  That is, the keys in the table take only
   sizeof (char *) bytes, the actual text of the strings is stored
   elsewhere.
*/

int default_str_hash (key, max, size, cmp)
char *key;
int max, size;
int (*cmp) ();
{
    if (key == NULL)
	return 0;

/* The input address points to a pointer; dereference that first pointer */

    key = *(char **) key;
    if (size != sizeof (char *))
	fprintf (stderr, "default_str_hash:  Bad key size %d (should be %d)\n",
		size, sizeof (char *));
    return default_hash1 (key, max, strlen (key), cmp);
} /* default_str_hash */



/* default_hash1 -- useful first hashing function when the key is actually
   stored in the table.  This adds up the bytes in the key (modulo the
   current range of index values).  For example, if integers were stored
   as the key values, then 1, 256, and 65536 would all map onto hash value
   1 because one byte holds the value 1 and the others are all 0.
*/

static int default_hash1 (key, max, size, cmp)
char *key;
int max, size;
int (*cmp) ();
{
    int sum, i;

    for (sum = 0, i = 0; i < size; i++)
	sum = (sum + key[i]) % max;

    return sum;
} /* default_hash1 */



/* default_hash2 -- Useful default second hashing function.  This
   implements a stupid algorithm, one which finds an increment that is
   relatively prime with respect to the maximum table index.
*/

static int default_hash2 (key, max, h1, probe, size, cmp)
char *key;
int max, h1, size, probe;
int (*cmp) ();
{
    int factor = 1;
    if (max % 3)
	factor = 3;
    else if (max % 5)
	factor = 5;
    else if (max % 7)
	factor = 7;

    return (h1 + factor * probe) % max;
} /* default_hash2 */



/* InitHashTable_hlp -- Initialize a new hash table.  The new table will
   be allocated on the heap.  Returns 1 on success, 0 on failure. */

int InitHashTable_hlp (h, bad_key, table_max, key_size, value_size, key_cmp,
	hash1, hash2)
HashTableT **h;				/* points to uninitialized pointer */
char *bad_key;				/* address of a copy of a bad key */
int table_max, key_size, value_size;
int (*key_cmp) (), (*hash1) (), (*hash2) ();
{
    char *malloc ();

    *h = (HashTableT *) malloc (sizeof (HashTableT));
    if (*h == NULL) {
	fprintf (stderr, "InitHashTable:  Can't allocate initial storage\n");
	return 0;
    } /* if h == NULL */

/* No need to initialize the   iter   field */

    (*h) -> key_size = (unsigned) key_size;
    (*h) -> value_size = (unsigned) value_size;
    (*h) -> key_cmp = key_cmp;
    (*h) -> current_size = 0;
    (*h) -> table_max = table_max;
    (*h) -> bad_key = malloc ((unsigned) key_size);

/* Make a copy of the unused key (bad_key) */

    memcpy ((*h) -> bad_key, bad_key, key_size);
    if (hash1 == (int (*)()) NULL)
	(*h) -> hash1 = default_hash1;
    else
	(*h) -> hash1 = hash1;
    if (hash2 == (int (*)()) NULL)
	(*h) -> hash2 = default_hash2;
    else
	(*h) -> hash2 = hash2;

/* Initialize the hash table with all bad keys */

    (*h) -> table = myalloc (table_max, (unsigned) (key_size + value_size),
	    bad_key, (unsigned) key_size);
    if ((*h) -> table == NULL) {
	fprintf (stderr, "InitHashTable:  Can't allocate table storage for ");
	fprintf (stderr, "%d bytes (%d entries)\n", key_size * value_size *
		table_max, table_max);
	fprintf (stderr, "                Try using a smaller table\n");
	return 0;
    } /* if (*h) -> table == NULL */

    return 1;
} /* InitHashTable_hlp */



/* Insert_hlp -- insert a new (key, value) pair into a hash table.  */

int Insert_hlp (h, key, value)
HashTableT *h;
char *key, *value;
{
    char *where;

/* Make sure you're not inserting the empty key */

    if (Empty (key, h -> bad_key, h -> key_size)) {
	fprintf (stderr, "Insert_hlp:  Can't insert empty key!\n");
	return 0;
    } /* if Empty (key) */

/* Make sure there is room in the table */

    if (h -> current_size >= h -> table_max) {
	fprintf (stderr, "Insert_hlp:  Hash Table size exceeded\n");
	return 0;
    } /* if h -> current_size >= h -> table_max */

/* Copy the data into the table */

    where = Lookup (h, key);
    if (where == NULL) {
	fprintf (stderr, "Insert_hlp:  NULL lookup value!\n");
	return 0;
    } /* if */
/*printf ("Inserting %d at %x\n", *(int*)key, where);*/
    if (Empty (where, h -> bad_key, h -> key_size)) {
	memcpy (where, key, (int) h -> key_size);
	memcpy (where + h -> key_size, value, (int) h -> value_size);
	++(h -> current_size);
    } else {

/* Have to overwrite the previous value */

	memcpy (where + h -> key_size, value, (int) h -> value_size);
    } /* else */

/* Since performance degrades if the table is more than 50% full,
   redistribute entries in a table twice the current size */

    if (h -> current_size >= h -> table_max / 2) {

/* Have to allocate a new table, and re-insert all old data */

	int entry_len = h -> key_size + h -> value_size;
	unsigned key_size = h -> key_size;
	char *new = myalloc (h -> table_max * 2, (unsigned)
		entry_len, h -> bad_key, key_size);
	char *ptr, *end = h -> table + h -> table_max * entry_len;
	char *old = h -> table, *bad_key = h -> bad_key;

	if (new == NULL)

/* Allocation failed, so we'll just let the table fill up and sacrifice
   some performance */

	    return 1;

	h -> table_max *= 2;
	h -> current_size = 0;
	h -> table = new;

	for (ptr = old; ptr < end; ptr += entry_len)
	    if (!Empty (ptr, bad_key, key_size))
		Insert (h, ptr, ptr + key_size);
	free (old);
    } /* if ++(h -> current_size) */
    return 1;
} /* Insert_hlp */



/* Empty -- See if one of the keys in the hash table is empty.  Returns 1
   if the entry at location   where   is empty (i.e., its key value equals
   bad_key, the unused key value), 0 if not empty.  */

int Empty (where, bad_key, size)
char *where, *bad_key;
unsigned size;
{
    int i;

    if (where == NULL) {
	fprintf (stderr, "Empty:  NULL hash entry!\n");

/* Pretend it's empty -- any attempt to write to NULL will give a core
   dump */

	return 1;
    } /* if where == NULL */

    where--;
    bad_key--;
    for (i = size; i; i--)
	if (where[i] != bad_key[i])
	    return 0;
    return 1;
} /* Empty */



/* Lookup -- Find the given   key   in the hash table, or if it's not
   there find the location where it should go.  Returns the address of the
   *key* (*not* the value) in the hash table, or NULL on error.

   The   ACCESS   macro does array indexing into the hash table.
*/

#define ACCESS(i) (h -> table + (i) * entry_len)

char *Lookup (h, key)
HashTableT *h;
char *key;
{
    int v1, entry_len;
    unsigned key_size;

    if (h == NULL) {
	fprintf (stderr, "Lookup:  NULL HashTableT pointer!\n");
	return NULL;
    } /* if h == NULL */

    key_size = h -> key_size;
    if (Empty (key, h -> bad_key, key_size)) {
	fprintf (stderr, "Lookup:  Can't lookup empty key!\n");
	return NULL;
    } /* if Empty (key, key_size) */

    entry_len = key_size + h -> value_size;
    v1 = (*(h -> hash1)) (key, h -> table_max, key_size, h -> key_cmp);

/* Check the location */

    if (Empty (ACCESS (v1), h -> bad_key, key_size) ||
	    (*(h -> key_cmp)) (ACCESS (v1), key) == 0)
	return ACCESS (v1);
    else {
	int table_max = h -> table_max;
	int i, index;

	for (i = 1; i < table_max; i++) {
	    index = (*(h -> hash2)) (key, table_max, v1, i, key_size,
		    h -> key_cmp);
	    if (Empty (ACCESS (index), h -> bad_key, key_size) ||
		    (*(h -> key_cmp)) (ACCESS (index), key) == 0)
		return ACCESS (index);
	} /* for */
    } /* else */

/* Assuming that   hash2   maps onto every possible index, we now know
   that the table is full and   key   is not in it. */

    return NULL;
} /* Lookup */



/* ShowTable -- display the contents of a hash table in more readable form
   */

ShowTable (h, fp, key_print, value_print)
HashTableT *h;
FILE *fp;
char *(*key_print)(), *(*value_print) ();
{
    int i, entry_len;
    unsigned key_size, value_size;
    char cntl[100], *bad_key;

    if (h == NULL || fp == NULL) {
	fprintf (stderr, "ShowTable:  NULL table or output file\n");
	return;
    } /* if h == NULL */
    if (key_print == (char *(*) ()) NULL)
	key_print = default_print;
    if (value_print == (char *(*) ()) NULL)
	value_print = default_print;

    fprintf (fp, "Hash Table has %d (of %d) entries, using ",
	    h -> current_size, h -> table_max);
    if (h -> hash1 == default_hash1)
	if (h -> hash2 == default_hash2)
	    fprintf (fp, "default");
	else
	    fprintf (fp, "default and user-defined");
    else if (h -> hash2 == default_hash2)
	fprintf (fp, "default and user-defined");
    else
	fprintf (fp, "user-defined");
    fprintf (fp, "\n   hash functions.  Each key requires %d bytes, each ",
	    h -> key_size);
    fprintf (fp, "value %d.\n", h -> value_size);
    key_size = h -> key_size;
    value_size = h -> value_size;
    entry_len = h -> key_size + h -> value_size;
    bad_key = h -> bad_key;

/* Format the output nicely */

    sprintf (cntl, "%%%dd.  (%%s, %%s)\n", digits (h -> table_max));

    for (i = 0; i < h -> table_max; i++)
	if (!Empty (ACCESS (i), bad_key, key_size))
	    fprintf (fp, cntl, i, (*key_print) (ACCESS (i), key_size),
		    (*value_print) (ACCESS (i) + key_size, value_size));
} /* ShowTable */



/* HashEnumTest -- used to iterate through all elements of the hash
   table, in no particular order. */

int HashEnumTest (h, key, value)
HashTableT *h;
char **key;
char **value;
{
    register int entrysize;
    register unsigned keysize;
    register char *ptr, *endptr, *badkey;

    if (h == NULL)
	return 0;

    if (key == NULL && value == NULL) {
	fprintf (stderr,
		"HashEnumTest -- empty key and value params!\n");
	return 0;
    } /* if key == NULL && value == NULL */

    keysize = h -> key_size;
    entrysize = keysize + h -> value_size;
    badkey = h -> bad_key;
    endptr = h -> table + h -> table_max * entrysize;
    for (ptr = h -> iter; ptr < endptr; ptr += entrysize)
	if (!Empty (ptr, badkey, keysize))
	    break;
    h -> iter = ptr + entrysize;
    if (ptr < endptr) {
	if (key) *key = ptr;
	if (value) *value = ptr + keysize;
    } /* if ptr < endptr */

    return ptr < endptr;
} /* HashEnumTest */



/* digits -- returns the number of digits in the base 10 rep of parameter
   x   */

static int digits (x)
int x;
{
    int result = 1;

    for (x /= 10; x ;x /= 10)
	result++;

    return result;
} /* digits */



/* default_print -- Write out memory contents in hex.  Although this uses
   static storage, there are two buffers so it the return value remains
   valid for two calls, not just one */

static char *default_print (start, len)
char *start;
unsigned len;
{
    static char store1[DEFAULT_PRINT_BUF_SIZE], store2[DEFAULT_PRINT_BUF_SIZE];
    static int which = 1;
    int i;
    char *ptr;

    ptr = which ? store1 : store2;

    if (len > DEFAULT_PRINT_BUF_SIZE / 2 - 1)
	fprintf (stderr, "default_print:  length '%d' is too long, max=%d\n",
		len, DEFAULT_PRINT_BUF_SIZE / 2 - 1);

    for (i = 0; i < len; i++) {
	sprintf (ptr, "%2x", start[i]);
	ptr += 2;
    } /* for i = 0 */

    which = !which;
    return which ? store2 : store1;
} /* default_print */



/* memcpy -- copy memory quickly */

static char *memcpy(s1, s2, n)
register char *s1, *s2;
int n;
{
    register char *s0 = s1, *se = s1 + n;

    while(s1 < se)
	*s1++ = *s2++;
    return s0;
} /* memcpy */



/* myalloc -- Allocate storage for (key, value) pairs using malloc, but
   initialize all key values with the   init   value.  Returns NULL on
   failure. */

static char *myalloc (entries, entry_len, init, init_len)
int entries;
unsigned entry_len, init_len;
char *init;
{
    char *result = malloc ((unsigned) entries * entry_len);
    char *ptr;

    if (result == NULL)
	return NULL;

    for (ptr = result; entries; entries--, ptr += entry_len)
	memcpy (ptr, init, (int) init_len);

    return result;
} /* myalloc */
