#include <sys/time.h>

#include <err.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#include "hashes.h"

#define	HASH_SIZE	65536

unsigned char **tdata;
unsigned char *bdata;
int binary, datasize, hashsize, items;

void read_text_data(void);
void read_binary_data(void);
void dohash(char *, unsigned int (*)(unsigned char *, size_t));
int main(int, char **);

void
read_text_data(void)
{
	char buf[1024], *cp;
	int maxitems;

	items = 0;
	maxitems = 256;
	tdata = malloc(maxitems * sizeof(char *));
	if (tdata == NULL)
		err(1, "malloc(%ld)", (long)maxitems * sizeof(char *));

	while (fgets(buf, sizeof(buf), stdin) != NULL) {
		if ((cp = strrchr(buf, '\n')) != NULL)
			*cp = '\0';
		if (items >= maxitems) {
			maxitems <<= 1;
			tdata = realloc(tdata, maxitems * sizeof(char *));
			if (tdata == NULL)
				err(1, "malloc(%ld)",
				    (long)maxitems * sizeof(char *));
		}

		tdata[items] = strdup(buf);
		items++;
	}
}

void
read_binary_data(void)
{
	int maxitems;

	items = 0;
	maxitems = 256;
	bdata = malloc(maxitems * datasize);
	if (bdata == NULL)
		err(1, "malloc(%d)", maxitems * datasize);

	while (read(STDIN_FILENO, bdata + items * datasize, datasize) ==
	    datasize) {
		items++;
		if (items >= maxitems) {
			/*
			 * Fudge so 1M elements don't need 2M slots,
			 * only works nicely for powers-of-two :-)
			 */
			maxitems = (maxitems << 1) + 64;
			bdata = realloc(bdata, maxitems * datasize);
			if (bdata == NULL)
				err(1, "malloc(%d)", maxitems * datasize);
		}
	}
}

void
dohash(char *name, unsigned int (*hash)(unsigned char *, size_t))
{
	struct timeval t1, t2, t3;
	static int *buckets = NULL;
	int i, max, used;
	unsigned int h;

	if (buckets == NULL) {
		buckets = malloc(sizeof(int) * hashsize);
		if (buckets == NULL)
			err(1, "malloc(%ld)", (long)sizeof(int) * hashsize);
	}

	for (i = 0; i < hashsize; i++)
		buckets[i] = 0;

	gettimeofday(&t1, NULL);
	if (binary) {
		unsigned char *data;

		data = bdata;
		for (i = 0; i < items; i++, data += datasize) {
			h = (*hash)(data, datasize);
			buckets[h % hashsize]++;
		}
	} else {
		for (i = 0; i < items; i++) {
			h = (*hash)(tdata[i], strlen(tdata[i]));
			buckets[h % hashsize]++;
		}
	}

	gettimeofday(&t2, NULL);

	timersub(&t2, &t1, &t3);
	max = used = 0;
	for (i = 0; i < hashsize; i++) {
		if (buckets[i] > 0)
			used++;
		if (buckets[i] > max)
			max = buckets[i];
	}

	printf("hash %-10s   used %6d   max %6d   time %ld.%03ldsec\n",
	    name, used, max, (long)t3.tv_sec, (long)t3.tv_usec / 1000);
}

int
main(int argc, char **argv)
{

	if (argc < 3)
		errx(1, "bad usage");
	binary = *argv[1] == 'b';
	hashsize = atoi(argv[2]);
	if (binary)
		datasize = atoi(argv[3]);

	if (binary)
		read_binary_data();
	else
		read_text_data();

	printf("total items:  %d\n", items);

	dohash("dumb", dumbhash);
	dohash("fnv", fnv);
	dohash("lennart", lennart);
	dohash("crc", crchash);
	dohash("perl", perlhash);
	dohash("mouse", mousehash);

	exit(0);
}
