/*
 * xferstats, a wu-ftpd and ncftpd logfile parser and report generator
 * Copyright 1997, 1998 Phil Schwan <pschwan@cmu.edu>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#else
#  error You did not run configure, did you?
#endif

#ifdef HAVE_STRING_H
#  include <string.h> /* needed everywhere */
#else
#  error The <string.h> header file is required to compile xferstats
#endif

#ifdef HAVE_CTYPE_H
#  include <ctype.h> /* needed for isdigit, tolower, isalpha */
#else
#  error The <ctype.h> header file is required to compile xferstats
#endif

#ifdef HAVE_FCNTL_H
#  include <fcntl.h>
#else
#  error The <fcntl.h> header file is required to compile xferstats
#endif

#ifdef HAVE_MALLOC_STATS
#  include <malloc.h> /* needed for malloc_stats() */
#endif

#include <stdlib.h> /* needed everywhere */
#include <stdio.h> /* needed everywhere */
#include <time.h>

#include "xferstats.h"
#include "hashstr.h"

const char DAYS[7][3] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
const char T_NAMES[5][7] = {"file", "dir", "host", "TLD", "domain"};


/* Very self-explanitory, converts an abbreviated month string to an integer.
 * In the interest of efficiency, it returns as soon as it can exclude other
 * months.  Therefore, "Jan", "Jar", "Jagawagavoovoo" all return 1 */
int
month2int(char *month)
{
  switch (month[0])
    {
    case 'J':
      switch (month[1])
	{
	case 'a':
	  return 1;
	case 'u':
	  switch (month[2])
	    {
	    case 'n':
	      return 6;
	    case 'l':
	      return 7;
	    }
	}
    case 'F':
      return 2;
    case 'M':
      switch (month[2])
	{
	case 'r':
	  return 3;
	case 'y':
	  return 5;
	}
    case 'A':
      switch (month[1])
	{
	case 'p':
	  return 4;
	case 'u':
	  return 8;
	}
    case 'S':
      return 9;
    case 'O':
      return 10;
    case 'N':
      return 11;
    case 'D':
      return 12;
    }

  return 0;
} /* month2int */


/* this routine compares two date strings and tells us which is earlier.  it
 * returns:
 *
 * 0 for identical days
 * 1 for date1 being more in the future than date2
 * -1 for date1 being more in the past than date2 */
int
compare_dates(char date1[25], char date2[25])
{
  int dw1, dw2;
  char foo[5] = {0};
  
  if (strlen(date1) != 24 || strlen(date2) != 24)
    return 0;

  /* check the year */
  foo[0] = date1[20];
  foo[1] = date1[21];
  foo[2] = date1[22];
  foo[3] = date1[23];
  dw1 = atoi(foo);
  foo[0] = date2[20];
  foo[1] = date2[21];
  foo[2] = date2[22];
  foo[3] = date2[23];
  dw2 = atoi(foo);
#ifdef DEBUG
  fprintf(stderr, "Year1: %d     Year2: %d\n", dw1, dw2);
#endif
  if (dw1 > dw2)
    return 1;
  else if (dw2 > dw1)
    return -1;

  /* check the month */
  dw1 = month2int(date1 + 4);
  dw2 = month2int(date2 + 4);
#ifdef DEBUG
  fprintf(stderr, "Month1: %d     Month2: %d\n", dw1, dw2);
#endif
  if (dw1 > dw2)
    return 1;
  else if (dw2 > dw1)
    return -1;

  /* lastly, check the day */
  foo[0] = date1[8];
  foo[1] = date1[9];
  foo[2] = '\0';
  dw1 = atoi(foo);
  foo[0] = date2[8];
  foo[1] = date2[9];
  dw2 = atoi(foo);
#ifdef DEBUG
  fprintf(stderr, "Day1: %d     Day2: %d\n", dw1, dw2);
#endif

  if (dw1 == dw2)
    return 0;
  if (dw1 > dw2)
    return 1;
  else
    return -1;
} /* compare_dates */


/* This function runs down the list starting with pointers->first_ftp_line and
 * stuffs data into the pointers->hourly_data[0-23] structs. */
void
generate_hourly_data(pointers_t * pointers)
{
  ftp_entry_t * ftp_line = pointers->first_ftp_line;
  int index;
  char hour_str[3] = {0};
  
#ifdef PTHREADS
  pthread_mutex_lock(pointers->hourly_data_running_mutex);
#endif
#ifdef DEBUGS
  fprintf(stderr, "Beginning hourly data generation...\n");
#endif

  for (index = 0; index <= 23; index++)
    pointers->hour_data[index].seconds = pointers->hour_data[index].file_count
      = pointers->hour_data[index].data = 0;

  for (; ftp_line; ftp_line = ftp_line->next_ptr)
    {
      /* pluck the hour from the date into a string */
      hour_str[0] = ftp_line->date[11];
      hour_str[1] = ftp_line->date[12];
      
      /* convert it to an integer */
      index = atoi(hour_str);
      
      /* add it to the already-compiled data in the array */
      pointers->hour_data[index].seconds += ftp_line->seconds;
      pointers->hour_data[index].file_count++;
      pointers->hour_data[index].data += ftp_line->data;
    }
  
#ifdef DEBUGS
  fprintf(stderr, "Hourly data generation complete.\n");
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  if (--pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
  pthread_mutex_unlock(pointers->hourly_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_hourly_data */


void
generate_monthly_data(pointers_t * pointers)
{
  xfdays_t * day = pointers->first_day_ptr;
  ftp_entry_t * ftp_line;
  int index;

#ifdef PTHREADS
  pthread_mutex_lock(pointers->monthly_data_running_mutex);
#endif  
#ifdef DEBUGS
  fprintf(stderr, "Beginning monthly data generation...\n");
#endif

  for (index = 0; index <= 11; index++)
    pointers->month_data[index].seconds =
      pointers->month_data[index].file_count =
      pointers->month_data[index].data = 0;

  if (!pointers->config->number_daily_stats && day)
    for (; day; day = day->next_ptr)
      {
	if ((index = month2int(day->name + 4) - 1) >= 0)
	  {
	    /* add it to the already-compiled data in the array */
	    pointers->month_data[index].seconds += day->seconds;
	    pointers->month_data[index].file_count += day->file_count;
	    pointers->month_data[index].data += day->data;
	  }
      }
  else
    for (ftp_line = pointers->first_ftp_line; ftp_line;
	 ftp_line = ftp_line->next_ptr)
      {
	if ((index = month2int(day->name + 4) - 1) >= 0)
	  {
	    /* add it to the already-compiled data in the array */
	    pointers->month_data[index].seconds += ftp_line->seconds;
	    pointers->month_data[index].file_count++;
	    pointers->month_data[index].data += ftp_line->data;
	  }
      }
  
#ifdef DEBUGS
  fprintf(stderr, "Monthly data generation complete.\n");
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  if (--pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
  pthread_mutex_unlock(pointers->monthly_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_monthly_data */


void
generate_dom_data(pointers_t * pointers)
{
  xfdays_t * day = pointers->first_day_ptr;
  int index;
  char day_str[3] = {0};

#ifdef PTHREADS
  pthread_mutex_lock(pointers->dom_data_running_mutex);
#endif
#ifdef DEBUGS
  fprintf(stderr, "Beginning day-of-the-month data generation...\n");
#endif

  for (index = 0; index <= 30; index++)
    pointers->dom_data[index].seconds = pointers->dom_data[index].file_count =
      pointers->dom_data[index].data = 0;

  for (; day; day = day->next_ptr)
    {
      /* pluck the day from the date into a string */
      strncpy(day_str, day->name+8, 2);

      /* convert it to an appropriate type and decrement it */
      index = atoi(day_str);

      if (--index >= 0 && index <= 30)
	{
	  /* add it to the already-compiled data in the array */
	  pointers->dom_data[index].seconds += day->seconds;
	  pointers->dom_data[index].file_count += day->file_count;
	  pointers->dom_data[index].data += day->data;
	}
#ifdef DEBUG
      else
	  fprintf(stderr, "generate_dom_data: Invalid day: %d\n", index);
#endif
    }

#ifdef DEBUGS
  fprintf(stderr, "Day-of-the-month data generation complete.\n");
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  if (--pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
  pthread_mutex_unlock(pointers->dom_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_dom_data */


int
day2int(char *day)
{
  switch (day[0])
    {
    case 'S':
      switch (day[1])
	{
	case 'u':
	  return 1;
	case 'a':
	  return 7;
	}
    case 'M':
      return 2;
    case 'T':
      switch (day[1])
	{
	case 'u':
	  return 3;
	case 'h':
	  return 5;
	}
    case 'W':
      return 4;
    case 'F':
      return 6;
    }

  return 0;
} /* day2int */


/* this function creates the structures for the day-of-the-week
 * report. */
void
generate_dow_data(pointers_t * pointers)
{
  xfdays_t * day = pointers->first_day_ptr;
  int index;

#ifdef PTHREADS
  pthread_mutex_lock(pointers->dow_data_running_mutex);
#endif
#ifdef DEBUGS
  fprintf(stderr, "Beginning day-of-the-week data generation...\n");
#endif

  for (index = 0; index <= 6; index++)
    pointers->weekday_data[index].seconds =
      pointers->weekday_data[index].file_count =
      pointers->weekday_data[index].data = 0;

  for (; day; day = day->next_ptr)
    {
      if ((index = day2int(day->name) - 1) >= 0)
	{
	  /* add it to the already-compiled data in the array */
	  pointers->weekday_data[index].seconds += day->seconds;
	  pointers->weekday_data[index].file_count += day->file_count;
	  pointers->weekday_data[index].data += day->data;
	}
#ifdef DEBUG
      else
	fprintf(stderr, "Invalid day in date \"%s\"\n", day->name);
#endif
    }

#ifdef DEBUGS
  fprintf(stderr, "Day-of-the-week data generation complete.\n");
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  if (--pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
  pthread_mutex_unlock(pointers->dow_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_dow_data */


void
generate_daily_data(pointers_t * pointers)
{
  ftp_entry_t * ftp_line = pointers->first_ftp_line;
  xfdays_t * new_day_ptr, * current_day_ptr = pointers->first_day_ptr,
    * prev_day_ptr = NULL;
  char temp_date[16];

#ifdef DEBUGS
  fprintf(stderr, "Beginning daily data generation...\n");
#endif
  
  pointers->day_arena = NULL;

  strncpy(pointers->low_date, ftp_line->date, 24);
  pointers->low_date[24] = '\0';
  strncpy(pointers->high_date, ftp_line->date, 24);
  pointers->high_date[24] = '\0';

  if (pointers->first_day_ptr == NULL)
    {
      /* this must be the first time through */
      new_day_ptr =
	(xfdays_t *)amalloc(&pointers->day_arena, sizeof(xfdays_t));
      
      strncpy(new_day_ptr->name, ftp_line->date, 10);
      new_day_ptr->name[10] = '\0';
      strncat(new_day_ptr->name, ftp_line->date + 19, 5);
      new_day_ptr->data = ftp_line->data;
      new_day_ptr->file_count = 1;
      new_day_ptr->next_ptr = new_day_ptr->prev_ptr = NULL;
      new_day_ptr->seconds = ftp_line->seconds;
      pointers->first_day_ptr = new_day_ptr;
      pointers->last_day_ptr = new_day_ptr;

      ftp_line = ftp_line->next_ptr;
    }
  
  for (; ftp_line; ftp_line = ftp_line->next_ptr)
    {
      if (compare_dates(ftp_line->date, pointers->high_date) > 0)
	  strncpy(pointers->high_date, ftp_line->date, 24);
      else if (compare_dates(ftp_line->date, pointers->low_date) < 0)
	strncpy(pointers->low_date, ftp_line->date, 24);

      strncpy(temp_date, ftp_line->date, 10);
      temp_date[10] = '\0';
      strncat(temp_date, ftp_line->date + 19, 5);
      
      for (current_day_ptr = pointers->last_day_ptr;
	   current_day_ptr;
	   current_day_ptr = current_day_ptr->prev_ptr)
	{
	  if (!strcmp(current_day_ptr->name, temp_date))
	    {
	      current_day_ptr->file_count++;
	      current_day_ptr->data += ftp_line->data;
	      current_day_ptr->seconds += ftp_line->seconds;
#ifdef DEBUG
	      fprintf(stderr, "Added stats to the day \"%s\".\n",
		      current_day_ptr->name);
#endif
	      break;
	    }
	  prev_day_ptr = current_day_ptr;
	}
      
      if (current_day_ptr)
	continue;
      
      new_day_ptr =
	(xfdays_t *)amalloc(&pointers->day_arena, sizeof(xfdays_t));
      
      strncpy(new_day_ptr->name, ftp_line->date, 10);
      new_day_ptr->name[10] = '\0';
      strncat(new_day_ptr->name, ftp_line->date + 19, 5);
      new_day_ptr->data = ftp_line->data;
      new_day_ptr->file_count = 1;
      new_day_ptr->seconds = ftp_line->seconds;

      pointers->last_day_ptr->next_ptr = new_day_ptr;
      new_day_ptr->next_ptr = NULL;
      new_day_ptr->prev_ptr = pointers->last_day_ptr;
      pointers->last_day_ptr = new_day_ptr;
#ifdef DEBUG
      fprintf(stderr, "New day structure for \"%s\" created.\n",
	      new_day_ptr->name);
#endif
    }

#ifdef DEBUGS
  fprintf(stderr, "Daily data generation complete.\n");
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  pointers->active_threads--;
  if (pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_daily_data */



/* This is probably the worst function in xferstats to read or change.
 * It performs five functions, file, directory, host, domains, and TLD because
 * of the similar nature in which they're processed.  Passed in addition to the
 * pointers struct is the type (defined in xferstats.h). */
void
generate_misc_data(pointers_t *pointers, int type)
{
  htable * ht;

  arena_t ** arena = NULL;
  ftp_entry_t * ftp_line = pointers->first_ftp_line;
  xfmisc_t * high_ptr = NULL, * prev_high_ptr = NULL,
    * current_data_ptr = NULL, * prev_path_ptr,
    * misc_data_list = NULL, ** sorted_list = NULL, ** pointers_ptr = NULL,
    * actual_sorted_list = NULL, * file_data_ptr, * bottom_sorted_list = NULL;
  int a, count = 0;
  unsigned LONGLONG n = 0, number = 0, max_n = 0, prev_min = -1;
  unsigned int num_keep = 0;
  char name[MAXPATHLEN], * tmpchar, * prevchar, tempcount[11];
  
#ifdef DEBUGS
  fprintf(stderr, "Beginning %s data generation...(%ld)\n", T_NAMES[type],
	  time(NULL));
#endif

  ht = create_hash_table(270371);

  switch (type)
    {
    case T_FILE:
      arena = &pointers->file_arena;
      pointers_ptr = &pointers->first_file_ptr;
      break;
    case T_DIR:
      arena = &pointers->dir_arena;
      pointers_ptr = &pointers->first_dir_ptr;
      break;
    case T_HOST:
      arena = &pointers->host_arena;
      pointers_ptr = &pointers->first_host_ptr;
      break;
    case T_TLD:
      arena = &pointers->tld_arena;
      pointers_ptr = &pointers->first_tld_ptr;
      break;
    case T_DOMAIN:
      arena = &pointers->domain_arena;
      pointers_ptr = &pointers->first_domain_ptr;
      break;
    }

  if (type == T_DOMAIN && pointers->config->host_traffic &&
      pointers->first_host_ptr && !pointers->config->number_host_stats)
    {
      for (file_data_ptr = pointers->first_host_ptr;
	   file_data_ptr;
	   file_data_ptr = file_data_ptr->next_ptr)
	{
	  /* make sure it's resolved first */
	  if ((tmpchar = strrchr(file_data_ptr->name, '.')) &&
	      tmpchar + 1)
	    {
#ifdef DEBUG2
	      fprintf(stderr, "First letter of TLD: %d:%c\n", *(tmpchar + 1),
		      *(tmpchar + 1));
#endif
	      if (isdigit((int)*(tmpchar + 1)))
		{
		  strcpy(name, "unresolved");
		  break;
		}
	    }
	  else
	    {
	      strcpy(name, "unresolved");
	      continue;
	    }
      
	  /* now check to see if we have another '.' */
	  if (tmpchar == file_data_ptr->name)
	    {
	      /* tmpchar is already the first character */
	      
	      /* convert all to lower case */
	      for(prevchar = tmpchar + 1;
		  (*prevchar = tolower((int)*prevchar)); prevchar++);
	  
	      strncpy(name, tmpchar + 1, MAXHOSTNAMELEN - 1);
	      name[MAXHOSTNAMELEN - 1] = '\0';
	      break;
	    }
	  else if ((tmpchar = strrchr(tmpchar - 1, '.')) == NULL)
	    {
	      /* there is only one '.' in the host */
	      
	      /* convert all to lower case */
	      for(prevchar = file_data_ptr->name;
		  (*prevchar = tolower((int)*prevchar)); prevchar++);
	      
	      strncpy(name, file_data_ptr->name, MAXHOSTNAMELEN - 1);
	      name[MAXHOSTNAMELEN - 1] = '\0';
	      break;
	    }
	  else
	    {
	      /* take everything after that '.' */
	      
	      /* convert all to lower case */
	      for(prevchar = tmpchar + 1;
		  (*prevchar = tolower((int)*prevchar)); prevchar++);
	      
	      strncpy(name, tmpchar + 1, MAXHOSTNAMELEN - 1);
	      name[MAXHOSTNAMELEN - 1] = '\0';
	      break;
	    }
	}      
    }
  else if (type == T_DIR && pointers->config->file_traffic &&
	   pointers->first_file_ptr && !pointers->config->number_file_stats)
    {
      for (file_data_ptr = pointers->first_file_ptr;
	   file_data_ptr;
	   file_data_ptr = file_data_ptr->next_ptr)
	{
	  strncpy(name, file_data_ptr->name, MAXPATHLEN - 1);
	  /* if we're doing directory processing, cut off at least the
	   * filename (and probably some directories) based on
	   * pointers->config->depth */
	  if (pointers->config->depth)
	    {
	      if ((tmpchar = strrchr(name, '/')))
		*(tmpchar + 1) = '\0';
	      for (a = 1, prevchar = name + 1, tmpchar = name;
		   a <= pointers->config->depth; prevchar = tmpchar, a++)
		{
		  if ((tmpchar = strchr(tmpchar + 1, '/')) == NULL)
		    {
		      *prevchar = '\0';
		      break;
		    }
		  if (a == pointers->config->depth)
		    *tmpchar = '\0';
		}
	    }
	  else
	    if ((tmpchar = strrchr(name, '/')))
	      *tmpchar = '\0';
	  
#ifdef DEBUG2
	  fprintf(stderr, "path: %s dir: %s\n", file_data_ptr->name, name);
#endif

	  if (get_value(ht, name, (void **)&current_data_ptr))
	    {
	      current_data_ptr->file_count += file_data_ptr->file_count;
	      current_data_ptr->seconds += file_data_ptr->seconds;
	      current_data_ptr->data += file_data_ptr->data;
#ifdef DEBUG
	      fprintf(stderr, "Added stats to the %s entry \"%s\".\n",
		      T_NAMES[type], name);
#endif
	      continue;
	    }
	  else
	    {
	      current_data_ptr =
		(xfmisc_t *)amalloc(arena, sizeof(xfmisc_t));
	      current_data_ptr->name = amalloc(arena, strlen(name) + 1);
	      
	      strcpy(current_data_ptr->name, name);
	      current_data_ptr->data = file_data_ptr->data;
	      current_data_ptr->seconds = file_data_ptr->seconds;
	      current_data_ptr->file_count = file_data_ptr->file_count;

	      add_hash_string(ht, name, current_data_ptr);

#ifdef DEBUG
	      fprintf(stderr, "New %s structure for \"%s\" created.\n",
		      T_NAMES[type], name);
#endif
	    }
	}
    }
  else
    {
      for (; ftp_line; ftp_line = ftp_line->next_ptr)
	{
	  /* if we're doing file or host processing, all we need to do is
	   * strncpy the name and go */
	  switch (type)
	    {
	    case T_FILE:
	      strncpy(name, ftp_line->path, MAXPATHLEN - 1);
	      break;
	    case T_HOST:
	      strncpy(name, ftp_line->host, MAXHOSTNAMELEN - 1);
	      break;
	    case T_DIR:
	      strncpy(name, ftp_line->path, MAXPATHLEN - 1);
	      /* if we're doing directory processing, cut off at least the
	       * filename (and probably some directories) based on
	       * pointers->config->depth */
	      if (pointers->config->depth)
		{
		  if ((tmpchar = strrchr(name, '/')))
		    *(tmpchar + 1) = '\0';
		  for (a = 1, prevchar = name + 1, tmpchar = name;
		       a <= pointers->config->depth;
		       prevchar = tmpchar, a++)
		    {
		      if ((tmpchar = strchr(tmpchar + 1, '/')) == NULL)
			{
			  *prevchar = '\0';
			  break;
			}
		      if (a == pointers->config->depth)
			*tmpchar = '\0';
		    }
		}
	      else
		if ((tmpchar = strrchr(name, '/')))
		  *tmpchar = '\0';
	      
#ifdef DEBUG2
	      fprintf(stderr, "date: %s path: %s dir: %s\n", ftp_line->date,
		      ftp_line->path, name);
#endif
	      break;
	    case T_TLD:
	      strncpy(name, ftp_line->host, MAXHOSTNAMELEN - 1);
	      /* if we're doing TLD processing, we only want what's after the
	       * last '.' */
	      if ((tmpchar = strrchr(ftp_line->host, '.')) &&
		  tmpchar + 1)
		{
		  strncpy(name, tmpchar + 1, MAXHOSTNAMELEN - 1);
#ifdef DEBUG2
		  fprintf(stderr, "First letter of TLD: %d:%c\n", name[0],
			  name[0]);
#endif
		  if (isdigit((int)name[0]))
		    strcpy(name, "unresolved");
		  else
		    /* convert all to lower case */
		    for(tmpchar = name; (*tmpchar = tolower((int)*tmpchar));
			tmpchar++);
		}
	      else
		strcpy(name, "unresolved");
#ifdef DEBUG2
	      fprintf(stderr, "host: %s TLD: %s\n", ftp_line->host, name);
#endif
	      break;
	    case T_DOMAIN:
	      /* if we're doing domain processing, check to see if it's
	       * resolved first */
	      if ((tmpchar = strrchr(ftp_line->host, '.')) &&
		  tmpchar + 1)
		{
#ifdef DEBUG2
		  fprintf(stderr, "First letter of TLD: %d:%c\n",
			  *(tmpchar + 1), *(tmpchar + 1));
#endif
		  if (isdigit((int)*(tmpchar + 1)))
		    {
		      strcpy(name, "unresolved");
		      break;
		    }
		}
	      else
		{
		  strcpy(name, "unresolved");
		  break;
		}

	      /* now check to see if we have another '.' */
	      if (tmpchar == ftp_line->host)
		{
		  /* tmpchar is already the first character */

		  /* convert all to lower case */
		  for(prevchar = tmpchar + 1;
		      (*prevchar = tolower((int)*prevchar)); prevchar++);

		  strncpy(name, tmpchar + 1, MAXHOSTNAMELEN - 1);
		  name[MAXHOSTNAMELEN - 1] = '\0';
		  break;
		}

	      *tmpchar = '\0';
	      if ((prevchar = strrchr(ftp_line->host, '.')) == NULL)
		{
		  /* there is only one '.' in the host */

		  *tmpchar = '.';
		  tmpchar = prevchar;

		  /* convert all to lower case */
		  for(prevchar = ftp_line->host;
		      (*prevchar = tolower((int)*prevchar)); prevchar++);

		  strncpy(name, ftp_line->host, MAXHOSTNAMELEN - 1);
		  name[MAXHOSTNAMELEN - 1] = '\0';
		  break;
		}
	      else
		{
		  /* take everything after that '.' */

		  *tmpchar = '.';
		  tmpchar = prevchar;

		  /* convert all to lower case */
		  for(prevchar = tmpchar + 1;
		      (*prevchar = tolower((int)*prevchar)); prevchar++);

		  strncpy(name, tmpchar + 1, MAXHOSTNAMELEN - 1);
		  name[MAXHOSTNAMELEN - 1] = '\0';
		  break;
		}
	    } /* switch(type) */
	  
	  if (get_value(ht, name, (void **)&current_data_ptr))
	    {
	      current_data_ptr->file_count++;
	      current_data_ptr->seconds += ftp_line->seconds;
	      current_data_ptr->data += ftp_line->data;
#ifdef DEBUG
	      fprintf(stderr, "Added stats to the %s entry \"%s\".\n",
		      T_NAMES[type], name);
#endif
	      continue;
	    }
	  else
	    {
	      current_data_ptr =
		(xfmisc_t *)amalloc(arena, sizeof(xfmisc_t));
	      current_data_ptr->name = amalloc(arena, strlen(name) + 1);
	      
	      strcpy(current_data_ptr->name, name);
	      current_data_ptr->data = ftp_line->data;
	      current_data_ptr->seconds = ftp_line->seconds;
	      current_data_ptr->file_count = 1;
	      
	      add_hash_string(ht, name, current_data_ptr);
	      
#ifdef DEBUG
	      fprintf(stderr, "New %s structure for \"%s\" created.\n",
		      T_NAMES[type], name);
#endif
	    }
	}
    }

#ifdef DEBUG
  fprintf(stderr, "Starting to sort...\n");
#endif

  /* ------ SORTING ROUTINES START HERE ------ */

  /* start by making file_data_tree = the first first_file_ptr bucket with data
   * in it.  n is the largest value of 10^n that a file_count can be divided by
   * at least once.  ie, for a file_count of 10-99, n = 1.  for a file_count of
   * 1000-9999, n = 3, etc. this bit finds the biggest n out of the whole tree
   * so we can allocate an array */

  misc_data_list = make_linked_list(ht);
  clean_table(ht);
  free(ht);

  for (current_data_ptr = misc_data_list;
       current_data_ptr;
       current_data_ptr = current_data_ptr->next_ptr)
    {
      switch(type)
	{
	case T_FILE:
	  n = current_data_ptr->file_count;
	  break;
	case T_HOST:
	case T_DIR:
	case T_TLD:
	case T_DOMAIN:
	  n = current_data_ptr->data;
	  break;
	}
      if (n < 10)
	n = 0;
      else if (n < 100)
	n = 1;
      else if (n < 1000)
	n = 2;
      else if (n < 10000)
	n = 3;
      else if (n < 100000)
	n = 4;
      else if (n < 1000000)
	n = 5;
      else if (n < 10000000)
	n = 6;
      else
	{
#ifdef BROKEN_LL
	  sprintf(tempcount, "%lu", n);
#else
	  sprintf(tempcount, "%llu", n);
#endif
	  n = strlen(tempcount) - 1;
	}
      if (n > max_n)
	max_n = n;
    }

#ifdef DEBUG
  fprintf(stderr, "It's all in one list now, max_n == %llu.\n", max_n);
#endif

  /* allocate our array for the top of the n-buckets */
  sorted_list = (xfmisc_t **)amalloc(arena, (max_n + 1) * sizeof(xfmisc_t *));

  /* empty the top array out */
  for (a = 0; a <= max_n; a++)
    {
#ifdef DEBUG
      fprintf(stderr, "Cleaning node %d\n", a);
#endif
      sorted_list[a] = NULL;
    }
#ifdef DEBUG
  fprintf(stderr, "The n-buckets are clean.\n");
#endif

  /* this loop finds 'n' and places the node in the appropriate bucket for
   * later further sorting */
  for (current_data_ptr = misc_data_list, prev_path_ptr = NULL;
       current_data_ptr;
       current_data_ptr = misc_data_list)
    {
      switch(type)
	{
	case T_FILE:
	  n = current_data_ptr->file_count;
	  break;
	case T_HOST:
	case T_DIR:
	case T_TLD:
	case T_DOMAIN:
	  n = current_data_ptr->data;
	  break;
	}
      if (n < 10)
	n = 0;
      else if (n < 100)
	n = 1;
      else if (n < 1000)
	n = 2;
      else if (n < 10000)
	n = 3;
      else if (n < 100000)
	n = 4;
      else if (n < 1000000)
	n = 5;
      else if (n < 10000000)
	n = 6;
      else
	{
#ifdef BROKEN_LL
	  sprintf(tempcount, "%lu", n);
#else
	  sprintf(tempcount, "%llu", n);
#endif
	  n = strlen(tempcount) - 1;
	}

#ifdef DEBUG
#ifdef BROKEN_LL
      fprintf(stderr, "Attacking %s %s, file_count = %u, data = %ld, "
	      "n = %ld\n", T_NAMES[type], current_data_ptr->name,
	      current_data_ptr->file_count, current_data_ptr->data, n);
#else
      fprintf(stderr, "Attacking %s %s, file_count = %u, data = %lld, "
	      "n = %lld\n", T_NAMES[type], current_data_ptr->name,
	      current_data_ptr->file_count, current_data_ptr->data, n);
#endif /* BROKEN_LL */
#endif /* DEBUG */

      /* remove that item from the original giant linked list */
      misc_data_list = current_data_ptr->next_ptr;

      if (sorted_list[n] == NULL)
	{
	  /* if sorted_list[n] is NULL then this is the first entry in that
	   * bucket */
	  sorted_list[n] = current_data_ptr;
	  current_data_ptr->next_ptr = NULL;
	}
      else
	{
	  /* otherwise, we just stick the entry onto the top of the bucket */
	  current_data_ptr->next_ptr = sorted_list[n];
	  sorted_list[n] = current_data_ptr;
	}
    }

#ifdef DEBUG
  /* this is really just a debug function to make sure that the data is in the
   * correct buckets.  loop until we've gone through all of the 'n' buckets */
  for (a = 0; a <= max_n; a++)
    {
      fprintf(stderr, "PROBLEMATIC DATA IN BUCKET %d:\n", a);
      for (current_data_ptr = sorted_list[a];
	   current_data_ptr;
	   current_data_ptr = current_data_ptr->next_ptr)
	{
	  switch(type)
	    {
	    case T_FILE:
	      n = current_data_ptr->file_count;
	      break;
	    case T_HOST:
	    case T_DIR:
	    case T_TLD:
	    case T_DOMAIN:
	      n = current_data_ptr->data;
	      break;
	    }
#ifdef BROKEN_LL
	  sprintf(tempcount, "%lu", n);
#else
	  sprintf(tempcount, "%llu", n);
#endif /* BROKEN_LL */
	  if (strlen(tempcount) != a + 1)
#ifdef BROKEN_LL
	    fprintf(stderr, "ERROR: item in wrong bucket! (name: %s %lu %s)\n",
		    current_data_ptr->name, n,
		    (type == T_FILE || type == T_HOST) ? "transfers" :
		    "bytes");
#else
	    fprintf(stderr, "ERROR: item in wrong bucket! (name: %s %llu %s)\n"
		    , current_data_ptr->name, n,
		    (type == T_FILE || type == T_HOST) ? "transfers" :
		    "bytes");
#endif /* BROKEN_LL */
	}
      fprintf(stderr, "\n");
    }
#endif /* DEBUG2 */

  switch (type)
    {
    case T_FILE:
      num_keep = pointers->config->number_file_stats;
      break;
    case T_DIR:
      num_keep = pointers->config->number_dir_stats;
      break;
    case T_HOST:
      num_keep = pointers->config->number_host_stats;
      break;
    case T_TLD:
      num_keep = pointers->config->number_tld_stats;
      break;
    case T_DOMAIN:
      num_keep = pointers->config->number_domain_stats;
      break;
    }

  /* this giant loop does the actual sorting of the individual smaller buckets.
   * I decided to use a selection sort since the data sets shouldn't get too
   * massive and I didn't see the point in wasting time coding a better way
   * right now (FIXME: when they do get massive, it's ugly) */
  for (a = max_n; a >= 0; a--)
    {
      if (sorted_list[a] == NULL)
	{
#ifdef DEBUGS
	  fprintf(stderr, "SKIPPING EMPTY BUCKET %d\n", a);
#endif
	  continue;
	}
#ifdef DEBUGS
      else
	fprintf(stderr, "SORTING bucket %d\n", a);
#endif

      /* actual_sorted_list is the temporary holder for each sorted bucket. */
      actual_sorted_list = NULL;

      /* we keep looping until we've gone through the entire 'n' bucket */
      while (sorted_list[a])
	{
#ifdef DEBUG
	  if (!(count % 1000))
	    fprintf(stderr, "Sorting (%d)...\n", count);
#endif

	  /* Find which of the remaining nodes in the bucket has the HIGHEST
	   * file_count or data value (depending on the type of data) and set
	   * high_ptr to it.  Additionally, if we run into a 'n' that is the
	   * -same- as the last	highest one we found, we can assume that it is
	   * the highest of the remaining values and stop looping.
	   * prev_high_ptr is the pointer immediately before (and pointing to)
	   * high_ptr. */

	  for (current_data_ptr = sorted_list[a], prev_path_ptr = NULL, n = 0;
	       current_data_ptr;
	       prev_path_ptr = current_data_ptr,
	       current_data_ptr = current_data_ptr->next_ptr)
	    {
	      if (num_keep && count > num_keep)
		{
#ifdef DEBUGS
		  fprintf(stderr, "Count == %d, num_keep == %u\n", count,
			  num_keep);
#endif
#ifndef SMFS
		  for (current_data_ptr = sorted_list[a]; current_data_ptr;)
		    {
		      prev_path_ptr = current_data_ptr->next_ptr;
		      afree(*arena, (__ptr_t) current_data_ptr->name);
		      afree(*arena, (__ptr_t) current_data_ptr);
		      current_data_ptr = prev_path_ptr;
		    }
#endif /* ifndef SMFS */
		  sorted_list[a] = NULL;
		  break;
		}

	      switch(type)
		{
		case T_FILE:
		  number = current_data_ptr->file_count;
		  break;
		case T_HOST:
		case T_DIR:
		case T_TLD:
		case T_DOMAIN:
		  number = current_data_ptr->data;
		  break;
		}

	      if (number == prev_min || number > n)
		{
		  prev_high_ptr = prev_path_ptr;

		  high_ptr = current_data_ptr;
		  if ((n = number) == prev_min)
		    break;
		}
	    }

	  if (sorted_list[a] == NULL)
	    {
	      /* the loop broke because count > num_keep */
	      break;
	    }

	  count++;

	  if (n != prev_min)
	    prev_min = n;
	      
	  /* #ifdef DEBUG*/
#ifdef DEBUG
#ifdef BROKEN_LL
	  fprintf(stderr, "Highest seems to be %60s with %lu %s\n",
		  high_ptr->name, n,
		  (type == T_FILE || type == T_HOST) ? "transfers" : "bytes");
#else
	  fprintf(stderr, "Highest seems to be %60s with %llu %s\n",
		  high_ptr->name, n,
		  (type == T_FILE || type == T_HOST) ? "transfers" : "bytes");
	  if (type == T_FILE)
	    {
	      if (n != high_ptr->file_count)
		fprintf(stderr, "n != high_ptr->file_count (%lld != %d).  "
			"is is bad.\n", n, high_ptr->file_count);
	    }
	  else
	    if (n != high_ptr->data)
	      fprintf(stderr, "n != high_ptr->data (%lld != %lld).  "
		      "is is bad.\n", n, high_ptr->data);
#endif /* BROKEN_LL */
#endif /* DEBUG */
	  
	  if (prev_high_ptr == NULL)
	    {
	      /* the item  happened to be the first item
		 in the n bucket, so we just move the top down */
#ifdef DEBUG
	      fprintf(stderr, "Moving the top of the n bucket down one "
		      "node.\n");
#endif
	      sorted_list[a] = sorted_list[a]->next_ptr;
	    }
	  else
	    {
	      /* the item was in the middle of the
	       * bucket so we remove it by setting the previous pointer's
	       * next_ptr to the one after the one we want to remove.  pouf,
	       * it's gone */
#ifdef DEBUG
	      fprintf(stderr, "Removing this node from the n bucket.\n");
#endif
	      prev_high_ptr->next_ptr = high_ptr->next_ptr;
	    }
	  
	  if (actual_sorted_list == NULL)
	    {
	      /* if this is the first node in the sorted list, adding it is
	       * simple */
#ifdef DEBUG
	      fprintf(stderr, "Creating new sorted list...\n");
#endif
	      actual_sorted_list = bottom_sorted_list = high_ptr;
	    }
	  else
	    {
	      /* ...but not much harder if we have to add to an existing
	       * tree */
#ifdef DEBUG
	      fprintf(stderr, "Adding it onto the bottom of the sorted "
		      "list...\n");
#endif
	      /* actual_sorted_list is the top of the linked list.  we add the
	       * value on to the bottom so that we can grab the highest values.
	       * If I grabbed the lowest values and pushed the list down, I
	       * wouldn't be allowed to abort when count > num_keep unless the
	       * bucket was finished */
	      bottom_sorted_list->next_ptr = high_ptr;
	      bottom_sorted_list = high_ptr;
	    }
	  
	}
      /* don't forget to terminate the list! */
      bottom_sorted_list->next_ptr = NULL;

      /* we now replace the old non-sorted list (which -should- be empty,
       * unless I've fucked up) with our sorted one.  rinse, repeat as
       * necessary for more 'n' buckets */
      sorted_list[a] = actual_sorted_list;
    }
  
  /* finally, we combine them into one giant tree */

  /* *pointers_ptr should already BE null, but... */
  *pointers_ptr = NULL;

  n = max_n;
  while (1)
    {
      if (sorted_list[n])
	{
	  /* append an n-bucket list to the end of our current giant list */
	  if (*pointers_ptr == NULL)
	    *pointers_ptr = current_data_ptr = sorted_list[n];
	  else
	    current_data_ptr->next_ptr = sorted_list[n];

	  /* and re-seek the end of the list */
	  for (; current_data_ptr->next_ptr;
	       current_data_ptr = current_data_ptr->next_ptr)
	    ;
	}

      if (n == 0)
	break;
      else
        n--;
    }

#ifdef DEBUGS
  fprintf(stderr, "%s data generation complete (%ld)\n", T_NAMES[type],
	  time(NULL));
#endif
#ifdef PTHREADS
  pthread_mutex_lock(pointers->active_threads_mutex);
  pointers->active_threads--;
  if (pointers->active_threads < MAX_ACTIVE_THREADS)
    pthread_cond_signal(pointers->active_threshold_cond);
  pthread_mutex_unlock(pointers->active_threads_mutex);
#endif
} /* generate_misc_data */


void
generate_file_data(pointers_t *pointers)
{
#ifdef PTHREADS
  pthread_mutex_lock(pointers->file_data_running_mutex);
#endif
  generate_misc_data(pointers, T_FILE);
#ifdef PTHREADS
  pthread_mutex_unlock(pointers->file_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_file_data */


void
generate_dir_data(pointers_t *pointers)
{
#ifdef PTHREADS
  pthread_mutex_lock(pointers->dir_data_running_mutex);
#endif
  generate_misc_data(pointers, T_DIR);
#ifdef PTHREADS
  pthread_mutex_unlock(pointers->dir_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_dir_data */


void
generate_host_data(pointers_t *pointers)
{
#ifdef PTHREADS
  pthread_mutex_lock(pointers->host_data_running_mutex);
#endif
  generate_misc_data(pointers, T_HOST);
#ifdef PTHREADS
  pthread_mutex_unlock(pointers->host_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_host_data */


void
generate_tld_data(pointers_t *pointers)
{
#ifdef PTHREADS
  pthread_mutex_lock(pointers->tld_data_running_mutex);
#endif
  generate_misc_data(pointers, T_TLD);
#ifdef PTHREADS
  pthread_mutex_unlock(pointers->tld_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_tld_data */


void
generate_domain_data(pointers_t *pointers)
{
#ifdef PTHREADS
  pthread_mutex_lock(pointers->domain_data_running_mutex);
#endif
  generate_misc_data(pointers, T_DOMAIN);
#ifdef PTHREADS
  pthread_mutex_unlock(pointers->domain_data_running_mutex);
  pthread_cond_signal(pointers->data_gen_finished_cond);
#endif
} /* generate_domain_data */


void
usage(char bad_arg)
{
  if (bad_arg)
    printf("Invalid argument -- %c\n", bad_arg);

  printf("%s\n", VERSION);
  printf("usage: xferstats <options> <filename>\n\n"
	 "Any command line arguments inside <> are required, inside [] are optional.  All\n"
	 "defaults listed assume that the configuration file has not changed these\n"
	 "settings.  If in doubt, check the configuration file or explicitly set them on\n"
	 "the command line.\n"
	 "\n"
	 " -                    get the log from a file (default)\n"
	 " +                    get the log from stdin\n"
	 " -c <config file>     specify a path and filename for the configuration file\n"
	 " -T <number>          logfile type (wu-ftpd = 0, ncftpd = 1)\n"
	 "\n"
	 "The following options are enabled with a \"+\" and disabled with a \"-\".  See the\n"
	 "man page for more information.  Any arguments apply only to enabling:\n"
	 "\n"
	 "  H                   HTML output\n"
	 "  n			no HTML headers option (see man page)\n"
	 "  s			single-page output (see man page)\n"
	 "  r                   real user data\n"
	 "  a                   anonymous user data\n"
	 "  g                   guest user data\n"
	 "  i                   inbound traffic data\n"
	 "  u                   outbound traffic data\n"
	 "  h                   report on hourly traffic\n"
	 "  m                   report on monthly traffic\n"
	 "  w                   report on days-of-the-week traffic\n"
	 "  M                   report on days-of-the-month traffic\n"
	 "  f [number]          report on file traffic\n"
	 "  d [number]          report on directory traffic\n"
	 "  t [number]          report on top-level domain traffic\n"
	 "  O [number]          report on domain traffic\n"
	 "  o [number]          report on host traffic\n"
	 "\n"
	 "\n"
	 " +L <number>          limit the report-by-day report to <number> listings\n"
	 " +A                   include all users, all reports\n"
	 /*	 " +T <TLD>             report only on traffic from the <TLD> top-level domain\n" */
	 /* This will have to wait for another version */
	 " -D <number>          depth of path detail for directories (default 3)\n"
	 " +D <directory>       directory to report on, for example: +D /pub will\n"
	 "                      report only on paths under /pub\n"
	 "\n"
	 " -v, --version        display version information\n"
	 " --help               this listing\n"
	 "\n"
	 "Report bugs to pschwan@cmu.edu\n");
} /* usage */


void
check_cmd_arg(int argc, char * argv[], int * i, int * j, unsigned int * value,
	      char * arg_str)
{
long foo;

  if (argv[*i][*j + 1] == '\0')
    {
      if (*i < (argc - 1) && *argv[*i + 1] != '-' && *argv[*i + 1] != '+') 
	{
	  if ((foo = atoi(argv[++*i])) < 0)
	    {
	      fprintf(stderr, "fatal: the %s parameter accepts only an "
		      "optional number (>= 0) following it.\n", arg_str);
	      exit(1);
	    }
	  else
	    {
	      *value = foo;
	      *j = strlen(argv[*i]);
	    }
	}
    }
  else
    if ((foo = atoi(argv[*i] + *j + 1)) < 0)
      {
	fprintf(stderr, "fatal: the %s parameter accepts only an optional "
		"number (>= 0) following it.\n", arg_str);
	exit(1);
      }
    else
      {
	*value = foo;
	*j = strlen(argv[*i]);
      }
} /* check_cmd_arg */


void
parse_cmdline(int argc, char *argv[], config_t * config)
{
  int i = 1, j;
  char c, no_more_options = 0;
  long foo;
  only_dir_t * item;
  
  for (; i < argc; i++)
    {
      if (!no_more_options && !strncmp(argv[i], "--", 2))
	{
	  /* argv[i] contains a long option (or is --) */
	  if (argv[i][2] == '\0')
	    no_more_options = 1;
	  else if (!strcmp(argv[i]+2, "help"))
	    {
	      usage('\0');
	      exit(0);
	    }
	  else if (!strcmp(argv[i]+2, "version"))
	    {
	      printf("%s\n", VERSION);
	      printf("Copyright (C) 1997, 1998 Phil Schwan\n");
	      exit(0);
	    }
	}
      else if (!no_more_options && *argv[i] == '-')
	{
	  /* argv[i] contains a disabling option */
	  j = 1;
	  if (*(argv[i] + 1) == '\0') config->use_stdin = 0;
	  while ((c = *(argv[i] + j)) != '\0')
	    {
	      switch (c)
		{
		case 'c':
		  /* this (config_file) was parsed in get_config_arg */
		  if (i < argc - 1)
		    j = strlen(argv[++i]);
		  continue;
		case 'T':
		  if (i < argc - 1 && *argv[i + 1] != '-' &&
		      *argv[i + 1] != '+') 
		    {
		      if (!strcasecmp(argv[i + 1], "wu-ftpd") ||
			  !strcasecmp(argv[i + 1], "wuftpd") ||
			  !strcasecmp(argv[i + 1], "wu-ftp") ||
			  !strcasecmp(argv[i + 1], "wuftp"))
			{
			  config->log_type = 0;
			  j = strlen(argv[++i]);
			  continue;
			}
		      else if (!strcasecmp(argv[i + 1], "ncftpd") ||
			       !strcasecmp(argv[i + 1], "ncftp"))
			{
			  config->log_type = 1;
			  j = strlen(argv[++i]);
			  continue;
			}
		      else if (!strcasecmp(argv[i + 1], "apache"))
			{
			  config->log_type = 2;
			  j = strlen(argv[++i]);
			  continue;
			}
		      else
			{
			  fprintf(stderr, "fatal: the -T parameter requires a "
				  "log type following it.\n");
			  exit(1);
			}
		    }
		  else
		    {
		      fprintf(stderr, "fatal: the -T parameter requires a log "
			      "type following it.\n");
		      exit(1);
		    }
		  break;
		case 'H':
		  config->html_output = 0;
		  break;
		case 'n':
		  config->no_html_headers = 0;
		  break;
		case 's':
		  config->single_page = 0;
		  break;
		case 'r':
		  config->real_traffic = 0;
		  break;
		case 'a':
		  config->anon_traffic = 0;
		  break;
		case 'g':
		  config->guest_traffic = 0;
		  break;
		case 'i':
		  config->inbound = 0;
		  break;
		case 'u':
		  config->outbound = 0;
		  break;
		case 'h':
		  config->hourly_traffic = 0;
		  break;
		case 'w':
		  config->dow_traffic = 0;
		  break;
		case 'M':
		  config->dom_traffic = 0;
		  break;
		case 't':
		  config->tld_traffic = 0;
		  break;
		case 'O':
		  config->domain_traffic = 0;
		  break;
		case 'o':
		  config->host_traffic = 0;
		  break;
		case 'm':
		  config->monthly_traffic = 0;
		  break;
		case 'd':
		  config->dir_traffic = 0;
		  break;
		case 'f':
		  config->dir_traffic = 0;
		  break;
		case 'D':
		  /* we duplicate the stuff in check_cmd_arg because this
		   * arguments requires (instead of being optional) a
		   * value > 0 (instead of >= 0) */
		  if (*(argv[i]+j+1) == '\0')
		    {
		      if (i < (argc - 1) && *argv[i + 1] != '-' &&
			  *argv[i + 1] != '+') 
			{
			  if ((foo = atoi(argv[i + 1])) < 0)
			    {
			      fprintf(stderr, "fatal: the -l parameter "
				      "requires a number (> 0) following "
				      "it.\n");
			      exit(1);
			    }
			  else
			    j = strlen(argv[i]);
			}
		      else
			{
			  fprintf(stderr, "fatal: the -l parameter requires a "
				  "number (> 0) following it.\n");
			  exit(1);
			}
		    }
		  else
		    if ((foo = atoi(argv[i]+j+1)) < 0)
		      {
			fprintf(stderr, "fatal: the -l parameter requires a "
				"number (> 0) following it.\n");
			exit(1);
		      }
		    else
		      j = strlen(argv[++i]);
		  config->depth = foo;
		  continue;
		case 'v':
                  printf("%s\n", VERSION);
                  printf("Copyright (C) 1997, 1998 Phil Schwan\n");
                  exit(0);
		default:
		  usage(argv[i][j]);
		  exit(1);
		}
	      j++;
	    }
	}
      else if (!no_more_options && *argv[i] == '+')
	{
	  /* argv[i] contains an enabling option */
	  j = 1;
	  if (*(argv[i] + 1) == '\0') config->use_stdin = 1;
	  while ((c = *(argv[i] + j)) != '\0')
	    {
	      switch (c)
		{
		case 'a':
		  config->anon_traffic = 2;
		  break;
		case 'g':
		  config->guest_traffic = 2;
		  break;
		case 'r':
		  config->real_traffic = 2;
		  break;
		case 'i':
		  config->inbound = 2;
		  break;
		case 'u':
		  config->outbound = 2;
		  break;
		case 'h':
		  config->hourly_traffic = 2;
		  break;
		case 'w':
		  config->dow_traffic = 2;
		  break;
		case 'M':
		  config->dom_traffic = 2;
		  break;
		case 'm':
		  config->monthly_traffic = 2;
		  break;
		case 'd':
		  config->dir_traffic = 2;
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_dir_stats, "+d");
		  continue;
		case 'O':
		  config->domain_traffic = 2;
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_domain_stats, "+O");
		  continue;
		case 'o':
		  config->host_traffic = 2;
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_host_stats, "+o");
		  continue;
		case 't':
		  config->tld_traffic = 2;
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_tld_stats, "+d");
		  continue;
		case 'f':
		  config->file_traffic = 2;
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_file_stats, "+f");
		  continue;
		case 'L':
		  check_cmd_arg(argc, argv, &i, &j,
				&config->number_daily_stats, "+L");
		  continue;
		case 'R':
		  check_cmd_arg(argc, argv, &i, &j,
				&config->max_report_size, "+R");
		  continue;
		case 'H':
		  config->html_output = 1;
		  break;
		case 'n':
		  config->no_html_headers = 1;
		  break;
		case 's':
		  config->single_page = 1;
		  break;
		case 'D':
		  if (i < (argc - 1) && *argv[i + 1] != '-' &&
		      *argv[i + 1] != '+')
		    {
		      MY_MALLOC(item, sizeof(only_dir_t));

		      MY_MALLOC(item->dir,
				(item->len = strlen(argv[i + 1])) + 1);
		      strcpy(item->dir, argv[i + 1]);

		      list_add_item(&config->only_dir, item);
		    }
		  else
		    {
		      fprintf(stderr, "fatal: the +D parameter requires a "
			      "path following it.\n");
		      exit(1);
		    }
		  j = strlen(argv[++i]);
		  continue;
		  /* This will have to wait for another version
		case 'T':
		  if (i < (argc - 1) && *argv[i + 1] != '-' && *argv[i + 1] != '+')
		    if (*argv[i + 1] != '/')
		      {
			config->only_dir = (char *)malloc((config->only_dir_length = strlen(argv[i + 1]) + 1) + 1);
			config->only_dir[0] = '/';
        		strcpy(config->only_dir + 1, argv[i + 1]);
		      }
		    else
        	      {
        		config->only_dir = (char *)malloc((config->only_dir_length = strlen(argv[i + 1])) + 1);
			strcpy(config->only_dir + 1, argv[i + 1]);
		      }
		  else
		    {
		      fprintf(stderr, "fatal: the +T parameter requires a top-level domain following it.\n");
		      exit(1);
		    }
		  j = strlen(argv[++i]);
		  continue; */
		case 'A':
		  config->anon_traffic = 2;
		  config->real_traffic = 2;
		  config->guest_traffic = 2;

		  config->hourly_traffic = 2;
		  config->monthly_traffic = 2;

		  config->tld_traffic = 2;
		  config->host_traffic = 2;

		  config->dir_traffic = 2;
		  config->file_traffic = 2;

		  config->dow_traffic = 2;
		  config->dom_traffic = 2;
		  break;
		default:
		  usage(argv[i][j]);
		  exit(1);
		}
	      j++;
	    }
	}
      else
	/* it's a filename */
	config->file_name = argv[i];
    }
} /* parse_cmdline */


void
get_config_arg(int argc, char *argv[], pointers_t *pointers)
{
  int argno = 0;

  for (; argno < argc; argno++)
    if (!strncmp(argv[argno], "-c", 2) && argv[argno][2] == '\0' &&
	argno < (argc - 1))
      {
	MY_MALLOC(pointers->config->config_file, strlen(argv[argno + 1]));
	strcpy(pointers->config->config_file, argv[argno + 1]);
	return;
      }
} /* get_config_arg */


void
xferstats_init(pointers_t *pointers)
{
  /* initialize the pointers and clear the totals/dates */
  pointers->first_ftp_line = NULL;
  pointers->first_day_ptr = NULL;
  pointers->first_tld_ptr = NULL;
  pointers->first_domain_ptr = NULL;
  pointers->first_host_ptr = NULL;
  pointers->first_file_ptr = NULL;
  pointers->file_count = pointers->data = 0;

  /* make sure our max report size is something sane (>= 10 or 0) */
  if (pointers->config->max_report_size &&
      pointers->config->max_report_size < 10)
    pointers->config->max_report_size = 10;

#ifdef PTHREADS
  /* malloc and initialize the mutexes and condition variables */
  MY_MALLOC(pointers->active_threads_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->active_threads_mutex, NULL);
  MY_MALLOC(pointers->file_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->file_data_running_mutex, NULL);
  MY_MALLOC(pointers->host_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->host_data_running_mutex, NULL);
  MY_MALLOC(pointers->dir_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->dir_data_running_mutex, NULL);
  MY_MALLOC(pointers->tld_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->tld_data_running_mutex, NULL);
  MY_MALLOC(pointers->domain_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->domain_data_running_mutex, NULL);
  MY_MALLOC(pointers->monthly_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->monthly_data_running_mutex, NULL);
  MY_MALLOC(pointers->hourly_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->hourly_data_running_mutex, NULL);
  MY_MALLOC(pointers->dom_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->dom_data_running_mutex, NULL);
  MY_MALLOC(pointers->dow_data_running_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->dow_data_running_mutex, NULL);
  MY_MALLOC(pointers->active_display_thread_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->active_display_thread_mutex, NULL);
  MY_MALLOC(pointers->data_gen_finished_cond_mutex, sizeof(pthread_mutex_t));
  pthread_mutex_init(pointers->data_gen_finished_cond_mutex, NULL);

  MY_MALLOC(pointers->active_threshold_cond, sizeof(pthread_cond_t));
  pthread_cond_init(pointers->active_threshold_cond, NULL);
  MY_MALLOC(pointers->data_gen_finished_cond, sizeof(pthread_cond_t));
  pthread_cond_init(pointers->data_gen_finished_cond, NULL);  
#endif /* NO_THREADS */
} /* xferstats_init */


int
main(int argc, char **argv)
{
  pointers_t *pointers;
#ifdef PTHREADS
  int index, sleep = 0, finished = 0;
  void * function_pointers[9];
  void * display_function_pointers[8];
  pthread_t threads[9];
  pthread_t display_threads[8];
  pthread_mutex_t *data_gen_mutexs[8];
#endif

  /* unbuffer stdout and stderr so output isn't lost */
  setbuf(stdout, NULL);
  setbuf(stderr, NULL);
  
  /* allocate memory for our pointers */
  MY_MALLOC(pointers, sizeof(pointers_t));
  MY_MALLOC(pointers->config, sizeof(config_t));

  /* we malloc and strcpy so that the config file parsing can free it without
   * worry */
  MY_MALLOC(pointers->config->file_name, strlen(FILENAME) + 1);
  strcpy(pointers->config->file_name, FILENAME);

  /* initialize our config structure with remaining default values */
  pointers->config->html_output = 0;
  pointers->config->log_type = 1;
  pointers->config->number_file_stats = 50;
  pointers->config->number_daily_stats = 50;
  pointers->config->number_dir_stats = 50;
  pointers->config->number_tld_stats = 50;
  pointers->config->number_domain_stats = 50;
  pointers->config->number_host_stats = 50;
  pointers->config->real_traffic = 0;
  pointers->config->anon_traffic = 0;
  pointers->config->guest_traffic = 0;
  pointers->config->inbound = 0;
  pointers->config->outbound = 0;
  pointers->config->tld_traffic = 0;
  pointers->config->domain_traffic = 0;
  pointers->config->host_traffic = 0;
  pointers->config->hourly_traffic = 0;
  pointers->config->dow_traffic = 0;
  pointers->config->dom_traffic = 0;
  pointers->config->dir_traffic = 0;
  pointers->config->file_traffic = 0;
  pointers->config->monthly_traffic = 0;
  pointers->config->max_report_size = 30;
  pointers->config->use_stdin = 0;
  pointers->config->depth = 4;
  pointers->config->only_dir = list_new();
  /* pointers->config->only_tld = NULL; */
  /* pointers->config->only_tld_length = 0; */
  /* This will have to wait for another version */
  pointers->config->config_file = NULL;
  pointers->config->graph_path = NULL;
  pointers->config->no_html_headers = 0;
  pointers->config->single_page = 0;
  pointers->config->additive_db = 1;

  /* grab the -c <configfile> argument if it exists */
  get_config_arg(argc, argv, pointers);

  /* do the config-file thing */
  init_config(pointers);

  /* change the config structure based on command line args */
  parse_cmdline(argc, argv, pointers->config);

  /* init linked lists (and mutexs/condition vars if threaded) */
  xferstats_init(pointers);

  /* load the xferlog into ftp_line structures */
  if (pointers->config->log_type == 0)
    parse_wuftpd_logfile(pointers);
  else if (pointers->config->log_type == 1)
    parse_ncftpd_logfile(pointers);
  else if (pointers->config->log_type == 2)
    parse_apache_logfile(pointers);

  if (pointers->first_ftp_line == NULL)
    {
      fprintf(stderr, "No data to process.\n");
      exit(0);
    }

#ifdef PTHREADS
  pointers->active_threads = 0;
  for (index = 0; index <= 7; index++)
    {
      function_pointers[index] = (void *)NULL;
      display_function_pointers[index] = (void *)NULL;
    }
  function_pointers[8] = (void *)NULL;

  function_pointers[0] = (void *)generate_daily_data;
  if (pointers->config->file_traffic)
    {
      function_pointers[1] = (void *)generate_file_data;
      display_function_pointers[0] = (void *)display_file_totals;
      data_gen_mutexs[0] = pointers->file_data_running_mutex;
    }
  if (pointers->config->dir_traffic)
    {
      function_pointers[2] = (void *)generate_dir_data;
      display_function_pointers[1] = (void *)display_dir_totals;
      data_gen_mutexs[1] = pointers->dir_data_running_mutex;
    }
  if (pointers->config->host_traffic)
    {
      function_pointers[3] = (void *)generate_host_data;
      display_function_pointers[2] = (void *)display_host_totals;
      data_gen_mutexs[2] = pointers->host_data_running_mutex;
    }
  if (pointers->config->tld_traffic)
    {
      function_pointers[4] = (void *)generate_tld_data;
      display_function_pointers[3] = (void *)display_tld_totals;
      data_gen_mutexs[3] = pointers->tld_data_running_mutex;
    }
  if (pointers->config->dom_traffic)
    {
      function_pointers[5] = (void *)generate_dom_data;
      display_function_pointers[4] = (void *)display_dom_totals;
      data_gen_mutexs[4] = pointers->dom_data_running_mutex;
    }
  if (pointers->config->hourly_traffic)
    {
      function_pointers[6] = (void *)generate_hourly_data;
      display_function_pointers[5] = (void *)display_hourly_totals;
      data_gen_mutexs[5] = pointers->hourly_data_running_mutex;
    }
  if (pointers->config->monthly_traffic)
    {
      function_pointers[7] = (void *)generate_monthly_data;
      display_function_pointers[6] = (void *)display_monthly_totals;
      data_gen_mutexs[6] = pointers->monthly_data_running_mutex;
    }
  if (pointers->config->dow_traffic)
    {
      function_pointers[8] = (void *)generate_dow_data;
      display_function_pointers[7] = (void *)display_dow_totals;
      data_gen_mutexs[7] = pointers->dow_data_running_mutex;
    }

  for (index = 0; index <= 8; index++)
    {
      if (function_pointers[index])
        {
          pthread_mutex_lock(pointers->active_threads_mutex);
          while (pointers->active_threads >= MAX_ACTIVE_THREADS)
	    pthread_cond_wait(pointers->active_threshold_cond,
			      pointers->active_threads_mutex);
#ifdef DEBUGS
          fprintf(stderr, "Active threads: %d -- starting another.\n",
		  pointers->active_threads);
#endif
          pointers->active_threads++;
          pthread_mutex_unlock(pointers->active_threads_mutex);
          pthread_create(&threads[index], NULL, function_pointers[index],
			 (void *)pointers);
#ifdef DEBUGS
          fprintf(stderr, "threads[%d]: %ld\n", index, threads[index]);
#endif
        }
    }

  /* since the daily display also prints the totals (and it's traditional!),
   * we like to do the daily totals before anything else.  after that we can do
   * them in any order (besides, they probably go to individual files anyways)
   *
   * make sure that the daily data thread is done generating data before we try
   * to display that data. */
  pthread_join(threads[0], NULL);
  display_daily_totals(pointers);

  /* pthread_join() is blocking--a no-no.  what we want to do instead is loop
   * until all of the display threads have been run.  we can check the mutexs
   * (file_data_running_mutex, etc) to see if the data generation threads are
   * still running.  If they are, move along and try the next one, etc.  If,
   * after looping through all of them, none were ready to display, go to
   * sleep until a data thread exits, and then try them all again.
   */
  while (!finished)
    {
      if (sleep)
	{
#ifdef DEBUG_THREADS
	  fprintf(stderr, "Sleeping waiting for data_gen_finished "
		  "(threads: %d)\n", pointers->active_threads);
#endif
	  pthread_cond_wait(pointers->data_gen_finished_cond,
			    pointers->data_gen_finished_cond_mutex);
	}

      finished = sleep = 1;
      for (index = 0; index <= 7; index++)
    	{
#ifdef DEBUG_THREADS
	  fprintf(stderr, "Looking...\n");
#endif
      	  if (display_function_pointers[index])
            {
#ifdef DEBUG_THREADS
	      fprintf(stderr, "Found (index = %d) (trying data_gen_mutex)!\n",
		      index);
#endif
	      finished = 0;
	      if (!pthread_mutex_trylock(data_gen_mutexs[index]))
		{
		  pthread_mutex_unlock(data_gen_mutexs[index]);
#ifdef DEBUG_THREADS
		  fprintf(stderr, "Trying adtm\n");
#endif
		  if (!pthread_mutex_trylock
		      (pointers->active_display_thread_mutex))
		    {
#ifdef DEBUG_THREADS
		      fprintf(stderr, "Mutexs are free, locking atm.\n");
#endif
		      sleep = 0;
		      pthread_mutex_lock(pointers->active_threads_mutex);
		      while (pointers->active_threads >= MAX_ACTIVE_THREADS)
			{
#ifdef DEBUG_THREADS
			  fprintf(stderr, "Sleeping waiting for a free thread "
				  "slot...\n");
#endif
			  pthread_cond_wait(pointers->active_threshold_cond,
					    pointers->active_threads_mutex);
			}
#ifdef DEBUGS
		      fprintf(stderr, "Active threads: %d -- starting "
			      "another.\n", pointers->active_threads);
#endif
		      pointers->active_threads++;
		      pthread_mutex_unlock(pointers->active_threads_mutex);
		      pthread_create(&display_threads[index], NULL,
				     display_function_pointers[index],
				     (void *)pointers);
		      display_function_pointers[index] = NULL;
#ifdef DEBUGS
		      fprintf(stderr, "display_threads[%d]: %ld\n", index,
			      display_threads[index]);
#endif
		    }
		}
	    }
        }
    }

  /* make sure all threads exit before we let the process finish */
  for (index = 0; index <= 8; index++)
    {
      if (threads[index] != 0)
        {
#ifdef DEBUGS
	  fprintf(stderr, "Waiting on the exit of thread %d...\n", index);
#endif
	  pthread_join(threads[index], NULL);
	}
      if (index != 8 && display_threads[index] != 0)
        {
#ifdef DEBUGS
	  fprintf(stderr, "Waiting on the exit of display thread %d...\n",
		  index);
#endif
	  pthread_join(display_threads[index], NULL);
	}
    }
#ifdef DEBUGS
  fprintf(stderr, "All threads finished.\n");
#endif

#else /* NO_THREADS */
  generate_daily_data(pointers);
  display_daily_totals(pointers);
  if (pointers->config->tld_traffic)
    {
      generate_tld_data(pointers);
      display_tld_totals(pointers);
    }
  if (pointers->config->file_traffic)
    {
      generate_file_data(pointers);
      display_file_totals(pointers);
    }
  if (pointers->config->dir_traffic)
    {
      generate_dir_data(pointers);
      display_dir_totals(pointers);
    }
  if (pointers->config->hourly_traffic)
    {
      generate_hourly_data(pointers);
      display_hourly_totals(pointers);
    }
  if (pointers->config->dow_traffic)
    {
      generate_dow_data(pointers);
      display_dow_totals(pointers);
    }
  if (pointers->config->dom_traffic)
    {
      generate_dom_data(pointers);
      display_dom_totals(pointers);
    }
  if (pointers->config->host_traffic)
    {
      generate_host_data(pointers);
      display_host_totals(pointers);
    }
  if (pointers->config->domain_traffic)
    {
      generate_domain_data(pointers);
      display_domain_totals(pointers);
    }
  if (pointers->config->monthly_traffic)
    {
      generate_monthly_data(pointers);
      display_monthly_totals(pointers);
    }
#endif /* ifndef NO_THREADS */

#if defined(DEBUGS) && defined(HAVE_MALLOC_STATS)
  /* display memory information to stderr */
  malloc_stats();
#endif
  return 0;
}
